def get_test_data(seed=6789, nDocTotal=100, **kwargs): ''' Create dataset of "heldout" docs, for testing purposes. Uses different random seed than get_data, but otherwise similar. ''' updateKwArgsWithDefaults(kwargs) kwargs['seed'] = seed kwargs['nDocTotal'] = nDocTotal Data = WordsData.CreateToyDataFromMixModel(**kwargs) Data.name = get_short_name() Data.summary = get_data_info() return Data
def get_data(**kwargs): ''' Create and return dataset. Keyword Args ------- nDocTotal nWordsPerDoc ''' updateKwArgsWithDefaults(kwargs) Data = WordsData.CreateToyDataFromMixModel(**kwargs) Data.name = get_short_name() Data.summary = get_data_info() return Data
def get_data_info(): s = 'Toy Bars Data with %d true topics. Each doc uses ONE topic.' % (K) return s def get_data(**kwargs): ''' Create and return dataset. Keyword Args ------- nDocTotal nWordsPerDoc ''' updateKwArgsWithDefaults(kwargs) Data = WordsData.CreateToyDataFromMixModel(**kwargs) Data.name = get_short_name() Data.summary = get_data_info() return Data def updateKwArgsWithDefaults(kwargs): for key in Defaults: if key not in kwargs: kwargs[key] = Defaults[key] if __name__ == '__main__': import bnpy.viz.BarsViz WData = WordsData.CreateToyDataFromMixModel(**Defaults) bnpy.viz.BarsViz.plotExampleBarsDocs(WData)