def get_draft_config(topic_model, dataset): config = "" if topic_model == "lda": if dataset == "congress": config = LDAConfig.from_json_file("configs/congress_lda_config.json") elif dataset == "nytimes": config = LDAConfig.from_json_file("configs/nytimes_lda_config.json") elif dataset == "alexa": config = LDAConfig.from_json_file("configs/alexa_lda_config.json") elif dataset == "newsgroup": config = LDAConfig.from_json_file("configs/newsgroup_lda_config.json") elif dataset == "anes": config = LDAConfig.from_json_file("configs/anes_lda_config.json") #json.dump(lda_config.__dict__, open(config_file, 'w')) elif topic_model == "lsi": if dataset == "congress": config = LSIConfig.from_json_file("configs/congress_lsi_config.json") elif dataset == "nytimes": config = LSIConfig.from_json_file("configs/nytimes_lsi_config.json") elif dataset == "alexa": config = LSIConfig.from_json_file("configs/alexa_lsi_config.json") elif dataset == "newsgroup": config = LSIConfig.from_json_file("configs/newsgroup_lsi_config.json") elif dataset == "anes": config = LSIConfig.from_json_file("configs/anes_lsi_config.json") return config
def find_default_values(dataset, topic_model): if dataset == "alexa": if topic_model == "lda": config_file = "configs/alexa_lda_config.json" topic_modeling_config = LDAConfig.from_json_file(config_file) elif topic_model == "lsi": config_file = "configs/alexa_lsi_config.json" topic_modeling_config = LSIConfig.from_json_file(config_file) elif dataset == "newsgroup": if topic_model == "lsi": config_file = "configs/newsgroup_lsi_config.json" topic_modeling_config = LSIConfig.from_json_file(config_file) elif topic_model == "lda": config_file = "configs/newsgroup_lda_config.json" topic_modeling_config = LDAConfig.from_json_file(config_file) elif dataset == "congress": if topic_model == "lsi": config_file = "configs/congress_lsi_config.json" topic_modeling_config = LSIConfig.from_json_file(config_file) elif topic_model == "lda": config_file = "configs/congress_lda_config.json" topic_modeling_config = LDAConfig.from_json_file(config_file) elif dataset == "nytimes": if topic_model == "lsi": config_file = "configs/nytimes_lsi_config.json" topic_modeling_config = LSIConfig.from_json_file(config_file) elif topic_model == "lda": config_file = "configs/nytimes_lda_config.json" topic_modeling_config = LDAConfig.from_json_file(config_file) elif dataset == "anes": if topic_model == "lsi": config_file = "configs/anes_lsi_config.json" topic_modeling_config = LSIConfig.from_json_file(config_file) elif topic_model == "lda": config_file = "configs/anes_lda_config.json" topic_modeling_config = LDAConfig.from_json_file(config_file) return topic_modeling_config
from gensim.models import TfidfModel, LsiModel from data.corpus import CorpusManager from visualization import topic_modeling_semantic_network as topic_modeling_semantic_network from configs import LSIConfig config_file = "/home/rohola/Codes/Python/topic_modeling_visualization-master/configs/lsi_config.json" config = LSIConfig.from_json_file(config_file) corpus_manager = CorpusManager() corpus, dictionary = corpus_manager.read_corpus(config.dataset_dir) tfidf = TfidfModel(corpus) corpus_tfidf = tfidf[corpus] lsi = LsiModel( corpus_tfidf, id2word=dictionary, num_topics=config.num_topics, power_iters=config.power_iters) # initialize an LSI transformation topic_words = lsi.show_topics(config.num_topics_to_show, num_words=config.num_words, formatted=False) topic_words = [j for (i, j) in topic_words] visualize_method = "" if config.dimension == 2: visualize_method = 'plotly' elif config.dimension == 3: visualize_method = 'plotly3d' else:
generation_config.max_length = length t1 = time.time() text, _, _ = generate_lda_text(prompt_text="The issue is ", selected_topic_index=-1, lda_config=config, generation_config=generation_config, plot=False) t2 = time.time() print(text) results.write("lda: " + str(length) + " " + str(t2 - t1) + "\n") lda_times.append((length, t2 - t1)) ###############LSI lsi_config_file = "/home/rohola/codes/topical_language_generation/configs/alexa_lsi_config.json" generation_config_file = "/home/rohola/codes/topical_language_generation/configs/generation_config.json" lsi_config = LSIConfig.from_json_file(lsi_config_file) generation_config = GenerationConfig.from_json_file(generation_config_file) generation_config.max_length = length t1 = time.time() text, _, _ = generate_lsi_text(prompt_text="The issue is", selected_topic_index=0, lsi_config=lsi_config, generation_config=generation_config, plot=False) t2 = time.time() print(text) print("LSI: ", t2 - t1) results.write("lsi: " + str(length) + " " + str(t2 - t1) + "\n") lsi_times.append((length, t2 - t1)) #############CTRL generation_config_file = "/home/rohola/codes/topical_language_generation/configs/ctrl_generation_config.json"