def get_draft_config(topic_model, dataset):
    config = ""
    if topic_model == "lda":
        if dataset == "congress":
            config = LDAConfig.from_json_file("configs/congress_lda_config.json")
        elif dataset == "nytimes":
            config = LDAConfig.from_json_file("configs/nytimes_lda_config.json")
        elif dataset == "alexa":
            config = LDAConfig.from_json_file("configs/alexa_lda_config.json")
        elif dataset == "newsgroup":
            config = LDAConfig.from_json_file("configs/newsgroup_lda_config.json")
        elif dataset == "anes":
            config = LDAConfig.from_json_file("configs/anes_lda_config.json")
            #json.dump(lda_config.__dict__, open(config_file, 'w'))

    elif topic_model == "lsi":
        if dataset == "congress":
            config = LSIConfig.from_json_file("configs/congress_lsi_config.json")
        elif dataset == "nytimes":
            config = LSIConfig.from_json_file("configs/nytimes_lsi_config.json")
        elif dataset == "alexa":
            config = LSIConfig.from_json_file("configs/alexa_lsi_config.json")
        elif dataset == "newsgroup":
            config = LSIConfig.from_json_file("configs/newsgroup_lsi_config.json")
        elif dataset == "anes":
            config = LSIConfig.from_json_file("configs/anes_lsi_config.json")

    return config
def find_default_values(dataset, topic_model):
    if dataset == "alexa":
        if topic_model == "lda":
            config_file = "configs/alexa_lda_config.json"
            topic_modeling_config = LDAConfig.from_json_file(config_file)
        elif topic_model == "lsi":
            config_file = "configs/alexa_lsi_config.json"
            topic_modeling_config = LSIConfig.from_json_file(config_file)
    elif dataset == "newsgroup":
        if topic_model == "lsi":
            config_file = "configs/newsgroup_lsi_config.json"
            topic_modeling_config = LSIConfig.from_json_file(config_file)
        elif topic_model == "lda":
            config_file = "configs/newsgroup_lda_config.json"
            topic_modeling_config = LDAConfig.from_json_file(config_file)
    elif dataset == "congress":
        if topic_model == "lsi":
            config_file = "configs/congress_lsi_config.json"
            topic_modeling_config = LSIConfig.from_json_file(config_file)
        elif topic_model == "lda":
            config_file = "configs/congress_lda_config.json"
            topic_modeling_config = LDAConfig.from_json_file(config_file)
    elif dataset == "nytimes":
        if topic_model == "lsi":
            config_file = "configs/nytimes_lsi_config.json"
            topic_modeling_config = LSIConfig.from_json_file(config_file)
        elif topic_model == "lda":
            config_file = "configs/nytimes_lda_config.json"
            topic_modeling_config = LDAConfig.from_json_file(config_file)
    elif dataset == "anes":
        if topic_model == "lsi":
            config_file = "configs/anes_lsi_config.json"
            topic_modeling_config = LSIConfig.from_json_file(config_file)
        elif topic_model == "lda":
            config_file = "configs/anes_lda_config.json"
            topic_modeling_config = LDAConfig.from_json_file(config_file)

    return topic_modeling_config
from gensim.models import TfidfModel, LsiModel
from data.corpus import CorpusManager
from visualization import topic_modeling_semantic_network as topic_modeling_semantic_network
from configs import LSIConfig

config_file = "/home/rohola/Codes/Python/topic_modeling_visualization-master/configs/lsi_config.json"
config = LSIConfig.from_json_file(config_file)

corpus_manager = CorpusManager()
corpus, dictionary = corpus_manager.read_corpus(config.dataset_dir)

tfidf = TfidfModel(corpus)
corpus_tfidf = tfidf[corpus]

lsi = LsiModel(
    corpus_tfidf,
    id2word=dictionary,
    num_topics=config.num_topics,
    power_iters=config.power_iters)  # initialize an LSI transformation

topic_words = lsi.show_topics(config.num_topics_to_show,
                              num_words=config.num_words,
                              formatted=False)
topic_words = [j for (i, j) in topic_words]

visualize_method = ""
if config.dimension == 2:
    visualize_method = 'plotly'
elif config.dimension == 3:
    visualize_method = 'plotly3d'
else:
Beispiel #4
0
    generation_config.max_length = length
    t1 = time.time()
    text, _, _ = generate_lda_text(prompt_text="The issue is ",
                                   selected_topic_index=-1,
                                   lda_config=config,
                                   generation_config=generation_config,
                                   plot=False)
    t2 = time.time()
    print(text)
    results.write("lda: " + str(length) + " " + str(t2 - t1) + "\n")
    lda_times.append((length, t2 - t1))

    ###############LSI
    lsi_config_file = "/home/rohola/codes/topical_language_generation/configs/alexa_lsi_config.json"
    generation_config_file = "/home/rohola/codes/topical_language_generation/configs/generation_config.json"
    lsi_config = LSIConfig.from_json_file(lsi_config_file)
    generation_config = GenerationConfig.from_json_file(generation_config_file)
    generation_config.max_length = length
    t1 = time.time()
    text, _, _ = generate_lsi_text(prompt_text="The issue is",
                                   selected_topic_index=0,
                                   lsi_config=lsi_config,
                                   generation_config=generation_config,
                                   plot=False)
    t2 = time.time()
    print(text)
    print("LSI: ", t2 - t1)
    results.write("lsi: " + str(length) + " " + str(t2 - t1) + "\n")
    lsi_times.append((length, t2 - t1))
    #############CTRL
    generation_config_file = "/home/rohola/codes/topical_language_generation/configs/ctrl_generation_config.json"