Esempi in Python per LDA.add_database

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: lda

Classe/tipologia: LDA

Metodo/funzione: add_database

Esempi su hotexamples.com: 2

LDA.add_database in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per lda.LDA.add_database, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

LDA(30)

fit(24)

add_database(2)

GetTopicIndex(2)

__init__(2)

get_categories(1)

file_input(1)

fininsh(1)

fit_batched(1)

fit_transform(1)

forward(1)

GetDocIndex(1)

get_dataset(1)

get_doc_topic(1)

document_topic(1)

get_document_topics(1)

get_perplexity(1)

get_topic_word(1)

get_vec(1)

gibbs_sample(1)

elbo(1)

conversion(1)

dataset(1)

batch_update(1)

GetTermIndex(1)

GetTermTopicMatrix(1)

GetTopicCooccurrence(1)

M_lda(1)

M_pca(1)

aggregate_df(1)

calculate(1)

create_dict(1)

clean_text(1)

cleargrads(1)

collapsed_gibbs_sample(1)

collapsed_theta_gibbs_sample(1)

compute_coherence_values(1)

GetDocTopicMatrix(1)

runQuery(1)

Esempio n. 1

Mostra file

def test_lda(model_file, dict_file, dbs_dir):
    """ Run training and display test results if visualize is true

  Args:
    model_file(str): saved model file to continue training on
    dict_file(str): dict_file path to load dictionary from 
    dbs_dir(str): dir path to load databases from 
  """

    assert (os.path.isdir(dbs_dir)), "Invalid data directory path"
    lda = LDA()
    print 'Loading existing dictionary...'
    lda.load_dict_from_disk(dict_file)
    test_results = list()
    #Iterate over all data and train model
    for root, dirs, files in os.walk(dbs_dir):
        #Iterate over sub-dirs
        for d in files:
            db = Database()
            #Load database object from saved file
            db.load_from_disk(dbs_dir + '/' + d)

            #Add database to model
            lda.add_database(db)
            #Test model
            test_results.append(lda.test(model_file, db_name=db.get_name()))
            lda.remove_database(db.get_name())

            del db
            gc.collect()

    #Print test results
    for idx, i in enumerate(test_results):
        print('Test results for database {}'.format(idx))
        for j in i[0]:
            print('Topic: {} has probability: {}'.format(j[0], j[1]))
        counter = 0
        for k in i[1]:
            print('Topic {} has topic-coherence score: {}'.format(
                counter, k[1]))
            counter += 1

    print lda.model.show_topics()

Esempio n. 2

Mostra file

def run_lda(data_dir, num_topics, use_mini_batches, batch_size, epochs,
            model_file, create_dict, dict_file, load_dbs):
    """ Run training and display test results if visualize is true

  Args:
    data_dir(str): directory containing director(y/ies) of data
    num_topics(int): Number of topics to train the model on
    batch_size(int): Size of mini batches used to train the model
    epochs(int): Number of epochs to train the data for on the train set
    model_file(str): saved model file to continue training on
    create_dict(bool): create dictionary from data or load dict from a file
    dict_file(str): dict_file path to load dictionary from 
    load_dbs(bool): if true, load databases from saved pickle files
  """

    assert (os.path.isdir(data_dir)), "Invalid data directory path"

    use_model_file = False
    if model_file:
        use_model_file = True

    #Create model
    lda = LDA(num_topics=num_topics)
    if create_dict:
        print 'Creating dictionary from data'
        #Create word to id mapping for all texts
        lda.create_dict(data_dir)
        lda.store_dict_to_disk('./dict/dictionary')
    else:
        print 'Loading existing dictionary...'
        lda.load_dict_from_disk(dict_file)

    #Iterate over all data and train model
    for root, dirs, files in os.walk(data_dir):
        if load_dbs:
            print 'Training will be done on existing databases'
            datum = files
        else:
            print 'Training will be done after creating databases from text files'
            datum = dirs
        #Iterate over sub-dirs
        for d in datum:
            db = None
            if not load_dbs:
                #Create database object
                db = Database(d, os.path.abspath(data_dir + '/' + d))
            else:
                db = Database()
                #Load database object from saved file
                db.load_from_disk(data_dir + '/' + d)

            #Add database to model
            lda.add_database(db)

            if use_model_file:
                #Load model paramaters from model file and call train
                lda.train(model_file,
                          db_name=db.get_name(),
                          use_mini_batches=use_mini_batches,
                          use_internal_dict=True,
                          batch_size=batch_size,
                          num_epochs=epochs)
                #Set to false, as we just need to load the model once and train it on the entire dataset
                use_model_file = False
            else:
                #Call train on the model
                lda.train(db_name=db.get_name(),
                          use_mini_batches=use_mini_batches,
                          use_internal_dict=True,
                          batch_size=batch_size,
                          num_epochs=epochs)
            if not load_dbs:
                #Remove db to free memory (can also save it if preferred)
                db.store_to_disk('./databases/' + d)

            lda.remove_database(db.get_name())
            del db
            gc.collect()
            tmp_file = './models/' + d + str(num_topics)
            lda.save_model(tmp_file)

    #Save final model
    file_name = './models/final' + str(num_topics)
    lda.save_model(file_name)