Exemple #1
0
def generate_model(dictionary, bow_corpus, corpus_path):
    try:
        tfidf = TfidfModel.load(corpus_path + 'wiki-tfidf.model')
        print('tfidf model generated')
    except:
        tfidf = TfidfModel()
        tfidf = TfidfModel(bow_corpus, dictionary)
        tfidf._smart_save(corpus_path + 'wiki-tfidf.model')
        pass
    return tfidf
Exemple #2
0
config.read('config.ini')

#TODO: Generalize this step puting corpus_path as your actual corpus
#Config file must allow wikipedia, Gutenberg, ...
corpus_path = config['WIKI']['en'][1:-1]

dictionary = Dictionary.load_from_text(
    os.path.relpath(corpus_path + '_wordids.txt.bz2'))
bow_corpus = MmCorpus(os.path.relpath(corpus_path + '_bow.mm'))

try:
    tfidf = TfidfModel.load(corpus_path + 'wiki-tfidf.model')
except:
    tfidf = TfidfModel()
    tfidf = TfidfModel(bow_corpus, dictionary)
    tfidf._smart_save(corpus_path + 'wiki-tfidf.model')
    pass

#testing sentences
sentence1 = 'pilar pescado en la tarde es fatal'
sentence2 = 'machacar pescado al atardecer es terrible'

#Transforming sentences
sent1 = sentence1.split()
sent2 = sentence2.split()

bow_sent1 = dictionary.doc2bow(sent1)
bow_sent2 = dictionary.doc2bow(sent2)

bow_sent1_tfidf = tfidf[bow_sent1]
bow_sent2_tfidf = tfidf[bow_sent2]