Example #1
0
norm_model = 'stemmedAllCleaned-fq10-cd10.noblanks.cor_log_ent.model'
# the lsi transformation
trans_model = 'stemmedAllCleaned-fq10-cd10.noblanks.cor_500__lsi.model'

matrices = {}

logging.info('load the articles pickle')
with open(results_path + "sparql_wiki.pickle", 'r') as f:
    articles = pickle.load(f)

logging.info('load the dictionary')
id2word, word2id = utils.loadDictionary(working_corpus + word_ids_extension)
dictionary = Dictionary(word2id=word2id, id2word=id2word)

logging.info('load the log_ent model')
log_ent = LogEntropyModel.load(results_path + norm_model)

logging.info('load the LSI model')
lsi = LsiModel.load(results_path + trans_model)

for key in articles.iterkeys():

    logging.info('current term: %s' % key)

    term_list = articles[key].keys()
    text_list = [dictionary.doc2bow(article['text'], allowUpdate=False, returnMissingWords=False) 
            for article in articles[key].values()]
    sim_matrix = np.zeros((len(text_list), len(text_list)))

    logging.info('transform the textlist')
    text_list = lsi[log_ent[text_list]]
Example #2
0
norm_model = 'stemmedAllCleaned-fq10-cd10.noblanks.cor_log_ent.model'
# the lsi transformation
trans_model = 'stemmedAllCleaned-fq10-cd10.noblanks.cor_500__lsi.model'

matrices = {}

logging.info('load the articles pickle')
with open(results_path + "sparql_wiki.pickle", 'r') as f:
    articles = pickle.load(f)

logging.info('load the dictionary')
id2word, word2id = utils.loadDictionary(working_corpus + word_ids_extension)
dictionary = Dictionary(word2id=word2id, id2word=id2word)

logging.info('load the log_ent model')
log_ent = LogEntropyModel.load(results_path + norm_model)

logging.info('load the LSI model')
lsi = LsiModel.load(results_path + trans_model)

for key in articles.iterkeys():

    logging.info('current term: %s' % key)

    term_list = articles[key].keys()
    text_list = [
        dictionary.doc2bow(article['text'],
                           allowUpdate=False,
                           returnMissingWords=False)
        for article in articles[key].values()
    ]