Ejemplo n.º 1
0
def create_sim_matrix(tfidf, corpus, dictionary, outputDir):
    """"
    Creates a Gensim simiariry matrix for document similarity comparison and saves it
    
    tfidf (Gensim tfidf model): Gensim tfidf model
    corpus (Gensim corpus object): Gensim corpus
    dictionary (Gensim dictionary object): Gensim dictionary
    outputDir (string): Location to save matrix
    """
    indicesFile = outputDir + 'indices'
    simFile = outputDir + 'Index'
    sims = Similarity(indicesFile,
                      tfidf[corpus],
                      num_features=(len(dictionary)))
    sims.close_shard()
    sims.save(simFile)
    print('Similarity matrix created and stored at: ' + simFile)