Beispiel #1
0
def export_model(model_file, out_file):
    """Saves the model. The output will be utf-8 encoded."""
    #    model = model_mapping[model_type].load(model_file)
    model = LsiModel.load(model_file)
    with FileWriter(out_file, 'w').open() as out:
        out.write(u"{0}\t{1}\n".format(model.numTerms, model.numTopics))
        for term in xrange(model.numTerms):
            word = model.id2word.id2token[term].decode("utf-8")
            while len(word) > 0 and not word[-1].isalnum():
                word = word[0:-1]
            out.write(u"{0}\n".format(word))
            out.write(u"{0}\n".format(u"\t".join(
                str(f)
                for f in numpy.asarray(model.projection.u.T[:,
                                                            term]).flatten())))