Ejemplo n.º 1
0
def train(mode, model_file, descriptions_file, output_file=None,
          neg_words_mult=2., lbda=50, min_words=1):

    model = load_word2vec_model(model_file, mmap='r')
    if mode == 'centroid':
        entity_model = EntityModelCentroid()
    elif mode == 'lr':
        bins = np.cumsum([model.vocab[word].count
                      for word in model.index2word])
        entity_model = EntityModelLR(bins, neg_words_mult, lbda)
    else:
        raise Exception('unsupported mode %s' % mode)

    entity_model.train(model,
                       read_entity_word_seqs(descriptions_file, model, min_words))

    if output_file is not None:
        entity_model.save(output_file)
Ejemplo n.º 2
0
def train(mode,
          model_file,
          descriptions_file,
          output_file=None,
          neg_words_mult=2.,
          lbda=50,
          min_words=1):

    model = load_word2vec_model(model_file, mmap='r')
    if mode == 'centroid':
        entity_model = EntityModelCentroid()
    elif mode == 'lr':
        bins = np.cumsum(
            [model.vocab[word].count for word in model.index2word])
        entity_model = EntityModelLR(bins, neg_words_mult, lbda)
    else:
        raise Exception('unsupported mode %s' % mode)

    entity_model.train(
        model, read_entity_word_seqs(descriptions_file, model, min_words))

    if output_file is not None:
        entity_model.save(output_file)