예제 #1
0
def eval(model_file, lr_entity_file, centroid_entity_file):
    import readline
    readline.parse_and_bind('set editing-mode emacs')

    model = load_word2vec_model(model_file, mmap='r')
    lr_entity_model = EntityModel.load(lr_entity_file, mmap='r')
    centroid_entity_model = EntityModel.load(centroid_entity_file, mmap='r')

    norm_entities = [(entity.lower(), entity)
                     for entity in lr_entity_model.entities]

    while True:
        try:
            line = raw_input('> ').strip()
        except EOFError:
            break

        words, entities = parse_query(norm_entities, line)
        lr_top = top_entities(model, lr_entity_model, entities, words)
        centroid_top = top_entities(model, centroid_entity_model, entities,
                                    words)

        for (lr_score, lr_ent), (centroid_score,
                                 centroid_ent) in zip(lr_top, centroid_top):
            print '%-50s%10.3f | %-50s%10.3f' % (lr_ent, lr_score,
                                                 centroid_ent, centroid_score)
def quant_entities(input_file, output_template=None, target_err=0.1):
    entity_model = EntityModel.load(input_file, mmap='r')

    q, pred_bits, zeros, avg_err, quant_vecs, dequant_model = quantize_entities(
        entity_model, target_err)
    pred_bps = float(pred_bits) / quant_vecs.size
    avg_zeros = float(zeros) / quant_vecs.size

    if output_template is not None:
        output_filename = '%s.e%.3f' % (output_template, target_err)
        with open(output_filename + '.txt', 'w') as fout:
            index2entity = [None] * len(entity_model.entities)
            for entity, idx in entity_model.entities.iteritems():
                index2entity[idx] = entity
            save_vectors(fout, index2entity, quant_vecs, q)

        dequant_model.save(output_filename + '.model')

    print json.dumps(
        OrderedDict([
            ('q', q),
            ('pred_bps', float(pred_bps)),
            ('avg_zeros', float(avg_zeros)),
            ('avg_err', float(avg_err)),
        ]))
예제 #3
0
def eval(model_file, lr_entity_file, centroid_entity_file):
    import readline
    readline.parse_and_bind('set editing-mode emacs')

    model = load_word2vec_model(model_file, mmap='r')
    lr_entity_model = EntityModel.load(lr_entity_file, mmap='r')
    centroid_entity_model = EntityModel.load(centroid_entity_file, mmap='r')

    norm_entities = [(entity.lower(), entity) for entity in lr_entity_model.entities]

    while True:
        try:
            line = raw_input('> ').strip()
        except EOFError:
            break

        words, entities = parse_query(norm_entities, line)
        lr_top = top_entities(model, lr_entity_model, entities, words)
        centroid_top = top_entities(model, centroid_entity_model, entities, words)

        for (lr_score, lr_ent), (centroid_score, centroid_ent) in zip(lr_top, centroid_top):
            print '%-50s%10.3f | %-50s%10.3f' % (lr_ent, lr_score, centroid_ent, centroid_score)
예제 #4
0
def quant_entities(input_file, output_template=None, target_err=0.1):
    entity_model = EntityModel.load(input_file, mmap='r')

    q, pred_bits, zeros, avg_err, quant_vecs, dequant_model = quantize_entities(entity_model, target_err)
    pred_bps = float(pred_bits) / quant_vecs.size
    avg_zeros = float(zeros) / quant_vecs.size

    if output_template is not None:
        output_filename = '%s.e%.3f' % (output_template, target_err)
        with open(output_filename + '.txt', 'w') as fout:
            index2entity = [None] * len(entity_model.entities)
            for entity, idx in entity_model.entities.iteritems():
                index2entity[idx] = entity
            save_vectors(fout, index2entity, quant_vecs, q)

        dequant_model.save(output_filename + '.model')

    print json.dumps(OrderedDict([
        ('q', q),
        ('pred_bps', float(pred_bps)),
        ('avg_zeros', float(avg_zeros)),
        ('avg_err', float(avg_err)),
    ]))