def eval(model_file, lr_entity_file, centroid_entity_file): import readline readline.parse_and_bind('set editing-mode emacs') model = load_word2vec_model(model_file, mmap='r') lr_entity_model = EntityModel.load(lr_entity_file, mmap='r') centroid_entity_model = EntityModel.load(centroid_entity_file, mmap='r') norm_entities = [(entity.lower(), entity) for entity in lr_entity_model.entities] while True: try: line = raw_input('> ').strip() except EOFError: break words, entities = parse_query(norm_entities, line) lr_top = top_entities(model, lr_entity_model, entities, words) centroid_top = top_entities(model, centroid_entity_model, entities, words) for (lr_score, lr_ent), (centroid_score, centroid_ent) in zip(lr_top, centroid_top): print '%-50s%10.3f | %-50s%10.3f' % (lr_ent, lr_score, centroid_ent, centroid_score)
def quant_entities(input_file, output_template=None, target_err=0.1): entity_model = EntityModel.load(input_file, mmap='r') q, pred_bits, zeros, avg_err, quant_vecs, dequant_model = quantize_entities( entity_model, target_err) pred_bps = float(pred_bits) / quant_vecs.size avg_zeros = float(zeros) / quant_vecs.size if output_template is not None: output_filename = '%s.e%.3f' % (output_template, target_err) with open(output_filename + '.txt', 'w') as fout: index2entity = [None] * len(entity_model.entities) for entity, idx in entity_model.entities.iteritems(): index2entity[idx] = entity save_vectors(fout, index2entity, quant_vecs, q) dequant_model.save(output_filename + '.model') print json.dumps( OrderedDict([ ('q', q), ('pred_bps', float(pred_bps)), ('avg_zeros', float(avg_zeros)), ('avg_err', float(avg_err)), ]))
def quant_entities(input_file, output_template=None, target_err=0.1): entity_model = EntityModel.load(input_file, mmap='r') q, pred_bits, zeros, avg_err, quant_vecs, dequant_model = quantize_entities(entity_model, target_err) pred_bps = float(pred_bits) / quant_vecs.size avg_zeros = float(zeros) / quant_vecs.size if output_template is not None: output_filename = '%s.e%.3f' % (output_template, target_err) with open(output_filename + '.txt', 'w') as fout: index2entity = [None] * len(entity_model.entities) for entity, idx in entity_model.entities.iteritems(): index2entity[idx] = entity save_vectors(fout, index2entity, quant_vecs, q) dequant_model.save(output_filename + '.model') print json.dumps(OrderedDict([ ('q', q), ('pred_bps', float(pred_bps)), ('avg_zeros', float(avg_zeros)), ('avg_err', float(avg_err)), ]))