Esempio n. 1
0
    model = load_word2vec_model(model_file, mmap='r')
    lr_entity_model = EntityModel.load(lr_entity_file, mmap='r')
    centroid_entity_model = EntityModel.load(centroid_entity_file, mmap='r')

    norm_entities = [(entity.lower(), entity)
                     for entity in lr_entity_model.entities]

    while True:
        try:
            line = raw_input('> ').strip()
        except EOFError:
            break

        words, entities = parse_query(norm_entities, line)
        lr_top = top_entities(model, lr_entity_model, entities, words)
        centroid_top = top_entities(model, centroid_entity_model, entities,
                                    words)

        for (lr_score, lr_ent), (centroid_score,
                                 centroid_ent) in zip(lr_top, centroid_top):
            print '%-50s%10.3f | %-50s%10.3f' % (lr_ent, lr_score,
                                                 centroid_ent, centroid_score)


if __name__ == '__main__':
    np.random.seed(1729)
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                        level=logging.INFO)
    baker.run()
Esempio n. 2
0
        ('q', q),
        ('pred_bps', float(pred_bps)),
        ('avg_zeros', float(avg_zeros)),
        ('avg_err', float(avg_err)),
    ]))


def load_quant_data(json_filename):
    import pandas as pd

    with open(json_filename) as fin:
        data = []
        decoder = json.JSONDecoder(object_pairs_hook=OrderedDict)
        for line in fin:
            row = decoder.decode(line)
            accuracy = row['accuracy'][-1]
            assert accuracy['section'] == 'total' # XXX
            acc_percentage = float(accuracy['correct']) / (accuracy['correct'] + accuracy['incorrect'])
            row['accuracy'] = acc_percentage

            data.append(row)

        return pd.DataFrame(data)



if __name__ == '__main__':
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                        level=logging.INFO)
    baker.run()