コード例 #1
0
                    #     if 'entity_map' in ann['long_answer'].keys():
                    #         entities.extend([ ent for k, v in ann["long_answer"]["entity_map"].items() for (ids, ent) in v ])
                    # for cand in item["long_answer_candidates"]:
                    #     if 'entity_map' in cand.keys():
                    #         entities.extend([ ent for k, v in cand["entity_map"].items() for (ids, ent) in v ])
                    # for ann in item["annotations"]:
                    #     for sa in ann['short_answers']:
                    #         if 'entity_map' in sa.keys():
                    #             entities.extend([ ent for k, v in sa["entity_map"].items() for (ids, ent) in v ])
                    if len(entities) == 0:
                        empty_ents += 1
                    st = time.time()
                    #print(example_id)
                    #print("Size of all entities: %d", len(entities))
                    k_hop_entities, k_hop_facts = apr.get_khop_facts(
                        entities, FLAGS.csr_num_hops)
                    #print("Size of two hop entities: %d", len(k_hop_entities))
                    #print("Size of two hop facts: %d", len(k_hop_facts))
                    csr_data = CsrData()
                    csr_data.create_and_save_csr_data(
                        full_wiki=FLAGS.full_wiki,
                        decompose_ppv=FLAGS.decompose_ppv,
                        files_dir=FLAGS.apr_files_dir,
                        sub_entities=k_hop_entities,
                        question_id=example_id,
                        question_embedding=question_embedding,
                        relation_embeddings=relation_embeddings,
                        sub_facts=k_hop_facts)
                    #print('Time taken for CSR: '+str(time.time() - st))
    print("No ent questions: " + str(empty_ents))
コード例 #2
0
    max_tasks = {"train": 50, "dev": 5}
    max_shards = {"train": 7, "dev": 17}
    apr = ApproximatePageRank()
    for mode in [FLAGS.split]:
        # Parse all shards in each mode
        # Currently sequentially, can be parallelized later
        for task_id in [FLAGS.task_id]:  #range(0, max_tasks[mode]):
            for shard_id in [FLAGS.shard_split_id
                             ]:  #range(0, max_shards[mode]):
                # if task_id == 0 and shard_id in range(0, 16):
                #     print("skipping finished job")
                #     continue
                nq_data, entities = get_examples(FLAGS.nq_dir, mode, task_id,
                                                 shard_id)
                if nq_data is None:
                    print("No examples here")
                    continue
                print("Size of all entities: %d", len(entities))
                two_hop_entities = apr.get_khop_entities(
                    entities, FLAGS.csr_num_hops)
                print("Size of two hop entities: %d", len(two_hop_entities))
                csr_data = CsrData()
                csr_data.create_and_save_csr_data(
                    full_wiki=FLAGS.full_wiki,
                    decompose_ppv=FLAGS.decompose_ppv,
                    files_dir=FLAGS.apr_files_dir,
                    sub_entities=two_hop_entities,
                    mode=mode,
                    task_id=task_id,
                    shard_id=shard_id)
コード例 #3
0
                    #print("Size of two hop entities: %d", len(k_hop_entities))
                    #print("Size of two hop facts: %d", len(k_hop_facts))
                    relation_scores = None
                    if FLAGS.rel_classifier_scores:
                        print(example_id, half_qid)
                        if str(half_qid) not in que_rel_scores:
                            print("not in qid")
                            print(example_id, half_qid)
                            print(item['question_text'], entities)
                            continue
                        print(item['question_text'], entities)
                        print(que_rel_scores[str(half_qid)])
                        #print([(rel ,apr.data.entity_names['r'][str(apr.data.rel2id[rel])], score) for rel, score in list(que_rel_scores[str(half_qid)].items())])
                        relation_scores = que_rel_scores[str(half_qid)]
                    proc_q +=1
                    csr_data = CsrData()
                    csr_data.create_and_save_csr_data(full_wiki=FLAGS.full_wiki,
                                                      decompose_ppv=FLAGS.decompose_ppv,
                                                      files_dir=FLAGS.output_apr_files_dir,
                                                      sub_entities=k_hop_entities,
                                                      question_id=example_id,
                                                      question_embedding=question_embedding,
                                                      relation_embeddings=relation_embeddings,
                                                      relation_scores=relation_scores,
                                                      sub_facts=k_hop_facts, relations_to_filter=apr.data.relations_to_filter)
                    #print('Time taken for CSR: '+str(time.time() - st))
    print("No ent questions: "+str(empty_ents))
    print("No proc q: "+str(proc_q))