def conquer_relationmatch(): # question_entity_pro_relation_words_concerned = read_dict( # "../data/relation/test.easy.partial.question_entity_pro_relation_words_concerned") question_entity_pro_relation_words_concerned = read_dict( "../data/relation/test.easy.partial.question_entity_pro_relation_words" ) filter_words = filter_word() questions_hitted = set() ques_predicate = read_dict("../data/relation/test.easy.ques.edge") ques_type = read_dict("../data/relation/test.easy.ques.type") entity_predicate_type, predicate_synsetwords, type_synsetwords, predicate_importantwords_synsetwords, type_importantwords_synsetwords, predicate_words, type_words, predicate_importantwords, predicate_lessimportantwords, type_importantwords, type_lessimportantwords = handle_entity_predicate_answer_type( ) questions_all = set() question_candidate = set() for question_entity_pro in question_entity_pro_relation_words_concerned: question = question_entity_pro.split("###")[0] questions_all.add(question) entity = question_entity_pro.split("###")[1] #这里我单纯的把entity对应的predicate和type返回了,没有考虑predicate和type之间的对应关系,以下代码有问题,已改正 predicates_type_map = predicates_types_linked_map( entity_predicate_type, entity) predicate_ = ques_predicate[question] types_ = ques_type[question] relation_word_concerned = question_entity_pro_relation_words_concerned[ question_entity_pro] relation_word_filter = set(relation_word_concerned) - filter_words # hit_predicate_types=hit_predicate_types_dict(predicates_type_map,relation_word_filter,predicate_synsetwords,type_synsetwords) hit_predicate_types = hit_predicate_or_types_dict( predicates_type_map, relation_word_filter, predicate_synsetwords, type_synsetwords) # hit_predicate_types=hit_predicate_types_dict(predicates_type_map,relation_word_filter,predicate_importantwords_synsetwords,type_importantwords_synsetwords) # hit_predicate_types=hit_predicate_types_dict(predicates_type_map,relation_word_filter,predicate_importantwords,type_importantwords) # hit_predicate_types=hit_predicate_types_dict(predicates_type_map,relation_word_filter,predicate_words,type_words) if len(hit_predicate_types) > 0: size_predicate_type_hitt = len(hit_predicate_types) for key in hit_predicate_types: size_predicate_type_hitt += len(hit_predicate_types[key]) # print(size_predicate_type_hitt) question_candidate.add(question) for hit_predicate in hit_predicate_types: if (hit_predicate == predicate_[0]): if (len(set(types_) & hit_predicate_types[hit_predicate]) > 0): # print(question, "\t", entity, "\t", predicate_[0], "\t", types_[0], "\t", relation_word_filter) questions_hitted.add(question) # if((len(set(predicate_)&hit_imp_pre_num.keys())==0)&(len(set(predicate_)&hit_lessimp_pre_num.keys())>0)): # if((len(set(types_)&hit_imp_type_num.keys())==0)&(len(set(types_)&hit_lessimp_type_num.keys())>0)): # #num=hit_imp_pre_num[predicate_[0]] # questions_hitted.add(question) # print(question,"\t",entity,"\t",predicate_[0],"\t",types_[0],"\t",relation_word_filter,"\t") questions_unhitted = questions_all - questions_hitted for que in questions_unhitted: print(que) print(len(questions_hitted)) print(len(question_candidate))
def allentityreturn(questionpath): question_entity_mapped = dict() questionentity = files_handle.read_dict(questionpath) for question in questionentity: entitymap = entityreturn(question) question_entity_mapped[question] = entitymap return question_entity_mapped
def output(): questionpathtrain = "..\\data\\entity\\graphquestions.training.questionEntity" train_question_entity_mapped = allentityreturn(questionpathtrain) questionentitytrain = files_handle.read_dict(questionpathtrain) questionpathtest = "..\\data\\entity\\graphquestions.testing.questionEntity" test_question_entity_mapped = allentityreturn(questionpathtest) questionentitytest = files_handle.read_dict(questionpathtest) missnodestrain = dict_difference_value(questionentitytrain, train_question_entity_mapped) morenodestrain = dict_difference_value(train_question_entity_mapped, questionentitytrain) missnodestest = dict_difference_value(questionentitytest, test_question_entity_mapped) morenodestest = dict_difference_value(test_question_entity_mapped, questionentitytest) files_handle.write_dict(missnodestrain, "..\\data\\entity\\missnodestrain") files_handle.write_dict(morenodestrain, "..\\data\\entity\\morenodestrain") files_handle.write_dict(missnodestest, "..\\data\\entity\\missnodestest") files_handle.write_dict(morenodestest, "..\\data\\entity\\morenodestest")
def handle_entity_predicate_answer_type(): entity_predicate_type = read_dict( "../data/relation/test.easy.partial.entities.predicates.type") print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) predicates, types = extract_predicates_types(entity_predicate_type) predicate_importantwords = generate_important_words(predicates) predicate_lessimportantwords = generate_lessimportant_words(predicates) type_importantwords = generate_important_words(types) type_lessimportantwords = generate_lessimportant_words(types) predicate_words = generate_pretype_words(predicates) type_words = generate_pretype_words(types) predicate_importantwords_synsetwords = generate_synsetwords( predicate_importantwords) type_importantwords_synsetwords = generate_synsetwords(type_importantwords) predicate_synsetwords = generate_synsetwords(predicate_words) type_synsetwords = generate_synsetwords(type_words) return entity_predicate_type, predicate_synsetwords, type_synsetwords, predicate_importantwords_synsetwords, type_importantwords_synsetwords, predicate_words, type_words, predicate_importantwords, predicate_lessimportantwords, type_importantwords, type_lessimportantwords
def handle_entity_predicate_answer_type_reverse(): # print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) # entity_predicate_type=read_dict_choose("../data/test/test.easy.partial.entities.predicates.answer.type") entity_predicate_type = read_dict( "../data/relation/test.easy.partial.entities.predicates.type") print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) predicates, types = extract_predicates_types(entity_predicate_type) predicate_importantwords = generate_important_words(predicates) predicate_lessimportantwords = generate_lessimportant_words(predicates) type_importantwords = generate_important_words(types) type_lessimportantwords = generate_lessimportant_words(types) importantwords_predicate = reverse_dict(predicate_importantwords) lessimportantwords_predicate = reverse_dict(predicate_lessimportantwords) importantwords_type = reverse_dict(type_importantwords) lessimportantwords_type = reverse_dict(type_lessimportantwords) return entity_predicate_type, importantwords_predicate, lessimportantwords_predicate, importantwords_type, lessimportantwords_type
def relation_word_num(): word_num = dict() question_entity_pro_relation_words_concerned = read_dict( "../data/relation/test.easy.partial.question_entity_pro_relation_words_concerned" ) question_old = "" for question_entity_pro in question_entity_pro_relation_words_concerned: question_new = question_entity_pro.split("###")[0] if question_old != question_new: relation_word_concerned = question_entity_pro_relation_words_concerned[ question_entity_pro] for word in relation_word_concerned: if word in word_num: num = word_num[word] num += 1 word_num[word] = num else: word_num[word] = 1 question_old = question_new word_num = dict(sorted(word_num.items(), key=lambda d: d[1], reverse=True)) # for word in word_num: # print(word,"\t",word_num[word]) return word_num