def loadTruth(template_data_path, exclude_ria=[], targets=None): ''' If the ground truth matches aren't already saved, extract the ground truth from all prior RIA template2 and create the target matches dictionary. Save this for future use. If a certain RIA is to be excluded, create a new dictionary that won't be saved. Args: template_data_path (string) : Directory to the RIA templates documents. exclude_ria (list[string]) : List of files to be excluded from RIA data extraction. targets (dict) : Target matches dictionary of just target descriptions. Returns: target_matches (dict) : Dictionary of ground truth matches for each target. ''' if targets: target_matches = targets elif len(exclude_ria) > 0: development_matches = parseundp.extract_template_data( template_data_path, exclude_ria) target_matches = parseundp.create_target_dictionary( development_matches) else: try: shelf = shelve.open('undp') target_matches = shelf['targets'] shelf.close() except: shelf.close() development_matches = parseundp.extract_template_data( template_data_path) target_matches = parseundp.create_target_dictionary( development_matches) shelf = shelve.open('undp') shelf['targets'] = target_matches shelf.close() return target_matches
vec = 1 for par_vec in par_vecs: for i in range(len(all_policy_documents)): exclude_ria = all_exclude_ria[i] policy_documents = all_policy_documents[i] target_matches = getTargetDoc(template_data_path, exclude_ria) targs, targ_vecs, sents = getInfo(par_vec, target_matches) print(exclude_ria[0][:-5]+str(vec)) score_dict = ria(documents_path, policy_documents, par_vec, sents, targ_vecs, targs) combined_prior_matches[exclude_ria[0][:-5]+str(vec)] = [score_dict] vec += 1 i = 0 for key, val in combined_prior_matches.items(): exclude_test = [file for file in os.listdir(template_data_path) if file not in all_exclude_ria[i]] test_development_matches = parseundp.extract_template_data(template_data_path, exclude_test) test_target_matches = parseundp.create_target_dictionary(test_development_matches) print(key, all_exclude_ria[i]) match_by_sent = evaluateByTarget(val[0], test_target_matches, 301) combined_prior_matches[key].append(match_by_sent) avg_new = avgMatches(match_by_sent, test_target_matches, 301) combined_prior_matches[key].append(avg_new) i+=1 if i % 5 == 0: i = 0 combined_prior_matches['liberia_google'] = combined_prior_matches.pop('liberia1') combined_prior_matches['liberia_nbow'] = combined_prior_matches.pop('liberia2') combined_prior_matches['liberia_tfidf'] = combined_prior_matches.pop('liberia3')