def main(q1, q2, articles, batch, input_type, outputFileName, dictType): num_articles = int(articles) query = queries.main(q1, q2) if batch == "yes": q1_id_list, q2_id_list = pmids.main(query, num_articles) q1_file_paths = run_tees_batch(q1, q1_id_list) q2_file_paths = run_tees_batch(q2, q2_id_list) if batch == "no": q1_id_list, q2_id_list = pmids.main(query, num_articles) q1_file_paths = run_tees(q1, q1_id_list) q2_file_paths = run_tees(q2, q2_id_list) q1_dict = get_info_from_interaction_xml(q1_file_paths) q2_dict = get_info_from_interaction_xml(q2_file_paths) if dictType == 'all' or dictType == 'both': all_words_dict = get_all_words_dict(q1, q2, q1_dict, q2_dict) normalized_all_words_dict = normalize_dict(all_words_dict, query) angle_list_all = Cosine_Sim.main(normalized_all_words_dict, q1, q2) print_pair_score_dict(angle_list_all, normalized_all_words_dict, q1, q2, input_type, outputFileName) if dictType == 'protein' or dictType == 'both': query_dicts = [q1_dict, q2_dict] combined_dict = combine_dictionaries(query_dicts) normalized_protein_dict = normalize_dict(combined_dict, query) angle_list_protein = Cosine_Sim.main(normalized_protein_dict, q1, q2) print_pair_score_dict(angle_list_protein, normalized_protein_dict, q1, q2, input_type, outputFileName)
def get_all_predicted_relations_dict(hprd50_paper_dict, max_sentences): all_predicted_relations_dict = {} for key in hprd50_paper_dict: found_relation_score_tuples_list = [] for relation in hprd50_paper_dict[key].possible_relations_Tnumber: q1 = relation[0] q2 = relation[1] found_relation_score_tuple = ((q1, q2), 0) if q1 == q2: continue query = queries.main(q1,q2) ID_sentence_list = [] ID_sentence_list = find_cooccurrence_sents(hprd50_paper_dict[key], query) print 'ID_sentence_list: ',ID_sentence_list if not ID_sentence_list: found_relation_score_tuple = ((q1, q2), 0) print found_relation_score_tuple found_relation_score_tuples_list.append(found_relation_score_tuple) if ID_sentence_list: sentences_with_score1 = Score1.rank_sentences(ID_sentence_list, query, max_sentences) sentences_with_score2 = Score2.main(sentences_with_score1, query) sorted_sentences_with_score2 = list(sorted(sentences_with_score2, key=operator.attrgetter('score'), reverse=True)) #------------------------------ if sorted_sentences_with_score2: #----------------- if len(sorted_sentences_with_score2) > 1: #-------------------------------------------------- pass #----------------------------------------------------- else: for sentence_object in sorted_sentences_with_score2: found_relation_score_tuple = ((q1, q2), sentence_object.score, sentence_object.order_in_abstract) print found_relation_score_tuple found_relation_score_tuples_list.append(found_relation_score_tuple) all_predicted_relations_dict[key]=found_relation_score_tuples_list return all_predicted_relations_dict
def index(a1_file, articles, max_sentences): global a_file a_file = a1_file q1= a1_file.protein1 q2= a1_file.protein2 query = queries.main(q1,q2) # Creates Queries q1_syns = query.q1_syns # Retrieves Q1 and Q2 synonyms q2_syns = query.q2_syns print a1_file.protein1, ' synonyms = ', q1_syns print a1_file.protein2, ' synonyms = ', q2_syns ID_sentence_position_list = Papers.main(query, articles) if len(ID_sentence_position_list) > 0: print str(len(ID_sentence_position_list)) + " sentences with co-occurrence found" sentences_with_score1 = Syntax_Scorer.main(ID_sentence_position_list, query, max_sentences) sentences_with_score2 = Semantics_Scorer.main(sentences_with_score1, query) sorted_sentences_with_score2 = sorted(sentences_with_score2, key=operator.attrgetter('score'), reverse=True) if sorted_sentences_with_score2: with open (r'txt_files_Testing\calibration unlimited sentences','a') as f: f.write(query.q1+'\t'+query.q2+'\n') for sent in sorted_sentences_with_score2: sent_w_replaced_queries = Organize.insert_syns(sent.sentence,q1,q1_syns,q2,q2_syns) if str(sent.sentence)[0] != '<': f.write(str(sent.score) +' '+ str(sent.method_scored)+'\t'+ sent_w_replaced_queries + '\n') print str(sent.score) +' '+ sent_w_replaced_queries f.write('\n') print_output_to_file(sorted_sentences_with_score2, q1_syns, q1, q2_syns, q2, a1_file) print ""
def main(q1, q2, articles, batch, input_type, outputFileName, dictType): num_articles = int(articles) query = queries.main(q1,q2) if batch == "yes": q1_id_list, q2_id_list = pmids.main(query, num_articles) q1_file_paths= run_tees_batch(q1, q1_id_list) q2_file_paths= run_tees_batch(q2, q2_id_list) if batch == "no": q1_id_list, q2_id_list = pmids.main(query, num_articles) q1_file_paths= run_tees(q1, q1_id_list) q2_file_paths= run_tees(q2, q2_id_list) q1_dict = get_info_from_interaction_xml(q1_file_paths) q2_dict = get_info_from_interaction_xml(q2_file_paths) if dictType == 'all' or dictType == 'both': all_words_dict = get_all_words_dict(q1, q2, q1_dict, q2_dict) normalized_all_words_dict = normalize_dict(all_words_dict, query) angle_list_all = Cosine_Sim.main(normalized_all_words_dict, q1, q2) print_pair_score_dict(angle_list_all, normalized_all_words_dict, q1, q2, input_type, outputFileName) if dictType == 'protein' or dictType == 'both': query_dicts = [q1_dict, q2_dict] combined_dict = combine_dictionaries(query_dicts) normalized_protein_dict = normalize_dict(combined_dict, query) angle_list_protein = Cosine_Sim.main(normalized_protein_dict, q1, q2) print_pair_score_dict(angle_list_protein, normalized_protein_dict, q1, q2, input_type, outputFileName)
def get_all_predicted_relations_dict(hprd50_paper_dict, max_sentences): all_predicted_relations_dict = {} for key in hprd50_paper_dict: found_relation_score_tuples_list = [] for relation in hprd50_paper_dict[key].possible_relations_Tnumber: q1 = relation[0] q2 = relation[1] found_relation_score_tuple = ((q1, q2), 0) if q1 == q2: continue query = queries.main(q1, q2) ID_sentence_list = [] ID_sentence_list = find_cooccurrence_sents(hprd50_paper_dict[key], query) print 'ID_sentence_list: ', ID_sentence_list if not ID_sentence_list: found_relation_score_tuple = ((q1, q2), 0) print found_relation_score_tuple found_relation_score_tuples_list.append( found_relation_score_tuple) if ID_sentence_list: sentences_with_score1 = Score1.rank_sentences( ID_sentence_list, query, max_sentences) sentences_with_score2 = Score2.main(sentences_with_score1, query) sorted_sentences_with_score2 = list( sorted(sentences_with_score2, key=operator.attrgetter('score'), reverse=True)) #------------------------------ if sorted_sentences_with_score2: #----------------- if len(sorted_sentences_with_score2) > 1: #-------------------------------------------------- pass #----------------------------------------------------- else: for sentence_object in sorted_sentences_with_score2: found_relation_score_tuple = ( (q1, q2), sentence_object.score, sentence_object.order_in_abstract) print found_relation_score_tuple found_relation_score_tuples_list.append( found_relation_score_tuple) all_predicted_relations_dict[key] = found_relation_score_tuples_list return all_predicted_relations_dict
def index(a1_file, articles, max_sentences): global a_file a_file = a1_file q1 = a1_file.protein1 q2 = a1_file.protein2 query = queries.main(q1, q2) # Creates Queries q1_syns = query.q1_syns # Retrieves Q1 and Q2 synonyms q2_syns = query.q2_syns print a1_file.protein1, ' synonyms = ', q1_syns print a1_file.protein2, ' synonyms = ', q2_syns ID_sentence_position_list = Papers.main(query, articles) if len(ID_sentence_position_list) > 0: print str(len( ID_sentence_position_list)) + " sentences with co-occurrence found" sentences_with_score1 = Syntax_Scorer.main(ID_sentence_position_list, query, max_sentences) sentences_with_score2 = Semantics_Scorer.main(sentences_with_score1, query) sorted_sentences_with_score2 = sorted(sentences_with_score2, key=operator.attrgetter('score'), reverse=True) if sorted_sentences_with_score2: with open(r'txt_files_Testing\calibration unlimited sentences', 'a') as f: f.write(query.q1 + '\t' + query.q2 + '\n') for sent in sorted_sentences_with_score2: sent_w_replaced_queries = Organize.insert_syns( sent.sentence, q1, q1_syns, q2, q2_syns) if str(sent.sentence)[0] != '<': f.write( str(sent.score) + ' ' + str(sent.method_scored) + '\t' + sent_w_replaced_queries + '\n') print str(sent.score) + ' ' + sent_w_replaced_queries f.write('\n') print_output_to_file(sorted_sentences_with_score2, q1_syns, q1, q2_syns, q2, a1_file) print ""
def main(q1, q2, articles, batch, input_type, outputFileName, dictType, outputType, evaluation_mode, stemmed, model, text_file): models = model.split(' ') num_articles = int(articles) query = queries.main(q1, q2) q1_dict = {} q2_dict = {} q1_already_downloaded_ids = get_already_downloaded_ids(q1, models) q2_already_downloaded_ids = get_already_downloaded_ids(q2, models) q1_already_downloaded_file_path_list = get_already_downloaded_file_paths( q1, models, num_articles) q2_already_downloaded_file_path_list = get_already_downloaded_file_paths( q2, models, num_articles) q1_already_dl_slice = None q2_already_dl_slice = None q1_file_paths = None q2_file_paths = None # if num_articles <= len(q1_already_downloaded_file_path_list): # q1_already_dl_slice = q1_already_downloaded_file_path_list[:num_articles] # q1_dict = get_info_from_interaction_xml(q1_already_dl_slice) # else: if num_articles * 100 <= len(q1_already_downloaded_file_path_list): q1_already_dl_slice = q1_already_downloaded_file_path_list[: num_articles] q1_dict = get_info_from_interaction_xml(q1_already_dl_slice) else: q1_id_list = pmids.main(query.q1, num_articles, query.q1_search_string, evaluation_mode) if len(q1_id_list) == len(q1_already_downloaded_file_path_list): q1_dict = get_info_from_interaction_xml( q1_already_downloaded_file_path_list) else: if batch == 'yes': q1_file_paths = run_tees_batch(q1, q1_id_list, models, text_file) elif batch == 'no': q1_file_paths = run_tees(q1, q1_id_list, models, text_file) if not q1_file_paths: q1_file_paths = q1_already_downloaded_file_path_list[: num_articles] q1_dict = get_info_from_interaction_xml(q1_file_paths) if num_articles * 100 <= len(q2_already_downloaded_file_path_list): q2_already_dl_slice = q2_already_downloaded_file_path_list[: num_articles] q2_dict = get_info_from_interaction_xml(q2_already_dl_slice) else: q2_id_list = pmids.main(query.q2, num_articles, query.q2_search_string, evaluation_mode) if len(q2_id_list) == len(q2_already_downloaded_file_path_list): q2_dict = get_info_from_interaction_xml( q2_already_downloaded_file_path_list) else: if batch == 'yes': q2_file_paths = run_tees_batch(q2, q2_id_list, models, text_file) elif batch == 'no': q2_file_paths = run_tees(q2, q2_id_list, models, text_file) if not q2_file_paths: q2_file_paths = q2_already_downloaded_file_path_list[: num_articles] q2_dict = get_info_from_interaction_xml(q2_file_paths) if q1_already_dl_slice: q1_num_docs_processed = len(q1_already_dl_slice) elif q1_file_paths: q1_num_docs_processed = len(q1_file_paths) else: q1_num_docs_processed = len(q1_already_downloaded_file_path_list) if q2_already_dl_slice: q2_num_docs_processed = len(q2_already_dl_slice) elif q2_file_paths: q2_num_docs_processed = len(q2_file_paths) else: q2_num_docs_processed = len(q2_already_downloaded_file_path_list) print q1, 'num_docs_processed', q1_num_docs_processed print q2, 'num_docs_processed', q2_num_docs_processed num_docs_processed = [q1_num_docs_processed, q2_num_docs_processed] return_dict_s = [] if dictType == 'all': all_words_dict = get_all_words_dict(q1, q2, q1_dict, q2_dict) normalized_all_words_dict = normalize_dict(all_words_dict, query, stemmed) return_dict_s.append(normalized_all_words_dict) if len(normalized_all_words_dict[query.q1.lower()]) < 1 or len( normalized_all_words_dict[query.q2.lower()]) < 1: angle_list = [90.00] else: angle_list = Cosine_Sim.main(normalized_all_words_dict, q1, q2) if dictType == 'protein': query_dicts = [q1_dict, q2_dict] combined_dict = combine_dictionaries(query_dicts) normalized_protein_dict = normalize_dict(combined_dict, query, stemmed) return_dict_s.append(normalized_protein_dict) if len(normalized_protein_dict[query.q1.lower()]) < 1 or len( normalized_protein_dict[query.q2.lower()]) < 1: angle_list = [90.00] else: angle_list = Cosine_Sim.main(normalized_protein_dict, q1, q2) return angle_list, return_dict_s, num_docs_processed
def get_results_pack(self, pack, dbvar, event_string, ignore_numbers): self.log("Start results_pack generation.") need_mirror = False compare_days_str = pack["result_options"]["compare_to_days"] try: compare_int = int(compare_days_str) except ValueError: self.bug( "Cannot convert compare_to_days value: {} into int.".format( compare_days_str)) else: if compare_int > 0: need_mirror = True agg_len = pack["result_options"]["aggregation_period"] agg_type = pack["result_options"]["aggregation_type"] # COLLECT PRIME DATA date_list = [] m_datelist = [] list_of_plot_tuples = [] date_list, result_dict = queries.main(pack, dbvar, ignore_numbers=ignore_numbers) date_list = [chunk[0] for chunk in self.get_chunks(date_list, agg_len)] list_of_plot_tuples = self.generate_list_of_plot_tuples( result_dict, list_of_plot_tuples, agg_len, agg_type) if pack["result_options"]["use_new_breakdowns"]: breakdown_keys = None else: breakdown_keys = list(result_dict["lines"].keys()) # COLLECT MIRROR DATA IF NEEDED if need_mirror: pack["data_filters"]["start_datetime"] = ( pack["data_filters"]["start_datetime"] - datetime.timedelta(compare_int)) pack["data_filters"]["end_datetime"] = ( pack["data_filters"]["end_datetime"] - datetime.timedelta(compare_int)) m_datelist, m_resultdict = queries.main( pack, dbvar, breakdown_keys=breakdown_keys, ignore_numbers=ignore_numbers) m_datelist = [ chunk[0] for chunk in self.get_chunks(m_datelist, agg_len) ] list_of_plot_tuples = self.generate_list_of_plot_tuples( m_resultdict, list_of_plot_tuples, agg_len, agg_type, is_mirror=True) self.log("Completed request_pack generation.") # print(list_of_plot_tuples) return date_list, m_datelist, list_of_plot_tuples
def main(q1, q2, articles, batch, input_type, outputFileName, dictType, outputType, evaluation_mode, stemmed, model, text_file): models = model.split(' ') num_articles = int(articles) query = queries.main(q1,q2) q1_dict = {} q2_dict = {} q1_already_downloaded_ids = get_already_downloaded_ids(q1, models) q2_already_downloaded_ids = get_already_downloaded_ids(q2, models) q1_already_downloaded_file_path_list = get_already_downloaded_file_paths(q1, models, num_articles) q2_already_downloaded_file_path_list = get_already_downloaded_file_paths(q2, models, num_articles) q1_already_dl_slice = None q2_already_dl_slice = None q1_file_paths = None q2_file_paths = None # if num_articles <= len(q1_already_downloaded_file_path_list): # q1_already_dl_slice = q1_already_downloaded_file_path_list[:num_articles] # q1_dict = get_info_from_interaction_xml(q1_already_dl_slice) # else: if num_articles * 100 <= len(q1_already_downloaded_file_path_list): q1_already_dl_slice = q1_already_downloaded_file_path_list[:num_articles] q1_dict = get_info_from_interaction_xml(q1_already_dl_slice) else: q1_id_list = pmids.main(query.q1, num_articles, query.q1_search_string, evaluation_mode) if len(q1_id_list) == len(q1_already_downloaded_file_path_list): q1_dict = get_info_from_interaction_xml(q1_already_downloaded_file_path_list) else: if batch == 'yes': q1_file_paths = run_tees_batch(q1, q1_id_list, models, text_file) elif batch == 'no': q1_file_paths = run_tees(q1, q1_id_list, models, text_file) if not q1_file_paths: q1_file_paths = q1_already_downloaded_file_path_list[:num_articles] q1_dict = get_info_from_interaction_xml(q1_file_paths) if num_articles * 100 <= len(q2_already_downloaded_file_path_list): q2_already_dl_slice = q2_already_downloaded_file_path_list[:num_articles] q2_dict = get_info_from_interaction_xml(q2_already_dl_slice) else: q2_id_list = pmids.main(query.q2, num_articles, query.q2_search_string, evaluation_mode) if len(q2_id_list) == len(q2_already_downloaded_file_path_list): q2_dict = get_info_from_interaction_xml(q2_already_downloaded_file_path_list) else: if batch == 'yes': q2_file_paths= run_tees_batch(q2, q2_id_list, models, text_file) elif batch == 'no': q2_file_paths= run_tees(q2, q2_id_list, models, text_file) if not q2_file_paths: q2_file_paths = q2_already_downloaded_file_path_list[:num_articles] q2_dict = get_info_from_interaction_xml(q2_file_paths) if q1_already_dl_slice: q1_num_docs_processed = len(q1_already_dl_slice) elif q1_file_paths: q1_num_docs_processed = len(q1_file_paths) else: q1_num_docs_processed = len(q1_already_downloaded_file_path_list) if q2_already_dl_slice: q2_num_docs_processed = len(q2_already_dl_slice) elif q2_file_paths: q2_num_docs_processed = len(q2_file_paths) else: q2_num_docs_processed = len(q2_already_downloaded_file_path_list) print q1, 'num_docs_processed', q1_num_docs_processed print q2, 'num_docs_processed', q2_num_docs_processed num_docs_processed = [q1_num_docs_processed,q2_num_docs_processed] return_dict_s = [] if dictType == 'all': all_words_dict = get_all_words_dict(q1, q2, q1_dict, q2_dict) normalized_all_words_dict = normalize_dict(all_words_dict, query, stemmed) return_dict_s.append(normalized_all_words_dict) if len(normalized_all_words_dict[query.q1.lower()]) < 1 or len(normalized_all_words_dict[query.q2.lower()]) < 1: angle_list = [90.00] else: angle_list = Cosine_Sim.main(normalized_all_words_dict, q1, q2) if dictType == 'protein': query_dicts = [q1_dict, q2_dict] combined_dict = combine_dictionaries(query_dicts) normalized_protein_dict = normalize_dict(combined_dict, query, stemmed) return_dict_s.append(normalized_protein_dict) if len(normalized_protein_dict[query.q1.lower()]) < 1 or len(normalized_protein_dict[query.q2.lower()]) < 1: angle_list = [90.00] else: angle_list = Cosine_Sim.main(normalized_protein_dict, q1, q2) return angle_list, return_dict_s, num_docs_processed
def send_query(self): queries.main(arg[1:]) return True