def extract_terms_for_titles(titles, min_repeat=0): # todo: auto increment this query_id = '1' filenames = download_records(titles, 'query_' + query_id) # keywords = PubMedParser.extract_all_keywords(filenames) keywords = PubMedParser.extract_repeated_keywords(filenames, min_repeat) # keywords = PubMedParser.extract_all_mesh(filenames) return keywords
def find_neighbors_for_terms(terms, num_neighbors=10): query = '' for t in terms: query += t + ' ' f = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'queryresults', query + '.txt') logfile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'queryresults', query + '_log' + '.txt') query_pubmed(query, f, logfile) keywords = PubMedParser.extract_repeated_keywords([f], 0) keywords = merge_terms(keywords, terms) sorted_keywords = sorted(keywords.items(), key=operator.itemgetter(1), reverse=True) num_keywords_all = len(sorted_keywords) pub_counts = read_counts(logfile) pub_counts['keyword_count'] = num_keywords_all pub_counts['showing_count'] = num_neighbors return {'keywords': sorted_keywords[:num_neighbors], 'log': pub_counts}
filenames.append(f) sleep(0.5) return filenames # the input should be either a list of titles or a list of terms, separated by \n input = 'The architecture of cognitive control in the human prefrontal cortex\nFMRI evidence for a hierarchical organization of the prefrontal cortex\nRostral–caudal gradients of abstraction revealed by multivariate pattern analysis of working memory' input_option = 'title' # either "term" or "title" query_id = '1' assoc_option = 'group' # either "group" or "individual" # Step 1: generate 0-level keywords that are directly from user inputs if input_option == 'title': # issue query for each publication; cannot post more than 3 queries per second! filenames = download_records(input, 'query_' + query_id) # keywords = PubMedParser.extract_all_keywords(filenames) keywords = PubMedParser.extract_repeated_keywords(filenames, 1) # keywords = PubMedParser.extract_all_mesh(filenames) elif input_option == 'term': print 'not implemented' # Step 2: generate 1-level keywords # can do this in two ways: 1) take multiple terms as the same query (group) 2) take each term as a query if assoc_option == 'group': query = '' for key in keywords: query += key + ' ' f = query + '.txt' query_pubmed(query, f) keywords = PubMedParser.extract_repeated_keywords([f], 30) print keywords elif assoc_option == 'individual':