예제 #1
0
def extract_terms_for_titles(titles, min_repeat=0):
	# todo: auto increment this
	query_id = '1'
	filenames = download_records(titles, 'query_' + query_id)
#	keywords = PubMedParser.extract_all_keywords(filenames)
	keywords = PubMedParser.extract_repeated_keywords(filenames, min_repeat)
#	keywords = PubMedParser.extract_all_mesh(filenames)
	return keywords
예제 #2
0
def find_neighbors_for_terms(terms, num_neighbors=10):
	query = ''
	for t in terms:
		query += t + ' '
	f = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'queryresults', query + '.txt')
	logfile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'queryresults', query + '_log' + '.txt')
	query_pubmed(query, f, logfile)
	keywords = PubMedParser.extract_repeated_keywords([f], 0)
	keywords = merge_terms(keywords, terms)
	sorted_keywords = sorted(keywords.items(), key=operator.itemgetter(1), reverse=True)
	num_keywords_all = len(sorted_keywords)
	pub_counts = read_counts(logfile)
	pub_counts['keyword_count'] = num_keywords_all
	pub_counts['showing_count'] = num_neighbors 
	return {'keywords': sorted_keywords[:num_neighbors], 'log': pub_counts}
예제 #3
0
		filenames.append(f)
		sleep(0.5)	
	return filenames

# the input should be either a list of titles or a list of terms, separated by \n
input = 'The architecture of cognitive control in the human prefrontal cortex\nFMRI evidence for a hierarchical organization of the prefrontal cortex\nRostral–caudal gradients of abstraction revealed by multivariate pattern analysis of working memory'
input_option = 'title' # either "term" or "title"
query_id = '1'
assoc_option = 'group' # either "group" or "individual"

# Step 1: generate 0-level keywords that are directly from user inputs
if input_option == 'title':
	# issue query for each publication; cannot post more than 3 queries per second!
	filenames = download_records(input, 'query_' + query_id)
#	keywords = PubMedParser.extract_all_keywords(filenames)
	keywords = PubMedParser.extract_repeated_keywords(filenames, 1)
#	keywords = PubMedParser.extract_all_mesh(filenames)
elif input_option == 'term':
	print 'not implemented'
	
# Step 2: generate 1-level keywords
# can do this in two ways: 1) take multiple terms as the same query (group) 2) take each term as a query
if assoc_option == 'group':
	query = ''
	for key in keywords:
		query += key + ' '
	f = query + '.txt'
	query_pubmed(query, f)
	keywords = PubMedParser.extract_repeated_keywords([f], 30)
	print keywords
elif assoc_option == 'individual':