def loadBatchResults(request): if request.method == 'GET': print '>> loading batch result request...' current_dir = os.path.dirname(os.path.realpath(__file__)) query = 'virtual reality' f = os.path.join(current_dir, 'batchresults', query + '.txt') PubMedParser.load_evidence(f, True, 12) return HttpResponse(json.dumps({}), status=status.HTTP_200_OK)
def Main(): pubMedParser = PubMedParser() pubMedParser.extractRecords() documentPreprocessor = PreProcessor() for record in pubMedParser.Records: try: documentPreprocessor.PreprocessAll(str(record)) print documentPreprocessor.partsOfSpeech except KeyError: print "No abstract"
def find_neighbors_for_terms(terms, num_neighbors=10, user_id=1): query = '' for t in terms: query += t + ' ' current_dir = os.path.dirname(os.path.realpath(__file__)) f = os.path.join(current_dir, 'queryresults', query + '.txt') logfile = os.path.join(current_dir, 'queryresults', query + '_log' + '.txt') query_pubmed(query, f, logfile) keywords = PubMedParser.extract_repeated_keywords([f], terms, threshold=10) PubMedParser.load_evidence(f) sorted_keywords = sorted(keywords.items(), key=operator.itemgetter(1), reverse=True) num_keywords_all = len(sorted_keywords) pub_counts = read_counts(logfile) pub_counts['keyword_count'] = num_keywords_all pub_counts['showing_count'] = num_neighbors return {'keywords': sorted_keywords[:num_neighbors], 'log': pub_counts}
def search_pubs(query, num_pubs=50): f = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'queryresults', query + '.txt') logfile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'queryresults', query + '_log' + '.txt') query_pubmed(query, f, logfile) contents = PubMedParser.read_contents(f, num_pubs) pub_counts = read_counts(logfile) pub_counts['showing_count'] = num_pubs return {'contents': contents, 'log': pub_counts}
def find_evidence_for_terms(terms, skip_no_abstract=False, user_id=1): print '>> finding evidence for terms...' query = ' '.join(terms) current_dir = os.path.dirname(os.path.realpath(__file__)) f = os.path.join(current_dir, 'queryresults', query + '.txt') logfile = os.path.join(current_dir, 'queryresults', query + '_log' + '.txt') query_pubmed(query, f, logfile) return PubMedParser.load_evidence(f, skip_no_abstract)
def extract_terms_for_titles(titles, min_repeat=0): # todo: auto increment this query_id = '1' filenames = download_records(titles, 'query_' + query_id) # keywords = PubMedParser.extract_all_keywords(filenames) keywords = PubMedParser.extract_repeated_keywords(filenames, min_repeat) # keywords = PubMedParser.extract_all_mesh(filenames) return keywords
filenames.append(f) sleep(0.5) return filenames # the input should be either a list of titles or a list of terms, separated by \n input = 'The architecture of cognitive control in the human prefrontal cortex\nFMRI evidence for a hierarchical organization of the prefrontal cortex\nRostral–caudal gradients of abstraction revealed by multivariate pattern analysis of working memory' input_option = 'title' # either "term" or "title" query_id = '1' assoc_option = 'group' # either "group" or "individual" # Step 1: generate 0-level keywords that are directly from user inputs if input_option == 'title': # issue query for each publication; cannot post more than 3 queries per second! filenames = download_records(input, 'query_' + query_id) # keywords = PubMedParser.extract_all_keywords(filenames) keywords = PubMedParser.extract_repeated_keywords(filenames, 1) # keywords = PubMedParser.extract_all_mesh(filenames) elif input_option == 'term': print 'not implemented' # Step 2: generate 1-level keywords # can do this in two ways: 1) take multiple terms as the same query (group) 2) take each term as a query if assoc_option == 'group': query = '' for key in keywords: query += key + ' ' f = query + '.txt' query_pubmed(query, f) keywords = PubMedParser.extract_repeated_keywords([f], 30) print keywords elif assoc_option == 'individual':