def augmentCollection(request, collection_id, seed_level): if request.method == 'GET': if collection_id in names: return HttpResponse(json.dumps({warning: 'Collection already exists! Try with another collection id.'}), status=status.HTTP_304_NOT_MODIFIED) seeds = Evidence.objects.filter(Q(created_by=collection_id)&~Q(abstract='')&Q(augmentation=seed_level)) counter = 0 start = 306 for e in seeds: counter += 1 if counter < start: continue print 'processing entry #' + str(counter) + ' out of ' + str(seeds.count()) unicodeTitle = e.title.encode('utf-8') related_evidence, citation_map, pmid = PubMedQuerier.get_related_evidence(unicodeTitle) print 'found ' + str(len(related_evidence)) + ' related evidence for ' + unicodeTitle refs, citedin = getRefsAndCitedin(pmid, citation_map) for re in related_evidence: if not re.title: continue re_object = Evidence.objects.create_evidence(re.title, re.abstract, serializePaperMetadata(re.pmid,re.authors_str,re.journal,re.year,''), collection_id, int(seed_level)+1) if re.pmid in refs: Citation.objects.get_or_create(paper_id=e.id, citation_id=re_object.id, collection_id=collection_id) if re.pmid in citedin: Citation.objects.get_or_create(paper_id=re_object.id, citation_id=e.id, collection_id=collection_id) return HttpResponse(json.dumps({}), status=status.HTTP_200_OK)
def augmentCollection(request, collection_id, seed_level): if request.method == 'GET': if collection_id in names: return HttpResponse(json.dumps({warning: 'Collection already exists! Try with another collection id.'}), status=status.HTTP_304_NOT_MODIFIED) seeds = Evidence.objects.filter(Q(created_by=collection_id)&~Q(abstract='')&Q(augmentation=seed_level)) counter = 0 for e in seeds: counter += 1 print 'processing entry #' + str(counter) + ' out of ' + str(seeds.count()) # print e.title # counter += 1 # if counter < 183: # continue # if counter > related_evidence = PubMedQuerier.get_related_evidence(e.title) try: print 'found ' + str(len(related_evidence)) + ' related evidence for ' + e.title except UnicodeEncodeError: pass for re in related_evidence: Evidence.objects.create_evidence(re.title, re.abstract, json.dumps({ 'PMID': re.pmid, 'AUTHOR': re.authors_str, 'JOURNAL': re.journal, 'DATE': re.year, 'AFFILIATION': '' }), collection_id, seed_level+1) return HttpResponse(json.dumps({}), status=status.HTTP_200_OK)
def completeCitationInfo(request, collection_id): if request.method == 'GET': if collection_id == '18': completeCitationInfoAMiner(collection_id) else: evidence = Evidence.objects.filter(created_by=collection_id) start = 546 counter = 1 for e in evidence: if counter < start: counter += 1 continue print '>> Processing entry ' + str(counter) + ' out of ' + str(evidence.count()) unicodeTitle = e.title.encode('utf-8') related_evidence, citation_map, pmid = PubMedQuerier.get_related_evidence(unicodeTitle) refs, citedin = getRefsAndCitedin(pmid, citation_map) print 'found ' + str(len(refs)) + ' ref, ' + str(len(citedin)) + ' citedin' for re in related_evidence: re_objects = Evidence.objects.filter(title=re.title,created_by=collection_id) # here paper_id cites citation_id for re_object in re_objects: if re.pmid in refs: try: Citation.objects.get_or_create(paper_id=e.id, citation_id=re_object.id, collection_id=collection_id) except MultipleObjectsReturned: print e.id print re_object.id if re.pmid in citedin: try: Citation.objects.get_or_create(paper_id=re_object.id, citation_id=e.id, collection_id=collection_id) except MultipleObjectsReturned: print re_object.id print e.id counter += 1 return HttpResponse(json.dumps({}), status=status.HTTP_200_OK)
def process_title_query(request): if request.method == 'POST': query = request.POST['query'] response_data = pq.extract_terms_for_titles(query, min_repeat=1) print 'Extracted terms:' print response_data return HttpResponse(json.dumps(response_data), content_type='application/json')
def search_pubs(request): if request.method == 'POST': query = request.POST.get('query') response_data = pq.search_pubs(query) print response_data return HttpResponse(json.dumps(response_data), content_type='application/json')
def get(self, request, format=None): terms = json.loads(request.body)['terms'] evidence = PubMedQuerier.find_evidence_for_terms(terms, skip_no_abstract=True) serialized_json = serializers.serialize('json', evidence) evidence_json = flattenSerializedJson(serialized_json) # let's provide topic modeling results in addition to the raw evidence output = {} output['evidence'] = json.loads(evidence_json) evidencePks = [e.pk for e in evidence] abstracts = [e.abstract for e in evidence] output['topics'], output['evidenceTopicMap'] = getTopicsForDocuments(evidencePks, abstracts) return HttpResponse(json.dumps(output), status=status.HTTP_201_CREATED)
def post(self, request, format=None): data = json.loads(request.body) neighbors = PubMedQuerier.find_neighbors_for_terms(data['concepts'], num_neighbors=10, user_id=data['requested_by']) output = {} output['counts'] = {} concepts = [] for pairs in neighbors['keywords']: output['counts'][pairs[0]] = pairs[1] concepts.append(Concept.objects.create_concept(pairs[0], data['requested_by'])) serialized_json = serializers.serialize('json', concepts) concepts_json = flattenSerializedJson(serialized_json) output['concepts'] = json.loads(concepts_json) return HttpResponse(json.dumps(output), status=status.HTTP_201_CREATED)
def post(self, request, user_id, format=None): data = json.loads(request.body) # data = { # 'abstract': '', # 'title': 'Generalized theory of relaxation', # 'metadata': '', # 'created_by': 1001 # } # 01/20/2016 new feature: initiate a google scholar api call to get abstract if not provided findRelatedEvidence = True; title = data['title'].replace('{', '').replace('}', '') abstract = data['abstract'] if abstract == '': temp_title, temp_abstract = PubMedQuerier.get_abstract_by_title(data['title']) if temp_title is not None: title = temp_title abstract = temp_abstract evidence = Evidence.objects.create_evidence(title, abstract, data['metadata'], data['created_by'], 0) serialized_json = serializers.serialize('json', [evidence]) evidence_json = flattenSerializedJson(serialized_json) return HttpResponse(evidence_json, status=status.HTTP_201_CREATED)
def post(self, request, format=None): params = json.loads(request.body) terms = params['terms'] user_id = params['user_id'] texts = Text.objects.filter(created_by=user_id) serialized_json = serializers.serialize('json', texts) texts_json = flattenSerializedJson(serialized_json) evidenceCreated = Evidence.objects.filter(created_by=user_id) evidenceBookmarks = EvidenceBookmark.objects.filter(user_id=user_id) evidenceBookmarkedIds = [eb.evidence.pk for eb in evidenceBookmarks] evidenceBookmarked = Evidence.objects.filter(pk__in=evidenceBookmarkedIds) evidenceRetrieved = PubMedQuerier.find_evidence_for_terms(terms, skip_no_abstract=True) evidence = chain(evidenceCreated, evidenceBookmarked, evidenceRetrieved) print '>> serializing evidence...' serialized_json = serializers.serialize('json', evidence) print '>> flatten serialized evidence...' evidence_json = flattenSerializedJson(serialized_json) # let's provide topic modeling results in addition to the raw evidence output = {} print '>> loading evidence into json...' output['evidence'] = json.loads(evidence_json) contents = [t.content for t in texts] textPks = ['t-'+str(t.pk) for t in texts] abstracts = [e['abstract'] for e in output['evidence']] evidencePks = ['e-'+str(e['id']) for e in output['evidence']] if len(evidencePks + textPks) <= 1: output['topics'] = [] output['evidenceTextTopicMap'] = [] else: output['topics'], output['evidenceTextTopicMap'] = getTopicsForDocuments(evidencePks + textPks, abstracts + contents) return HttpResponse(json.dumps(output), status=status.HTTP_201_CREATED)
def process_term_query(request): if request.method == 'POST': query = request.POST.getlist('query[]') response_data = pq.find_neighbors_for_terms(query) print response_data return HttpResponse(json.dumps(response_data), content_type='application/json')