# page end page_end = row[7] # reprint try: reprint = bool(int(row[10])) except: reprint = False # lookup number for pdf lookup = row[11] paper = Paper(author=author, date=date, title=title, journal=journal, volume = volume, page_start=page_start, page_end=page_end, reprint=reprint, lookup=lookup) paper.save() # keywords kwords = [x.strip() for x in row[8].split(',')] for kword in kwords: obj, created = Keyword.objects.get_or_create(keyword=kword) paper.keywords.add(obj)
def retrieve(self, request, pk=None): """ Logic for parsing paper info from PubMed using efetch """ try: paper = Paper.objects.get(pmid=pk) except Paper.DoesNotExist: try: handle = Entrez.efetch("pubmed", id=str(pk), rettype="medline") attr = {} kw = ['DP', 'TI', 'TA', 'VI', 'VI', 'PG', 'AB', 'PMC'] attr = attr.fromkeys(kw, None) AU = [] MH = [] buff = '' for line in handle: if line.strip(): if line.startswith(' '): buff = buff + ' ' + line.strip() elif not buff: buff = line.strip() else: key, val = buff.split('-', 1) if key.strip() in kw: attr[key.strip()] = val.strip() elif key.strip() == 'AU': AU.append(val.strip()) elif key.strip() == 'MH': MH.append(val.strip()) buff = line.strip() attr['AU'] = ', '.join(AU) attr['MH'] = ', '.join(MH) attr['DP'] = re.sub("[^0-9]", "", attr['DP']) if attr['PMC']: attr['PMC'] = re.sub("[^0-9]", "", attr['PMC']) if attr['TI']: attr['TI'] = attr['TI'].strip('.') if attr['DP']: attr['DP'] = attr['DP'][:4] paper = Paper( pmid=pk, author=attr.get('AU', None), pub_year=attr.get('DP', None), title=attr.get('TI', None), journal=attr.get('TA', None), volume=attr.get('VI', None), pages=attr.get('PG', None), abstract=attr.get('AB', None), keywords=attr.get('MH', None), pmc=attr.get('PMC', None) ) handle.close() paper.save() except: return JsonResponse(status=404, data={'success':'false','message':'PMID:%s not found' % pk}) serializer = PaperSerializer(paper) return Response(serializer.data)