Ejemplo n.º 1
0
        # page end
        page_end = row[7]

        # reprint
        try:
            reprint = bool(int(row[10]))
        except:
            reprint = False

        # lookup number for pdf
        lookup = row[11]


        paper = Paper(author=author,
                      date=date,
                      title=title,
                      journal=journal,
                      volume = volume,
                      page_start=page_start,
                      page_end=page_end,
                      reprint=reprint,
                      lookup=lookup)
        paper.save()

        # keywords
        kwords = [x.strip() for x in row[8].split(',')]
        for kword in kwords:
            obj, created = Keyword.objects.get_or_create(keyword=kword)
            paper.keywords.add(obj)
Ejemplo n.º 2
0
    def retrieve(self, request, pk=None):
        """
        Logic for parsing paper info from PubMed using efetch
        """
        try:
            paper = Paper.objects.get(pmid=pk)
        except Paper.DoesNotExist:
            try:
                handle = Entrez.efetch("pubmed", id=str(pk), rettype="medline")

                attr = {}
                kw = ['DP', 'TI', 'TA', 'VI', 'VI', 'PG', 'AB', 'PMC']
                attr = attr.fromkeys(kw, None)
                AU = []
                MH = []

                buff = ''
                for line in handle:
                    if line.strip():
                        if line.startswith(' '):
                            buff = buff + ' ' + line.strip()
                        elif not buff:
                            buff = line.strip()
                        else:
                            key, val = buff.split('-', 1)
                            if key.strip() in kw:
                                attr[key.strip()] = val.strip()
                            elif key.strip() == 'AU':
                                AU.append(val.strip())
                            elif key.strip() == 'MH':
                                MH.append(val.strip())
                            buff = line.strip()

                attr['AU'] = ', '.join(AU)
                attr['MH'] = ', '.join(MH)
                attr['DP'] = re.sub("[^0-9]", "", attr['DP'])
                if attr['PMC']:
                    attr['PMC'] = re.sub("[^0-9]", "", attr['PMC'])

                if attr['TI']:
                    attr['TI'] = attr['TI'].strip('.')

                if attr['DP']:
                    attr['DP'] = attr['DP'][:4]

                paper = Paper(
                    pmid=pk,
                    author=attr.get('AU', None),
                    pub_year=attr.get('DP', None),
                    title=attr.get('TI', None),
                    journal=attr.get('TA', None),
                    volume=attr.get('VI', None),
                    pages=attr.get('PG', None),
                    abstract=attr.get('AB', None),
                    keywords=attr.get('MH', None),
                    pmc=attr.get('PMC', None)
                )

                handle.close()
                paper.save()
            except:
                return JsonResponse(status=404, data={'success':'false','message':'PMID:%s not found' % pk})

        serializer = PaperSerializer(paper)
        return Response(serializer.data)