コード例 #1
0
    def parse_response(self, response):
        """
        Parse the arXiv response, which is in Atom XML format.
        
        The feed provides itself provides more-or-less all the
        information required without needing any extra requests.
        """

        papers = []
        try:
            parsed = feedparser.parse(response)
        except Exception as ex:
            log_error("arxiv: error while parsing response: %s" % ex[0])
            return papers

        log_debug("arxiv: received response containing %d results" %
                  len(parsed.entries))
        for entry in parsed.entries:
            paper = {}

            try:
                paper['title'] = entry['title']
                for link in entry['links']:
                    if link.get('title', None) == 'pdf':
                        paper['import_url'] = link['href']
                        break

                paper['authors'] = [a['name'] for a in entry['authors']]
                if 'arxiv_journal_ref' in entry:
                    paper['journal'] = entry['arxiv_journal_ref']
                if 'arxiv_doi' in entry:
                    paper['doi'] = entry['arxiv_doi']
                if 'arxiv_comment' in entry:
                    paper['notes'] = entry['arxiv_comment']
                paper['year'] = entry['published_parsed'].tm_year
                paper['arxiv_id'] = entry['id']
                paper['url'] = entry['id']
                paper['abstract'] = entry['summary'].replace('\n', ' ')
                if 'arxiv_primary_category' in entry:
                    paper['arxiv_type'] = entry['arxiv_primary_category'].get(
                        'term', '')

                paper['created'] = datetime.datetime(
                    year=entry['published_parsed'].tm_year,
                    month=entry['published_parsed'].tm_mon,
                    day=entry['published_parsed'].tm_mday)
                paper['updated'] = datetime.datetime(
                    year=entry['updated_parsed'].tm_year,
                    month=entry['updated_parsed'].tm_mon,
                    day=entry['updated_parsed'].tm_mday)

                paper['data'] = paper  #messy

                papers += [paper]
            except Exception as ex:
                log_error("arxiv: error while reading item: %s" % ex[0])

        return papers
コード例 #2
0
ファイル: google_scholar.py プロジェクト: zeliboba7/gpapers
    def _got_bibtex(self, message, callback, user_data):
        if message.status_code == Soup.KnownStatusCode.OK:
            bibtex_data = message.response_body.flatten().get_data()

            log_debug('Received BibTeX data:\n%s' % bibtex_data)
            paper_info = paper_info_from_bibtex(bibtex_data)
        else:
            log_error('google scholar got status code %d' % message.status_code)
            paper_info = None
        callback(paper_info, None, user_data)
コード例 #3
0
    def _got_bibtex(self, message, callback, user_data):
        if message.status_code == Soup.KnownStatusCode.OK:
            bibtex_data = message.response_body.flatten().get_data()

            log_debug('Received BibTeX data:\n%s' % bibtex_data)
            paper_info = paper_info_from_bibtex(bibtex_data)
        else:
            log_error('google scholar got status code %d' %
                      message.status_code)
            paper_info = None
        callback(paper_info, None, user_data)
コード例 #4
0
ファイル: arxiv.py プロジェクト: zeliboba7/gpapers
 def parse_response(self, response):
     """
     Parse the arXiv response, which is in Atom XML format.
     
     The feed provides itself provides more-or-less all the
     information required without needing any extra requests.
     """
     
     papers = []
     try:
         parsed = feedparser.parse(response)
     except Exception as ex:
         log_error("arxiv: error while parsing response: %s" % ex[0])
         return papers
     
     log_debug("arxiv: received response containing %d results" % len(parsed.entries))    
     for entry in parsed.entries:
         paper = {}
         
         try:
             paper['title'] = entry['title']
             for link in entry['links']:
                 if link.get('title', None) == 'pdf':
                     paper['import_url'] = link['href']
                     break
                 
             paper['authors'] = [a['name'] for a in entry['authors']]
             if 'arxiv_journal_ref' in entry:
                 paper['journal'] = entry['arxiv_journal_ref']
             if 'arxiv_doi' in entry:
                 paper['doi'] = entry['arxiv_doi']
             if 'arxiv_comment' in entry:
                 paper['notes'] = entry['arxiv_comment']
             paper['year'] = entry['published_parsed'].tm_year
             paper['arxiv_id'] = entry['id']
             paper['url'] = entry['id']
             paper['abstract'] = entry['summary'].replace('\n', ' ')
             if 'arxiv_primary_category' in entry:
                 paper['arxiv_type'] = entry['arxiv_primary_category'].get('term', '')
             
             paper['created'] = datetime.datetime(year=entry['published_parsed'].tm_year,
                                                  month=entry['published_parsed'].tm_mon,
                                                  day=entry['published_parsed'].tm_mday)
             paper['updated'] = datetime.datetime(year=entry['updated_parsed'].tm_year,
                                                  month=entry['updated_parsed'].tm_mon,
                                                  day=entry['updated_parsed'].tm_mday)
             
             paper['data'] = paper #messy
             
             papers += [paper]
         except Exception as ex:
             log_error("arxiv: error while reading item: %s" % ex[0])
     
     return papers
コード例 #5
0
ファイル: models.py プロジェクト: zeliboba7/gpapers
 def open(self):
     if self.full_text and os.path.isfile(self.full_text.path):
         uri = 'file://' + self.full_text.path
         if Gtk.show_uri(None, uri, Gdk.CURRENT_TIME):
             self.read_count = self.read_count + 1
             # temporary disable receivers getting notified by post_save
             receivers = post_save.receivers
             post_save.receivers = []
             self.save()
             post_save.receivers = receivers
         else:
             log_error('Failed to open %s' % uri)
コード例 #6
0
 def open(self):
     if self.full_text and os.path.isfile(self.full_text.path):
         uri = 'file://' + self.full_text.path
         if Gtk.show_uri(None, uri, Gdk.CURRENT_TIME):
             self.read_count = self.read_count + 1
             # temporary disable receivers getting notified by post_save
             receivers = post_save.receivers
             post_save.receivers = []
             self.save()
             post_save.receivers = receivers
         else:
             log_error('Failed to open %s' % uri)
コード例 #7
0
ファイル: pubmed.py プロジェクト: zeliboba7/gpapers
    def _paper_info_received(self, message, callback, user_data):
        if not message.status_code == Soup.KnownStatusCode.OK:
            log_error('Pubmed replied with error code %d for paper_info with id: %s' % \
                      (message.status_code, user_data[1]))
            paper_info = None
        else:
            parsed_response = BeautifulStoneSoup(message.response_body.data)
            paper_info = {}

            # Journal
            try:
                journal = parsed_response.findAll('journal')[0]
                paper_info['journal'] = journal.findAll('title')[0].text
                try:
                    paper_info['issue'] = journal.findAll('issue')[0].text
                except:
                    pass

                paper_info['pages'] = parsed_response.findAll('medlinepgn')[0].text
                log_debug('Pages: %s' % paper_info['pages'])
            except Exception as ex:
                pass

            # Publication date
            try:
                articledate = parsed_response.findAll('articledate')[0]
                paper_info['year'] = articledate.year.text
            except:
                pass

            # Title and abstract
            try:
                paper_info['title'] = parsed_response.findAll('articletitle')[0].text
                log_debug('Title: %s' % paper_info['title'])
                paper_info['abstract'] = parsed_response.findAll('abstracttext')[0].text
                log_debug('Abstract: %s' % paper_info['abstract'])
            except Exception as ex:
                pass

            # Authors
            try:
                all_authors = []
                authors = parsed_response.findAll('author')
                for author in authors:
                    author_name = author.forename.text + ' ' + \
                                            author.lastname.text
                    log_debug('\tAuthor: %s' % author_name)
                    all_authors.append(author_name)
                if all_authors:
                    paper_info['authors'] = all_authors
            except Exception as ex:
                pass

            # URL + IDs
            try:
                articleids = parsed_response.findAll('articleid')
                for articleid in articleids:
                    if articleid['idtype'] == 'doi':
                        paper_info['doi'] = articleid.text
                    elif articleid['idtype'] == 'pubmed':
                        paper_info['pubmed_id'] = articleid.text
            except:
                pass

        callback(paper_info=paper_info, user_data=user_data)
コード例 #8
0
    def _paper_info_received(self, message, callback, user_data):
        if not message.status_code == Soup.KnownStatusCode.OK:
            log_error('Pubmed replied with error code %d for paper_info with id: %s' % \
                      (message.status_code, user_data[1]))
            paper_info = None
        else:
            parsed_response = BeautifulStoneSoup(message.response_body.data)
            paper_info = {}

            # Journal
            try:
                journal = parsed_response.findAll('journal')[0]
                paper_info['journal'] = journal.findAll('title')[0].text
                try:
                    paper_info['issue'] = journal.findAll('issue')[0].text
                except:
                    pass

                paper_info['pages'] = parsed_response.findAll(
                    'medlinepgn')[0].text
                log_debug('Pages: %s' % paper_info['pages'])
            except Exception as ex:
                pass

            # Publication date
            try:
                articledate = parsed_response.findAll('articledate')[0]
                paper_info['year'] = articledate.year.text
            except:
                pass

            # Title and abstract
            try:
                paper_info['title'] = parsed_response.findAll(
                    'articletitle')[0].text
                log_debug('Title: %s' % paper_info['title'])
                paper_info['abstract'] = parsed_response.findAll(
                    'abstracttext')[0].text
                log_debug('Abstract: %s' % paper_info['abstract'])
            except Exception as ex:
                pass

            # Authors
            try:
                all_authors = []
                authors = parsed_response.findAll('author')
                for author in authors:
                    author_name = author.forename.text + ' ' + \
                                            author.lastname.text
                    log_debug('\tAuthor: %s' % author_name)
                    all_authors.append(author_name)
                if all_authors:
                    paper_info['authors'] = all_authors
            except Exception as ex:
                pass

            # URL + IDs
            try:
                articleids = parsed_response.findAll('articleid')
                for articleid in articleids:
                    if articleid['idtype'] == 'doi':
                        paper_info['doi'] = articleid.text
                    elif articleid['idtype'] == 'pubmed':
                        paper_info['pubmed_id'] = articleid.text
            except:
                pass

        callback(paper_info=paper_info, user_data=user_data)