def import_paper_after_search(self, paper, callback): log_info('Trying to import google scholar citation') try: data = paper.data citations = data.findAll('div', {'class': 'gs_fl'})[0] log_debug('Citations: %s' % str(citations)) for link in citations.findAll('a'): log_debug('Link: %s' % str(link)) if link['href'].startswith('/scholar.bib'): log_debug('Found BibTex link: %s' % link['href']) def bibtex_callback(session, message, user_data): self._got_bibtex(message, callback, user_data) message = Soup.Message.new(method='GET', uri_string=BASE_URL + link['href']) message.request_headers.append( 'Cookie', 'GSP=ID=%s:CF=4' % self.google_id) soup_session.queue_message(message, bibtex_callback, self.label) #FIXME: Google scholar does not always seem to include the # URL in the bibtex data -- in this case add a link except: traceback.print_exc()
def prepare_search_message(self, search_string): uri_string = BASE_URL + 'scholar?' + urllib.urlencode({'q': search_string}) message = Soup.Message.new(method='GET', uri_string=uri_string) log_info('Starting google scholar request with uri_string="%s"' % uri_string) # This tells Google Scholar to return links to BibTeX message.request_headers.append('Cookie', 'GSP=ID=%s:CF=4' % self.google_id) return message
def import_paper_after_search(self, paper, callback): pubmed_id = paper.data log_info('Trying to import pubmed citation with id %s' % pubmed_id) query = BASE_URL + EFETCH_QUERY % pubmed_id message = Soup.Message.new(method='GET', uri_string=query) def mycallback(session, message, user_data): self._paper_info_received(message, callback, user_data) soup_session.queue_message(message, mycallback, (self.label, pubmed_id))
def prepare_search_message(self, search_string): uri_string = BASE_URL + 'scholar?' + urllib.urlencode( {'q': search_string}) message = Soup.Message.new(method='GET', uri_string=uri_string) log_info('Starting google scholar request with uri_string="%s"' % uri_string) # This tells Google Scholar to return links to BibTeX message.request_headers.append('Cookie', 'GSP=ID=%s:CF=4' % self.google_id) return message
def paper_info_from_bibtex(data): if data is None: return {} # ieee puts <br>s in their bibtex data = data.replace('<br>', '\n') paper_info = {} result = parse_str(data) if len(result) == 0: log_warn('Could not parse BibTeX data') return {} bibtex = {} # FIXME: This does not handle special cases well... for i, r in enumerate(result[0][2:]): bibtex[r[0].lower()] = ''.join([str(r_part) for r_part in r[1:]]) # fix for ACM's doi retardedness if bibtex.get('doi', '').startswith('http://dx.doi.org/'): bibtex['doi'] = bibtex['doi'][ len('http://dx.doi.org/'): ] if bibtex.get('doi', '').startswith('http://doi.acm.org/'): bibtex['doi'] = bibtex['doi'][ len('http://doi.acm.org/'): ] # Mappings from BibTeX to our keys # TODO: Handle more fields mappings = {'doi': 'doi', 'url': 'import_url', 'title': 'title', 'pages': 'pages', 'abstract': 'abstract', 'journal': 'journal', 'year': 'year', 'publisher': 'publisher'} for bibtex_key, our_key in mappings.items(): if bibtex_key in bibtex: log_debug('Have key %s' % bibtex_key) # replace newlines with spaces and remove superfluous spaces paper_info[our_key] = bibtex[bibtex_key].replace('\n', ' ').strip() # TODO: Handle editors, etc.? if 'author' in bibtex: if ' AND ' in bibtex['author']: paper_info['authors'] = bibtex['author'].split(' AND ') else: paper_info['authors'] = bibtex['author'].split(' and ') paper_info['bibtex'] = data log_info('imported paper_info: %s\nFrom bibtex: %s' % (str(paper_info), str(bibtex))) return paper_info
def search(self, search_string, callback, error_callback): ''' This method will be called by the GUI with the `search_string` when a search is initiated. Returns search results from the cache or initiates a new search using :meth:`search_async` if the search has not been performed before. Before calling the `callback`, saves the search results to the cache. This method should normally not be overwritten. ''' # A tuple identifying the search, making it possible for the callback # function to deal with the results properly (otherwise results arriving # out of order could lead to wrongly displayed results) user_data = (self.label, search_string) if not search_string: callback(user_data, []) return if search_string in self.search_cache: log_debug('Result for "%s" already in cache.' % search_string) callback(user_data, self.search_cache[search_string]) return log_info('Search for "%s" is not cached by this provider, starting new search' % search_string) try: def callback_wrapper(search_results): ''' Before calling the actual callback, save the result in the cache and add `user_data` (tuple identifying request and search provider) to the call. ''' log_debug('Saving %s in cache for "%s"' % (search_results, search_string)) self.search_cache[search_string] = search_results callback(user_data, search_results) self.search_async(search_string, callback_wrapper, error_callback) except Exception as ex: error_callback(ex, None)
def import_paper_after_search(self, paper, callback): log_info('Trying to import google scholar citation') try: data = paper.data citations = data.findAll('div', {'class': 'gs_fl'})[0] log_debug('Citations: %s' % str(citations)) for link in citations.findAll('a'): log_debug('Link: %s' % str(link)) if link['href'].startswith('/scholar.bib'): log_debug('Found BibTex link: %s' % link['href']) def bibtex_callback(session, message, user_data): self._got_bibtex(message, callback, user_data) message = Soup.Message.new(method='GET', uri_string=BASE_URL + link['href']) message.request_headers.append('Cookie', 'GSP=ID=%s:CF=4' % self.google_id) soup_session.queue_message(message, bibtex_callback, self.label) #FIXME: Google scholar does not always seem to include the # URL in the bibtex data -- in this case add a link except: traceback.print_exc()