def googlebooks_scr(parsed_url, date_format='%Y-%m-%d') -> tuple: """Create the response namedtuple.""" parsed_query = parse_qs(parsed_url.query) id_ = parsed_query.get('id') if id_ is not None: volume_id = id_[0] else: # the new URL format volume_id = parsed_url.path.rpartition('/')[2] dictionary = ris_parse( request( f'https://{parsed_url.netloc}/books/download/?id={volume_id}' f'&output=ris', spoof=True).content.decode('utf8')) dictionary['date_format'] = date_format # manually adding page number to dictionary: pg = parsed_query.get('pg') if pg is not None: pg0 = pg[0] dictionary['page'] = pg0[2:] dictionary['url'] += f'&pg={pg0}' # although google does not provide a language field: if not dictionary['language']: dictionary['language'] = classify(dictionary['title'])[0] return dict_to_sfn_cit_ref(dictionary)
def ris_fetcher_thread(url, ris_collection): """Fill the ris_dict. This function is called in a thread.""" ris_dict = ris_parse(get_ris(url)) language = ris_dict.get('language') if language: ris_collection['language'] = language authors = ris_dict.get('authors') if authors: ris_collection['authors'] = authors
def oclc_scr(oclc: str, date_format: str = '%Y-%m-%d') -> tuple: text = request('https://www.worldcat.org/oclc/' + oclc + '?page=endnote' '&client=worldcat.org-detailed_record').content.decode() if '<html' in text: # invalid OCLC number return ('Error processing OCLC number: ' + oclc, 'Perhaps you entered an invalid OCLC number?', '') d = ris_parse(text) authors = d['authors'] if authors: # worldcat has a '.' the end of the first name d['authors'] = [( fn.rstrip('.') if not fn.isupper() else fn, ln.rstrip('.') if not ln.isupper() else ln, ) for fn, ln in authors] d['date_format'] = date_format d['oclc'] = oclc d['title'] = d['title'].rstrip('.') return dict_to_sfn_cit_ref(d)
def googlebooks_sfn_cit_ref(url, date_format='%Y-%m-%d') -> tuple: """Create the response namedtuple.""" # bibtex_result = get_bibtex(url) [1] # dictionary = bibtex.parse(bibtex_result) [1] dictionary = ris_parse(get_ris(url)) dictionary['date_format'] = date_format pu = urlparse(url) pq = parse_qs(pu.query) # default domain is prefered: dictionary['url'] = 'https://' + pu.netloc + '/books?id=' + pq['id'][0] # manually adding page number to dictionary: if 'pg' in pq: dictionary['page'] = pq['pg'][0][2:] dictionary['url'] += '&pg=' + pq['pg'][0] # although google does not provide a language field: if not dictionary['language']: dictionary['language'] = classify(dictionary['title'])[0] return dict_to_sfn_cit_ref(dictionary)
def oclc_sfn_cit_ref(oclc: str, date_format: str = '%Y-%m-%d') -> tuple: text = request( 'https://www.worldcat.org/oclc/' + oclc + '?page=endnote' '&client=worldcat.org-detailed_record').text if '<html' in text: # invalid OCLC number return ( 'Error processing OCLC number: ' + oclc, 'Perhaps you entered an invalid OCLC number?', '') d = ris_parse(text) authors = d['authors'] if authors: # worldcat has a '.' the end of the first name d['authors'] = [( fn.rstrip('.') if not fn.isupper() else fn, ln.rstrip('.') if not ln.isupper() else ln, ) for fn, ln in authors] d['date_format'] = date_format d['oclc'] = oclc d['title'] = d['title'].rstrip('.') return dict_to_sfn_cit_ref(d)