def googlebooks_scr(parsed_url, date_format='%Y-%m-%d') -> tuple: """Create the response namedtuple.""" parsed_query = parse_qs(parsed_url.query) id_ = parsed_query.get('id') if id_ is not None: volume_id = id_[0] else: # the new URL format volume_id = parsed_url.path.rpartition('/')[2] dictionary = ris_parse( request( f'https://{parsed_url.netloc}/books/download/?id={volume_id}' f'&output=ris', spoof=True).content.decode('utf8')) dictionary['date_format'] = date_format # manually adding page number to dictionary: pg = parsed_query.get('pg') if pg is not None: pg0 = pg[0] dictionary['page'] = pg0[2:] dictionary['url'] += f'&pg={pg0}' # although google does not provide a language field: if not dictionary['language']: dictionary['language'] = classify(dictionary['title'])[0] return dict_to_sfn_cit_ref(dictionary)
def noorlib_scr(url: str, date_format: str = '%Y-%m-%d') -> tuple: """Create the response namedtuple.""" dictionary = bibtex_parse(get_bibtex(url)) dictionary['date_format'] = date_format # risr = get_ris(url)[1] # dictionary = risr.parse(ris)[1] return dict_to_sfn_cit_ref(dictionary)
def noorlib_sfn_cit_ref(url: str, date_format: str = '%Y-%m-%d') -> tuple: """Create the response namedtuple.""" dictionary = bibtex_parse(get_bibtex(url)) dictionary['date_format'] = date_format # risr = get_ris(url)[1] # dictionary = risr.parse(ris)[1] return dict_to_sfn_cit_ref(dictionary)
def isbn_scr(isbn_container_str: str, pure: bool = False, date_format: str = '%Y-%m-%d') -> tuple: """Create the response namedtuple.""" if pure: isbn = isbn_container_str else: # search for isbn13 m = ISBN13_SEARCH(isbn_container_str) if m is not None: isbn = m[0] else: # search for isbn10 m = ISBN10_SEARCH(isbn_container_str) isbn = m[0] iranian_isbn = isbn_info(isbn) == 'Iran' if iranian_isbn is True: ketabir_result_list = [] ketabir_thread = Thread(target=ketabir_thread_target, args=(isbn, ketabir_result_list)) ketabir_thread.start() citoid_result_list = [] citoid_thread = Thread(target=citoid_thread_target, args=(isbn, citoid_result_list)) citoid_thread.start() ottobib_bibtex = ottobib(isbn) if ottobib_bibtex: otto_dict = bibtex_parse(ottobib_bibtex) else: otto_dict = None if iranian_isbn is True: # noinspection PyUnboundLocalVariable ketabir_thread.join() # noinspection PyUnboundLocalVariable if ketabir_result_list: # noinspection PyUnboundLocalVariable ketabir_dict = ketabir_result_list[0] else: ketabir_dict = None dictionary = choose_dict(ketabir_dict, otto_dict) else: dictionary = otto_dict citoid_thread.join() if citoid_result_list: dictionary['oclc'] = citoid_result_list[0]['oclc'] dictionary['date_format'] = date_format if 'language' not in dictionary: dictionary['language'] = classify(dictionary['title'])[0] return dict_to_sfn_cit_ref(dictionary)
def urls_scr(url: str, date_format: str = '%Y-%m-%d') -> tuple: """Create the response namedtuple.""" try: dictionary = url2dict(url) except (ContentTypeError, ContentLengthError) as e: logger.exception(url) # Todo: i18n return 'Could not process the request.', e, '' dictionary['date_format'] = date_format return dict_to_sfn_cit_ref(dictionary)
def ketabir_sfn_cit_ref(url: str, date_format='%Y-%m-%d') -> tuple: """Return the response namedtuple.""" dictionary = url2dictionary(url) dictionary['date_format'] = date_format if 'language' not in dictionary: # Assume that language is either fa or en. # Todo: give warning about this assumption? dictionary['language'] = \ classify(dictionary['title'])[0] return dict_to_sfn_cit_ref(dictionary)
def ketabir_scr(url: str, date_format='%Y-%m-%d') -> tuple: """Return the response namedtuple.""" dictionary = url2dictionary(url) dictionary['date_format'] = date_format if 'language' not in dictionary: # Assume that language is either fa or en. # Todo: give warning about this assumption? dictionary['language'] = \ classify(dictionary['title'])[0] return dict_to_sfn_cit_ref(dictionary)
def urls_sfn_cit_ref(url: str, date_format: str = '%Y-%m-%d') -> tuple: """Create the response namedtuple.""" try: dictionary = url2dict(url) except (ContentTypeError, ContentLengthError) as e: logger.exception(url) # Todo: i18n return 'Could not process the request.', e, '' dictionary['date_format'] = date_format return dict_to_sfn_cit_ref(dictionary)
def jstor_scr(url: str, date_format: str = '%Y-%m-%d') -> tuple: open_access = [] thread = Thread(target=is_open_access, args=(url, open_access)) thread.start() id_ = urlparse(url).path.rpartition('/')[2] bibtex = request('https://www.jstor.org/citation/text/' + id_).content.decode('utf8') dictionary = bibtex_parse(bibtex) dictionary['jstor'] = id_ dictionary['date_format'] = date_format thread.join() if open_access: dictionary['jstor-access'] = 'free' return dict_to_sfn_cit_ref(dictionary)
def noormags_sfn_cit_ref(url: str, date_format: str = '%Y-%m-%d') -> tuple: """Create the response namedtuple.""" ris_collection = {} ris_thread = Thread(target=ris_fetcher_thread, args=(url, ris_collection)) ris_thread.start() dictionary = bibtex_parse(get_bibtex(url)) dictionary['date_format'] = date_format # language parameter needs to be taken from RIS # other information are more accurate in bibtex # for example: http://www.noormags.ir/view/fa/articlepage/104040 # "IS - 1" is wrong in RIS but "number = { 45 }," is correct in bibtex ris_thread.join() dictionary.update(ris_collection) return dict_to_sfn_cit_ref(dictionary)
def doi_scr(doi_or_url, pure=False, date_format='%Y-%m-%d') -> tuple: """Return the response namedtuple.""" if pure: doi = doi_or_url else: # unescape '&', '<', and '>' in doi_or_url # decode percent encodings decoded_url = unquote(unescape(doi_or_url)) doi = DOI_SEARCH(decoded_url)[1] dictionary = get_crossref_dict(doi) dictionary['date_format'] = date_format if LANG == 'fa': dictionary['language'] = classify(dictionary['title'])[0] return dict_to_sfn_cit_ref(dictionary)
def noormags_sfn_cit_ref(url: str, date_format: str = '%Y-%m-%d') -> tuple: """Create the response namedtuple.""" ris_collection = {} ris_thread = Thread(target=ris_fetcher_thread, args=(url, ris_collection)) ris_thread.start() dictionary = bibtex_parse(get_bibtex(url)) dictionary['date_format'] = date_format # language parameter needs to be taken from RIS # other information are more accurate in bibtex # for example: http://www.noormags.ir/view/fa/articlepage/104040 # "IS - 1" is wrong in RIS but "number = { 45 }," is correct in bibtex ris_thread.join() dictionary.update(ris_collection) return dict_to_sfn_cit_ref(dictionary)
def isbn_sfn_cit_ref( isbn_container_str: str, pure: bool = False, date_format: str = '%Y-%m-%d' ) -> tuple: """Create the response namedtuple.""" if pure: isbn = isbn_container_str else: # search for isbn13 m = ISBN13_SEARCH(isbn_container_str) if m: isbn = m[0] else: # search for isbn10 m = ISBN10_SEARCH(isbn_container_str) isbn = m[0] ketabir_result_list = [] ketabir_thread = Thread( target=ketabir_thread_target, args=(isbn, ketabir_result_list)) ketabir_thread.start() citoid_result_list = [] citoid_thread = Thread( target=citoid_thread_target, args=(isbn, citoid_result_list)) citoid_thread.start() ottobib_bibtex = ottobib(isbn) if ottobib_bibtex: otto_dict = bibtex_parse(ottobib_bibtex) else: otto_dict = None ketabir_thread.join() if ketabir_result_list: ketabir_dict = ketabir_result_list[0] else: ketabir_dict = None dictionary = choose_dict(ketabir_dict, otto_dict) citoid_thread.join() if citoid_result_list: dictionary['oclc'] = citoid_result_list[0]['oclc'] dictionary['date_format'] = date_format if 'language' not in dictionary: dictionary['language'] = classify(dictionary['title'])[0] return dict_to_sfn_cit_ref(dictionary)
def waybackmachine_sfn_cit_ref( archive_url: str, date_format: str = '%Y-%m-%d' ) -> tuple: """Create the response namedtuple.""" m = URL_FULLMATCH(archive_url) if not m: # Could not parse the archive_url. Treat as an ordinary URL. return urls_sfn_cit_ref(archive_url, date_format) archive_year, archive_month, archive_day, original_url = \ m.groups() original_dict = {} thread = Thread( target=original_url2dict, args=(original_url, original_dict) ) thread.start() try: archive_dict = url2dict(archive_url) except (ContentTypeError, ContentLengthError) as e: logger.exception(archive_url) # Todo: i18n return 'Invalid content type or length.', e, '' archive_dict['date_format'] = date_format archive_dict['url'] = original_url archive_dict['archive-url'] = archive_url archive_dict['archive-date'] = date( int(archive_year), int(archive_month), int(archive_day) ) thread.join() if original_dict: # The original_process has been successful if ( original_dict['title'] == archive_dict['title'] or original_dict['html_title'] == archive_dict['html_title'] ): archive_dict.update(original_dict) archive_dict['dead-url'] = 'no' else: # and original title is the same as archive title. Otherwise it # means that the content probably has changed and the original data # cannot be trusted. archive_dict['dead-url'] = 'unfit' else: archive_dict['dead-url'] = 'yes' if archive_dict['website'] == 'Wayback Machine': archive_dict['website'] = ( urlparse(original_url).hostname.replace('www.', '') ) return dict_to_sfn_cit_ref(archive_dict)
def googlebooks_sfn_cit_ref(url, date_format='%Y-%m-%d') -> tuple: """Create the response namedtuple.""" # bibtex_result = get_bibtex(url) [1] # dictionary = bibtex.parse(bibtex_result) [1] dictionary = ris_parse(get_ris(url)) dictionary['date_format'] = date_format pu = urlparse(url) pq = parse_qs(pu.query) # default domain is prefered: dictionary['url'] = 'https://' + pu.netloc + '/books?id=' + pq['id'][0] # manually adding page number to dictionary: if 'pg' in pq: dictionary['page'] = pq['pg'][0][2:] dictionary['url'] += '&pg=' + pq['pg'][0] # although google does not provide a language field: if not dictionary['language']: dictionary['language'] = classify(dictionary['title'])[0] return dict_to_sfn_cit_ref(dictionary)
def oclc_scr(oclc: str, date_format: str = '%Y-%m-%d') -> tuple: text = request('https://www.worldcat.org/oclc/' + oclc + '?page=endnote' '&client=worldcat.org-detailed_record').content.decode() if '<html' in text: # invalid OCLC number return ('Error processing OCLC number: ' + oclc, 'Perhaps you entered an invalid OCLC number?', '') d = ris_parse(text) authors = d['authors'] if authors: # worldcat has a '.' the end of the first name d['authors'] = [( fn.rstrip('.') if not fn.isupper() else fn, ln.rstrip('.') if not ln.isupper() else ln, ) for fn, ln in authors] d['date_format'] = date_format d['oclc'] = oclc d['title'] = d['title'].rstrip('.') return dict_to_sfn_cit_ref(d)
def waybackmachine_scr(archive_url: str, date_format: str = '%Y-%m-%d') -> tuple: """Create the response namedtuple.""" m = URL_FULLMATCH(archive_url) if not m: # Could not parse the archive_url. Treat as an ordinary URL. return urls_scr(archive_url, date_format) archive_year, archive_month, archive_day, original_url = \ m.groups() original_dict = {} thread = Thread(target=original_url2dict, args=(original_url, original_dict)) thread.start() try: archive_dict = url2dict(archive_url) except (ContentTypeError, ContentLengthError) as e: logger.exception(archive_url) # Todo: i18n return 'Invalid content type or length.', e, '' archive_dict['date_format'] = date_format archive_dict['url'] = original_url archive_dict['archive-url'] = archive_url archive_dict['archive-date'] = date(int(archive_year), int(archive_month), int(archive_day)) thread.join() if original_dict: # The original_process has been successful if (original_dict['title'] == archive_dict['title'] or original_dict['html_title'] == archive_dict['html_title']): archive_dict.update(original_dict) archive_dict['url-status'] = 'live' else: # and original title is the same as archive title. Otherwise it # means that the content probably has changed and the original data # cannot be trusted. archive_dict['url-status'] = 'unfit' else: archive_dict['url-status'] = 'dead' if archive_dict['website'] == 'Wayback Machine': archive_dict['website'] = (urlparse(original_url).hostname.replace( 'www.', '')) return dict_to_sfn_cit_ref(archive_dict)
def oclc_sfn_cit_ref(oclc: str, date_format: str = '%Y-%m-%d') -> tuple: text = request( 'https://www.worldcat.org/oclc/' + oclc + '?page=endnote' '&client=worldcat.org-detailed_record').text if '<html' in text: # invalid OCLC number return ( 'Error processing OCLC number: ' + oclc, 'Perhaps you entered an invalid OCLC number?', '') d = ris_parse(text) authors = d['authors'] if authors: # worldcat has a '.' the end of the first name d['authors'] = [( fn.rstrip('.') if not fn.isupper() else fn, ln.rstrip('.') if not ln.isupper() else ln, ) for fn, ln in authors] d['date_format'] = date_format d['oclc'] = oclc d['title'] = d['title'].rstrip('.') return dict_to_sfn_cit_ref(d)
def pmcid_sfn_cit_ref(pmcid: str, date_format='%Y-%m-%d') -> tuple: """Return the response namedtuple.""" pmcid = NON_DIGITS_SUB('', pmcid) dictionary = ncbi('pmcid', pmcid) dictionary['date_format'] = date_format return dict_to_sfn_cit_ref(dictionary)
def pmcid_scr(pmcid: str, date_format='%Y-%m-%d') -> tuple: """Return the response namedtuple.""" pmcid = NON_DIGITS_SUB('', pmcid) dictionary = ncbi('pmcid', pmcid) dictionary['date_format'] = date_format return dict_to_sfn_cit_ref(dictionary)