Ejemplo n.º 1
0
def googlebooks_scr(parsed_url, date_format='%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    parsed_query = parse_qs(parsed_url.query)

    id_ = parsed_query.get('id')
    if id_ is not None:
        volume_id = id_[0]
    else:  # the new URL format
        volume_id = parsed_url.path.rpartition('/')[2]

    dictionary = ris_parse(
        request(
            f'https://{parsed_url.netloc}/books/download/?id={volume_id}'
            f'&output=ris',
            spoof=True).content.decode('utf8'))
    dictionary['date_format'] = date_format
    # manually adding page number to dictionary:
    pg = parsed_query.get('pg')
    if pg is not None:
        pg0 = pg[0]
        dictionary['page'] = pg0[2:]
        dictionary['url'] += f'&pg={pg0}'
    # although google does not provide a language field:
    if not dictionary['language']:
        dictionary['language'] = classify(dictionary['title'])[0]
    return dict_to_sfn_cit_ref(dictionary)
Ejemplo n.º 2
0
def ris_fetcher_thread(url, ris_collection):
    """Fill the ris_dict. This function is called in a thread."""
    ris_dict = ris_parse(get_ris(url))
    language = ris_dict.get('language')
    if language:
        ris_collection['language'] = language
    authors = ris_dict.get('authors')
    if authors:
        ris_collection['authors'] = authors
Ejemplo n.º 3
0
def ris_fetcher_thread(url, ris_collection):
    """Fill the ris_dict. This function is called in a thread."""
    ris_dict = ris_parse(get_ris(url))
    language = ris_dict.get('language')
    if language:
        ris_collection['language'] = language
    authors = ris_dict.get('authors')
    if authors:
        ris_collection['authors'] = authors
Ejemplo n.º 4
0
def oclc_scr(oclc: str, date_format: str = '%Y-%m-%d') -> tuple:
    text = request('https://www.worldcat.org/oclc/' + oclc + '?page=endnote'
                   '&client=worldcat.org-detailed_record').content.decode()
    if '<html' in text:  # invalid OCLC number
        return ('Error processing OCLC number: ' + oclc,
                'Perhaps you entered an invalid OCLC number?', '')
    d = ris_parse(text)
    authors = d['authors']
    if authors:
        # worldcat has a '.' the end of the first name
        d['authors'] = [(
            fn.rstrip('.') if not fn.isupper() else fn,
            ln.rstrip('.') if not ln.isupper() else ln,
        ) for fn, ln in authors]
    d['date_format'] = date_format
    d['oclc'] = oclc
    d['title'] = d['title'].rstrip('.')
    return dict_to_sfn_cit_ref(d)
Ejemplo n.º 5
0
def googlebooks_sfn_cit_ref(url, date_format='%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    # bibtex_result = get_bibtex(url) [1]
    # dictionary = bibtex.parse(bibtex_result) [1]
    dictionary = ris_parse(get_ris(url))
    dictionary['date_format'] = date_format
    pu = urlparse(url)
    pq = parse_qs(pu.query)
    # default domain is prefered:
    dictionary['url'] = 'https://' + pu.netloc + '/books?id=' + pq['id'][0]
    # manually adding page number to dictionary:
    if 'pg' in pq:
        dictionary['page'] = pq['pg'][0][2:]
        dictionary['url'] += '&pg=' + pq['pg'][0]
    # although google does not provide a language field:
    if not dictionary['language']:
        dictionary['language'] = classify(dictionary['title'])[0]
    return dict_to_sfn_cit_ref(dictionary)
Ejemplo n.º 6
0
def oclc_sfn_cit_ref(oclc: str, date_format: str = '%Y-%m-%d') -> tuple:
    text = request(
        'https://www.worldcat.org/oclc/' + oclc + '?page=endnote'
        '&client=worldcat.org-detailed_record').text
    if '<html' in text:  # invalid OCLC number
        return (
            'Error processing OCLC number: ' + oclc,
            'Perhaps you entered an invalid OCLC number?',
            '')
    d = ris_parse(text)
    authors = d['authors']
    if authors:
        # worldcat has a '.' the end of the first name
        d['authors'] = [(
            fn.rstrip('.') if not fn.isupper() else fn,
            ln.rstrip('.') if not ln.isupper() else ln,
        ) for fn, ln in authors]
    d['date_format'] = date_format
    d['oclc'] = oclc
    d['title'] = d['title'].rstrip('.')
    return dict_to_sfn_cit_ref(d)