Exemple #1
0
def googlebooks_scr(parsed_url, date_format='%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    parsed_query = parse_qs(parsed_url.query)

    id_ = parsed_query.get('id')
    if id_ is not None:
        volume_id = id_[0]
    else:  # the new URL format
        volume_id = parsed_url.path.rpartition('/')[2]

    dictionary = ris_parse(
        request(
            f'https://{parsed_url.netloc}/books/download/?id={volume_id}'
            f'&output=ris',
            spoof=True).content.decode('utf8'))
    dictionary['date_format'] = date_format
    # manually adding page number to dictionary:
    pg = parsed_query.get('pg')
    if pg is not None:
        pg0 = pg[0]
        dictionary['page'] = pg0[2:]
        dictionary['url'] += f'&pg={pg0}'
    # although google does not provide a language field:
    if not dictionary['language']:
        dictionary['language'] = classify(dictionary['title'])[0]
    return dict_to_sfn_cit_ref(dictionary)
Exemple #2
0
def noorlib_scr(url: str, date_format: str = '%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    dictionary = bibtex_parse(get_bibtex(url))
    dictionary['date_format'] = date_format
    # risr = get_ris(url)[1]
    # dictionary = risr.parse(ris)[1]
    return dict_to_sfn_cit_ref(dictionary)
Exemple #3
0
def noorlib_sfn_cit_ref(url: str, date_format: str = '%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    dictionary = bibtex_parse(get_bibtex(url))
    dictionary['date_format'] = date_format
    # risr = get_ris(url)[1]
    # dictionary = risr.parse(ris)[1]
    return dict_to_sfn_cit_ref(dictionary)
Exemple #4
0
def isbn_scr(isbn_container_str: str,
             pure: bool = False,
             date_format: str = '%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    if pure:
        isbn = isbn_container_str
    else:
        # search for isbn13
        m = ISBN13_SEARCH(isbn_container_str)
        if m is not None:
            isbn = m[0]
        else:
            # search for isbn10
            m = ISBN10_SEARCH(isbn_container_str)
            isbn = m[0]

    iranian_isbn = isbn_info(isbn) == 'Iran'

    if iranian_isbn is True:
        ketabir_result_list = []
        ketabir_thread = Thread(target=ketabir_thread_target,
                                args=(isbn, ketabir_result_list))
        ketabir_thread.start()

    citoid_result_list = []
    citoid_thread = Thread(target=citoid_thread_target,
                           args=(isbn, citoid_result_list))
    citoid_thread.start()

    ottobib_bibtex = ottobib(isbn)
    if ottobib_bibtex:
        otto_dict = bibtex_parse(ottobib_bibtex)
    else:
        otto_dict = None

    if iranian_isbn is True:
        # noinspection PyUnboundLocalVariable
        ketabir_thread.join()
        # noinspection PyUnboundLocalVariable
        if ketabir_result_list:
            # noinspection PyUnboundLocalVariable
            ketabir_dict = ketabir_result_list[0]
        else:
            ketabir_dict = None
        dictionary = choose_dict(ketabir_dict, otto_dict)
    else:
        dictionary = otto_dict

    citoid_thread.join()
    if citoid_result_list:
        dictionary['oclc'] = citoid_result_list[0]['oclc']

    dictionary['date_format'] = date_format
    if 'language' not in dictionary:
        dictionary['language'] = classify(dictionary['title'])[0]
    return dict_to_sfn_cit_ref(dictionary)
Exemple #5
0
def urls_scr(url: str, date_format: str = '%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    try:
        dictionary = url2dict(url)
    except (ContentTypeError, ContentLengthError) as e:
        logger.exception(url)
        # Todo: i18n
        return 'Could not process the request.', e, ''
    dictionary['date_format'] = date_format
    return dict_to_sfn_cit_ref(dictionary)
Exemple #6
0
def ketabir_sfn_cit_ref(url: str, date_format='%Y-%m-%d') -> tuple:
    """Return the response namedtuple."""
    dictionary = url2dictionary(url)
    dictionary['date_format'] = date_format
    if 'language' not in dictionary:
        # Assume that language is either fa or en.
        # Todo: give warning about this assumption?
        dictionary['language'] = \
            classify(dictionary['title'])[0]
    return dict_to_sfn_cit_ref(dictionary)
Exemple #7
0
def ketabir_scr(url: str, date_format='%Y-%m-%d') -> tuple:
    """Return the response namedtuple."""
    dictionary = url2dictionary(url)
    dictionary['date_format'] = date_format
    if 'language' not in dictionary:
        # Assume that language is either fa or en.
        # Todo: give warning about this assumption?
        dictionary['language'] = \
            classify(dictionary['title'])[0]
    return dict_to_sfn_cit_ref(dictionary)
Exemple #8
0
def urls_sfn_cit_ref(url: str, date_format: str = '%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    try:
        dictionary = url2dict(url)
    except (ContentTypeError, ContentLengthError) as e:
        logger.exception(url)
        # Todo: i18n
        return 'Could not process the request.', e, ''
    dictionary['date_format'] = date_format
    return dict_to_sfn_cit_ref(dictionary)
Exemple #9
0
def jstor_scr(url: str, date_format: str = '%Y-%m-%d') -> tuple:
    open_access = []
    thread = Thread(target=is_open_access, args=(url, open_access))
    thread.start()
    id_ = urlparse(url).path.rpartition('/')[2]
    bibtex = request('https://www.jstor.org/citation/text/' + id_).content.decode('utf8')
    dictionary = bibtex_parse(bibtex)
    dictionary['jstor'] = id_
    dictionary['date_format'] = date_format
    thread.join()
    if open_access:
        dictionary['jstor-access'] = 'free'
    return dict_to_sfn_cit_ref(dictionary)
Exemple #10
0
def noormags_sfn_cit_ref(url: str, date_format: str = '%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    ris_collection = {}
    ris_thread = Thread(target=ris_fetcher_thread, args=(url, ris_collection))
    ris_thread.start()
    dictionary = bibtex_parse(get_bibtex(url))
    dictionary['date_format'] = date_format
    # language parameter needs to be taken from RIS
    # other information are more accurate in bibtex
    # for example: http://www.noormags.ir/view/fa/articlepage/104040
    # "IS  - 1" is wrong in RIS but "number = { 45 }," is correct in bibtex
    ris_thread.join()
    dictionary.update(ris_collection)
    return dict_to_sfn_cit_ref(dictionary)
Exemple #11
0
def doi_scr(doi_or_url, pure=False, date_format='%Y-%m-%d') -> tuple:
    """Return the response namedtuple."""
    if pure:
        doi = doi_or_url
    else:
        # unescape '&', '<', and '>' in doi_or_url
        # decode percent encodings
        decoded_url = unquote(unescape(doi_or_url))
        doi = DOI_SEARCH(decoded_url)[1]
    dictionary = get_crossref_dict(doi)
    dictionary['date_format'] = date_format
    if LANG == 'fa':
        dictionary['language'] = classify(dictionary['title'])[0]
    return dict_to_sfn_cit_ref(dictionary)
Exemple #12
0
def noormags_sfn_cit_ref(url: str, date_format: str = '%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    ris_collection = {}
    ris_thread = Thread(target=ris_fetcher_thread, args=(url, ris_collection))
    ris_thread.start()
    dictionary = bibtex_parse(get_bibtex(url))
    dictionary['date_format'] = date_format
    # language parameter needs to be taken from RIS
    # other information are more accurate in bibtex
    # for example: http://www.noormags.ir/view/fa/articlepage/104040
    # "IS  - 1" is wrong in RIS but "number = { 45 }," is correct in bibtex
    ris_thread.join()
    dictionary.update(ris_collection)
    return dict_to_sfn_cit_ref(dictionary)
Exemple #13
0
def isbn_sfn_cit_ref(
    isbn_container_str: str, pure: bool = False, date_format: str = '%Y-%m-%d'
) -> tuple:
    """Create the response namedtuple."""
    if pure:
        isbn = isbn_container_str
    else:
        # search for isbn13
        m = ISBN13_SEARCH(isbn_container_str)
        if m:
            isbn = m[0]
        else:
            # search for isbn10
            m = ISBN10_SEARCH(isbn_container_str)
            isbn = m[0]

    ketabir_result_list = []
    ketabir_thread = Thread(
        target=ketabir_thread_target,
        args=(isbn, ketabir_result_list))
    ketabir_thread.start()

    citoid_result_list = []
    citoid_thread = Thread(
        target=citoid_thread_target,
        args=(isbn, citoid_result_list))
    citoid_thread.start()

    ottobib_bibtex = ottobib(isbn)
    if ottobib_bibtex:
        otto_dict = bibtex_parse(ottobib_bibtex)
    else:
        otto_dict = None

    ketabir_thread.join()
    if ketabir_result_list:
        ketabir_dict = ketabir_result_list[0]
    else:
        ketabir_dict = None
    dictionary = choose_dict(ketabir_dict, otto_dict)

    citoid_thread.join()
    if citoid_result_list:
        dictionary['oclc'] = citoid_result_list[0]['oclc']

    dictionary['date_format'] = date_format
    if 'language' not in dictionary:
        dictionary['language'] = classify(dictionary['title'])[0]
    return dict_to_sfn_cit_ref(dictionary)
Exemple #14
0
def waybackmachine_sfn_cit_ref(
    archive_url: str, date_format: str = '%Y-%m-%d'
) -> tuple:
    """Create the response namedtuple."""
    m = URL_FULLMATCH(archive_url)
    if not m:
        # Could not parse the archive_url. Treat as an ordinary URL.
        return urls_sfn_cit_ref(archive_url, date_format)
    archive_year, archive_month, archive_day, original_url = \
        m.groups()
    original_dict = {}
    thread = Thread(
        target=original_url2dict, args=(original_url, original_dict)
    )
    thread.start()
    try:
        archive_dict = url2dict(archive_url)
    except (ContentTypeError, ContentLengthError) as e:
        logger.exception(archive_url)
        # Todo: i18n
        return 'Invalid content type or length.', e, ''
    archive_dict['date_format'] = date_format
    archive_dict['url'] = original_url
    archive_dict['archive-url'] = archive_url
    archive_dict['archive-date'] = date(
        int(archive_year), int(archive_month), int(archive_day)
    )
    thread.join()
    if original_dict:
        # The original_process has been successful
        if (
            original_dict['title'] == archive_dict['title']
            or original_dict['html_title'] == archive_dict['html_title']
        ):
            archive_dict.update(original_dict)
            archive_dict['dead-url'] = 'no'
        else:
            # and original title is the same as archive title. Otherwise it
            # means that the content probably has changed and the original data
            # cannot be trusted.
            archive_dict['dead-url'] = 'unfit'
    else:
        archive_dict['dead-url'] = 'yes'
    if archive_dict['website'] == 'Wayback Machine':
        archive_dict['website'] = (
            urlparse(original_url).hostname.replace('www.', '')
        )
    return dict_to_sfn_cit_ref(archive_dict)
Exemple #15
0
def googlebooks_sfn_cit_ref(url, date_format='%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    # bibtex_result = get_bibtex(url) [1]
    # dictionary = bibtex.parse(bibtex_result) [1]
    dictionary = ris_parse(get_ris(url))
    dictionary['date_format'] = date_format
    pu = urlparse(url)
    pq = parse_qs(pu.query)
    # default domain is prefered:
    dictionary['url'] = 'https://' + pu.netloc + '/books?id=' + pq['id'][0]
    # manually adding page number to dictionary:
    if 'pg' in pq:
        dictionary['page'] = pq['pg'][0][2:]
        dictionary['url'] += '&pg=' + pq['pg'][0]
    # although google does not provide a language field:
    if not dictionary['language']:
        dictionary['language'] = classify(dictionary['title'])[0]
    return dict_to_sfn_cit_ref(dictionary)
Exemple #16
0
def oclc_scr(oclc: str, date_format: str = '%Y-%m-%d') -> tuple:
    text = request('https://www.worldcat.org/oclc/' + oclc + '?page=endnote'
                   '&client=worldcat.org-detailed_record').content.decode()
    if '<html' in text:  # invalid OCLC number
        return ('Error processing OCLC number: ' + oclc,
                'Perhaps you entered an invalid OCLC number?', '')
    d = ris_parse(text)
    authors = d['authors']
    if authors:
        # worldcat has a '.' the end of the first name
        d['authors'] = [(
            fn.rstrip('.') if not fn.isupper() else fn,
            ln.rstrip('.') if not ln.isupper() else ln,
        ) for fn, ln in authors]
    d['date_format'] = date_format
    d['oclc'] = oclc
    d['title'] = d['title'].rstrip('.')
    return dict_to_sfn_cit_ref(d)
Exemple #17
0
def waybackmachine_scr(archive_url: str,
                       date_format: str = '%Y-%m-%d') -> tuple:
    """Create the response namedtuple."""
    m = URL_FULLMATCH(archive_url)
    if not m:
        # Could not parse the archive_url. Treat as an ordinary URL.
        return urls_scr(archive_url, date_format)
    archive_year, archive_month, archive_day, original_url = \
        m.groups()
    original_dict = {}
    thread = Thread(target=original_url2dict,
                    args=(original_url, original_dict))
    thread.start()
    try:
        archive_dict = url2dict(archive_url)
    except (ContentTypeError, ContentLengthError) as e:
        logger.exception(archive_url)
        # Todo: i18n
        return 'Invalid content type or length.', e, ''
    archive_dict['date_format'] = date_format
    archive_dict['url'] = original_url
    archive_dict['archive-url'] = archive_url
    archive_dict['archive-date'] = date(int(archive_year), int(archive_month),
                                        int(archive_day))
    thread.join()
    if original_dict:
        # The original_process has been successful
        if (original_dict['title'] == archive_dict['title']
                or original_dict['html_title'] == archive_dict['html_title']):
            archive_dict.update(original_dict)
            archive_dict['url-status'] = 'live'
        else:
            # and original title is the same as archive title. Otherwise it
            # means that the content probably has changed and the original data
            # cannot be trusted.
            archive_dict['url-status'] = 'unfit'
    else:
        archive_dict['url-status'] = 'dead'
    if archive_dict['website'] == 'Wayback Machine':
        archive_dict['website'] = (urlparse(original_url).hostname.replace(
            'www.', ''))
    return dict_to_sfn_cit_ref(archive_dict)
Exemple #18
0
def oclc_sfn_cit_ref(oclc: str, date_format: str = '%Y-%m-%d') -> tuple:
    text = request(
        'https://www.worldcat.org/oclc/' + oclc + '?page=endnote'
        '&client=worldcat.org-detailed_record').text
    if '<html' in text:  # invalid OCLC number
        return (
            'Error processing OCLC number: ' + oclc,
            'Perhaps you entered an invalid OCLC number?',
            '')
    d = ris_parse(text)
    authors = d['authors']
    if authors:
        # worldcat has a '.' the end of the first name
        d['authors'] = [(
            fn.rstrip('.') if not fn.isupper() else fn,
            ln.rstrip('.') if not ln.isupper() else ln,
        ) for fn, ln in authors]
    d['date_format'] = date_format
    d['oclc'] = oclc
    d['title'] = d['title'].rstrip('.')
    return dict_to_sfn_cit_ref(d)
Exemple #19
0
def pmcid_sfn_cit_ref(pmcid: str, date_format='%Y-%m-%d') -> tuple:
    """Return the response namedtuple."""
    pmcid = NON_DIGITS_SUB('', pmcid)
    dictionary = ncbi('pmcid', pmcid)
    dictionary['date_format'] = date_format
    return dict_to_sfn_cit_ref(dictionary)
Exemple #20
0
def pmcid_scr(pmcid: str, date_format='%Y-%m-%d') -> tuple:
    """Return the response namedtuple."""
    pmcid = NON_DIGITS_SUB('', pmcid)
    dictionary = ncbi('pmcid', pmcid)
    dictionary['date_format'] = date_format
    return dict_to_sfn_cit_ref(dictionary)