예제 #1
0
def _get_search_url(search: Search, start_record: Optional[int] = 0) -> str:
    """
    This method return the URL to be used to retrieve data from ACM database
    See https://dl.acm.org/search/advanced for query tips

    Parameters
    ----------
    search : Search
        A search instance
    start_record : str
        Sequence number of first record to fetch, by default 0

    Returns
    -------
    str
        a URL to be used to retrieve data from ACM database
    """
    
    # when a wildcard is present, the search term cannot be enclosed in quotes
    transformed_query = query_util.replace_search_term_enclosures(search.query, '', '', True)
    
    # some additional query transformations
    transformed_query = transformed_query.replace(' AND NOT ', ' NOT ')
    transformed_query = query_util.replace_search_term_enclosures(transformed_query, '"', '"')

    query = f'Abstract:({transformed_query})'

    # the OR connector between the fields are not working properly, so we'll use only the abstract for now
    #query += f' OR Keyword:({transformed_query})'
    #query += f' OR Title:({transformed_query})'

    url_parameters = {
        'fillQuickSearch': 'false',
        'expand': 'all',
        'AllField': query,
        'pageSize': MAX_ENTRIES_PER_PAGE,
        'startPage': start_record,
        'sortBy': 'Ppub'
    }

    if search.since is not None:
        url_parameters['AfterMonth'] = search.since.month
        url_parameters['AfterYear'] = search.since.year

    if search.until is not None:
        url_parameters['BeforeMonth'] = search.until.month
        url_parameters['BeforeYear'] = search.until.year

    url = f'{BASE_URL}/action/doSearch?{urlencode(url_parameters)}'

    return url
예제 #2
0
def _get_search_url(search: Search, start_record: Optional[int] = 0) -> str:
    """
    This method return the URL to be used to retrieve data from PubMed database
    See https://www.ncbi.nlm.nih.gov/books/NBK25500/ for query tips

    Parameters
    ----------
    search : Search
        A search instance
    start_record : str, optional
        Sequence number of first record to fetch, by default 0

    Returns
    -------
    str
        a URL to be used to retrieve data from PubMed database
    """
    query = search.query.replace(' AND NOT ', ' NOT ')
    query = query_util.replace_search_term_enclosures(query, '"', '"[TIAB]')

    url = f'{BASE_URL}/entrez/eutils/esearch.fcgi?db=pubmed&term={query} AND has abstract [FILT] AND "journal article"[Publication Type]'

    if search.since is not None or search.until is not None:
        since = datetime.date(
            1, 1, 1) if search.since is None else search.since
        until = datetime.date.today() if search.until is None else search.until

        url += f' AND {since.strftime("%Y/%m/%d")}:{until.strftime("%Y/%m/%d")}[Date - Publication]'

    if start_record is not None:
        url += f'&retstart={start_record}'

    url += f'&retmax={MAX_ENTRIES_PER_PAGE}&sort=pub+date'

    return url
예제 #3
0
def _get_query(search: Search) -> str:
    """
    Get the translated query from search instance to fetch data from Scopus database
    See https://dev.elsevier.com/tips/ScopusSearchTips.htm for query tips

    Parameters
    ----------
    search : Search
        A search instance

    Returns
    -------
    str
        The translated query
    """

    query = query_util.replace_search_term_enclosures(search.query, '"', '"',
                                                      True)
    query = query_util.replace_search_term_enclosures(query, '{', '}')

    query = f'TITLE-ABS-KEY({query})'

    if search.since is not None:
        query += f' AND PUBYEAR > {search.since.year - 1}'
    if search.until is not None:
        query += f' AND PUBYEAR < {search.until.year + 1}'

    if search.publication_types is not None:

        publication_types = set()

        if 'conference proceedings' in search.publication_types:
            publication_types.add('p')  # Conference Proceeding
        if 'journal' in search.publication_types:
            publication_types.add('j')  # Journal
        if 'book' in search.publication_types:
            publication_types.add('b')  # Book
            publication_types.add('k')  # Book Series
        if 'other' in search.publication_types:
            publication_types.add('r')  # Report
            publication_types.add('d')  # Trade Publication

        query += f' AND SRCTYPE({" OR ".join(publication_types)})'

    return query
예제 #4
0
def _get_search_url(search: Search, api_token: str, start_record: Optional[int] = 1) -> str:
    """
    This method return the URL to be used to retrieve data from IEEE database
    See https://developer.ieee.org/docs/read/Metadata_API_details for query tips

    Parameters
    ----------
    search : Search
        A search instance
    api_token : str
        The API key used to fetch data from IEEE database,
    start_record : str
        Sequence number of first record to fetch, by default 1

    Returns
    -------
    str
        a URL to be used to retrieve data from IEEE database
    """

    query = search.query.replace(' AND NOT ', ' NOT ')
    query = query_util.replace_search_term_enclosures(query, '"Abstract":"', '"')

    url = f'{BASE_URL}/api/v1/search/articles?querytext=({query})&format=json&apikey={api_token}&max_records={MAX_ENTRIES_PER_PAGE}'

    if search.since is not None:
        url += f'&start_year={search.since.year}'

    if search.until is not None:
        url += f'&end_year={search.until.year}'

    if start_record is not None:
        url += f'&start_record={start_record}'

    if search.publication_types is not None:

        content_types = set()

        if 'conference proceedings' in search.publication_types:
            content_types.add('Conferences')
        if 'journal' in search.publication_types:
            content_types.add('Journals')
        if 'book' in search.publication_types:
            content_types.add('Books')
        if 'other' in search.publication_types:
            content_types.add('Courses')
            content_types.add('Early Access')
            content_types.add('Magazines')
            content_types.add('Standards')
            
        url += f'&content_type={",".join(content_types)}'

    return url
예제 #5
0
def _get_search_url(search: Search, start_record: Optional[int] = 0) -> str:
    """
    This method return the URL to be used to retrieve data from arXiv database
    See https://arxiv.org/help/api/user-manual for query tips

    Parameters
    ----------
    search : Search
        A search instance
    start_record : str
        Sequence number of first record to fetch, by default 0

    Returns
    -------
    str
        a URL to be used to retrieve data from arXiv database
    """

    transformed_query = search.query.replace(' AND NOT ', ' ANDNOT ')
    transformed_query = transformed_query.replace(
        '-', ' ')  # the arXiv search engine doesn't support hyphens properly
    if transformed_query[0] == '"':
        transformed_query = ' ' + transformed_query
    transformed_query = transformed_query.replace('[', 'FIELD_TYPE:[')

    # when a wildcard is present, the search term cannot be enclosed in quotes
    transformed_query = query_util.replace_search_term_enclosures(
        transformed_query, '', '', True)
    transformed_query = query_util.replace_search_term_enclosures(
        transformed_query, '"', '"').strip()

    abstract_query = transformed_query.replace('FIELD_TYPE:', 'abs:')
    title_query = transformed_query.replace('FIELD_TYPE:', 'ti:')
    final_query = f'({title_query}) OR ({abstract_query})'

    url = f'{BASE_URL}/api/query?search_query={final_query}&start={start_record}&sortBy=submittedDate&sortOrder=descending&max_results={MAX_ENTRIES_PER_PAGE}'

    return url