def _get_search_url(search: Search, start_record: Optional[int] = 0) -> str: """ This method return the URL to be used to retrieve data from ACM database See https://dl.acm.org/search/advanced for query tips Parameters ---------- search : Search A search instance start_record : str Sequence number of first record to fetch, by default 0 Returns ------- str a URL to be used to retrieve data from ACM database """ # when a wildcard is present, the search term cannot be enclosed in quotes transformed_query = query_util.replace_search_term_enclosures(search.query, '', '', True) # some additional query transformations transformed_query = transformed_query.replace(' AND NOT ', ' NOT ') transformed_query = query_util.replace_search_term_enclosures(transformed_query, '"', '"') query = f'Abstract:({transformed_query})' # the OR connector between the fields are not working properly, so we'll use only the abstract for now #query += f' OR Keyword:({transformed_query})' #query += f' OR Title:({transformed_query})' url_parameters = { 'fillQuickSearch': 'false', 'expand': 'all', 'AllField': query, 'pageSize': MAX_ENTRIES_PER_PAGE, 'startPage': start_record, 'sortBy': 'Ppub' } if search.since is not None: url_parameters['AfterMonth'] = search.since.month url_parameters['AfterYear'] = search.since.year if search.until is not None: url_parameters['BeforeMonth'] = search.until.month url_parameters['BeforeYear'] = search.until.year url = f'{BASE_URL}/action/doSearch?{urlencode(url_parameters)}' return url
def _get_search_url(search: Search, start_record: Optional[int] = 0) -> str: """ This method return the URL to be used to retrieve data from PubMed database See https://www.ncbi.nlm.nih.gov/books/NBK25500/ for query tips Parameters ---------- search : Search A search instance start_record : str, optional Sequence number of first record to fetch, by default 0 Returns ------- str a URL to be used to retrieve data from PubMed database """ query = search.query.replace(' AND NOT ', ' NOT ') query = query_util.replace_search_term_enclosures(query, '"', '"[TIAB]') url = f'{BASE_URL}/entrez/eutils/esearch.fcgi?db=pubmed&term={query} AND has abstract [FILT] AND "journal article"[Publication Type]' if search.since is not None or search.until is not None: since = datetime.date( 1, 1, 1) if search.since is None else search.since until = datetime.date.today() if search.until is None else search.until url += f' AND {since.strftime("%Y/%m/%d")}:{until.strftime("%Y/%m/%d")}[Date - Publication]' if start_record is not None: url += f'&retstart={start_record}' url += f'&retmax={MAX_ENTRIES_PER_PAGE}&sort=pub+date' return url
def _get_query(search: Search) -> str: """ Get the translated query from search instance to fetch data from Scopus database See https://dev.elsevier.com/tips/ScopusSearchTips.htm for query tips Parameters ---------- search : Search A search instance Returns ------- str The translated query """ query = query_util.replace_search_term_enclosures(search.query, '"', '"', True) query = query_util.replace_search_term_enclosures(query, '{', '}') query = f'TITLE-ABS-KEY({query})' if search.since is not None: query += f' AND PUBYEAR > {search.since.year - 1}' if search.until is not None: query += f' AND PUBYEAR < {search.until.year + 1}' if search.publication_types is not None: publication_types = set() if 'conference proceedings' in search.publication_types: publication_types.add('p') # Conference Proceeding if 'journal' in search.publication_types: publication_types.add('j') # Journal if 'book' in search.publication_types: publication_types.add('b') # Book publication_types.add('k') # Book Series if 'other' in search.publication_types: publication_types.add('r') # Report publication_types.add('d') # Trade Publication query += f' AND SRCTYPE({" OR ".join(publication_types)})' return query
def _get_search_url(search: Search, api_token: str, start_record: Optional[int] = 1) -> str: """ This method return the URL to be used to retrieve data from IEEE database See https://developer.ieee.org/docs/read/Metadata_API_details for query tips Parameters ---------- search : Search A search instance api_token : str The API key used to fetch data from IEEE database, start_record : str Sequence number of first record to fetch, by default 1 Returns ------- str a URL to be used to retrieve data from IEEE database """ query = search.query.replace(' AND NOT ', ' NOT ') query = query_util.replace_search_term_enclosures(query, '"Abstract":"', '"') url = f'{BASE_URL}/api/v1/search/articles?querytext=({query})&format=json&apikey={api_token}&max_records={MAX_ENTRIES_PER_PAGE}' if search.since is not None: url += f'&start_year={search.since.year}' if search.until is not None: url += f'&end_year={search.until.year}' if start_record is not None: url += f'&start_record={start_record}' if search.publication_types is not None: content_types = set() if 'conference proceedings' in search.publication_types: content_types.add('Conferences') if 'journal' in search.publication_types: content_types.add('Journals') if 'book' in search.publication_types: content_types.add('Books') if 'other' in search.publication_types: content_types.add('Courses') content_types.add('Early Access') content_types.add('Magazines') content_types.add('Standards') url += f'&content_type={",".join(content_types)}' return url
def _get_search_url(search: Search, start_record: Optional[int] = 0) -> str: """ This method return the URL to be used to retrieve data from arXiv database See https://arxiv.org/help/api/user-manual for query tips Parameters ---------- search : Search A search instance start_record : str Sequence number of first record to fetch, by default 0 Returns ------- str a URL to be used to retrieve data from arXiv database """ transformed_query = search.query.replace(' AND NOT ', ' ANDNOT ') transformed_query = transformed_query.replace( '-', ' ') # the arXiv search engine doesn't support hyphens properly if transformed_query[0] == '"': transformed_query = ' ' + transformed_query transformed_query = transformed_query.replace('[', 'FIELD_TYPE:[') # when a wildcard is present, the search term cannot be enclosed in quotes transformed_query = query_util.replace_search_term_enclosures( transformed_query, '', '', True) transformed_query = query_util.replace_search_term_enclosures( transformed_query, '"', '"').strip() abstract_query = transformed_query.replace('FIELD_TYPE:', 'abs:') title_query = transformed_query.replace('FIELD_TYPE:', 'ti:') final_query = f'({title_query}) OR ({abstract_query})' url = f'{BASE_URL}/api/query?search_query={final_query}&start={start_record}&sortBy=submittedDate&sortOrder=descending&max_results={MAX_ENTRIES_PER_PAGE}' return url