def request(query, params): """Google-Scholar search request""" offset = (params['pageno'] - 1) * 10 lang_info = get_lang_info( # pylint: disable=undefined-variable params, supported_languages, language_aliases, False ) logger.debug( "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) # subdomain is: scholar.google.xy lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.") query_url = 'https://'+ lang_info['subdomain'] + '/scholar' + "?" + urlencode({ 'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'start' : offset, }) query_url += time_range_url(params) params['url'] = query_url params['headers'].update(lang_info['headers']) params['headers']['Accept'] = ( 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' ) #params['google_subdomain'] = subdomain return params
def request(query, params): """Google-Video search request""" lang_info = get_lang_info( # pylint: disable=undefined-variable params, supported_languages, language_aliases ) query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ 'q': query, 'tbm': "isch", 'hl': lang_info['hl'], 'lr': lang_info['lr'], 'ie': "utf8", 'oe': "utf8", 'num': 30, }) if params['time_range'] in time_range_dict: query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) if params['safesearch']: query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]}) logger.debug("query_url --> %s", query_url) params['url'] = query_url logger.debug("HTTP header Accept-Language --> %s", lang_info['Accept-Language']) params['headers']['Accept-Language'] = lang_info['Accept-Language'] params['headers']['Accept'] = ( 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' ) return params
def request(query, params): """Google-News search request""" lang_info = get_lang_info( # pylint: disable=undefined-variable params, supported_languages, language_aliases, False) logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) # google news has only one domain lang_info['subdomain'] = 'news.google.com' ceid = "%s:%s" % (lang_info['country'], lang_info['language']) # google news redirects en to en-US if lang_info['params']['hl'] == 'en': lang_info['params']['hl'] = 'en-US' # Very special to google-news compared to other google engines, the time # range is included in the search term. if params['time_range']: query += ' ' + time_range_dict[params['time_range']] query_url = 'https://' + lang_info[ 'subdomain'] + '/search' + "?" + urlencode( { 'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'gl': lang_info['country'], }) + ( '&ceid=%s' % ceid ) # ceid includes a ':' character which must not be urlencoded params['url'] = query_url params['headers'].update(lang_info['headers']) params['headers']['Accept'] = ( 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' ) params['headers'][ 'Cookie'] = "CONSENT=YES+cb.%s-14-p0.en+F+941;" % datetime.now( ).strftime("%Y%m%d") return params