Esempio n. 1
0
def request(query, params):
    """Google-Scholar search request"""

    offset = (params['pageno'] - 1) * 10
    lang_info = get_lang_info(
        # pylint: disable=undefined-variable
        params, supported_languages, language_aliases, False
    )
    logger.debug(
        "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])

    # subdomain is: scholar.google.xy
    lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")

    query_url = 'https://'+ lang_info['subdomain'] + '/scholar' + "?" + urlencode({
        'q':  query,
        **lang_info['params'],
        'ie': "utf8",
        'oe':  "utf8",
        'start' : offset,
    })

    query_url += time_range_url(params)
    params['url'] = query_url

    params['headers'].update(lang_info['headers'])
    params['headers']['Accept'] = (
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    )

    #params['google_subdomain'] = subdomain
    return params
Esempio n. 2
0
def request(query, params):
    """Google-Video search request"""

    lang_info = get_lang_info(
        # pylint: disable=undefined-variable
        params, supported_languages, language_aliases
    )

    query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({
        'q': query,
        'tbm': "isch",
        'hl': lang_info['hl'],
        'lr': lang_info['lr'],
        'ie': "utf8",
        'oe': "utf8",
        'num': 30,
    })

    if params['time_range'] in time_range_dict:
        query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
    if params['safesearch']:
        query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})

    logger.debug("query_url --> %s", query_url)
    params['url'] = query_url

    logger.debug("HTTP header Accept-Language --> %s", lang_info['Accept-Language'])
    params['headers']['Accept-Language'] = lang_info['Accept-Language']
    params['headers']['Accept'] = (
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    )
    return params
Esempio n. 3
0
def request(query, params):
    """Google-News search request"""

    lang_info = get_lang_info(
        # pylint: disable=undefined-variable
        params,
        supported_languages,
        language_aliases,
        False)
    logger.debug("HTTP header Accept-Language --> %s",
                 lang_info['headers']['Accept-Language'])

    # google news has only one domain
    lang_info['subdomain'] = 'news.google.com'

    ceid = "%s:%s" % (lang_info['country'], lang_info['language'])

    # google news redirects en to en-US
    if lang_info['params']['hl'] == 'en':
        lang_info['params']['hl'] = 'en-US'

    # Very special to google-news compared to other google engines, the time
    # range is included in the search term.
    if params['time_range']:
        query += ' ' + time_range_dict[params['time_range']]

    query_url = 'https://' + lang_info[
        'subdomain'] + '/search' + "?" + urlencode(
            {
                'q': query,
                **lang_info['params'],
                'ie': "utf8",
                'oe': "utf8",
                'gl': lang_info['country'],
            }) + (
                '&ceid=%s' % ceid
            )  # ceid includes a ':' character which must not be urlencoded
    params['url'] = query_url

    params['headers'].update(lang_info['headers'])
    params['headers']['Accept'] = (
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    )
    params['headers'][
        'Cookie'] = "CONSENT=YES+cb.%s-14-p0.en+F+941;" % datetime.now(
        ).strftime("%Y%m%d")

    return params