Beispiel #1
0
def search_diffbot_cache(term):
    response = requests.get('http://api.diffbot.com/v3/search', params={
        'token': config.credentials.diffbot,
        'query': requests.utils.quote('"{}"'.format(term)),
        'col': 'GLOBAL-INDEX'
    }).json()
    if not response.get('objects'):
        if response.get('error'):
            print("Response Error '{}' (code: {})".format(response['error'], response['errorCode']))
        else:
            print("NO RESULTS")
    results = []
    for object in response.get('objects', []):
        if object.get('text'):
            pr = PageRequest(object.get('pageUrl'), term, run=False)
            pr.extract_sentences(object.get('text'))
            result = {
                "title": object.get('title'),
                "url": object.get('pageUrl'),
                'search_provider': 'diffbot',
                "author": object.get('author'),
                "date": parse_date(object.get('date', '')).isoformat(),
                "doc": object.get('text'),
                "sentences": pr.sentences,
                "variants": list(pr.variants)
            }
            results.append(result)
    return results
Beispiel #2
0
def search_diffbot_cache(term):
    response = requests.get('http://api.diffbot.com/v3/search',
                            params={
                                'token': config.credentials.diffbot,
                                'query':
                                requests.utils.quote('"{}"'.format(term)),
                                'col': 'GLOBAL-INDEX'
                            }).json()
    if not response.get('objects'):
        if response.get('error'):
            print("Response Error '{}' (code: {})".format(
                response['error'], response['errorCode']))
        else:
            print("NO RESULTS")
    results = []
    for object in response.get('objects', []):
        if object.get('text'):
            pr = PageRequest(object.get('pageUrl'), term, run=False)
            pr.extract_sentences(object.get('text'))
            result = {
                "title": object.get('title'),
                "url": object.get('pageUrl'),
                'search_provider': 'diffbot',
                "author": object.get('author'),
                "date": parse_date(object.get('date', '')).isoformat(),
                "doc": object.get('text'),
                "sentences": pr.sentences,
                "variants": list(pr.variants)
            }
            results.append(result)
    return results
Beispiel #3
0
def search_duckduckgo(term):
    result = []
    try:
        req = requests.get(
            'http://api.duckduckgo.com/?q={}&format=json'.format(term)).json()
    except:
        return result
    if req['AbstractSource'] not in config.duckduckgo_sources:
        return result
    if req.get('Abstract'):
        pr = PageRequest(req['AbstractURL'], term, run=False)
        pr.extract_sentences(req['Abstract'])
        result.append({
            'title': req['Heading'],
            'url': req['AbstractURL'],
            'search_provider': 'duckduckgo',
            'author': None,
            'date': None,
            'source': req['AbstractSource'],
            'doc': req['Abstract'],
            "sentences": pr.sentences,
            "variants": list(pr.variants)
        })
    if req.get('Definition'):
        pr = PageRequest(req['DefinitionURL'], term, run=False)
        pr.extract_sentences(req['Definition'])
        result.append({
            'title': req['Heading'],
            'url': req['DefinitionURL'],
            'source': req['DefinitionSource'],
            'search_provider': 'duckduckgo',
            'author': None,
            'date': None,
            'doc': req['Definition'],
            "sentences": pr.sentences,
            "variants": list(pr.variants)
        })
    log.info("Searching DuckDuckGo for '{}' returned {} results".format(
        term, len(result)))
    return result
Beispiel #4
0
def search_duckduckgo(term):
    result = []
    try:
        req = requests.get('http://api.duckduckgo.com/?q={}&format=json'.format(term)).json()
    except:
        return result
    if req['AbstractSource'] not in config.duckduckgo_sources:
        return result
    if req.get('Abstract'):
        pr = PageRequest(req['AbstractURL'], term, run=False)
        pr.extract_sentences(req['Abstract'])
        result.append({
            'title': req['Heading'],
            'url': req['AbstractURL'],
            'search_provider': 'duckduckgo',
            'author': None,
            'date': None,
            'source': req['AbstractSource'],
            'doc': req['Abstract'],
            "sentences": pr.sentences,
            "variants": list(pr.variants)
        })
    if req.get('Definition'):
        pr = PageRequest(req['DefinitionURL'], term, run=False)
        pr.extract_sentences(req['Definition'])
        result.append({
            'title': req['Heading'],
            'url': req['DefinitionURL'],
            'source': req['DefinitionSource'],
            'search_provider': 'duckduckgo',
            'author': None,
            'date': None,
            'doc': req['Definition'],
            "sentences": pr.sentences,
            "variants": list(pr.variants)
        })
    log.info("Searching DuckDuckGo for '{}' returned {} results".format(term, len(result)))
    return result