예제 #1
0
def handler(event, context):
    if "term" in event:  # API call
        if not qualify_term(event['term']):
            return {'error': 'Invalid search term'}
        message = {
            "word": event['term'],
            'hashslug': hashslug(event['term'])
        }
        if "sentence" in event:  # Detect
            s_clean, variants = clean_sentence(event['sentence'], event['term'])
            message['crawl_date'] = now()
            message['urls'] = [{
                "url": event.get('url'),
                "source": get_source_from_url(event.get('url')),
                "sentences": [{
                    "s": event['sentence'],
                    "s_clean": s_clean,
                }],
                "variants": list(variants)
            }]
            return tasks.detect(message)
        else:  # Search
            return tasks.search(message)

    elif "Records" in event:  # This comes from S3
        for record in event['Records']:
            bucket = record['s3']['bucket']['name']
            key = record['s3']['object']['key']
            key = key.replace("%3A", ":")  # That's my URLDecode.
            if key.count(":") == 2:
                return run_task(bucket, key)
            elif key.endswith(".wordlist"):
                return add_words(bucket, key)
            else:
                print "Don't know what to do with '{}'".format(key)
예제 #2
0
def search(message):
    """Takes a message that must contain at least a word, searches for the word
    and saves a new message with a detect task. The incoming message is expected
    to look at least like this:

        {
            'word': ...
            'hashslug': ...
        }

    Where doc contains the parsed body text.

    Args:
        message: dict --  A message dictionary
    Returns:
        dict -- A message dictionary
    """
    word = message['word']
    message['urls'] = search_all(word)
    message['crawl_date'] = now()
    return write_message('detect', message)
예제 #3
0
def search(message):
    """Takes a message that must contain at least a word, searches for the word
    and saves a new message with a detect task. The incoming message is expected
    to look at least like this:

        {
            'word': ...
            'hashslug': ...
        }

    Where doc contains the parsed body text.

    Args:
        message: dict --  A message dictionary
    Returns:
        dict -- A message dictionary
    """
    word = message['word']
    message['urls'] = search_all(word)
    message['crawl_date'] = now()
    return write_message('detect', message)