コード例 #1
0
ファイル: lambda_handler.py プロジェクト: wordnik/serapis
def handler(event, context):
    if "term" in event:  # API call
        if not qualify_term(event['term']):
            return {'error': 'Invalid search term'}
        message = {
            "word": event['term'],
            'hashslug': hashslug(event['term'])
        }
        if "sentence" in event:  # Detect
            s_clean, variants = clean_sentence(event['sentence'], event['term'])
            message['crawl_date'] = now()
            message['urls'] = [{
                "url": event.get('url'),
                "source": get_source_from_url(event.get('url')),
                "sentences": [{
                    "s": event['sentence'],
                    "s_clean": s_clean,
                }],
                "variants": list(variants)
            }]
            return tasks.detect(message)
        else:  # Search
            return tasks.search(message)

    elif "Records" in event:  # This comes from S3
        for record in event['Records']:
            bucket = record['s3']['bucket']['name']
            key = record['s3']['object']['key']
            key = key.replace("%3A", ":")  # That's my URLDecode.
            if key.count(":") == 2:
                return run_task(bucket, key)
            elif key.endswith(".wordlist"):
                return add_words(bucket, key)
            else:
                print "Don't know what to do with '{}'".format(key)
コード例 #2
0
def add(word):
    """
    Adds a single word to the pipeline.
    """
    from serapis import tasks
    message = {'word': word, 'hashslug': util.hashslug(word)}
    tasks.write_message('search', message)
    print("Added task '{}'".format(message['hashslug']))
コード例 #3
0
ファイル: lambda_handler.py プロジェクト: wordnik/serapis
def add_words(bucket, key):
    contents = config.s3.Object(bucket, key).get()
    words = contents['Body'].read().splitlines()
    added, skipped = set(), []
    for term in words:
        term = clean_and_qualify_term(term)
        if term:
            slug = hashslug(term)
            if slug not in added:
                added.add(slug)
                message = {'word': term, 'hashslug': slug}
                tasks.write_message('search', message)
            else:
                skipped.append(term)
        else:
            skipped.append(term)
    print "Added {} terms, skipped {}".format(len(added), len(skipped))
コード例 #4
0
def add_words(bucket, key):
    contents = config.s3.Object(bucket, key).get()
    words = contents['Body'].read().splitlines()
    added, skipped = set(), []
    for term in words:
        term = clean_and_qualify_term(term)
        if term:
            slug = hashslug(term)
            if slug not in added:
                added.add(slug)
                message = {'word': term, 'hashslug': slug}
                tasks.write_message('search', message)
            else:
                skipped.append(term)
        else:
            skipped.append(term)
    print "Added {} terms, skipped {}".format(len(added), len(skipped))