def handler(event, context): if "term" in event: # API call if not qualify_term(event['term']): return {'error': 'Invalid search term'} message = { "word": event['term'], 'hashslug': hashslug(event['term']) } if "sentence" in event: # Detect s_clean, variants = clean_sentence(event['sentence'], event['term']) message['crawl_date'] = now() message['urls'] = [{ "url": event.get('url'), "source": get_source_from_url(event.get('url')), "sentences": [{ "s": event['sentence'], "s_clean": s_clean, }], "variants": list(variants) }] return tasks.detect(message) else: # Search return tasks.search(message) elif "Records" in event: # This comes from S3 for record in event['Records']: bucket = record['s3']['bucket']['name'] key = record['s3']['object']['key'] key = key.replace("%3A", ":") # That's my URLDecode. if key.count(":") == 2: return run_task(bucket, key) elif key.endswith(".wordlist"): return add_words(bucket, key) else: print "Don't know what to do with '{}'".format(key)
def search(message): """Takes a message that must contain at least a word, searches for the word and saves a new message with a detect task. The incoming message is expected to look at least like this: { 'word': ... 'hashslug': ... } Where doc contains the parsed body text. Args: message: dict -- A message dictionary Returns: dict -- A message dictionary """ word = message['word'] message['urls'] = search_all(word) message['crawl_date'] = now() return write_message('detect', message)