def handler(event, context): if "term" in event: # API call if not qualify_term(event['term']): return {'error': 'Invalid search term'} message = { "word": event['term'], 'hashslug': hashslug(event['term']) } if "sentence" in event: # Detect s_clean, variants = clean_sentence(event['sentence'], event['term']) message['crawl_date'] = now() message['urls'] = [{ "url": event.get('url'), "source": get_source_from_url(event.get('url')), "sentences": [{ "s": event['sentence'], "s_clean": s_clean, }], "variants": list(variants) }] return tasks.detect(message) else: # Search return tasks.search(message) elif "Records" in event: # This comes from S3 for record in event['Records']: bucket = record['s3']['bucket']['name'] key = record['s3']['object']['key'] key = key.replace("%3A", ":") # That's my URLDecode. if key.count(":") == 2: return run_task(bucket, key) elif key.endswith(".wordlist"): return add_words(bucket, key) else: print "Don't know what to do with '{}'".format(key)
def structured(self): structure = { "term": self.term, "url": self.url, "source": get_source_from_url(self.url), "doc": self.text, "features": self.features, "variants": list(self.variants), # Sets are not JSON serializable "sentences": self.sentences, "author": self.author, "title": self.title } if config.save_html: structure["html"] = self.html return structure