def es_cleanup(): """ Delete documents beyond retention time """ config = load_config() es = setup_elasticsearch(config) for application in config['binoas']['applications']: seconds = parse_frequency( config['binoas']['applications'][application].get( 'retention', '1h')) click.echo('Cleaning up %s (%s)' % ( application, seconds, )) es_query = { "query": { "range": { "modified": { "lt": "now-%ss/s" % (seconds, ), } } } } index_name = 'binoas_%s' % (application, ) try: res = es.delete_by_query(index=index_name, doc_type='item', body=es_query) except NotFoundError: res = None print(res)
def make(self, application, frequency): if application not in self.config['binoas']['applications']: raise ValueError('Application could not be found') seconds = parse_frequency(frequency) es_query = { "query": { "range": { "modified": { "gte": "now-%ss/s" % (seconds, ), "lt": "now/s" } } } } index_name = 'binoas_%s' % (application, ) perc_req = '' try: scan_results = [ r for r in scan( self.es, es_query, index=index_name, doc_type='item') ] except NotFoundError: scan_results = [] logging.info( 'Found %s documents for frequency %s' % (len(scan_results), frequency), ) if len(scan_results) <= 0: return for r in scan_results: req_head, req_body = self._make_percolate_query(index_name, r) perc_req += '%s \n' % (json.dumps(req_head), ) perc_req += '%s \n' % (json.dumps(req_body), ) try: results = self.es.msearch(body=perc_req) except ValueError as e: results = {'responses': []} queries = {} for d, r in zip(scan_results, results['responses']): if r['hits']['total'] <= 0: continue #logging.info('Document %s' % (d['_id'],)) for q in r['hits']['hits']: #logging.info('* %s' % (q['_id'],)) try: queries[q['_id']]['documents'].append(d['_source']) except LookupError: queries[q['_id']] = {'documents': [d['_source']]} user_queries = self.db.query(UserQueries).filter( UserQueries.query_id.in_(queries.keys())).filter( UserQueries.frequency == frequency.lower()).all() logging.info('Found user queries:') logging.info([u.user_id for u in user_queries]) users_with_queries = {} for uq in user_queries: try: users_with_queries[uq.user_id].append(uq) except LookupError: users_with_queries[uq.user_id] = [uq] for user_id, uq in users_with_queries.items(): logging.info( '%s alert for %s with ids : %s and %s documents' % (application, uq[0].user.email, [u.query_id for u in uq], sum([ len(queries[q]['documents']) for q in [u.query_id for u in uq] ]))) pl = { 'application': application, 'payload': { 'alerts': [{ 'query': { 'id': u.query_id, 'description': u.description, 'frequency': u.frequency }, 'documents': queries[u.query_id]['documents'] } for u in uq], 'user': { 'id': uq[0].user_id, 'email': uq[0].user.email } } } self.produce_message(pl) self.db.close()
def test_seconds(self): result = parse_frequency('1s') self.assertEqual(result, 1)
def test_empty(self): with self.assertRaises(ValueError): parse_frequency('')
def test_numbers_only(self): with self.assertRaises(ValueError): parse_frequency('1234')
def test_garbage(self): with self.assertRaises(ValueError): parse_frequency('asdq')
def test_invalid_period(self): with self.assertRaises(ValueError): parse_frequency('1q')
def test_years(self): result = parse_frequency('1y') self.assertEqual(result, 31536000)
def test_weeks(self): result = parse_frequency('1w') self.assertEqual(result, 604800)
def test_days(self): result = parse_frequency('1d') self.assertEqual(result, 86400)
def test_hours(self): result = parse_frequency('1h') self.assertEqual(result, 3600)
def test_minutes(self): result = parse_frequency('1m') self.assertEqual(result, 60)