Example #1
0
def es_cleanup():
    """
    Delete documents beyond retention time
    """
    config = load_config()
    es = setup_elasticsearch(config)

    for application in config['binoas']['applications']:
        seconds = parse_frequency(
            config['binoas']['applications'][application].get(
                'retention', '1h'))
        click.echo('Cleaning up %s (%s)' % (
            application,
            seconds,
        ))
        es_query = {
            "query": {
                "range": {
                    "modified": {
                        "lt": "now-%ss/s" % (seconds, ),
                    }
                }
            }
        }
        index_name = 'binoas_%s' % (application, )
        try:
            res = es.delete_by_query(index=index_name,
                                     doc_type='item',
                                     body=es_query)
        except NotFoundError:
            res = None
        print(res)
Example #2
0
    def make(self, application, frequency):
        if application not in self.config['binoas']['applications']:
            raise ValueError('Application could not be found')

        seconds = parse_frequency(frequency)
        es_query = {
            "query": {
                "range": {
                    "modified": {
                        "gte": "now-%ss/s" % (seconds, ),
                        "lt": "now/s"
                    }
                }
            }
        }
        index_name = 'binoas_%s' % (application, )

        perc_req = ''
        try:
            scan_results = [
                r for r in scan(
                    self.es, es_query, index=index_name, doc_type='item')
            ]
        except NotFoundError:
            scan_results = []

        logging.info(
            'Found %s documents for frequency %s' %
            (len(scan_results), frequency), )
        if len(scan_results) <= 0:
            return

        for r in scan_results:
            req_head, req_body = self._make_percolate_query(index_name, r)
            perc_req += '%s \n' % (json.dumps(req_head), )
            perc_req += '%s \n' % (json.dumps(req_body), )

        try:
            results = self.es.msearch(body=perc_req)
        except ValueError as e:
            results = {'responses': []}

        queries = {}
        for d, r in zip(scan_results, results['responses']):
            if r['hits']['total'] <= 0:
                continue
            #logging.info('Document %s' % (d['_id'],))
            for q in r['hits']['hits']:
                #logging.info('* %s' % (q['_id'],))
                try:
                    queries[q['_id']]['documents'].append(d['_source'])
                except LookupError:
                    queries[q['_id']] = {'documents': [d['_source']]}

        user_queries = self.db.query(UserQueries).filter(
            UserQueries.query_id.in_(queries.keys())).filter(
                UserQueries.frequency == frequency.lower()).all()
        logging.info('Found user queries:')
        logging.info([u.user_id for u in user_queries])

        users_with_queries = {}
        for uq in user_queries:
            try:
                users_with_queries[uq.user_id].append(uq)
            except LookupError:
                users_with_queries[uq.user_id] = [uq]

        for user_id, uq in users_with_queries.items():
            logging.info(
                '%s alert for %s with ids : %s and %s documents' %
                (application, uq[0].user.email, [u.query_id for u in uq],
                 sum([
                     len(queries[q]['documents'])
                     for q in [u.query_id for u in uq]
                 ])))

            pl = {
                'application': application,
                'payload': {
                    'alerts': [{
                        'query': {
                            'id': u.query_id,
                            'description': u.description,
                            'frequency': u.frequency
                        },
                        'documents': queries[u.query_id]['documents']
                    } for u in uq],
                    'user': {
                        'id': uq[0].user_id,
                        'email': uq[0].user.email
                    }
                }
            }

            self.produce_message(pl)
        self.db.close()
Example #3
0
 def test_seconds(self):
     result = parse_frequency('1s')
     self.assertEqual(result, 1)
Example #4
0
 def test_empty(self):
     with self.assertRaises(ValueError):
         parse_frequency('')
Example #5
0
 def test_numbers_only(self):
     with self.assertRaises(ValueError):
         parse_frequency('1234')
Example #6
0
 def test_garbage(self):
     with self.assertRaises(ValueError):
         parse_frequency('asdq')
Example #7
0
 def test_invalid_period(self):
     with self.assertRaises(ValueError):
         parse_frequency('1q')
Example #8
0
 def test_years(self):
     result = parse_frequency('1y')
     self.assertEqual(result, 31536000)
Example #9
0
 def test_weeks(self):
     result = parse_frequency('1w')
     self.assertEqual(result, 604800)
Example #10
0
 def test_days(self):
     result = parse_frequency('1d')
     self.assertEqual(result, 86400)
Example #11
0
 def test_hours(self):
     result = parse_frequency('1h')
     self.assertEqual(result, 3600)
Example #12
0
 def test_minutes(self):
     result = parse_frequency('1m')
     self.assertEqual(result, 60)