def main(): parser = argparse.ArgumentParser(description='Monolith Aggregator') parser.add_argument('--version', action='store_true', default=False, help='Displays version and exits.') date_group = parser.add_mutually_exclusive_group() date_group.add_argument('--date', default=None, choices=_DATES, help='Date') date_group.add_argument('--start-date', default=None, type=_mkdate, help='Start date.') parser.add_argument('--end-date', default=None, type=_mkdate, help='End date.') parser.add_argument('config', help='Configuration file.',) parser.add_argument('--log-level', dest='loglevel', default='info', choices=LOG_LEVELS.keys() + [key.upper() for key in LOG_LEVELS.keys()], help="log level") parser.add_argument('--log-output', dest='logoutput', default='-', help="log output") parser.add_argument('--sequence', dest='sequence', default=None, help='A comma-separated list of sequences.') parser.add_argument('--batch-size', dest='batch_size', default=None, type=int, help='The size of the batch when writing') parser.add_argument('--force', action='store_true', default=False, help='Forces a run') parser.add_argument('--purge-only', action='store_true', default=False, help='Only run the purge of sources.') parser.add_argument('--retries', default=3, type=int, help='Number of retries') args = parser.parse_args() if args.version: print(__version__) sys.exit(0) if args.date is not None: start, end = word2daterange(args.date) elif args.start_date is None: start, end = word2daterange('yesterday') else: start, end = args.start_date, args.end_date configure_logger(logger, args.loglevel, args.logoutput) res = extract(args.config, start, end, args.sequence, args.batch_size, args.force, args.purge_only) if res == 0: logger.info('SUCCESS') else: logger.info('ERROR') sys.exit(res)
def test_extract(self): config, _ = self._make_config('config_extract.ini') def _count(): self.es_client.refresh() return self.es_client.count({'match_all': {}})['count'] start, end = word2daterange('today') extract(config, start, end) count = _count() self.assertEqual(count, 102) # A second attempt should not write more logs. extract(config, start, end, force=True) self.assertEqual(count, _count()) # unless we force it extract(config, start, end, force=True) # overwrite has generated the same entries with new ids, so # we end up with double the entries self.assertEqual(count, _count()) # forcing only the load phase extract(config, start, end, sequence='load', force=True) # loading the same data (ids) won't generate any more entries self.assertEqual(count, _count())
def test_word2daterange(self): def _d(days): return timedelta(days=days) def _diff(d1, d2): diff = d1 - d2 return diff.days # don't run those tests a millisecond before midnight! # self.assertRaises(NotImplementedError, word2daterange, 'bleh') now = date.today() today, __ = word2daterange('today') self.assertTrue(today, now) yesterday, __ = word2daterange('yesterday') self.assertTrue(today - yesterday, _d(1)) first, last = word2daterange('last-week') self.assertTrue(_diff(now, last) >= 0) self.assertTrue(_diff(now, first) >= 7) self.assertTrue(_diff(last, first) == 7)
def main(): parser = argparse.ArgumentParser(description='Monolith Aggregator') parser.add_argument('--version', action='store_true', default=False, help='Displays version and exits.') date_group = parser.add_mutually_exclusive_group() date_group.add_argument('--date', default=None, choices=_DATES, help='Date') date_group.add_argument('--start-date', default=None, type=_mkdate, help='Start date.') parser.add_argument('--end-date', default=None, type=_mkdate, help='End date.') parser.add_argument( 'config', help='Configuration file.', ) parser.add_argument('--log-level', dest='loglevel', default='info', choices=LOG_LEVELS.keys() + [key.upper() for key in LOG_LEVELS.keys()], help="log level") parser.add_argument('--log-output', dest='logoutput', default='-', help="log output") parser.add_argument('--sequence', dest='sequence', default=None, help='A comma-separated list of sequences.') parser.add_argument('--batch-size', dest='batch_size', default=None, type=int, help='The size of the batch when writing') parser.add_argument('--force', action='store_true', default=False, help='Forces a run') parser.add_argument('--purge-only', action='store_true', default=False, help='Only run the purge of sources.') parser.add_argument('--retries', default=3, type=int, help='Number of retries') args = parser.parse_args() if args.version: print(__version__) sys.exit(0) if args.date is not None: start, end = word2daterange(args.date) elif args.start_date is None: start, end = word2daterange('yesterday') else: start, end = args.start_date, args.end_date configure_logger(logger, args.loglevel, args.logoutput) res = extract(args.config, start, end, args.sequence, args.batch_size, args.force, args.purge_only) if res == 0: logger.info('SUCCESS') else: logger.info('ERROR') sys.exit(res)
def test_retry(self): config, _ = self._make_config('config_retry.ini') # retrying 3 times before failing in the load phase. start, end = word2daterange('today') extract(config, start, end) self.assertEqual(len(_res), 102)
def test_fails(self): config, _ = self._make_config('config_fails.ini') # retrying 3 times before failing in the extract phase start, end = word2daterange('last-month') self.assertRaises(RunError, extract, config, start, end)