def do_parse_images(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info('Parsing the image ids for old activations') with db as dbsession: query = dbsession.chunked_query(Machine, chunk_size=args.chunk_size) query = query.filter(Machine.image_product.is_(None)) query = query.reverse_chunks() num_records = query.count() if num_records == 0: log.info('-> No machine record with unparsed image ids') return None for chunk_number, chunk in enumerate(query, start=1): for machine in chunk: parsed_image = parse_endless_os_image(machine.image_id) for k, v in parsed_image.items(): setattr(machine, k, v) dbsession.commit() progress(chunk_number * args.chunk_size, num_records) progress(num_records, num_records, end='\n') log.info('All done!')
def do_remove_empty_location_info(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info('Remove location events with empty info') with db as dbsession: query = dbsession.query(LocationLabel).filter( LocationLabel.info == { 'id': '', 'city': '', 'state': '', 'street': '', 'country': '', 'facility': '', }) num_records = query.count() if num_records == 0: log.info('-> No locations events with empty info in database') return None query.delete() dbsession.commit() log.info('All done!')
def do_replay_invalid(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info('Replaying the invalid singular events…') with db as dbsession: query = dbsession.chunked_query(InvalidSingularEvent, chunk_size=args.chunk_size) query = query.reverse_chunks() total = query.count() for chunk_number, chunk in enumerate(query, start=1): replay_invalid_singular_events(chunk) dbsession.commit() progress(chunk_number * args.chunk_size, total) progress(total, total, end='\n') log.info('Replaying the invalid aggregate events…') with db as dbsession: query = dbsession.chunked_query(InvalidAggregateEvent, chunk_size=args.chunk_size) query = query.reverse_chunks() total = query.count() # FIXME: Stop ignoring from coverage report once we actually have aggregate events for chunk_number, chunk in enumerate(query, start=1): # pragma: no cover replay_invalid_aggregate_events(chunk) dbsession.commit() progress(chunk_number * args.chunk_size, total) progress(total, total, end='\n') log.info('Replaying the invalid sequences…') with db as dbsession: query = dbsession.chunked_query(InvalidSequence, chunk_size=args.chunk_size) query = query.reverse_chunks() total = query.count() for chunk_number, chunk in enumerate(query, start=1): replay_invalid_sequences(chunk) dbsession.commit() progress(chunk_number * args.chunk_size, total) progress(total, total, end='\n')
def do_dedupe_dualboots(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info( 'Deduplicating the metrics requests with multiple "dual boot" (%s) events', DualBootBooted.__event_uuid__) with db as dbsession: query = dbsession.query(DualBootBooted.request_id) query = query.group_by(DualBootBooted.request_id) query = query.having(func.count(DualBootBooted.id) > 1) num_requests_with_dupes = query.count() if num_requests_with_dupes == 0: log.info( '-> No metrics requests with deduplicate dual boot events found' ) return None log.info( '-> Found %s metrics requests with duplicate dual boot events', num_requests_with_dupes) request_ids_with_dupes = [x[0] for x in query] previous_request_id = None for start in range(0, num_requests_with_dupes, args.chunk_size): stop = min(num_requests_with_dupes, start + args.chunk_size) request_id_chunk = request_ids_with_dupes[start:stop] with db as dbsession: query = dbsession.query(DualBootBooted) query = query.filter( DualBootBooted.request_id.in_(request_id_chunk)) query = query.order_by(DualBootBooted.request_id, DualBootBooted.id) for dualboot in query: if dualboot.request_id == previous_request_id: dbsession.delete(dualboot) previous_request_id = dualboot.request_id progress(stop, num_requests_with_dupes) progress(num_requests_with_dupes, num_requests_with_dupes, end='\n') log.info('All done!')
def do_set_open_durations(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info('Setting open shell apps durations') with db as dbsession: query = dbsession.query(ShellAppIsOpen).filter(ShellAppIsOpen.duration == 0) total = query.count() if total == 0: log.info('-> No open app with unset duration') return None query.update({ ShellAppIsOpen.duration: func.extract( 'epoch', ShellAppIsOpen.stopped_at - ShellAppIsOpen.started_at), }, synchronize_session=False) dbsession.commit() log.info('All done!')
def do_replay_machine_live_usbs(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info('Replaying the live USB events…') with db as dbsession: query = dbsession.query(Request.machine_id) query = query.join(LiveUsbBooted) query = query.distinct() total = query.count() for i, (machine_id, ) in enumerate(query, start=1): upsert_machine_live(dbsession, machine_id) if (i % args.chunk_size) == 0: dbsession.commit() progress(i, total) progress(total, total)
def do_replay_machine_images(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info('Replaying the image version events…') with db as dbsession: query = dbsession.query(Request.machine_id, ImageVersion.image_id) query = query.filter(Request.id == ImageVersion.request_id) query = query.distinct() total = query.count() for i, (machine_id, image_id) in enumerate(query, start=1): upsert_machine_image(dbsession, machine_id, image_id) if (i % args.chunk_size) == 0: dbsession.commit() progress(i, total) progress(total, total)
def do_remove_os_info_quotes(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info('Remove leading and trailing quotes from OS version fields') with db as dbsession: query = dbsession.query(OSVersion).filter(or_( OSVersion.version.startswith('"'), OSVersion.version.endswith('"'), )) num_records = query.count() if num_records == 0: log.info('-> No OS info with extra quotes in database') return None query.update({ OSVersion.version: func.btrim(OSVersion.version, '"'), }, synchronize_session=False) dbsession.commit() log.info('All done!')
def do_normalize_vendors(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info('Normalizing the vendors for activations') with db as dbsession: query = dbsession.chunked_query(Activation, chunk_size=args.chunk_size) num_records = query.count() if num_records == 0: log.info('-> No activation record in database') return None for chunk_number, chunk in enumerate(query, start=1): _normalize_chunk(chunk) dbsession.commit() progress(chunk_number * args.chunk_size, num_records) progress(num_records, num_records, end='\n') log.info('All done!')
def do_normalize_vendors(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info('Normalizing the vendors for "updater branch selected" (%s) events', UpdaterBranchSelected.__event_uuid__) with db as dbsession: query = dbsession.chunked_query(UpdaterBranchSelected, chunk_size=args.chunk_size) num_records = query.count() if num_records == 0: log.info('-> No "updater branch selected" record in database') return None for chunk_number, chunk in enumerate(query, start=1): _normalize_chunk(chunk) dbsession.commit() progress(chunk_number * args.chunk_size, num_records) progress(num_records, num_records, end='\n') log.info('All done!')
def setup_teardown(self, request): # Create a config file for the test, with a common base and some per-test options _, config_file = tempfile.mkstemp() with open(config_file, 'w') as f: f.write(toml.dumps({ 'main': { 'verbose': True, 'number_of_workers': 2, 'exit_on_empty_queues': True, }, 'postgresql': { 'database': 'azafea-tests', }, 'queues': { request.node.name: { 'handler': self.handler_module, }, } })) self.config_file = config_file self.config = Config.from_file(self.config_file) self.db = Db(self.config.postgresql) self.redis = Redis(host=self.config.redis.host, port=self.config.redis.port, password=self.config.redis.password) # Ensure we start with a clean slate self.ensure_no_queues() self.ensure_no_tables() # Run the test function yield # Ensure we finish with a clean DB self.db.drop_all() self.ensure_no_tables() # Deregister the models, tables and events from SQLAlchemy Base._decl_class_registry.clear() Base.metadata.clear() Base.metadata.dispatch._clear() # Deregister the handler modules so the next tests reimport them completely; not doing so # confuses SQLAlchemy, leading to the tables only being created for the first test. :( modules_to_deregister = [] for queue_config in self.config.queues.values(): handler_root = queue_config.handler.rsplit('.', 1)[0] for module in sys.modules: if module.startswith(handler_root): modules_to_deregister.append(module) for module in modules_to_deregister: sys.modules.pop(module) # Ensure we finish with clean a Redis self.clear_queues() self.ensure_no_queues() # And remove the configuration file os.unlink(self.config_file)
class IntegrationTest: def ensure_tables(self, *models): for model in models: with self.db as dbsession: # Just check the query succeeds dbsession.query(model).count() def ensure_no_tables(self): for model in Base._decl_class_registry.values(): if not isinstance(model, type) or not issubclass(model, Base): # Internal SQLAlchemy stuff continue with pytest.raises(ProgrammingError) as exc_info: with self.db as dbsession: dbsession.query(model).all() assert model.__tablename__ in str(exc_info.value) assert 'UndefinedTable' in str(exc_info.value) def clear_queues(self): queues = self.redis.keys() if queues: self.redis.delete(*queues) def ensure_no_queues(self): for queue_name in self.config.queues: assert self.redis.llen(queue_name) == 0 assert self.redis.llen(f'errors-{queue_name}') == 0 def run_azafea(self): proc = multiprocessing.Process(target=self.run_subcommand, args=('run', )) proc.start() proc.join() def run_subcommand(self, *cmd): cli.run_command('-c', self.config_file, *cmd) @pytest.fixture(autouse=True) def setup_teardown(self, request): # Create a config file for the test, with a common base and some per-test options _, config_file = tempfile.mkstemp() with open(config_file, 'w') as f: f.write(toml.dumps({ 'main': { 'verbose': True, 'number_of_workers': 2, 'exit_on_empty_queues': True, }, 'postgresql': { 'database': 'azafea-tests', }, 'queues': { request.node.name: { 'handler': self.handler_module, }, } })) self.config_file = config_file self.config = Config.from_file(self.config_file) self.db = Db(self.config.postgresql) self.redis = Redis(host=self.config.redis.host, port=self.config.redis.port, password=self.config.redis.password) # Ensure we start with a clean slate self.ensure_no_queues() self.ensure_no_tables() # Run the test function yield # Ensure we finish with a clean DB self.db.drop_all() self.ensure_no_tables() # Deregister the models, tables and events from SQLAlchemy Base._decl_class_registry.clear() Base.metadata.clear() Base.metadata.dispatch._clear() # Deregister the handler modules so the next tests reimport them completely; not doing so # confuses SQLAlchemy, leading to the tables only being created for the first test. :( modules_to_deregister = [] for queue_config in self.config.queues.values(): handler_root = queue_config.handler.rsplit('.', 1)[0] for module in sys.modules: if module.startswith(handler_root): modules_to_deregister.append(module) for module in modules_to_deregister: sys.modules.pop(module) # Ensure we finish with clean a Redis self.clear_queues() self.ensure_no_queues() # And remove the configuration file os.unlink(self.config_file)
def do_replay_unknown(config: Config, args: argparse.Namespace) -> None: db = Db(config.postgresql) log.info('Replaying the unknown singular events…') with db as dbsession: unknown_event_ids = dbsession.query(UnknownSingularEvent.event_id).distinct() unknown_event_ids = (str(i[0]) for i in unknown_event_ids) unknown_event_ids = [i for i in unknown_event_ids if singular_event_is_known(i)] query = dbsession.chunked_query(UnknownSingularEvent, chunk_size=args.chunk_size) query = query.filter(UnknownSingularEvent.event_id.in_(unknown_event_ids)) query = query.reverse_chunks() total = query.count() progress(0, total) for chunk_number, chunk in enumerate(query, start=1): replay_unknown_singular_events(chunk) dbsession.commit() progress(chunk_number * args.chunk_size, total) progress(total, total, end='\n') log.info('Replaying the unknown aggregate events…') with db as dbsession: unknown_event_ids = dbsession.query(UnknownAggregateEvent.event_id).distinct() unknown_event_ids = (str(i[0]) for i in unknown_event_ids) unknown_event_ids = [i for i in unknown_event_ids if aggregate_event_is_known(i)] query = dbsession.chunked_query(UnknownAggregateEvent, chunk_size=args.chunk_size) query = query.filter(UnknownAggregateEvent.event_id.in_(unknown_event_ids)) query = query.reverse_chunks() total = query.count() progress(0, total) # FIXME: Stop ignoring from coverage report once we actually have aggregate events for chunk_number, chunk in enumerate(query, start=1): # pragma: no cover replay_unknown_aggregate_events(chunk) dbsession.commit() progress(chunk_number * args.chunk_size, total) progress(total, total, end='\n') log.info('Replaying the unknown sequences…') with db as dbsession: unknown_event_ids = dbsession.query(UnknownSequence.event_id).distinct() unknown_event_ids = (str(i[0]) for i in unknown_event_ids) unknown_event_ids = [i for i in unknown_event_ids if sequence_is_known(i)] query = dbsession.chunked_query(UnknownSequence, chunk_size=args.chunk_size) query = query.filter(UnknownSequence.event_id.in_(unknown_event_ids)) query = query.reverse_chunks() total = query.count() progress(0, total) for chunk_number, chunk in enumerate(query, start=1): replay_unknown_sequences(chunk) dbsession.commit() progress(chunk_number * args.chunk_size, total) progress(total, total, end='\n')