Example #1
0
def do_parse_images(config: Config, args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info('Parsing the image ids for old activations')

    with db as dbsession:
        query = dbsession.chunked_query(Machine, chunk_size=args.chunk_size)
        query = query.filter(Machine.image_product.is_(None))
        query = query.reverse_chunks()
        num_records = query.count()

        if num_records == 0:
            log.info('-> No machine record with unparsed image ids')
            return None

        for chunk_number, chunk in enumerate(query, start=1):
            for machine in chunk:
                parsed_image = parse_endless_os_image(machine.image_id)

                for k, v in parsed_image.items():
                    setattr(machine, k, v)

            dbsession.commit()
            progress(chunk_number * args.chunk_size, num_records)

    progress(num_records, num_records, end='\n')

    log.info('All done!')
Example #2
0
def do_remove_empty_location_info(config: Config,
                                  args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info('Remove location events with empty info')

    with db as dbsession:
        query = dbsession.query(LocationLabel).filter(
            LocationLabel.info == {
                'id': '',
                'city': '',
                'state': '',
                'street': '',
                'country': '',
                'facility': '',
            })
        num_records = query.count()

        if num_records == 0:
            log.info('-> No locations events with empty info in database')
            return None

        query.delete()
        dbsession.commit()

    log.info('All done!')
Example #3
0
def do_replay_invalid(config: Config, args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info('Replaying the invalid singular events…')

    with db as dbsession:
        query = dbsession.chunked_query(InvalidSingularEvent,
                                        chunk_size=args.chunk_size)
        query = query.reverse_chunks()
        total = query.count()

        for chunk_number, chunk in enumerate(query, start=1):
            replay_invalid_singular_events(chunk)
            dbsession.commit()
            progress(chunk_number * args.chunk_size, total)

    progress(total, total, end='\n')

    log.info('Replaying the invalid aggregate events…')

    with db as dbsession:
        query = dbsession.chunked_query(InvalidAggregateEvent,
                                        chunk_size=args.chunk_size)
        query = query.reverse_chunks()
        total = query.count()

        # FIXME: Stop ignoring from coverage report once we actually have aggregate events
        for chunk_number, chunk in enumerate(query,
                                             start=1):  # pragma: no cover
            replay_invalid_aggregate_events(chunk)
            dbsession.commit()
            progress(chunk_number * args.chunk_size, total)

    progress(total, total, end='\n')

    log.info('Replaying the invalid sequences…')

    with db as dbsession:
        query = dbsession.chunked_query(InvalidSequence,
                                        chunk_size=args.chunk_size)
        query = query.reverse_chunks()
        total = query.count()

        for chunk_number, chunk in enumerate(query, start=1):
            replay_invalid_sequences(chunk)
            dbsession.commit()
            progress(chunk_number * args.chunk_size, total)

    progress(total, total, end='\n')
Example #4
0
def do_dedupe_dualboots(config: Config, args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info(
        'Deduplicating the metrics requests with multiple "dual boot" (%s) events',
        DualBootBooted.__event_uuid__)

    with db as dbsession:
        query = dbsession.query(DualBootBooted.request_id)
        query = query.group_by(DualBootBooted.request_id)
        query = query.having(func.count(DualBootBooted.id) > 1)
        num_requests_with_dupes = query.count()

        if num_requests_with_dupes == 0:
            log.info(
                '-> No metrics requests with deduplicate dual boot events found'
            )
            return None

        log.info(
            '-> Found %s metrics requests with duplicate dual boot events',
            num_requests_with_dupes)
        request_ids_with_dupes = [x[0] for x in query]

    previous_request_id = None

    for start in range(0, num_requests_with_dupes, args.chunk_size):
        stop = min(num_requests_with_dupes, start + args.chunk_size)
        request_id_chunk = request_ids_with_dupes[start:stop]

        with db as dbsession:
            query = dbsession.query(DualBootBooted)
            query = query.filter(
                DualBootBooted.request_id.in_(request_id_chunk))
            query = query.order_by(DualBootBooted.request_id,
                                   DualBootBooted.id)

            for dualboot in query:
                if dualboot.request_id == previous_request_id:
                    dbsession.delete(dualboot)

                previous_request_id = dualboot.request_id

        progress(stop, num_requests_with_dupes)

    progress(num_requests_with_dupes, num_requests_with_dupes, end='\n')
    log.info('All done!')
Example #5
0
def do_set_open_durations(config: Config, args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info('Setting open shell apps durations')

    with db as dbsession:
        query = dbsession.query(ShellAppIsOpen).filter(ShellAppIsOpen.duration == 0)
        total = query.count()

        if total == 0:
            log.info('-> No open app with unset duration')
            return None

        query.update({
            ShellAppIsOpen.duration: func.extract(
                'epoch', ShellAppIsOpen.stopped_at - ShellAppIsOpen.started_at),
        }, synchronize_session=False)
        dbsession.commit()

    log.info('All done!')
Example #6
0
def do_replay_machine_live_usbs(config: Config, args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info('Replaying the live USB events…')

    with db as dbsession:
        query = dbsession.query(Request.machine_id)
        query = query.join(LiveUsbBooted)
        query = query.distinct()

        total = query.count()

        for i, (machine_id, ) in enumerate(query, start=1):
            upsert_machine_live(dbsession, machine_id)

            if (i % args.chunk_size) == 0:
                dbsession.commit()
                progress(i, total)

        progress(total, total)
Example #7
0
def do_replay_machine_images(config: Config, args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info('Replaying the image version events…')

    with db as dbsession:
        query = dbsession.query(Request.machine_id, ImageVersion.image_id)
        query = query.filter(Request.id == ImageVersion.request_id)
        query = query.distinct()

        total = query.count()

        for i, (machine_id, image_id) in enumerate(query, start=1):
            upsert_machine_image(dbsession, machine_id, image_id)

            if (i % args.chunk_size) == 0:
                dbsession.commit()
                progress(i, total)

        progress(total, total)
Example #8
0
def do_remove_os_info_quotes(config: Config, args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info('Remove leading and trailing quotes from OS version fields')

    with db as dbsession:
        query = dbsession.query(OSVersion).filter(or_(
            OSVersion.version.startswith('"'), OSVersion.version.endswith('"'),
        ))
        num_records = query.count()

        if num_records == 0:
            log.info('-> No OS info with extra quotes in database')
            return None

        query.update({
            OSVersion.version: func.btrim(OSVersion.version, '"'),
        }, synchronize_session=False)
        dbsession.commit()

    log.info('All done!')
Example #9
0
def do_normalize_vendors(config: Config, args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info('Normalizing the vendors for activations')

    with db as dbsession:
        query = dbsession.chunked_query(Activation, chunk_size=args.chunk_size)
        num_records = query.count()

        if num_records == 0:
            log.info('-> No activation record in database')
            return None

        for chunk_number, chunk in enumerate(query, start=1):
            _normalize_chunk(chunk)
            dbsession.commit()
            progress(chunk_number * args.chunk_size, num_records)

    progress(num_records, num_records, end='\n')

    log.info('All done!')
Example #10
0
def do_normalize_vendors(config: Config, args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info('Normalizing the vendors for "updater branch selected" (%s) events',
             UpdaterBranchSelected.__event_uuid__)

    with db as dbsession:
        query = dbsession.chunked_query(UpdaterBranchSelected, chunk_size=args.chunk_size)
        num_records = query.count()

        if num_records == 0:
            log.info('-> No "updater branch selected" record in database')
            return None

        for chunk_number, chunk in enumerate(query, start=1):
            _normalize_chunk(chunk)
            dbsession.commit()
            progress(chunk_number * args.chunk_size, num_records)

    progress(num_records, num_records, end='\n')

    log.info('All done!')
Example #11
0
    def setup_teardown(self, request):
        # Create a config file for the test, with a common base and some per-test options
        _, config_file = tempfile.mkstemp()

        with open(config_file, 'w') as f:
            f.write(toml.dumps({
                'main': {
                    'verbose': True,
                    'number_of_workers': 2,
                    'exit_on_empty_queues': True,
                },
                'postgresql': {
                    'database': 'azafea-tests',
                },
                'queues': {
                    request.node.name: {
                        'handler': self.handler_module,
                    },
                }
            }))

        self.config_file = config_file
        self.config = Config.from_file(self.config_file)

        self.db = Db(self.config.postgresql)
        self.redis = Redis(host=self.config.redis.host, port=self.config.redis.port,
                           password=self.config.redis.password)

        # Ensure we start with a clean slate
        self.ensure_no_queues()
        self.ensure_no_tables()

        # Run the test function
        yield

        # Ensure we finish with a clean DB
        self.db.drop_all()
        self.ensure_no_tables()

        # Deregister the models, tables and events from SQLAlchemy
        Base._decl_class_registry.clear()
        Base.metadata.clear()
        Base.metadata.dispatch._clear()

        # Deregister the handler modules so the next tests reimport them completely; not doing so
        # confuses SQLAlchemy, leading to the tables only being created for the first test. :(
        modules_to_deregister = []

        for queue_config in self.config.queues.values():
            handler_root = queue_config.handler.rsplit('.', 1)[0]

            for module in sys.modules:
                if module.startswith(handler_root):
                    modules_to_deregister.append(module)

        for module in modules_to_deregister:
            sys.modules.pop(module)

        # Ensure we finish with clean a Redis
        self.clear_queues()
        self.ensure_no_queues()

        # And remove the configuration file
        os.unlink(self.config_file)
Example #12
0
class IntegrationTest:
    def ensure_tables(self, *models):
        for model in models:
            with self.db as dbsession:
                # Just check the query succeeds
                dbsession.query(model).count()

    def ensure_no_tables(self):
        for model in Base._decl_class_registry.values():
            if not isinstance(model, type) or not issubclass(model, Base):
                # Internal SQLAlchemy stuff
                continue

            with pytest.raises(ProgrammingError) as exc_info:
                with self.db as dbsession:
                    dbsession.query(model).all()

            assert model.__tablename__ in str(exc_info.value)
            assert 'UndefinedTable' in str(exc_info.value)

    def clear_queues(self):
        queues = self.redis.keys()

        if queues:
            self.redis.delete(*queues)

    def ensure_no_queues(self):
        for queue_name in self.config.queues:
            assert self.redis.llen(queue_name) == 0
            assert self.redis.llen(f'errors-{queue_name}') == 0

    def run_azafea(self):
        proc = multiprocessing.Process(target=self.run_subcommand, args=('run', ))
        proc.start()
        proc.join()

    def run_subcommand(self, *cmd):
        cli.run_command('-c', self.config_file, *cmd)

    @pytest.fixture(autouse=True)
    def setup_teardown(self, request):
        # Create a config file for the test, with a common base and some per-test options
        _, config_file = tempfile.mkstemp()

        with open(config_file, 'w') as f:
            f.write(toml.dumps({
                'main': {
                    'verbose': True,
                    'number_of_workers': 2,
                    'exit_on_empty_queues': True,
                },
                'postgresql': {
                    'database': 'azafea-tests',
                },
                'queues': {
                    request.node.name: {
                        'handler': self.handler_module,
                    },
                }
            }))

        self.config_file = config_file
        self.config = Config.from_file(self.config_file)

        self.db = Db(self.config.postgresql)
        self.redis = Redis(host=self.config.redis.host, port=self.config.redis.port,
                           password=self.config.redis.password)

        # Ensure we start with a clean slate
        self.ensure_no_queues()
        self.ensure_no_tables()

        # Run the test function
        yield

        # Ensure we finish with a clean DB
        self.db.drop_all()
        self.ensure_no_tables()

        # Deregister the models, tables and events from SQLAlchemy
        Base._decl_class_registry.clear()
        Base.metadata.clear()
        Base.metadata.dispatch._clear()

        # Deregister the handler modules so the next tests reimport them completely; not doing so
        # confuses SQLAlchemy, leading to the tables only being created for the first test. :(
        modules_to_deregister = []

        for queue_config in self.config.queues.values():
            handler_root = queue_config.handler.rsplit('.', 1)[0]

            for module in sys.modules:
                if module.startswith(handler_root):
                    modules_to_deregister.append(module)

        for module in modules_to_deregister:
            sys.modules.pop(module)

        # Ensure we finish with clean a Redis
        self.clear_queues()
        self.ensure_no_queues()

        # And remove the configuration file
        os.unlink(self.config_file)
Example #13
0
def do_replay_unknown(config: Config, args: argparse.Namespace) -> None:
    db = Db(config.postgresql)
    log.info('Replaying the unknown singular events…')

    with db as dbsession:
        unknown_event_ids = dbsession.query(UnknownSingularEvent.event_id).distinct()
        unknown_event_ids = (str(i[0]) for i in unknown_event_ids)
        unknown_event_ids = [i for i in unknown_event_ids if singular_event_is_known(i)]

        query = dbsession.chunked_query(UnknownSingularEvent, chunk_size=args.chunk_size)
        query = query.filter(UnknownSingularEvent.event_id.in_(unknown_event_ids))
        query = query.reverse_chunks()
        total = query.count()

        progress(0, total)
        for chunk_number, chunk in enumerate(query, start=1):
            replay_unknown_singular_events(chunk)
            dbsession.commit()
            progress(chunk_number * args.chunk_size, total)

    progress(total, total, end='\n')

    log.info('Replaying the unknown aggregate events…')

    with db as dbsession:
        unknown_event_ids = dbsession.query(UnknownAggregateEvent.event_id).distinct()
        unknown_event_ids = (str(i[0]) for i in unknown_event_ids)
        unknown_event_ids = [i for i in unknown_event_ids if aggregate_event_is_known(i)]

        query = dbsession.chunked_query(UnknownAggregateEvent, chunk_size=args.chunk_size)
        query = query.filter(UnknownAggregateEvent.event_id.in_(unknown_event_ids))
        query = query.reverse_chunks()
        total = query.count()

        progress(0, total)
        # FIXME: Stop ignoring from coverage report once we actually have aggregate events
        for chunk_number, chunk in enumerate(query, start=1):  # pragma: no cover
            replay_unknown_aggregate_events(chunk)
            dbsession.commit()
            progress(chunk_number * args.chunk_size, total)

    progress(total, total, end='\n')

    log.info('Replaying the unknown sequences…')

    with db as dbsession:
        unknown_event_ids = dbsession.query(UnknownSequence.event_id).distinct()
        unknown_event_ids = (str(i[0]) for i in unknown_event_ids)
        unknown_event_ids = [i for i in unknown_event_ids if sequence_is_known(i)]

        query = dbsession.chunked_query(UnknownSequence, chunk_size=args.chunk_size)
        query = query.filter(UnknownSequence.event_id.in_(unknown_event_ids))
        query = query.reverse_chunks()
        total = query.count()

        progress(0, total)
        for chunk_number, chunk in enumerate(query, start=1):
            replay_unknown_sequences(chunk)
            dbsession.commit()
            progress(chunk_number * args.chunk_size, total)

    progress(total, total, end='\n')