def upsert_machine_dualboot(dbsession: DbSession, machine_id: str) -> None: stmt = insert(Machine.__table__).values(machine_id=machine_id, dualboot=True) stmt = stmt.on_conflict_do_update( constraint='uq_metrics_machine_machine_id', set_={'dualboot': True}) dbsession.connection().execute(stmt)
def upsert_machine_location(dbsession: DbSession, machine_id: str, info: Union[Dict[str, Any], List[Any]]) -> None: """Update the relevant Machine record with information from a LocationLabel event. Although the info is guaranteed to be a `Dict[str, str]` when received from the client, the database column is of type JSONB, so the `LocationLabel.info` field has a more general type. As a result, the function parameter also has a more general type, and this runtime type check is needed to satisfy mypy. """ if isinstance(info, dict): location_columns = [ column.name for column in Machine.__table__.columns ] values = { f'location_{key}': value for key, value in info.items() if f'location_{key}' in location_columns } stmt = insert(Machine.__table__).values(machine_id=machine_id, location=info, **values) stmt = stmt.on_conflict_do_update( constraint='uq_metrics_machine_machine_id', set_=values) dbsession.connection().execute(stmt) else: # pragma: no cover log.warning('Data received for machine location is not a dict: %r', info)
def new_aggregate_event(request: Request, event_variant: GLib.Variant, dbsession: DbSession) -> Optional[AggregateEvent]: event_id = str(UUID(bytes=get_bytes(event_variant.get_child_value(1)))) if event_id in IGNORED_EVENTS: return None user_id = event_variant.get_child_value(0).get_uint32() count = event_variant.get_child_value(2).get_int64() event_relative_timestamp = event_variant.get_child_value(3).get_int64() payload = event_variant.get_child_value(4) event_date = get_event_datetime(request.absolute_timestamp, request.relative_timestamp, event_relative_timestamp) # We don't have any aggregate event yet, therefore it can only be unknown # Mypy complains here, even though this should be fine: # https://github.com/dropbox/sqlalchemy-stubs/issues/97 event = UnknownAggregateEvent( request=request, user_id=user_id, # type: ignore occured_at=event_date, count=count, event_id=event_id, payload=payload) dbsession.add(event) return event
def id_from_serialized(cls, serialized: bytes, dbsession: DbSession) -> int: record = json.loads(serialized.decode('utf-8')) columns = inspect(cls).attrs record = {k: v for (k, v) in record.items() if k in columns} record['vendor'] = normalize_vendor(record.get('vendor', 'unknown')) # Let's make the case of a missing "image" fail at the SQL level if 'image' in record: # pragma: no branch record.update(**parse_endless_os_image(record['image'])) # Postgresql's 'INSERT … ON CONFLICT …' is not available at the ORM layer, so let's # drop down to the SQL layer stmt = insert(PingConfiguration.__table__).values(**record) stmt = stmt.returning(PingConfiguration.__table__.c.id) # We have to use 'ON CONFLICT … DO UPDATE …' because 'ON CONFLICT DO NOTHING' does not # return anything, and we need to get the id back; in addition we have to actually # update something, anything, so let's arbitrarily update the image to its existing value stmt = stmt.on_conflict_do_update( constraint='uq_ping_configuration_v1_image_vendor_product_dualboot', set_={'image': record['image']} ) result = dbsession.connection().execute(stmt) dbsession.commit() return result.first()[0]
def new_singular_event(request: Request, event_variant: GLib.Variant, dbsession: DbSession) -> Optional[SingularEvent]: event_id = str(UUID(bytes=get_bytes(event_variant.get_child_value(1)))) if event_id in IGNORED_EVENTS: return None user_id = event_variant.get_child_value(0).get_uint32() event_relative_timestamp = event_variant.get_child_value(2).get_int64() payload = event_variant.get_child_value(3) event_date = get_event_datetime(request.absolute_timestamp, request.relative_timestamp, event_relative_timestamp) try: event_model = SINGULAR_EVENT_MODELS[event_id] except KeyError: # Mypy complains here, even though this should be fine: # https://github.com/dropbox/sqlalchemy-stubs/issues/97 event = UnknownSingularEvent( request=request, user_id=user_id, # type: ignore occured_at=event_date, event_id=event_id, payload=payload) dbsession.add(event) return event try: # Mypy complains here, even though this should be fine: # https://github.com/dropbox/sqlalchemy-stubs/issues/97 event = event_model( request=request, user_id=user_id, # type: ignore occured_at=event_date, payload=payload) except Exception as e: if isinstance(e, EmptyPayloadError ) and event_id in IGNORED_EMPTY_PAYLOAD_ERRORS: return None log.exception('An error occured while processing the event:') # Mypy complains here, even though this should be fine: # https://github.com/dropbox/sqlalchemy-stubs/issues/97 event = InvalidSingularEvent( request=request, user_id=user_id, # type: ignore occured_at=event_date, event_id=event_id, payload=payload, error=str(e)) dbsession.add(event) return event
def process(dbsession: DbSession, record: bytes) -> None: log.debug('Processing ping v1 record: %s', record) ping_config_id = PingConfiguration.id_from_serialized(record, dbsession) ping = Ping.from_serialized(record) ping.config_id = ping_config_id dbsession.add(ping) log.debug('Inserting ping record:\n%s', ping)
def upsert_machine_image(dbsession: DbSession, machine_id: str, image_id: str) -> None: image_values: Dict[str, Any] = { 'image_id': image_id, **parse_endless_os_image(image_id) } stmt = insert(Machine.__table__).values(machine_id=machine_id, **image_values) stmt = stmt.on_conflict_do_update( constraint='uq_metrics_machine_machine_id', set_=image_values) dbsession.connection().execute(stmt)
class MachineIdsByDay(View): __tablename__ = 'machine_ids_by_day' __query__ = DbSession().query( Request.received_at.cast(Date).label('day'), Request.machine_id.label('machine_id')).distinct() __table_args__ = (Index('ix_machine_ids_by_day_day', 'day'), )
def process(dbsession: DbSession, record: bytes) -> None: log.debug('Processing metric v2 record: %s', record) request_builder = RequestBuilder.parse_bytes(record) request = request_builder.build_request() dbsession.add(request) for event_variant in request_builder.singulars: singular_event = new_singular_event(request, event_variant, dbsession) if singular_event is not None: log.debug('Inserting singular metric:\n%s', singular_event) for event_variant in request_builder.aggregates: aggregate_event = new_aggregate_event(request, event_variant, dbsession) log.debug('Inserting aggregate metric:\n%s', aggregate_event) for event_variant in request_builder.sequences: sequence_event = new_sequence_event(request, event_variant, dbsession) if sequence_event is not None: log.debug('Inserting sequence event:\n%s', sequence_event) try: dbsession.commit() except IntegrityError as e: # FIXME: This is fragile, can we do better? if "uq_metrics_request_v2_sha512" in str(e): log.debug('Request had already been processed in the past') return # FIXME: Given how the request is built, this shouldn't ever happen; if it does though, we # absolutely need an integration test raise # pragma: no cover
def process(dbsession: DbSession, record: bytes) -> None: log.debug('Processing activation v1 record: %s', record) activation = Activation.from_serialized(record) dbsession.add(activation) log.debug('Inserting activation record:\n%s', activation)
def new_sequence_event( request: Request, sequence_variant: GLib.Variant, dbsession: DbSession ) -> Optional[Union[SequenceEvent, InvalidSequence, UnknownSequence]]: event_id = str(UUID(bytes=get_bytes(sequence_variant.get_child_value(1)))) if event_id in IGNORED_EVENTS: return None user_id = sequence_variant.get_child_value(0).get_uint32() events = sequence_variant.get_child_value(2) num_events = events.n_children() if num_events < 2: error = f'Sequence must have at least 2 elements, but only had {num_events}' # Mypy complains here, even though this should be fine: # https://github.com/dropbox/sqlalchemy-stubs/issues/97 sequence = InvalidSequence( request=request, user_id=user_id, # type: ignore event_id=event_id, payload=events, error=error) dbsession.add(sequence) return sequence start_variant, *_progress_variants, stop_variant = get_child_values(events) # For now, we ignore progress events entirely. We also assume the stop event always has a null # payload. This works for most sequence events we care about in priority. # TODO: Figure this out for the more complex events start_relative_timestamp = start_variant.get_child_value(0).get_int64() payload = start_variant.get_child_value(1) started_at = get_event_datetime(request.absolute_timestamp, request.relative_timestamp, start_relative_timestamp) stop_relative_timestamp = stop_variant.get_child_value(0).get_int64() stopped_at = get_event_datetime(request.absolute_timestamp, request.relative_timestamp, stop_relative_timestamp) try: event_model = SEQUENCE_EVENT_MODELS[event_id] except KeyError: # Mypy complains here, even though this should be fine: # https://github.com/dropbox/sqlalchemy-stubs/issues/97 sequence = UnknownSequence( request=request, user_id=user_id, # type: ignore event_id=event_id, payload=events) dbsession.add(sequence) return sequence try: # Mypy complains here, even though this should be fine: # https://github.com/dropbox/sqlalchemy-stubs/issues/97 sequence = event_model( request=request, user_id=user_id, # type: ignore started_at=started_at, stopped_at=stopped_at, payload=payload) except Exception as e: if isinstance(e, EmptyPayloadError ) and event_id in IGNORED_EMPTY_PAYLOAD_ERRORS: return None log.exception('An error occured while processing the sequence:') # Mypy complains here, even though this should be fine: # https://github.com/dropbox/sqlalchemy-stubs/issues/97 sequence = InvalidSequence( request=request, user_id=user_id, # type: ignore event_id=event_id, payload=events, error=str(e)) dbsession.add(sequence) return sequence