def extract_custom( self, output: MutableMapping[str, Any], event: InsertEvent, metadata: KafkaMessageMetadata, ) -> None: data = event.get("data", {}) output["message"] = _unicodify(event["message"]) # USER REQUEST GEO user = data.get("user", data.get("sentry.interfaces.User", None)) or {} extract_user(output, user) geo = user.get("geo", None) or {} self.extract_geo(output, geo) request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) output["http_method"] = http_data["http_method"] output["http_referer"] = http_data["http_referer"] output["primary_hash"] = _hashify(event["primary_hash"]) output["hierarchical_hashes"] = list( _hashify(x) for x in data.get("hierarchical_hashes") or ()) output["culprit"] = _unicodify(data.get("culprit", None)) output["type"] = _unicodify(data.get("type", None)) output["title"] = _unicodify(data.get("title", None))
def process_unmerge( message: Mapping[str, Any], all_columns: Sequence[FlattenedColumn], state_name: ReplacerState, ) -> Optional[Replacement]: hashes = message["hashes"] if not hashes: return None assert all(isinstance(h, str) for h in hashes) timestamp = datetime.strptime(message["datetime"], settings.PAYLOAD_DATETIME_FORMAT) all_column_names = [c.escaped for c in all_columns] select_columns = map( lambda i: i if i != "group_id" else str(message["new_group_id"]), all_column_names, ) where = """\ PREWHERE group_id = %(previous_group_id)s WHERE project_id = %(project_id)s AND primary_hash IN (%(hashes)s) AND received <= CAST('%(timestamp)s' AS DateTime) AND NOT deleted """ count_query_template = ("""\ SELECT count() FROM %(table_name)s FINAL """ + where) insert_query_template = ("""\ INSERT INTO %(table_name)s (%(all_columns)s) SELECT %(select_columns)s FROM %(table_name)s FINAL """ + where) query_args = { "all_columns": ", ".join(all_column_names), "select_columns": ", ".join(select_columns), "previous_group_id": message["previous_group_id"], "project_id": message["project_id"], "timestamp": timestamp.strftime(DATETIME_FORMAT), } if state_name == ReplacerState.ERRORS: query_args["hashes"] = ", ".join( ["'%s'" % str(uuid.UUID(_hashify(h))) for h in hashes]) else: query_args["hashes"] = ", ".join("'%s'" % _hashify(h) for h in hashes) query_time_flags = (NEEDS_FINAL, message["project_id"]) return Replacement(count_query_template, insert_query_template, query_args, query_time_flags)
def _where_clause(self) -> str: if self.state_name == ReplacerState.ERRORS: hashes = ", ".join( ["'%s'" % str(uuid.UUID(_hashify(h))) for h in self.hashes]) else: hashes = ", ".join("'%s'" % _hashify(h) for h in self.hashes) timestamp = self.timestamp.strftime(DATETIME_FORMAT) return f"""\
def _convert_hash( hash: str, state_name: ReplacerState, convert_types: bool = False ) -> str: if state_name == ReplacerState.ERRORS or state_name == ReplacerState.ERRORS_V2: if convert_types: return "toUUID('%s')" % str(uuid.UUID(_hashify(hash))) else: return "'%s'" % str(uuid.UUID(_hashify(hash))) else: if convert_types: return "toFixedString('%s', 32)" % _hashify(hash) else: return "'%s'" % _hashify(hash)
def extract_custom( self, output: MutableMapping[str, Any], event: InsertEvent, metadata: KafkaMessageMetadata, ) -> None: data = event.get("data", {}) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} user_data: MutableMapping[str, Any] = {} extract_user(user_data, user_dict) output["user_name"] = user_data["username"] output["user_id"] = user_data["user_id"] output["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: output["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: output["ip_address_v6"] = str(ip_address) contexts: MutableMapping[str, Any] = _as_dict_safe( data.get("contexts", None)) geo = user_dict.get("geo", {}) if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) output["http_method"] = http_data["http_method"] output["http_referer"] = http_data["http_referer"] # _as_dict_safe may not return a reference to the entry in the data # dictionary in some cases. data["contexts"] = contexts output["message"] = _unicodify(event["message"]) output["primary_hash"] = str(uuid.UUID(_hashify( event["primary_hash"]))) output["hierarchical_hashes"] = list( str(uuid.UUID(_hashify(x))) for x in data.get("hierarchical_hashes") or ()) output["culprit"] = _unicodify(data.get("culprit", "")) output["type"] = _unicodify(data.get("type", "")) output["title"] = _unicodify(data.get("title", ""))
def extract_common( self, output: MutableMapping[str, Any], event: InsertEvent, metadata: KafkaMessageMetadata, ) -> None: # Properties we get from the top level of the message payload output["platform"] = _unicodify(event["platform"]) output["primary_hash"] = _hashify(event["primary_hash"]) # Properties we get from the "data" dict, which is the actual event body. data = event.get("data", {}) received = _collapse_uint32(int(data["received"])) output["received"] = (datetime.utcfromtimestamp(received) if received is not None else None) output["culprit"] = _unicodify(data.get("culprit", None)) output["type"] = _unicodify(data.get("type", None)) output["version"] = _unicodify(data.get("version", None)) output["title"] = _unicodify(data.get("title", None)) output["location"] = _unicodify(data.get("location", None)) module_names = [] module_versions = [] modules = data.get("modules", {}) if isinstance(modules, dict): for name, version in modules.items(): module_names.append(_unicodify(name)) # Being extra careful about a stray (incorrect by spec) `null` # value blowing up the write. module_versions.append(_unicodify(version) or "") output["modules.name"] = module_names output["modules.version"] = module_versions
def extract_common(self, output, message, data): # Properties we get from the top level of the message payload output['platform'] = _unicodify(message['platform']) output['primary_hash'] = _hashify(message['primary_hash']) # Properties we get from the "data" dict, which is the actual event body. received = _collapse_uint32(int(data['received'])) output['received'] = datetime.utcfromtimestamp( received) if received is not None else None output['culprit'] = _unicodify(data.get('culprit', None)) output['type'] = _unicodify(data.get('type', None)) output['version'] = _unicodify(data.get('version', None)) output['title'] = _unicodify(data.get('title', None)) output['location'] = _unicodify(data.get('location', None)) # The following concerns the change to message/search_message # There are 2 Scenarios: # Pre-rename: # - Payload contains: # "message": "a long search message" # - Does NOT contain a `search_message` property # - "message" value saved in `message` column # - `search_message` column nonexistent or Null # Post-rename: # - Payload contains: # "search_message": "a long search message" # - Optionally the payload's "data" dict (event body) contains: # "message": "short message" # - "search_message" value stored in `search_message` column # - "message" value stored in `message` column # output['search_message'] = _unicodify( message.get('search_message', None)) if output['search_message'] is None: # Pre-rename scenario, we expect to find "message" at the top level output['message'] = _unicodify(message['message']) else: # Post-rename scenario, we check in case we have the optional # "message" in the event body. output['message'] = _unicodify(data.get('message', None)) module_names = [] module_versions = [] modules = data.get('modules', {}) if isinstance(modules, dict): for name, version in modules.items(): module_names.append(_unicodify(name)) # Being extra careful about a stray (incorrect by spec) `null` # value blowing up the write. module_versions.append(_unicodify(version) or '') output['modules.name'] = module_names output['modules.version'] = module_versions
def process_unmerge(message, all_column_names) -> Optional[Replacement]: hashes = message["hashes"] if not hashes: return None assert all(isinstance(h, str) for h in hashes) timestamp = datetime.strptime(message["datetime"], settings.PAYLOAD_DATETIME_FORMAT) select_columns = map( lambda i: i if i != "group_id" else str(message["new_group_id"]), all_column_names, ) where = """\ PREWHERE group_id = %(previous_group_id)s WHERE project_id = %(project_id)s AND primary_hash IN (%(hashes)s) AND received <= CAST('%(timestamp)s' AS DateTime) AND NOT deleted """ count_query_template = ( """\ SELECT count() FROM %(dist_read_table_name)s FINAL """ + where ) insert_query_template = ( """\ INSERT INTO %(dist_write_table_name)s (%(all_columns)s) SELECT %(select_columns)s FROM %(dist_read_table_name)s FINAL """ + where ) query_args = { "all_columns": ", ".join(all_column_names), "select_columns": ", ".join(select_columns), "previous_group_id": message["previous_group_id"], "project_id": message["project_id"], "hashes": ", ".join("'%s'" % _hashify(h) for h in hashes), "timestamp": timestamp.strftime(DATETIME_FORMAT), } query_time_flags = (NEEDS_FINAL, message["project_id"]) return Replacement( count_query_template, insert_query_template, query_args, query_time_flags )
def process_unmerge(message, all_column_names): hashes = message['hashes'] if not hashes: return None assert all(isinstance(h, str) for h in hashes) timestamp = datetime.strptime(message['datetime'], settings.PAYLOAD_DATETIME_FORMAT) select_columns = map( lambda i: i if i != 'group_id' else str(message['new_group_id']), all_column_names) where = """\ WHERE project_id = %(project_id)s AND group_id = %(previous_group_id)s AND primary_hash IN (%(hashes)s) AND received <= CAST('%(timestamp)s' AS DateTime) AND NOT deleted """ count_query_template = """\ SELECT count() FROM %(dist_read_table_name)s FINAL """ + where insert_query_template = """\ INSERT INTO %(dist_write_table_name)s (%(all_columns)s) SELECT %(select_columns)s FROM %(dist_read_table_name)s FINAL """ + where query_args = { 'all_columns': ', '.join(all_column_names), 'select_columns': ', '.join(select_columns), 'previous_group_id': message['previous_group_id'], 'project_id': message['project_id'], 'hashes': ", ".join("'%s'" % _hashify(h) for h in hashes), 'timestamp': timestamp.strftime(DATETIME_FORMAT), } query_time_flags = (NEEDS_FINAL, message['project_id']) return (count_query_template, insert_query_template, query_args, query_time_flags)