def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): crash_report = {} # TODO Opportunity of optimization: We could inspect # CRASH_REPORT_JSON_SCHEMA and get a list of all (recursive) keys that # are in there and use that to limit the two following loops to not # bother filling up `crash_report` with keys that will never be needed. # Rename fields in raw_crash raw_fields_map = dict( (x['in_database_name'], x['name']) for x in self._all_fields.values() if x['namespace'] == 'raw_crash' ) for key, val in raw_crash.items(): crash_report[raw_fields_map.get(key, key)] = val # Rename fields in processed_crash processed_fields_map = dict( (x['in_database_name'], x['name']) for x in self._all_fields.values() if x['namespace'] == 'processed_crash' ) for key, val in processed_crash.items(): crash_report[processed_fields_map.get(key, key)] = val # Validate crash_report crash_report = json_schema_reducer.make_reduced_dict(CRASH_REPORT_JSON_SCHEMA, crash_report) self.save_processed(crash_report)
def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): """Save the raw and processed crash data. For Telemetry, we combine the raw and processed crash data into a "crash report" which we save to an S3 bucket for the Telemetry system to pick up later. """ crash_report = {} # TODO Opportunity of optimization: We could inspect # CRASH_REPORT_JSON_SCHEMA and get a list of all (recursive) keys that # are in there and use that to limit the two following loops to not # bother filling up `crash_report` with keys that will never be needed. # Rename fields in raw_crash raw_fields_map = dict((x["in_database_name"], x["name"]) for x in self._all_fields.values() if x["namespace"] == "raw_crash") for key, val in raw_crash.items(): crash_report[raw_fields_map.get(key, key)] = val # Rename fields in processed_crash processed_fields_map = dict((x["in_database_name"], x["name"]) for x in self._all_fields.values() if x["namespace"] == "processed_crash") for key, val in processed_crash.items(): crash_report[processed_fields_map.get(key, key)] = val # Validate crash_report crash_report = json_schema_reducer.make_reduced_dict( CRASH_REPORT_JSON_SCHEMA, crash_report) self.save_processed(crash_report)
def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): crash_report = {} # TODO Opportunity of optimization; # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list # of all (recursive) keys that are in there and use that # to limit the two following loops to not bother # filling up `crash_report` with keys that will never be # needed. # Rename fields in raw_crash. raw_fields_map = dict((x['in_database_name'], x['name']) for x in self._all_fields.values() if x['namespace'] == 'raw_crash') for key, val in raw_crash.items(): crash_report[raw_fields_map.get(key, key)] = val # Rename fields in processed_crash. processed_fields_map = dict((x['in_database_name'], x['name']) for x in self._all_fields.values() if x['namespace'] == 'processed_crash') for key, val in processed_crash.items(): crash_report[processed_fields_map.get(key, key)] = val # Validate crash_report. crash_report = json_schema_reducer.make_reduced_dict( CRASH_REPORT_JSON_SCHEMA, crash_report) self.save_processed(crash_report)
def test_airmozilla_example(): schema_file = os.path.join(_here, 'contribute-schema.json') sample_file = os.path.join(_here, 'contribute-airmo.json') result = make_reduced_dict(schema_file, sample_file) # We know *exactly* what the difference is between the schema # and the sample file. # The sample file is identical plus it has two additional # keys. One of them nested. sample = json.load(open(sample_file)) sample_keys = sample.keys() result_keys = list(result.keys()) assert sorted(sample_keys) == sorted(result_keys + ['whatsdeployed']) assert 'other_stuff' in sample['participate'].keys() assert 'other_stuff' not in result['participate'].keys()
def test_airmozilla_example(): schema_file = os.path.join(_here, 'contribute-schema.json') sample_file = os.path.join(_here, 'contribute-airmo.json') result = make_reduced_dict( schema_file, sample_file ) # We know *exactly* what the difference is between the schema # and the sample file. # The sample file is identical plus it has two additional # keys. One of them nested. sample = json.load(open(sample_file)) sample_keys = sample.keys() result_keys = list(result.keys()) assert sorted(sample_keys) == sorted(result_keys + ['whatsdeployed']) assert 'other_stuff' in sample['participate'].keys() assert 'other_stuff' not in result['participate'].keys()
def save_processed_crash(self, raw_crash, processed_crash): """Save processed crash data. For Telemetry, we combine the raw and processed crash data into a "crash report" which we save to an S3 bucket for the Telemetry system to pick up later. """ crash_report = {} # TODO Opportunity of optimization: We could inspect # TELEMETRY_SOCORRO_CRASH_SCHEMA and get a list of all (recursive) keys that are # in there and use that to limit the two following loops to not bother filling # up `crash_report` with keys that will never be needed. # Rename fields in raw_crash raw_fields_map = { x["in_database_name"]: x["name"] for x in self._all_fields.values() if x["namespace"] == "raw_crash" } for key, val in raw_crash.items(): crash_report[raw_fields_map.get(key, key)] = val # Rename fields in processed_crash processed_fields_map = { x["in_database_name"]: x["name"] for x in self._all_fields.values() if x["namespace"] == "processed_crash" } for key, val in processed_crash.items(): crash_report[processed_fields_map.get(key, key)] = val # Validate crash_report crash_report = json_schema_reducer.make_reduced_dict( TELEMETRY_SOCORRO_CRASH_SCHEMA, crash_report) crash_id = crash_report["uuid"] data = dict_to_str(crash_report).encode("utf-8") path = build_keys("crash_report", crash_id)[0] self.conn.save_file(path, data)