def test_prune_max_properties(self): doc = {'prop-' + str(i): 'value' for i in range(0, 10000)} sanitized = bigquery_schema.sanitize_property_value(doc) self.assertEqual(len(sanitized), 10000) # prune the 10,000'th doc['prop-10001'] = 'value' sanitized = bigquery_schema.sanitize_property_value(doc) self.assertEqual(len(sanitized), 10000) # prune last added property doc['z'] = 'value' sanitized = bigquery_schema.sanitize_property_value(doc) self.assertEqual(len(sanitized), 10000) self.assertNotIn('z', sanitized)
def test_sanitize_property_value(self): doc = { 'empyty_dict': {}, 'empyty_dict_list': [{}, {}], 'a' * 200: 'value0', '@!@': 'deleteme', '@2_3': 'value1', 'invalid_numeric': 9.300000191734863, 'labels': { 'label1': 'value1', 'label2': 'value2', } } sanitized = bigquery_schema.sanitize_property_value(doc) self.assertEqual(len(sanitized), 4) self.assertNotIn('empty_dict', sanitized) self.assertNotIn('empty_dict_list', sanitized) self.assertEqual(sanitized['a' * 128], 'value0') self.assertEqual(sanitized['invalid_numeric'], 9.300000192) self.assertEqual(sanitized['_2_3'], 'value1') labels = sanitized['labels'] self.assertEqual(len(labels), 2) labels_found = [False, False] for label in labels: if label['name'] == 'label1': labels_found[0] = True assert label['value'] == 'value1' if label['name'] == 'label2': labels_found[1] = True assert label['value'] == 'value2' self.assertTrue(labels_found[0] and labels_found[1])
def test_remove_duplicate_property(self): doc = { 'ipAddress': 'value', 'IPAddress': 'other_value', 'array': [{ 'ipAddress': 'value', 'IPAddress': 'other_value' }], } sanitized = bigquery_schema.sanitize_property_value(doc) self.assertEqual(len(sanitized), 2) self.assertIn('IPAddress', sanitized) self.assertEqual(sanitized['IPAddress'], 'other_value') self.assertEqual(sanitized['array'], [{'IPAddress': 'other_value'}])
def process(self, element): yield bigquery_schema.sanitize_property_value(element)
def process(self, element): element = bigquery_schema.sanitize_property_value(element) # add load timestamp. element['timestamp'] = self.load_time.get() yield element