def _check_child_rdlog_json_stats(child, check_json_callback, *callback_kwargs): while True: line = child.readline(t_timeout_seconds=5) print(line) if line.endswith('Librdkafka stats ===\n'): stats_thread_id = _rdlog_thread_id(line) break while True: json_builder = ijson.common.ObjectBuilder() map_stack_i = 0 g = GrepThread(stats_thread_id, child) for event, value in ijson.basic_parse(g): json_builder.event(event, value) if event == 'start_map': map_stack_i += 1 elif event == 'end_map': map_stack_i -= 1 if map_stack_i == 0: if _right_kafka_stats_message(json_builder.value, *callback_kwargs): return # all OK! else: break # Search again
def parse_json(json, **kwargs): """Generates RDFlib triples from a file-like object or a string using a direct mapping.""" # parse json events = ijson.basic_parse(json, use_float=True) return _parse_events(events, **kwargs)
def test_object_builder(self): builder = ObjectBuilder() for event, value in basic_parse(StringIO(JSON)): builder.event(event, value) self.assertEqual(builder.value, { 'docs': [ { 'string': u'строка', 'null': None, 'boolean': False, 'integer': 0, 'double': Decimal('0.5'), 'long': 10000000000, 'decimal': Decimal('10000000000.5'), }, { 'meta': [[1], {}], }, { 'meta': {'key': 'value'}, }, { 'meta': None, }, ], })
def get_shot(shotn): shot_path = '%s%s' % (path, shotn) if not os.path.isdir(shot_path): print('Requested shotn is missing.') return {} if not os.path.isfile('%s/%s.%s' % (shot_path, HEADER_FILE, FILE_EXT)): print('Requested shot is missing header file.') return {} resp = [[] for board in range(4)] for board_id in range(4): if os.path.isfile('%s/%d.%s' % (shot_path, board_id, FILE_EXT)): with open('%s/%d.%s' % (shot_path, board_id, FILE_EXT), 'rb') as board_file: print('opened %d' % board_id) events = ijson.basic_parse(board_file, use_float=True) counter = 0 for event, value in events: if event == 'map_key' and value == 'timestamp': event, value = events.__next__() if not counter: resp[board_id].append(value) else: if counter == 7: counter = 0 continue counter += 1 return resp
def test_invalid(self): for json in INVALID_JSONS: # Yajl1 doesn't complain about additional data after the end # of a parsed object. Skipping this test. if self.__class__.__name__ == 'YajlParse' and json == YAJL1_PASSING_INVALID: continue with self.assertRaises(ijson.JSONError) as cm: list(ijson.basic_parse(BytesIO(json)))
def parse(fileobj: typing.IO, materialize=False) -> ValueGenerator: """ parse a JSON document and return the results as nested generators :rtype: ValueGenerator """ stream = ijson.basic_parse(fileobj) return _ijson_value(stream, next(stream), materialize)
def make_dict_from_json(data_file, output_file, min_timestamp, max_timestamp): '''Will make dictionary from parsing json''' print("Opening data file...") with open(data_file, 'r') as read_file: print("Loading into file using ijson...") events = ijson.basic_parse(read_file) print("Loaded into ijson object!") print("Making dictionary...") basic_parse_make_numbered_titles_file(events, output_file, min_timestamp, max_timestamp)
def test_basic_parse(self): events = list(basic_parse(StringIO(JSON))) reference = [ ('start_map', None), ('map_key', 'docs'), ('start_array', None), ('start_map', None), ('map_key', 'string'), ('string', u'строка'), ('map_key', 'null'), ('null', None), ('map_key', 'boolean'), ('boolean', False), ('map_key', 'integer'), ('number', 0), ('map_key', 'double'), ('number', Decimal('0.5')), ('map_key', 'long'), ('number', 10000000000), ('map_key', 'decimal'), ('number', Decimal('10000000000.5')), ('end_map', None), ('start_map', None), ('map_key', 'meta'), ('start_array', None), ('start_array', None), ('number', 1), ('end_array', None), ('start_array', None), ('number', 2), ('end_array', None), ('end_array', None), ('end_map', None), ('start_map', None), ('map_key', 'meta'), ('start_map', None), ('map_key', 'key'), ('string', 'value'), ('end_map', None), ('end_map', None), ('start_map', None), ('map_key', 'meta'), ('null', None), ('end_map', None), ('end_array', None), ('end_map', None), ] for e, r in zip(events, reference): self.assertEqual(e, r)
def test_A(self): print(sys.version) filename = os.getcwd() + os.sep + "files" + os.sep + "result-set.txt" print("filename: " + filename) with open(filename, 'rb') as input_file: # load json iteratively parser = ijson.parse(input_file) for prefix, event, value in parser: print('prefix={}, event={}, value={}'.format( prefix, event, value)) with open(filename, 'rb') as input_file: events = ijson.basic_parse(input_file) for value in events: print(str(value))
def simplify_json_file(data_dir, langs, policy="IN_ALL_LANGS", json_file = "latest-all.json.bz2"): latest_all_json_file = join(data_dir,json_file) if policy not in policies: raise ValueError("Policy %s not supported." % policy) print("extracting multilingual titles with policy %s (%s)" % (policy,' '.join(langs))) lang_prefix = list(langs) lang_prefix.sort() simple_titles_path = join(data_dir, "extraction_" + "_".join(lang_prefix) + "." + policy) def process_entry(last, fo): global written id = last["id"] titles = None if policy == "IN_ALL_LANGS" and langs.issubset(last["labels"].keys()): titles = {lang: last["labels"][lang]["value"] for lang in langs} elif policy == "IN_ANY_LANG": titles = {lang: last["labels"][lang]["value"] for lang in langs if lang in last["labels"]} if titles: fo.write((id+'\t'+'\t'.join([lang+':'+titles[lang] for lang in titles.keys()])+'\n').encode('utf-8')) return True else: return False written = 0 with BZ2File(latest_all_json_file, 'r', buffering=1024*1024*16) as fi, \ BZ2File(join(data_dir,simple_titles_path+".simple.bz2"),'w') as fo: builder = ObjectBuilder() completed = 0 for event, value in ijson.basic_parse(fi, buf_size=1024*1024*16): builder.event(event, value) if len(builder.value)>1: if process_entry(builder.value.pop(0), fo): written += 1 completed += 1 print("\rCompleted %d\ttitles %d" % (completed,written), end="") print("") #process the last entry process_entry(builder.value.pop(0)) return simple_titles_path
def parse_tuples_and_save(data_file, save_file, min_timestamp, max_timestamp): '''Function will get tuples from json data file and save to pickle file''' print("Opening data file...") with open(data_file, 'r') as read_file: print("Loading into file using ijson...") events = ijson.basic_parse(read_file) print("Loaded into ijson object!") print("Loading generator object...") gen_obj = basic_parse_yield_tuples(events, min_timestamp, max_timestamp) print("Expanding object into list...") list_obj = list(gen_obj) read_file.close() # get list sorted by the first value print(f'Sorting tuples list of length {len(list_obj)}...') tuples = sorted(list_obj, key=lambda tup: tup[0]) '''Save objects to files''' print("Sorting complete and saving to file") save_object(tuples, save_file)
def test_boundary_lexeme(self): buf_size = JSON.index(b'false') + 1 events = list(ijson.basic_parse(BytesIO(JSON), buf_size=buf_size))
def test_boundary_whitespace(self): buf_size = JSON.index(b' ') + 1 events = list(ijson.basic_parse(BytesIO(JSON), buf_size=buf_size)) self.assertEqual(events, JSON_EVENTS)
def test_lazy(self): # shouldn't fail since iterator is not exhausted ijson.basic_parse(BytesIO(INVALID_JSONS[0])) self.assertTrue(True)
def test_utf8_split(self): buf_size = JSON.index(b'\xd1') + 1 try: events = list(ijson.basic_parse(BytesIO(JSON), buf_size=buf_size)) except UnicodeDecodeError: self.fail('UnicodeDecodeError raised')
def test_incomplete(self): for json in INCOMPLETE_JSONS: with self.assertRaises(ijson.IncompleteJSONError): list(ijson.basic_parse(BytesIO(json)))
def test_numbers(self): events = list(ijson.basic_parse(BytesIO(NUMBERS_JSON))) types = [type(value) for event, value in events if event == 'number'] self.assertEqual(types, [int, Decimal, Decimal])
def test_surrogate_pairs(self): event = next(ijson.basic_parse(BytesIO(SURROGATE_PAIRS_JSON))) parsed_string = event[1] self.assertEqual(parsed_string, '💩')
def test_strings(self): events = list(ijson.basic_parse(BytesIO(STRINGS_JSON))) strings = [value for event, value in events if event == 'string'] self.assertEqual(strings, ['', '"', '\\', '\\\\', '\b\f\n\r\t']) self.assertTrue(('map_key', 'special\t') in events)
def test_scalar(self): events = list(ijson.basic_parse(BytesIO(SCALAR_JSON))) self.assertEqual(events, [('number', 0)])
def test_basic_parse(self): events = list(ijson.basic_parse(BytesIO(JSON))) self.assertEqual(events, JSON_EVENTS)
def test_basic_parse(self): events = list(ijson.basic_parse(BytesIO(JSON)))
def test_numbers(self): events = list(ijson.basic_parse(BytesIO(NUMBERS_JSON))) types = [type(value) for event, value in events if event == 'number']
def test_strings(self): events = list(ijson.basic_parse(BytesIO(STRINGS_JSON))) strings = [value for event, value in events if event == 'string']
def test_scalar(self): events = list(ijson.basic_parse(BytesIO(SCALAR_JSON)))
def test_scalar_builder(self): builder = ObjectBuilder() for event, value in basic_parse(StringIO(SCALAR_JSON)): builder.event(event, value) self.assertEqual(builder.value, 0)
def test_boundary_whitespace(self): buf_size = JSON.index(b' ') + 1 events = list(ijson.basic_parse(BytesIO(JSON), buf_size=buf_size))