Example #1
0
def _check_child_rdlog_json_stats(child,
                                  check_json_callback,
                                  *callback_kwargs):
    while True:
        line = child.readline(t_timeout_seconds=5)
        print(line)

        if line.endswith('Librdkafka stats ===\n'):
            stats_thread_id = _rdlog_thread_id(line)
            break

    while True:
        json_builder = ijson.common.ObjectBuilder()
        map_stack_i = 0
        g = GrepThread(stats_thread_id, child)
        for event, value in ijson.basic_parse(g):
            json_builder.event(event, value)

            if event == 'start_map':
                map_stack_i += 1
            elif event == 'end_map':
                map_stack_i -= 1
                if map_stack_i == 0:
                    if _right_kafka_stats_message(json_builder.value,
                                                  *callback_kwargs):
                        return  # all OK!
                    else:
                        break  # Search again
def parse_json(json, **kwargs):
    """Generates RDFlib triples from a file-like object or a string using a direct mapping."""

    #   parse json
    events = ijson.basic_parse(json, use_float=True)

    return _parse_events(events, **kwargs)
Example #3
0
 def test_object_builder(self):
     builder = ObjectBuilder()
     for event, value in basic_parse(StringIO(JSON)):
         builder.event(event, value)
     self.assertEqual(builder.value, {
         'docs': [
             {
                'string': u'строка',
                'null': None,
                'boolean': False,
                'integer': 0,
                'double': Decimal('0.5'),
                'long': 10000000000,
                'decimal': Decimal('10000000000.5'),
             },
             {
                 'meta': [[1], {}],
             },
             {
                 'meta': {'key': 'value'},
             },
             {
                 'meta': None,
             },
         ],
     })
Example #4
0
def get_shot(shotn):
    shot_path = '%s%s' % (path, shotn)
    if not os.path.isdir(shot_path):
        print('Requested shotn is missing.')
        return {}
    if not os.path.isfile('%s/%s.%s' % (shot_path, HEADER_FILE, FILE_EXT)):
        print('Requested shot is missing header file.')
        return {}
    resp = [[] for board in range(4)]
    for board_id in range(4):
        if os.path.isfile('%s/%d.%s' % (shot_path, board_id, FILE_EXT)):
            with open('%s/%d.%s' % (shot_path, board_id, FILE_EXT), 'rb') as board_file:
                print('opened %d' % board_id)
                events = ijson.basic_parse(board_file, use_float=True)
                counter = 0
                for event, value in events:
                    if event == 'map_key' and value == 'timestamp':
                        event, value = events.__next__()
                        if not counter:
                            resp[board_id].append(value)
                        else:
                            if counter == 7:
                                counter = 0
                                continue
                        counter += 1
    return resp
Example #5
0
 def test_invalid(self):
     for json in INVALID_JSONS:
         # Yajl1 doesn't complain about additional data after the end
         # of a parsed object. Skipping this test.
         if self.__class__.__name__ == 'YajlParse' and json == YAJL1_PASSING_INVALID:
             continue
         with self.assertRaises(ijson.JSONError) as cm:
             list(ijson.basic_parse(BytesIO(json)))
def parse(fileobj: typing.IO, materialize=False) -> ValueGenerator:
    """
    parse a JSON document and return the results as nested generators

    :rtype: ValueGenerator
    """
    stream = ijson.basic_parse(fileobj)
    return _ijson_value(stream, next(stream), materialize)
Example #7
0
def make_dict_from_json(data_file, output_file, min_timestamp, max_timestamp):
    '''Will make dictionary from parsing json'''
    print("Opening data file...")
    with open(data_file, 'r') as read_file:
        print("Loading into file using ijson...")
        events = ijson.basic_parse(read_file)
        print("Loaded into ijson object!")

        print("Making dictionary...")
        basic_parse_make_numbered_titles_file(events, output_file,
                                              min_timestamp, max_timestamp)
Example #8
0
 def test_basic_parse(self):
     events = list(basic_parse(StringIO(JSON)))
     reference = [
         ('start_map', None),
             ('map_key', 'docs'),
             ('start_array', None),
                 ('start_map', None),
                     ('map_key', 'string'),
                     ('string', u'строка'),
                     ('map_key', 'null'),
                     ('null', None),
                     ('map_key', 'boolean'),
                     ('boolean', False),
                     ('map_key', 'integer'),
                     ('number', 0),
                     ('map_key', 'double'),
                     ('number', Decimal('0.5')),
                     ('map_key', 'long'),
                     ('number', 10000000000),
                     ('map_key', 'decimal'),
                     ('number', Decimal('10000000000.5')),
                 ('end_map', None),
                 ('start_map', None),
                     ('map_key', 'meta'),
                     ('start_array', None),
                         ('start_array', None),
                             ('number', 1),
                         ('end_array', None),
                         ('start_array', None),
                             ('number', 2),
                         ('end_array', None),
                     ('end_array', None),
                 ('end_map', None),
                 ('start_map', None),
                     ('map_key', 'meta'),
                     ('start_map', None),
                         ('map_key', 'key'),
                         ('string', 'value'),
                     ('end_map', None),
                 ('end_map', None),
                 ('start_map', None),
                     ('map_key', 'meta'),
                     ('null', None),
                 ('end_map', None),
             ('end_array', None),
         ('end_map', None),
     ]
     for e, r in zip(events, reference):
         self.assertEqual(e, r)
Example #9
0
    def test_A(self):

        print(sys.version)

        filename = os.getcwd() + os.sep + "files" + os.sep + "result-set.txt"
        print("filename: " + filename)

        with open(filename, 'rb') as input_file:
            # load json iteratively
            parser = ijson.parse(input_file)
            for prefix, event, value in parser:
                print('prefix={}, event={}, value={}'.format(
                    prefix, event, value))

        with open(filename, 'rb') as input_file:
            events = ijson.basic_parse(input_file)
            for value in events:
                print(str(value))
Example #10
0
def simplify_json_file(data_dir, langs, policy="IN_ALL_LANGS", json_file = "latest-all.json.bz2"):
    latest_all_json_file = join(data_dir,json_file)

    if policy not in policies:
        raise ValueError("Policy %s not supported." % policy)

    print("extracting multilingual titles with policy %s (%s)" % (policy,' '.join(langs)))

    lang_prefix = list(langs)
    lang_prefix.sort()
    simple_titles_path = join(data_dir, "extraction_" + "_".join(lang_prefix) + "." + policy)

    def process_entry(last, fo):
        global written
        id = last["id"]
        titles = None
        if policy == "IN_ALL_LANGS" and langs.issubset(last["labels"].keys()):
            titles = {lang: last["labels"][lang]["value"] for lang in langs}
        elif policy == "IN_ANY_LANG":
            titles = {lang: last["labels"][lang]["value"] for lang in langs if lang in last["labels"]}

        if titles:
            fo.write((id+'\t'+'\t'.join([lang+':'+titles[lang] for lang in titles.keys()])+'\n').encode('utf-8'))
            return True
        else:
            return False

    written = 0
    with BZ2File(latest_all_json_file, 'r', buffering=1024*1024*16) as fi, \
            BZ2File(join(data_dir,simple_titles_path+".simple.bz2"),'w') as fo:
        builder = ObjectBuilder()
        completed = 0
        for event, value in ijson.basic_parse(fi, buf_size=1024*1024*16):
             builder.event(event, value)
             if len(builder.value)>1:
                if process_entry(builder.value.pop(0), fo): written += 1
                completed += 1
                print("\rCompleted %d\ttitles %d" % (completed,written), end="")
        print("")

        #process the last entry
        process_entry(builder.value.pop(0))

    return simple_titles_path
Example #11
0
def parse_tuples_and_save(data_file, save_file, min_timestamp, max_timestamp):
    '''Function will get tuples from json data file and save to pickle file'''

    print("Opening data file...")
    with open(data_file, 'r') as read_file:
        print("Loading into file using ijson...")
        events = ijson.basic_parse(read_file)
        print("Loaded into ijson object!")

        print("Loading generator object...")
        gen_obj = basic_parse_yield_tuples(events, min_timestamp,
                                           max_timestamp)

        print("Expanding object into list...")
        list_obj = list(gen_obj)

        read_file.close()

    # get list sorted by the first value
    print(f'Sorting tuples list of length {len(list_obj)}...')
    tuples = sorted(list_obj, key=lambda tup: tup[0])
    '''Save objects to files'''
    print("Sorting complete and saving to file")
    save_object(tuples, save_file)
Example #12
0
 def test_boundary_lexeme(self):
     buf_size = JSON.index(b'false') + 1
     events = list(ijson.basic_parse(BytesIO(JSON), buf_size=buf_size))
Example #13
0
 def test_boundary_whitespace(self):
     buf_size = JSON.index(b'   ') + 1
     events = list(ijson.basic_parse(BytesIO(JSON), buf_size=buf_size))
     self.assertEqual(events, JSON_EVENTS)
Example #14
0
 def test_lazy(self):
     # shouldn't fail since iterator is not exhausted
     ijson.basic_parse(BytesIO(INVALID_JSONS[0]))
     self.assertTrue(True)
Example #15
0
 def test_utf8_split(self):
     buf_size = JSON.index(b'\xd1') + 1
     try:
         events = list(ijson.basic_parse(BytesIO(JSON), buf_size=buf_size))
     except UnicodeDecodeError:
         self.fail('UnicodeDecodeError raised')
Example #16
0
 def test_incomplete(self):
     for json in INCOMPLETE_JSONS:
         with self.assertRaises(ijson.IncompleteJSONError):
             list(ijson.basic_parse(BytesIO(json)))
Example #17
0
 def test_numbers(self):
     events = list(ijson.basic_parse(BytesIO(NUMBERS_JSON)))
     types = [type(value) for event, value in events if event == 'number']
     self.assertEqual(types, [int, Decimal, Decimal])
Example #18
0
 def test_surrogate_pairs(self):
     event = next(ijson.basic_parse(BytesIO(SURROGATE_PAIRS_JSON)))
     parsed_string = event[1]
     self.assertEqual(parsed_string, '💩')
Example #19
0
 def test_strings(self):
     events = list(ijson.basic_parse(BytesIO(STRINGS_JSON)))
     strings = [value for event, value in events if event == 'string']
     self.assertEqual(strings, ['', '"', '\\', '\\\\', '\b\f\n\r\t'])
     self.assertTrue(('map_key', 'special\t') in events)
Example #20
0
 def test_scalar(self):
     events = list(ijson.basic_parse(BytesIO(SCALAR_JSON)))
     self.assertEqual(events, [('number', 0)])
Example #21
0
 def test_basic_parse(self):
     events = list(ijson.basic_parse(BytesIO(JSON)))
     self.assertEqual(events, JSON_EVENTS)
Example #22
0
 def test_basic_parse(self):
     events = list(ijson.basic_parse(BytesIO(JSON)))
Example #23
0
 def test_numbers(self):
     events = list(ijson.basic_parse(BytesIO(NUMBERS_JSON)))
     types = [type(value) for event, value in events if event == 'number']
Example #24
0
 def test_strings(self):
     events = list(ijson.basic_parse(BytesIO(STRINGS_JSON)))
     strings = [value for event, value in events if event == 'string']
Example #25
0
 def test_scalar(self):
     events = list(ijson.basic_parse(BytesIO(SCALAR_JSON)))
Example #26
0
 def test_scalar_builder(self):
     builder = ObjectBuilder()
     for event, value in basic_parse(StringIO(SCALAR_JSON)):
         builder.event(event, value)
     self.assertEqual(builder.value, 0)
Example #27
0
 def test_boundary_whitespace(self):
     buf_size = JSON.index(b'   ') + 1
     events = list(ijson.basic_parse(BytesIO(JSON), buf_size=buf_size))