Ejemplo n.º 1
0
 def __aspect_elements(self):
     current_name = None
     builder = None
     saw_aspect_element = False
     for prefix, event, value in self.__parser:
         if event == 'start_map':
             m = self.__ITEM_NAME_ITEM_RE.fullmatch(prefix)
             if m is not None:
                 current_name = m.group(1)
                 builder = ObjectBuilder()
         if builder is not None and current_name is not None:
             builder.event(event, value)
             if event == 'end_map':
                 if prefix == 'item.%s.item' % current_name:
                     val = builder.value
                     builder = None
                     if current_name == CxConstants.NUMBER_VERIFICATION:
                         self.__number_verification = Element(current_name, val)
                     elif current_name == CxConstants.STATUS:
                         self.__status = Element(current_name, val)
                     elif current_name == CxConstants.META_DATA:
                         if saw_aspect_element:
                             self.__post_meta_data.append(Element(current_name, val))
                         else:
                             self.__pre_meta_data.append(Element(current_name, val))
                             #yield None
                     else:
                         saw_aspect_element = True
                         if current_name not in self.__aspect_element_counts:
                             self.__aspect_element_counts[current_name] = 1
                         else:
                             self.__aspect_element_counts[current_name] += 1
                         yield AspectElement(current_name, val)
     raise StopIteration()
Ejemplo n.º 2
0
 def test_object_builder(self):
     builder = ObjectBuilder()
     for event, value in basic_parse(StringIO(JSON)):
         builder.event(event, value)
     self.assertEqual(builder.value, {
         'docs': [
             {
                'string': u'строка',
                'null': None,
                'boolean': False,
                'integer': 0,
                'double': Decimal('0.5'),
                'long': 10000000000,
                'decimal': Decimal('10000000000.5'),
             },
             {
                 'meta': [[1], {}],
             },
             {
                 'meta': {'key': 'value'},
             },
             {
                 'meta': None,
             },
         ],
     })
Ejemplo n.º 3
0
 def __aspect_elements(self):
     current_name = None
     builder = None
     saw_aspect_element = False
     for prefix, event, value in self.__parser:
         if event == 'start_map':
             m = self.__ITEM_NAME_ITEM_RE.fullmatch(prefix)
             if m is not None:
                 current_name = m.group(1)
                 builder = ObjectBuilder()
         if builder is not None and current_name is not None:
             builder.event(event, value)
             if event == 'end_map':
                 if prefix == 'item.%s.item' % current_name:
                     val = builder.value
                     builder = None
                     if current_name == CxConstants.META_DATA:
                         if saw_aspect_element:
                             self.__post_meta_data.append(
                                 AspectElement(current_name, val))
                         else:
                             self.__pre_meta_data.append(
                                 AspectElement(current_name, val))
                             yield None
                     else:
                         saw_aspect_element = True
                         if current_name not in self.__aspect_element_counts:
                             self.__aspect_element_counts[current_name] = 1
                         else:
                             self.__aspect_element_counts[current_name] += 1
                         yield AspectElement(current_name, val)
     raise StopIteration()
Ejemplo n.º 4
0
def big_file_stream(unzipFile):
    print('analysing big json file using streaming  ...')
    key = '-'
    for prefix, event, value in parse(unzipFile):
        if prefix == 'somePrefix' and event == 'map_key':
            key = value
            builder = ObjectBuilder()
        elif prefix.startswith('somePrefix.' + str(key)):
            builder.event(event, value)
            if event == 'end_map':
                yield key, builder.value
Ejemplo n.º 5
0
def items_basecoro(target, prefix, map_type=None, skip_key=None):
    """
    This is copied from ``ijson/common.py``. A ``skip_key`` argument is added. If the ``skip_key`` is in the current
    path, the current event is skipped. Otherwise, the method is identical.
    """
    while True:
        current, event, value = (yield)
        if skip_key and skip_key in current:
            continue
        if current == prefix:
            if event in ('start_map', 'start_array'):
                builder = ObjectBuilder(map_type=map_type)
                end_event = event.replace('start', 'end')
                while (current, event) != (prefix, end_event):
                    builder.event(event, value)
                    current, event, value = (yield)
                del builder.containers[:]
                target.send(builder.value)
            else:
                target.send(value)
Ejemplo n.º 6
0
 def test_scalar_builder(self):
     builder = ObjectBuilder()
     for event, value in basic_parse(StringIO(SCALAR_JSON)):
         builder.event(event, value)
     self.assertEqual(builder.value, 0)
Ejemplo n.º 7
0
def load_json_big(chain, file, merge=False):
    if not merge:
        chain.clear()

    parser = ijson.parse(file)

    depth = 0

    builder = ObjectBuilder()

    codes = None

    building_bag_item = False

    start = None

    in_bag = False
    in_chain = False
    in_codes = False

    last_value = None
    last_key = None

    for prefix, event, value in parser:

        is_start = event.startswith('start_')
        is_end = event.startswith('end_')
        start_array = event == 'start_array'
        end_array = event == 'end_array'

        if is_start:
            depth += 1
        if is_end:
            depth -= 1
        if event == 'map_key':
            last_key = value
        if event == 'number' or event == 'string':
            last_value = value

        if not (in_codes or in_chain) and start_array and depth == 2:
            if last_key == 'codes':
                in_codes = True
                builder.event(event, value)
            elif last_key == 'chain':
                in_chain = True
        elif event == 'end_array' and depth == 1:
            if in_codes:
                liter = iter(builder.value)
                codes = dict(zip(liter, liter))
                builder.event(event, value)
                in_codes = False
            if in_chain:
                in_chain = False
        elif in_chain and start_array and depth == 3:
            in_bag = True
            start = last_value
        elif in_chain and end_array and depth == 2:
            in_bag = False
        elif in_codes:
            builder.event(event, value)
        elif in_bag:
            if not building_bag_item and is_start:
                builder.event(event, value)
                building_bag_item = True
            elif building_bag_item and is_end:
                builder.event(event, value)
                building_bag_item = False
            elif building_bag_item:
                builder.event(event, value)
            else:
                if codes:
                    chain.add_to_bag(codes[start],
                                     (codes[i] for i in builder.value),
                                     count=value)
                else:
                    chain.add_to_bag(start, builder.value, count=value)