Ejemplo n.º 1
0
    def eval(self, env):
        source_result = self.source.eval(env)

        def iterate(env=env, source_result=source_result): # Python closure workaround
            if self.name:
                for item in source_result:
                    yield self.body.eval(env.bind(self.name, item))
            else:
                for item in source_result:
                    yield self.body.eval(env.replace(item))

        return RepeatableIterator(iterate)
    def test_iteration(self):

        class LazinessException(Exception): pass

        def test1(): 
            for i in range(1, 100): 
                yield i

        def test2():
            for i in range(1, 100):
                if i > 10:
                    raise LazinessException('Not lazy enough')
                yield i

        # First make sure that we've properly set up a situation that fails
        # without RepeatableIterator
        iterator = test1()
        assert list(iterator) == list(range(1, 100))
        assert list(iterator) == []
        
        # Now test that the RepeatableIterator restores functionality
        iterator = RepeatableIterator(test1)
        assert list(iterator) == list(range(1, 100))
        assert list(iterator) == list(range(1, 100))
        assert bool(iterator) is True

        iterator = RepeatableIterator(lambda: (i for i in []))
        assert bool(iterator) is False

        # Ensure that laziness is maintained
        iterator = RepeatableIterator(test2)
        assert list(islice(iterator, 5)) == list(range(1, 6))

        try:
            list(islice(iterator, 15))
            raise Exception('Should have failed')
        except LazinessException:
            pass
Ejemplo n.º 3
0
    def lookup(self, name):
        "str|JsonPath -> ??"
        if isinstance(name, six.string_types):
            jsonpath_expr = self.parse(name)
        elif isinstance(name, jsonpath.JSONPath):
            jsonpath_expr = name
        else:
            raise NotFound(unwrap_val(name))

        def iter(jsonpath_expr=jsonpath_expr):  # Capture closure
            for datum in jsonpath_expr.find(self.__bindings):
                # HACK: The auto id from jsonpath_rw is good, but we lose it when we do .value here,
                # so just slap it on if not present
                if isinstance(datum.value, dict) and 'id' not in datum.value:
                    datum.value['id'] = jsonpath.AutoIdForDatum(datum).value
                yield datum

        return RepeatableIterator(iter)
    def iterate(self, resource, paginator, params=None):
        """
        Assumes the endpoint is a list endpoint, and iterates over it
        making a lot of assumptions that it is like a tastypie endpoint.
        """
        params = dict(params or {})

        def iterate_resource(resource=resource, params=params):
            more_to_fetch = True
            last_batch_ids = set()

            while more_to_fetch:
                fetch_start = datetime.utcnow()
                batch = self.get(resource, params)
                total_count = int(
                    batch['meta']['total_count']
                ) if batch['meta']['total_count'] else 'unknown'
                logger.debug(
                    'Received %s-%s of %s', batch['meta']['offset'],
                    int(batch['meta']['offset']) + int(batch['meta']['limit']),
                    total_count)

                if not batch['objects']:
                    more_to_fetch = False
                else:
                    for obj in batch['objects']:
                        if obj['id'] not in last_batch_ids:
                            yield obj

                    if batch['meta']['next']:
                        last_batch_ids = {
                            obj['id']
                            for obj in batch['objects']
                        }
                        params = paginator.next_page_params_from_batch(batch)
                        if not params:
                            more_to_fetch = False
                    else:
                        more_to_fetch = False

                self.checkpoint(fetch_start)

        return RepeatableIterator(iterate_resource)
Ejemplo n.º 5
0
    def iterate(self, resource, paginator, params=None):
        """
        Iterates through what the API would have been had it been passed in.
        """
        from commcare_export.cli import logger

        # resource is either 'form' or 'case'
        # params are api params
        # (e.g. {'limit': 1000, u'type': u'pregnant_mother', 'order_by': 'server_date_modified'})
        params = dict(params or {})
        mock_api = _get_mock_api(resource, self.project, params)

        def iterate_resource(resource=resource, params=params):
            more_to_fetch = True
            last_batch_ids = set()

            count = 0
            total_count = mock_api.query_set.count()
            while more_to_fetch:
                batch = self.get(mock_api.query_set, count, params)
                batch_list = [mock_api.serialize(obj) for obj in batch]
                logger.info('Received {}-{} of {}'.format(
                    count, count + self.limit, total_count))

                if not batch_list:
                    more_to_fetch = False
                else:
                    for obj in batch_list:
                        if obj['id'] not in last_batch_ids:
                            yield obj

                    if count < total_count:
                        last_batch_ids = {obj['id'] for obj in batch_list}
                        count += self.limit
                    else:
                        more_to_fetch = False

                    self.checkpoint(paginator, batch_list)

        from commcare_export.repeatable_iterator import RepeatableIterator
        return RepeatableIterator(iterate_resource)
    def test_or(self):
        env = BuiltInEnv()
        assert Apply(Reference("or"), Literal(None), Literal(2)).eval(env) == 2

        laziness_iterator = RepeatableIterator(
            lambda: (i if i < 1 else die('Not lazy enough') for i in range(2)))
        assert Apply(Reference("or"), Literal(1),
                     Literal(laziness_iterator)).eval(env) == 1
        assert Apply(Reference("or"), Literal(''),
                     Literal(laziness_iterator)).eval(env) == ''
        assert Apply(Reference("or"), Literal(0),
                     Literal(laziness_iterator)).eval(env) == 0
        with pytest.raises(LazinessException):
            Apply(Reference("or"), Literal(None),
                  Literal(laziness_iterator)).eval(env)

        env = env | JsonPathEnv({'a': {'c': 'c val'}})
        assert Apply(Reference("or"), Reference('a.b'),
                     Reference('a.c')).eval(env) == 'c val'
        assert Apply(Reference("or"), Reference('a.b'),
                     Reference('a.d')).eval(env) is None
    def test_flatmap(self):
        env = BuiltInEnv() | DictEnv({})

        laziness_iterator = RepeatableIterator(lambda: ({
            'a': range(i)
        } if i < 4 else die('Not lazy enough') for i in range(12)))

        assert list(
            FlatMap(source=Literal([{
                'a': [1]
            }, {
                'a': 'foo'
            }, {
                'a': [3, 4]
            }]),
                    body=Literal(
                        [1, 2, 3])).eval(env)) == [1, 2, 3, 1, 2, 3, 1, 2, 3]
        assert list(
            FlatMap(source=Literal([{
                'a': [1]
            }, {
                'a': [2]
            }, {
                'a': [3, 4]
            }]),
                    body=Reference('a')).eval(env)) == [1, 2, 3, 4]

        assert list(
            islice(
                FlatMap(source=Literal(laziness_iterator),
                        body=Reference('a')).eval(env),
                6)) == [0, 0, 1, 0, 1, 2]

        try:
            list(
                FlatMap(source=Literal(laziness_iterator),
                        body=Reference('a')).eval(env))
            raise Exception('Should have failed')
        except LazinessException:
            pass
    def test_map(self):
        env = BuiltInEnv() | DictEnv({})

        laziness_iterator = RepeatableIterator(lambda: ({
            'a': i
        } if i < 5 else die('Not lazy enough') for i in range(12)))

        assert list(
            Map(source=Literal([{
                'a': 1
            }, {
                'a': 2
            }, {
                'a': 3
            }]),
                body=Literal(1)).eval(env)) == [1, 1, 1]
        assert list(
            Map(source=Literal([{
                'a': 1
            }, {
                'a': 2
            }, {
                'a': 3
            }]),
                body=Reference('a')).eval(env)) == [1, 2, 3]

        assert list(
            islice(
                Map(source=Literal(laziness_iterator),
                    body=Reference('a')).eval(env), 5)) == [0, 1, 2, 3, 4]

        try:
            list(
                Map(source=Literal(laziness_iterator),
                    body=Reference('a')).eval(env))
            raise Exception('Should have failed')
        except LazinessException:
            pass
Ejemplo n.º 9
0
 def emitted_tables(self):
     return RepeatableIterator(lambda: chain(self.left.emitted_tables(),
                                             self.right.emitted_tables()))
Ejemplo n.º 10
0
    def iterate(self,
                resource,
                paginator,
                params=None,
                checkpoint_manager=None):
        """
        Assumes the endpoint is a list endpoint, and iterates over it
        making a lot of assumptions that it is like a tastypie endpoint.
        """
        params = dict(params or {})

        def iterate_resource(resource=resource, params=params):
            more_to_fetch = True
            last_batch_ids = set()
            total_count = None
            fetched = 0
            repeat_counter = 0
            last_params = None

            while more_to_fetch:
                if params == last_params:
                    repeat_counter += 1
                else:
                    repeat_counter = 0
                if repeat_counter >= RESOURCE_REPEAT_LIMIT:
                    raise ResourceRepeatException(
                        "Requested resource '{}' {} times with same parameters"
                        .format(resource, repeat_counter))

                batch = self.get(resource, params)
                last_params = copy.copy(params)
                if not total_count or total_count == 'unknown' or fetched >= total_count:
                    total_count = int(
                        batch['meta']['total_count']
                    ) if batch['meta']['total_count'] else 'unknown'
                    fetched = 0

                fetched += len(batch['objects'])
                logger.debug('Received %s of %s', fetched, total_count)

                if not batch['objects']:
                    more_to_fetch = False
                else:
                    for obj in batch['objects']:
                        if obj['id'] not in last_batch_ids:
                            yield obj

                    if batch['meta']['next']:
                        last_batch_ids = {
                            obj['id']
                            for obj in batch['objects']
                        }
                        params = paginator.next_page_params_from_batch(batch)
                        if not params:
                            more_to_fetch = False
                    else:
                        more_to_fetch = False

                    self.checkpoint(checkpoint_manager, paginator, batch,
                                    not more_to_fetch)

        return RepeatableIterator(iterate_resource)
Ejemplo n.º 11
0
def default_to_json(obj):
    if hasattr(obj, 'toJSON'):
        return obj.toJSON()
    else:
        return RepeatableIterator.to_jvalue(obj)