def _process_request(self, request, spider):
        """
        Replace requested meta['splash']['args'] values with their fingerprints.
        This allows to store values only once in request queue, which helps
        with disk queue size.

        Downloader middleware should restore the values from fingerprints.
        """
        if 'splash' not in request.meta:
            return request

        if '_replaced_args' in request.meta['splash']:
            # don't process re-scheduled requests
            # XXX: does it work as expected?
            warnings.warn(
                "Unexpected request.meta['splash']['_replaced_args']")
            return request

        request.meta['splash']['_replaced_args'] = []
        cache_args = request.meta['splash'].get('cache_args', [])
        args = request.meta['splash'].setdefault('args', {})

        for name in cache_args:
            if name not in args:
                continue
            value = args[name]
            fp = 'LOCAL+' + json_based_hash(value)
            spider.state[self.local_values_key][fp] = value
            args[name] = fp
            request.meta['splash']['_replaced_args'].append(name)

        return request
Example #2
0
    def _process_request(self, request, spider):
        """
        Replace requested meta['splash']['args'] values with their fingerprints.
        This allows to store values only once in request queue, which helps
        with disk queue size.

        Downloader middleware should restore the values from fingerprints.
        """
        if 'splash' not in request.meta:
            return request

        if '_replaced_args' in request.meta['splash']:
            # don't process re-scheduled requests
            # XXX: does it work as expected?
            warnings.warn("Unexpected request.meta['splash']['_replaced_args']")
            return request

        request.meta['splash']['_replaced_args'] = []
        cache_args = request.meta['splash'].get('cache_args', [])
        args = request.meta['splash'].setdefault('args', {})

        for name in cache_args:
            if name not in args:
                continue
            value = args[name]
            fp = 'LOCAL+' + json_based_hash(value)
            spider.state[self.local_values_key][fp] = value
            args[name] = fp
            request.meta['splash']['_replaced_args'].append(name)

        return request
def test_json_based_hash(val1, val2):
    assume(val1 != val2)
    assert json_based_hash(val1) == json_based_hash(val1)
    assert json_based_hash(val1) != json_based_hash(val2)
Example #4
0
def test_json_based_hash(val1, val2):
    assume(val1 != val2)
    assert json_based_hash(val1) == json_based_hash(val1)
    assert json_based_hash(val1) != json_based_hash(val2)