Example #1
0
    def archive(self, spiders=None):

        class ArchivingStorage(object):
            def __init__(self, storage):
                self.storage = storage

            def isdir(self, *args, **kwargs):
                return self.storage.isdir(self.rel_path(*args))

            def listdir(self, *args, **kwargs):
                if spiders and args == ['spiders']:
                    return ['{}.json'.format(s) for s in spiders]
                path = self.rel_path(*args)
                return itertools.chain(*self.storage.listdir(path))

            def rel_path(self, *args):
                return '/'.join(args)

            def open(self, *args, **kwargs):
                raw = kwargs.get('raw')
                fp = self.storage.open_with_default(self.rel_path(*args), {})
                return decode(fp.read()) if raw else json.load(fp)

        storage = ArchivingStorage(self.storage)
        schemas, extractors, spiders = load_project_data(storage)
        name = self._process_name()
        return port_project(name, schemas, spiders, extractors)
Example #2
0
    def archive(self, spiders=None, **kwargs):
        class ArchivingStorage(object):
            def __init__(self, storage):
                self.storage = storage

            def isdir(self, *args, **kwargs):
                return self.storage.isdir(self.rel_path(*args))

            def listdir(self, *args, **kwargs):
                if spiders and args == ['spiders']:
                    return ['{}.json'.format(s) for s in spiders]
                path = self.rel_path(*args)
                return itertools.chain(*self.storage.listdir(path))

            def rel_path(self, *args):
                return '/'.join(args)

            def open(self, *args, **kwargs):
                raw = kwargs.get('raw')
                fp = self.storage.open_with_default(self.rel_path(*args), {})
                return decode(fp.read()) if raw else json.load(fp)

        storage = ArchivingStorage(self.storage)
        schemas, extractors, spiders = load_project_data(storage)
        name = self._process_name()
        selector = kwargs.get('selector') or 'css'
        return port_project(name,
                            schemas,
                            spiders,
                            extractors,
                            selector=selector)
Example #3
0
    def archive(self, spiders=None):
        def list_spiders(spiders):
            if spiders:
                return spiders
            _, spiders = self.storage.listdir('spiders')
            len_json = len('.json')
            return [s[:-len_json] for s in spiders if s.endswith('.json')]

        def open_file(*path, **kwargs):
            raw = kwargs.pop('raw', False)
            path = join(*path[1:])
            if not raw and not path.endswith('.json'):
                path = '%s.json' % path
            try:
                content = self.storage.open(path).read()
                return decode(content) if raw else json.loads(content)
            except IOError as e:
                if path in ('items.json', 'extractors.json'):
                    return {}
                raise e

        schemas, extractors, spiders = load_project_data(
            open_file, list_spiders, None)
        name = self._process_name()
        return port_project(name, schemas, spiders, extractors)