예제 #1
0
    def db_prepare(self):
        # @TODO: race condition - use transactions (pool.runInteraction())

        query = (select([Resources]).where(Resources.c.name == self.data['name']))
        resource = yield tx_pool.runQuery(query)

        if not resource:
            query = (ResourceMeta.insert().values(
                recursive_sizes=self.data['options']['recursive_foldersizes'],
                web_user_agent=self.data['options']['user-agent'],
                auth_user=self.data['options']['auth_user'],
                auth_pass=self.data['options']['auth_pass']
            ).returning(ResourceMeta.c.id))

            meta_id = yield tx_pool.runQuery(query)
            meta_id = meta_id[0].id

            query = (Resources.insert().values(
                name=self.data['name'],
                address=self.data['address'],
                port=self.data['options']['port'],
                protocol=FileProtocols().id_by_name(self.data['method']),
                display_url=self.data['options']['display_url'],
                date_crawl_start=datetime.now(),
                basepath=self.data['basepath'],
                meta_id=meta_id
            ).returning(Resources.c.id))

            resource_id = yield tx_pool.runQuery(query)
            self.data['resource_id'] = resource_id[0].id

            query = (select([Resources]).where(Resources.c.id == self.data['resource_id']))
            resource = yield tx_pool.runQuery(query)

        resource = resource[0]
        resource_meta = yield tx_pool.runQuery(select([ResourceMeta]).where(ResourceMeta.c.id == resource.meta_id))
        resource_meta = resource_meta[0]

        if resource_meta.busy:
            raise Exception('This resource is already being crawled')

        self.data['resource_id'] = resource.id
        self.data['resource_meta_id'] = resource.meta_id
        self.data['resource'] = resource
        self.data['resource_meta'] = resource_meta

        self.db_busy_crawling_toggle()

        query = (Resources.update().where(Resources.c.id == resource.id).values(date_crawl_start=datetime.now()))
        yield tx_pool.runOperation(query)

        query = (Resources.update().where(Resources.c.id == resource.id).values(basepath=self.data['basepath']))
        yield tx_pool.runOperation(query)

        query = (Resources.update().where(Resources.c.id == resource.id).values(address=self.data['address']))
        yield tx_pool.runOperation(query)
예제 #2
0
    def db_finalize(self):
        query = (Resources.update().where(Resources.c.id == self.data['resource_id']).values(date_crawl_end=datetime.now()))
        yield tx_pool.runOperation(query)

        query = (Files.delete().where(Files.c.resource_id == self.data['resource_id']))
        yield tx_pool.runOperation(query)

        query = (Files.update().where(Files.c.resource_id == '-%s' % str(self.data['resource_id'])).values(resource_id=self.data['resource_id']))
        yield tx_pool.runOperation(query)

        query = (ResourceMeta.update().where(ResourceMeta.c.id == self.data['resource_meta_id']).values(file_count=self.resource.db_files_inserts))
        yield tx_pool.runOperation(query)

        if 'recursive_foldersizes' in self.data['options']:
            yield self.db_calculate_foldersizes()

        self.db_calculate_filedistribution(resource_id=self.data['resource_id'])

        self.db_busy_crawling_toggle()