예제 #1
0
    def db_finalize(self):
        query = (Resources.update().where(Resources.c.id == self.data['resource_id']).values(date_crawl_end=datetime.now()))
        yield tx_pool.runOperation(query)

        query = (Files.delete().where(Files.c.resource_id == self.data['resource_id']))
        yield tx_pool.runOperation(query)

        query = (Files.update().where(Files.c.resource_id == '-%s' % str(self.data['resource_id'])).values(resource_id=self.data['resource_id']))
        yield tx_pool.runOperation(query)

        query = (ResourceMeta.update().where(ResourceMeta.c.id == self.data['resource_meta_id']).values(file_count=self.resource.db_files_inserts))
        yield tx_pool.runOperation(query)

        if 'recursive_foldersizes' in self.data['options']:
            yield self.db_calculate_foldersizes()

        self.db_calculate_filedistribution(resource_id=self.data['resource_id'])

        self.db_busy_crawling_toggle()
예제 #2
0
    def db_calculate_foldersizes(self):
        log.msg("[%s] Recursively calculating folder sizes" % self.data['resource_id'])

        dirs = ['/']
        while dirs:
            file_path = dirs[0]

            query = (
                select([Files.c.file_name, Files.c.file_path, Files.c.file_size, Files.c.file_isdir])
                .where(Files.c.resource_id == self.data['resource_id'])
                .where(Files.c.file_path == file_path))
            all = yield tx_pool.runQuery(query)

            for file_name in [z.file_name for z in all if z.file_isdir]:
                query = (
                    select([func.sum(Files.c.file_size)])
                    .where(Files.c.resource_id == self.data['resource_id'])
                    .where(Files.c.file_path.like(file_path+file_name+'%')))
                size = yield tx_pool.runQuery(query)
                size = size[0]

                if not size.sum_1:
                    size = 0
                else:
                    size = long(size.sum_1)

                query = (
                    Files.update()
                    .where(Files.c.resource_id == self.data['resource_id'])
                    .where(Files.c.file_path == file_path)
                    .where(Files.c.file_name == file_name)
                    .where(Files.c.file_isdir == True)
                    .values(file_size=size))
                yield tx_pool.runOperation(query)

                dirs.append('%s%s/' % (file_path, file_name))
            dirs.pop(0)

        log.msg("[%s] Recursively calculating folder sizes DONE" % self.data['resource_id'])