Beispiel #1
0
    def db_calculate_filedistribution(resource_id):
        log.msg("[%s] Calculating file distributions" % resource_id)

        file_distribution = {}

        query = (select([func.count()]).select_from(Files).where(Files.c.resource_id == resource_id))
        total_file_count = yield tx_pool.runQuery(query)
        total_file_count = int(total_file_count[0].count_1)

        for k, v in FileCategories().data.iteritems():
            query = (select([func.count()]).select_from(Files).where(Files.c.file_format == v).where(Files.c.resource_id == resource_id))
            count = yield tx_pool.runQuery(query)

            if count:
                count = int(count[0].count_1)

                pct = 100 * float(count)/float(total_file_count)
                file_distribution[k] = "%.1f" % pct
            else:
                file_distribution[k] = 0

        query = (ResourceMeta.update().where(ResourceMeta.c.id == resource_id).values(file_distribution=json.dumps(file_distribution)))
        yield tx_pool.runOperation(query)

        log.msg("[%s] Calculating file distributions DONE" % resource_id)
Beispiel #2
0
    def db_finalize(self):
        query = (Resources.update().where(Resources.c.id == self.data['resource_id']).values(date_crawl_end=datetime.now()))
        yield tx_pool.runOperation(query)

        query = (Files.delete().where(Files.c.resource_id == self.data['resource_id']))
        yield tx_pool.runOperation(query)

        query = (Files.update().where(Files.c.resource_id == '-%s' % str(self.data['resource_id'])).values(resource_id=self.data['resource_id']))
        yield tx_pool.runOperation(query)

        query = (ResourceMeta.update().where(ResourceMeta.c.id == self.data['resource_meta_id']).values(file_count=self.resource.db_files_inserts))
        yield tx_pool.runOperation(query)

        if 'recursive_foldersizes' in self.data['options']:
            yield self.db_calculate_foldersizes()

        self.db_calculate_filedistribution(resource_id=self.data['resource_id'])

        self.db_busy_crawling_toggle()
Beispiel #3
0
    def db_busy_crawling_toggle(self):
        resource_meta = yield tx_pool.runQuery(select([ResourceMeta]).where(ResourceMeta.c.id == self.data['resource_meta_id']))
        resource_meta = resource_meta[0]

        query = (ResourceMeta.update().where(ResourceMeta.c.id == self.data['resource_meta_id']).values(busy = 0 if resource_meta.busy else 1))
        yield tx_pool.runOperation(query)