def db_prepare(self): # @TODO: race condition - use transactions (pool.runInteraction()) query = (select([Resources]).where(Resources.c.name == self.data['name'])) resource = yield tx_pool.runQuery(query) if not resource: query = (ResourceMeta.insert().values( recursive_sizes=self.data['options']['recursive_foldersizes'], web_user_agent=self.data['options']['user-agent'], auth_user=self.data['options']['auth_user'], auth_pass=self.data['options']['auth_pass'] ).returning(ResourceMeta.c.id)) meta_id = yield tx_pool.runQuery(query) meta_id = meta_id[0].id query = (Resources.insert().values( name=self.data['name'], address=self.data['address'], port=self.data['options']['port'], protocol=FileProtocols().id_by_name(self.data['method']), display_url=self.data['options']['display_url'], date_crawl_start=datetime.now(), basepath=self.data['basepath'], meta_id=meta_id ).returning(Resources.c.id)) resource_id = yield tx_pool.runQuery(query) self.data['resource_id'] = resource_id[0].id query = (select([Resources]).where(Resources.c.id == self.data['resource_id'])) resource = yield tx_pool.runQuery(query) resource = resource[0] resource_meta = yield tx_pool.runQuery(select([ResourceMeta]).where(ResourceMeta.c.id == resource.meta_id)) resource_meta = resource_meta[0] if resource_meta.busy: raise Exception('This resource is already being crawled') self.data['resource_id'] = resource.id self.data['resource_meta_id'] = resource.meta_id self.data['resource'] = resource self.data['resource_meta'] = resource_meta self.db_busy_crawling_toggle() query = (Resources.update().where(Resources.c.id == resource.id).values(date_crawl_start=datetime.now())) yield tx_pool.runOperation(query) query = (Resources.update().where(Resources.c.id == resource.id).values(basepath=self.data['basepath'])) yield tx_pool.runOperation(query) query = (Resources.update().where(Resources.c.id == resource.id).values(address=self.data['address'])) yield tx_pool.runOperation(query)
def db_finalize(self): query = (Resources.update().where(Resources.c.id == self.data['resource_id']).values(date_crawl_end=datetime.now())) yield tx_pool.runOperation(query) query = (Files.delete().where(Files.c.resource_id == self.data['resource_id'])) yield tx_pool.runOperation(query) query = (Files.update().where(Files.c.resource_id == '-%s' % str(self.data['resource_id'])).values(resource_id=self.data['resource_id'])) yield tx_pool.runOperation(query) query = (ResourceMeta.update().where(ResourceMeta.c.id == self.data['resource_meta_id']).values(file_count=self.resource.db_files_inserts)) yield tx_pool.runOperation(query) if 'recursive_foldersizes' in self.data['options']: yield self.db_calculate_foldersizes() self.db_calculate_filedistribution(resource_id=self.data['resource_id']) self.db_busy_crawling_toggle()