def dump(self, path, release=None, force=False, job_manager=None, **kwargs): if os.path.isabs(path): self.new_data_folder = path elif path: self.new_data_folder = os.path.join(self.src_root_folder,path) else: self.new_data_folder = self.src_root_folder if release is None: # take latest path part, usually it's the release self.release = os.path.basename(self.new_data_folder) else: self.release = release # sanity check if not os.path.exists(self.new_data_folder): raise DumperException("Can't find folder '%s' (did you download data first ?)" % self.new_data_folder) if not os.listdir(self.new_data_folder): raise DumperException("Directory '%s' is empty (did you download data first ?)" % self.new_data_folder) pinfo = self.get_pinfo() pinfo["step"] = "post_dump" strargs = "[path=%s,release=%s]" % (self.new_data_folder,self.release) job = yield from job_manager.defer_to_thread(pinfo, partial(self.post_dump,job_manager=job_manager)) yield from asyncio.gather(job) # consume future # ok, good to go self.register_status("success") if self.__class__.AUTO_UPLOAD: set_pending_to_upload(self.src_name) self.logger.info("success %s" % strargs,extra={"notify":True}) self.logger.info("Manually dumped resource (data_folder: '%s')" % self.new_data_folder)
def dump(self,force=False,job_manager=None, *args, **kwargs): self.logger.debug("Dummy dumper, nothing to download...") self.prepare_local_folders(os.path.join(self.new_data_folder,"dummy_file")) # this is the only interesting thing happening here pinfo = self.get_pinfo() pinfo["step"] = "post_dump" job = yield from job_manager.defer_to_thread(pinfo, partial(self.post_dump,job_manager=job_manager)) yield from asyncio.gather(job) # consume future self.logger.info("Registering success") self.register_status("success") if self.__class__.AUTO_UPLOAD: set_pending_to_upload(self.src_name) self.logger.info("success",extra={"notify":True})
def post_update_data(self, steps, force, batch_size, job_manager, **kwargs): # move produced files used for other dependent uploaders klass = {"pir": UniprotPIRUploader, "pdb": UniprotPDBUploader} release = os.path.split(self.data_folder)[-1] for ext in ["pir", "pdb"]: destdir = os.path.join(config.DATA_ARCHIVE_ROOT, klass[ext].name, release) destfn = "gene2%s.pyobj" % ext try: os.makedirs(destdir) except FileExistsError: # good to go pass self.logger.info("Dispatching file '%s' to %s upload" % (destfn, ext.upper())) os.rename(os.path.join(self.data_folder, destfn), os.path.join(destdir, destfn)) uploader.set_pending_to_upload(klass[ext].name)
def dump(self, steps=None, force=False, job_manager=None, check_only=False, **kwargs): ''' Dump (ie. download) resource as needed this should be called after instance creation 'force' argument will force dump, passing this to create_todump_list() method. ''' # signature says it's optional but for now it's not... assert job_manager # check what to do self.steps = steps or self.steps if type(self.steps) == str: self.steps = [self.steps] strargs = "[steps=%s]" % ",".join(self.steps) try: if "dump" in self.steps: pinfo = self.get_pinfo() pinfo["step"] = "check" # if last download failed (or was interrupted), we want to force the dump again try: if self.src_doc["download"]["status"] in [ "failed", "downloading" ]: self.logger.info( "Forcing dump because previous failed (so let's try again)" ) force = True except (AttributeError, KeyError) as e: # no src_doc or no download info pass # TODO: blocking call for now, FTP client can't be properly set in thread after self.create_todump_list(force=force, **kwargs) # make sure we release (disconnect) client so we don't keep an open # connection for nothing self.release_client() if self.to_dump: if check_only: self.logger.info("New release available, '%s', %s file(s) to download" % \ (self.release,len(self.to_dump)),extra={"notify":True}) return self.release # mark the download starts self.register_status("downloading", transient=True) # unsync to make it pickable state = self.unprepare() yield from self.do_dump(job_manager=job_manager) # then restore state self.prepare(state) else: # if nothing to dump, don't do post process self.logger.debug("Nothing to dump", extra={"notify": True}) return "Nothing to dump" if "post" in self.steps: got_error = False pinfo = self.get_pinfo() pinfo["step"] = "post_dump" # for some reason (like maintaining object's state between pickling). # we can't use process there. Need to use thread to maintain that state without # building an unmaintainable monster job = yield from job_manager.defer_to_thread( pinfo, partial(self.post_dump, job_manager=job_manager)) def postdumped(f): nonlocal got_error if f.exception(): got_error = f.exception() job.add_done_callback(postdumped) yield from job if got_error: raise got_error # set it to success at the very end self.register_status("success") if self.__class__.AUTO_UPLOAD: set_pending_to_upload(self.src_name) self.logger.info("success %s" % strargs, extra={"notify": True}) except (KeyboardInterrupt, Exception) as e: self.logger.error("Error while dumping source: %s" % e) import traceback self.logger.error(traceback.format_exc()) self.register_status("failed", download={"err": str(e)}) self.logger.error("failed %s: %s" % (strargs, e), extra={"notify": True}) raise finally: if self.client: self.release_client()