Exemple #1
0
    def dump(self, path, release=None, force=False, job_manager=None, **kwargs):
        if os.path.isabs(path):
            self.new_data_folder = path
        elif path:
            self.new_data_folder = os.path.join(self.src_root_folder,path)
        else:
            self.new_data_folder = self.src_root_folder
        if release is None:
            # take latest path part, usually it's the release
            self.release = os.path.basename(self.new_data_folder)
        else:
            self.release = release
        # sanity check
        if not os.path.exists(self.new_data_folder):
            raise DumperException("Can't find folder '%s' (did you download data first ?)" % self.new_data_folder)
        if not os.listdir(self.new_data_folder):
            raise DumperException("Directory '%s' is empty (did you download data first ?)" % self.new_data_folder)

        pinfo = self.get_pinfo()
        pinfo["step"] = "post_dump"
        strargs = "[path=%s,release=%s]" % (self.new_data_folder,self.release)
        job = yield from job_manager.defer_to_thread(pinfo,
                partial(self.post_dump,job_manager=job_manager))
        yield from asyncio.gather(job) # consume future
        # ok, good to go
        self.register_status("success")
        if self.__class__.AUTO_UPLOAD:
            set_pending_to_upload(self.src_name)
        self.logger.info("success %s" % strargs,extra={"notify":True})
        self.logger.info("Manually dumped resource (data_folder: '%s')" % self.new_data_folder)
Exemple #2
0
 def dump(self,force=False,job_manager=None, *args, **kwargs):
     self.logger.debug("Dummy dumper, nothing to download...")
     self.prepare_local_folders(os.path.join(self.new_data_folder,"dummy_file"))
     # this is the only interesting thing happening here
     pinfo = self.get_pinfo()
     pinfo["step"] = "post_dump"
     job = yield from job_manager.defer_to_thread(pinfo,
             partial(self.post_dump,job_manager=job_manager))
     yield from asyncio.gather(job) # consume future
     self.logger.info("Registering success")
     self.register_status("success")
     if self.__class__.AUTO_UPLOAD:
         set_pending_to_upload(self.src_name)
     self.logger.info("success",extra={"notify":True})
 def post_update_data(self, steps, force, batch_size, job_manager,
                      **kwargs):
     # move produced files used for other dependent uploaders
     klass = {"pir": UniprotPIRUploader, "pdb": UniprotPDBUploader}
     release = os.path.split(self.data_folder)[-1]
     for ext in ["pir", "pdb"]:
         destdir = os.path.join(config.DATA_ARCHIVE_ROOT, klass[ext].name,
                                release)
         destfn = "gene2%s.pyobj" % ext
         try:
             os.makedirs(destdir)
         except FileExistsError:
             # good to go
             pass
         self.logger.info("Dispatching file '%s' to %s upload" %
                          (destfn, ext.upper()))
         os.rename(os.path.join(self.data_folder, destfn),
                   os.path.join(destdir, destfn))
         uploader.set_pending_to_upload(klass[ext].name)
Exemple #4
0
    def dump(self,
             steps=None,
             force=False,
             job_manager=None,
             check_only=False,
             **kwargs):
        '''
        Dump (ie. download) resource as needed
        this should be called after instance creation
        'force' argument will force dump, passing this to
        create_todump_list() method.
        '''
        # signature says it's optional but for now it's not...
        assert job_manager
        # check what to do
        self.steps = steps or self.steps
        if type(self.steps) == str:
            self.steps = [self.steps]
        strargs = "[steps=%s]" % ",".join(self.steps)
        try:
            if "dump" in self.steps:
                pinfo = self.get_pinfo()
                pinfo["step"] = "check"
                # if last download failed (or was interrupted), we want to force the dump again
                try:
                    if self.src_doc["download"]["status"] in [
                            "failed", "downloading"
                    ]:
                        self.logger.info(
                            "Forcing dump because previous failed (so let's try again)"
                        )
                        force = True
                except (AttributeError, KeyError) as e:
                    # no src_doc or no download info
                    pass
                # TODO: blocking call for now, FTP client can't be properly set in thread after
                self.create_todump_list(force=force, **kwargs)
                # make sure we release (disconnect) client so we don't keep an open
                # connection for nothing
                self.release_client()
                if self.to_dump:
                    if check_only:
                        self.logger.info("New release available, '%s', %s file(s) to download" % \
                            (self.release,len(self.to_dump)),extra={"notify":True})
                        return self.release
                    # mark the download starts
                    self.register_status("downloading", transient=True)
                    # unsync to make it pickable
                    state = self.unprepare()
                    yield from self.do_dump(job_manager=job_manager)
                    # then restore state
                    self.prepare(state)
                else:
                    # if nothing to dump, don't do post process
                    self.logger.debug("Nothing to dump",
                                      extra={"notify": True})
                    return "Nothing to dump"
            if "post" in self.steps:
                got_error = False
                pinfo = self.get_pinfo()
                pinfo["step"] = "post_dump"
                # for some reason (like maintaining object's state between pickling).
                # we can't use process there. Need to use thread to maintain that state without
                # building an unmaintainable monster
                job = yield from job_manager.defer_to_thread(
                    pinfo, partial(self.post_dump, job_manager=job_manager))

                def postdumped(f):
                    nonlocal got_error
                    if f.exception():
                        got_error = f.exception()

                job.add_done_callback(postdumped)
                yield from job
                if got_error:
                    raise got_error
                # set it to success at the very end
                self.register_status("success")
                if self.__class__.AUTO_UPLOAD:
                    set_pending_to_upload(self.src_name)
                self.logger.info("success %s" % strargs,
                                 extra={"notify": True})
        except (KeyboardInterrupt, Exception) as e:
            self.logger.error("Error while dumping source: %s" % e)
            import traceback
            self.logger.error(traceback.format_exc())
            self.register_status("failed", download={"err": str(e)})
            self.logger.error("failed %s: %s" % (strargs, e),
                              extra={"notify": True})
            raise
        finally:
            if self.client:
                self.release_client()