def load_data(self,data_folder): # there's one zip there, let's get the zipped filename zgrasp = glob.glob(os.path.join(data_folder,"*.zip")) if len(zgrasp) != 1: raise uploader.ResourceError("Expecting one zip only, got: %s" % repr(zgrasp)) zgrasp = zgrasp.pop() zf = zipfile.ZipFile(zgrasp) content = [e.filename for e in zf.filelist] if len(content) != 1: raise uploader.ResourceError("Expecting only one file in the archive, got: %s" % content) input_file = content.pop() input_file = os.path.join(data_folder,input_file) self.logger.info("Load data from file '%s'" % input_file) res = load_data(input_file) return res
def jobs(self): files = glob.glob( os.path.join(self.data_folder, self.__class__.GLOB_PATTERN)) if len(files) != 1: raise uploader.ResourceError("Expected 1 files, got: %s" % files) chrom_list = [str(i) for i in range(1, 23)] + ['X', 'Y', 'MT'] return list(itertools.product(files, chrom_list))
def load_data(self, data_folder): content = glob.glob(os.path.join(data_folder, "ExAC_nonTCGA.r*.vcf")) if len(content) != 1: raise uploader.ResourceError( "Expecting one single vcf file, got: %s" % repr(content)) input_file = content.pop() self.logger.info("Load data from file '%s'" % input_file) return load_data(self.__class__.name, input_file)
def load_data(self,data_folder): # there's one vcf file there, let's get it input_file = glob.glob(os.path.join(data_folder,"*.vcf")) if len(input_file) != 1: raise uploader.ResourceError("Expecting only one VCF file, got: %s" % input_file) input_file = input_file.pop() self.logger.info("Load data from file '%s'" % input_file) return load_data(input_file)
def restore_snapshot(self, build_meta, job_manager, **kwargs): idxr = self.target_backend.target_esidxer # first check if snapshot repo exists repo_name, repo_settings = list( build_meta["metadata"]["repository"].items())[0] try: repo = idxr.get_repository(repo_name) # ok it exists, check if settings are the same if repo[repo_name] != repo_settings: # different, raise exception so it's handles in the except self.logger.info( "Repository '%s' was found but settings are different, it needs to be created again" % repo_name) raise IndexerException except IndexerException: # okgg, it doesn't exist let's try to create it try: repo = idxr.create_repository(repo_name, repo_settings) except IndexerException as e: raise uploader.ResourceError("Could not create snapshot repository. Check elasticsearch.yml configuration " + \ "file, you should have a line like this: " + \ 'repositories.url.allowed_urls: "%s*" ' % repo_settings["settings"]["url"] + \ "allowing snapshot to be restored from this URL. Error was: %s" % e) # repository is now ready, let's trigger the restore snapshot_name = build_meta["metadata"]["snapshot_name"] pinfo = self.get_pinfo() pinfo["step"] = "restore" pinfo["description"] = snapshot_name def get_status_info(): try: res = idxr.get_restore_status(idxr._index) return res except Exception as e: # somethng went wrong, report as failure return "FAILED %s" % e def restore_launched(f): try: self.logger.info("Restore launched: %s" % f.result()) except Exception as e: self.logger.error("Error while lauching restore: %s" % e) raise e self.logger.info("Restoring snapshot '%s' to index '%s' on host '%s'" % (snapshot_name, idxr._index, idxr.es_host)) job = yield from job_manager.defer_to_thread( pinfo, partial(idxr.restore, repo_name, snapshot_name, idxr._index, purge=self.__class__.AUTO_PURGE_INDEX)) job.add_done_callback(restore_launched) yield from job while True: status_info = get_status_info() status = status_info["status"] self.logger.info("Recovery status for index '%s': %s" % (idxr._index, status_info)) if status in ["INIT", "IN_PROGRESS"]: yield from asyncio.sleep( getattr(btconfig, "MONITOR_SNAPSHOT_DELAY", 60)) else: if status == "DONE": self.logger.info("Snapshot '%s' successfully restored to index '%s' (host: '%s')" % \ (snapshot_name,idxr._index,idxr.es_host),extra={"notify":True}) else: e = uploader.ResourceError("Failed to restore snapshot '%s' on index '%s', status: %s" % \ (snapshot_name,idxr._index,status)) self.logger.error(e) raise e break