def run(self): analysis_folder = os.environ['ANALYSIS_FOLDER'] machine_run_folder = os.environ['MACHINE_RUN_FOLDER'] sample_sheet = os.environ['SAMPLE_SHEET'] Logger.info('Starting analytical processing for sample sheet %s' % sample_sheet, task_name=self.task) samples = SampleSheetParser( sample_sheet, [SAMPLE_ID, SAMPLE_NAME, SAMPLE_PROJECT]).parse_sample_sheet() launched_runs = {} for sample in samples: Logger.info('Starting "%s" sample processing.' % sample[SAMPLE_NAME], task_name=self.task) launched_runs[sample[SAMPLE_NAME]] = self.__run_sample( sample[SAMPLE_NAME], analysis_folder, machine_run_folder) failed_runs = self.__wait_runs_completion(launched_runs) if failed_runs: for sample, run_id in failed_runs.iteritems(): Logger.fail( 'Processing failed for sample "%s". Check run %d logs for more information.' % (sample, run_id), task_name=self.task) sys.exit(1) Logger.success("All samples processed successfully.", task_name=self.task)
def run(self, upload): Logger.info('Starting localization of remote data...', task_name=self.task_name) try: dts_registry = self.fetch_dts_registry() parameter_types = {ParameterType.INPUT_PARAMETER, ParameterType.COMMON_PARAMETER} if upload else \ {ParameterType.OUTPUT_PARAMETER} remote_locations = self.find_remote_locations( dts_registry, parameter_types) if len(remote_locations) == 0: Logger.info('No remote sources found', task_name=self.task_name) else: dts_locations = [ path for location in remote_locations for path in location.paths if path.type == PathType.DTS ] if upload: self.transfer_dts(dts_locations, dts_registry, upload) self.localize_data(remote_locations, upload) if self.report_file: with open(self.report_file, 'w') as report: for location in remote_locations: env_name = location.env_name original_value = location.original_value localized_value = location.delimiter.join([ path.local_path for path in location.paths ]) report.write('export {}="{}"\n'.format( env_name, localized_value)) report.write('export {}="{}"\n'.format( env_name + '_ORIGINAL', original_value)) else: rule_patterns = DataStorageRule.read_from_file(self.rules) rules = [] for rule in rule_patterns: if rule.move_to_sts: rules.append(rule.file_mask) self.localize_data(remote_locations, upload, rules=rules) self.transfer_dts(dts_locations, dts_registry, upload, rules=rules) Logger.success('Finished localization of remote data', task_name=self.task_name) except BaseException as e: Logger.fail( 'Localization of remote data failed due to exception: %s' % e.message, task_name=self.task_name) exit(1)
def child_run_active(self): if self.child_id is None: return False attempts = 0 while attempts < self.RETRY_COUNT: try: run = self.api.load_run(self.child_id) return run['status'] == 'RUNNING' except Exception as e: Logger.warn( "Failed to fetch child run ID '' status: {}.".format( str(self.child_id), e.message), task_name=self.TASK_NAME) attempts = attempts + 1 time.sleep(self.POLL_TIMEOUT) Logger.fail("Exceeded maximum attempts to fetch child run status.") raise RuntimeError( "Exceeded maximum attempts to fetch child run status.")
def transfer_data(self, data_paths, log_task): if len(data_paths) > 0: Logger.info('Transferring %d path(s)' % len(data_paths), task_name=log_task) transfers = map(self.__schedule_transfer_task, data_paths) for transfer in transfers: if transfer is None: raise RuntimeError('Upload via DTS failed') remaining_ids = map(lambda transfer: transfer['id'], transfers) while remaining_ids: current_ids = list(remaining_ids) for id in current_ids: transfer_task = self.__get_transfer_task(id) source_path = transfer_task['source']['path'] destination_path = transfer_task['destination']['path'] if transfer_task['status'] == _TransferStatus.SUCCESS: remaining_ids.remove(id) Logger.info( 'Data transfer from source %s to destination %s has finished' % (destination_path, source_path), task_name=log_task) elif transfer_task['status'] == _TransferStatus.FAILURE: remaining_ids.remove(id) reason = transfer_task[ 'reason'] if 'reason' in transfer_task else 'No reason available' Logger.fail( "Data transfer from source %s to destination %s went bad due to the reason: '%s'" % (source_path, destination_path, reason), task_name=log_task) raise RuntimeError( 'Data transfer went bad for source %s' % source_path) else: time.sleep(self.pooling_delay) if not len(remaining_ids) == len( current_ids) and remaining_ids: Logger.info('%d data transfers are still being processed' % len(remaining_ids), task_name=log_task) Logger.info('All data transfers have finished successfully', task_name=log_task) else: Logger.warn('No files for data transfer were found', task_name=log_task)
def get_running_samples(self): attempts = 0 while attempts < self.RETRY_COUNT: try: child_runs = self.api.load_child_pipelines(self.run_id) count = 0 for run in child_runs: if run['status'] == 'RUNNING': count = count + 1 return count except Exception as e: Logger.warn("Failed to fetch running samples: {}.".format( e.message), task_name=self.TASK_NAME) attempts = attempts + 1 time.sleep(self.POLL_TIMEOUT) Logger.fail("Exceeded maximum attempts to fetch running samples.") raise RuntimeError( "Exceeded maximum attempts to fetch running samples.")
def upload_data(src, dst, f_name_format, c_name, c_type, create_folders, entity_id, m_id, ent_api, upd_paths): if not dst.endswith('/'): dst = dst + '/' if f_name_format is not None and c_name is not None: if create_folders: dst = dst + c_name + '/' + f_name_format else: dst = dst + f_name_format.format(c_name) else: dst = dst + src.split('/')[-1:][0] code = 1 for upload_try_num in range(1, UPLOAD_RETRY_COUNT + 1): Logger.info("Attempt #{}. Uploading {} to {}...".format( upload_try_num, src, dst), task_name=UPLOAD_TASK_NAME) Logger.info( 'Executing command \'pipe storage cp "{}" "{}" -f > /dev/null\''. format(src, dst), task_name=UPLOAD_TASK_NAME) code = os.system('pipe storage cp "{}" "{}" -f > /dev/null'.format( src, dst)) if code != 0: Logger.fail("Attempt #{}. Error uploading {} to {}".format( upload_try_num, src, dst), task_name=UPLOAD_TASK_NAME) if upload_try_num < UPLOAD_RETRY_COUNT: time.sleep(UPLOAD_RETRY_TIMEOUT_SEC) else: Logger.fail( "All {} attempts failed for {}. Source is not uploaded". format(UPLOAD_RETRY_COUNT, src), task_name=UPLOAD_TASK_NAME) else: Logger.info("Uploading {} to {} done".format(src, dst), task_name=UPLOAD_TASK_NAME) if upd_paths: ent_api.update_key(m_id, entity_id, c_name, c_type, dst) break return code
def run(self): Logger.info('Launching demultiplex pipeline "%s" with version "%s"' % (self.pipeline_name, self.version), task_name=self.task) pipeline = self.api.find_pipeline(self.pipeline_name) pipeline_params = { 'MACHINE_RUN_FOLDER': { 'value': os.environ['MACHINE_RUN_FOLDER'], 'type': 'input' }, 'SAMPLE_SHEET': { 'value': os.environ['SAMPLE_SHEET_ORIGINAL'], 'type': 'input' }, 'ANALYSIS_FOLDER': { 'value': os.environ['ANALYSIS_FOLDER'], 'type': 'output' } } run = self.api.launch_pipeline(pipeline['id'], self.version, pipeline_params, instance=self.instance_type, disk=self.instance_disk, parent_run_id=os.environ['RUN_ID']) demultiplex_run_id = run['id'] Logger.info('Launched demultiplex run %d.' % demultiplex_run_id, task_name=self.task) Logger.info('Waiting till run %d completion.' % demultiplex_run_id, task_name=self.task) final_status = self.__wait_run_completion(demultiplex_run_id) if final_status != 'SUCCESS': Logger.fail( 'Demultiplex processing does not completed successfully. ' 'Check run %d logs for more information.' % demultiplex_run_id, task_name=self.task) sys.exit(1) Logger.success('Demultiplex processing completed sucessfully.', task_name=self.task)
def run(self, mount_root, tmp_dir): try: Logger.info('Starting mounting remote data storages.', task_name=self.task_name) Logger.info('Fetching list of allowed storages...', task_name=self.task_name) available_storages = self.api.load_available_storages() if not available_storages: Logger.success('No remote storages are available', task_name=self.task_name) return Logger.info( 'Found {} available storage(s). Checking mount options.'. format(len(available_storages)), task_name=self.task_name) fuse_tmp = os.path.join(tmp_dir, "s3fuse") if not self.create_directory(fuse_tmp): fuse_tmp = '/tmp' fuse_available = self.check_or_install_fuse() aws_default_region = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') aws_region = os.getenv('AWS_REGION', aws_default_region) limited_storages = os.getenv('CP_CAP_LIMIT_MOUNTS') if limited_storages: try: limited_storages_list = [ int(x.strip()) for x in limited_storages.split(',') ] available_storages = [ x for x in available_storages if x.id in limited_storages_list ] Logger.info( 'Run is launched with mount limits ({}) Only {} storages will be mounted' .format(limited_storages, len(available_storages)), task_name=self.task_name) except Exception as limited_storages_ex: Logger.warn( 'Unable to parse CP_CAP_LIMIT_MOUNTS value({}) with error: {}.' .format(limited_storages, str(limited_storages_ex.message)), task_name=self.task_name) nfs_count = len( filter((lambda ds: ds.storage_type == 'NFS' and ds.region_name == aws_region), available_storages)) nfs_available = nfs_count > 0 and self.check_or_install_nfs() if not fuse_available and not nfs_available: Logger.success( 'Mounting of remote storages is not available for this image', task_name=self.task_name) return for storage in available_storages: if not PermissionHelper.is_storage_readable(storage): continue mounter = self.get_mount_manager(storage, nfs_available, fuse_available, fuse_tmp) if mounter is not None: self.mount(mounter, mount_root) elif storage.storage_type != NFS_TYPE and storage.storage_type != S3_TYPE: Logger.warn('Unsupported storage type {}.'.format( storage.storage_type), task_name=self.task_name) Logger.success('Finished data storage mounting', task_name=self.task_name) except Exception as e: Logger.fail('Unhandled error during mount task: {}.'.format( str(e.message)), task_name=self.task_name)
def fail_task(self, message): error_text = '{} task failed: {}.'.format(self.task_name, message) Logger.fail(error_text, task_name=self.task_name) raise RuntimeError(error_text)
task_name=UPLOAD_TASK_NAME) else: Logger.info("Uploading {} to {} done".format(src, dst), task_name=UPLOAD_TASK_NAME) if upd_paths: ent_api.update_key(m_id, entity_id, c_name, c_type, dst) break return code if __name__ == '__main__': Logger.info("Checking input parameters", task_name=INPUT_CHECK_TASK_NAME) scripts_dir = os.environ['SCRIPTS_DIR'] if 'DESTINATION_DIRECTORY' not in os.environ: Logger.fail("DESTINATION_DIRECTORY parameter is missing", task_name=INPUT_CHECK_TASK_NAME) exit(1) if 'METADATA_ID' not in os.environ: Logger.fail("METADATA_ID parameter is missing", task_name=INPUT_CHECK_TASK_NAME) exit(1) if 'METADATA_CLASS' not in os.environ: Logger.fail("METADATA_CLASS parameter is missing", task_name=INPUT_CHECK_TASK_NAME) exit(1) if 'METADATA_COLUMNS' not in os.environ: Logger.fail("METADATA_COLUMNS parameter is missing or invalid", task_name=INPUT_CHECK_TASK_NAME) exit(1) destination = os.environ['DESTINATION_DIRECTORY'] api_path = os.environ['API']
def fail_task(self, message): Logger.fail(message, task_name=self.name) raise RuntimeError(message)
def fail(message, crucial=True, *args, **kwargs): logging.error(message, *args, **kwargs) if not Logger.cmd and (crucial or Logger.verbose): CloudPipelineLogger.fail(message, task_name=Logger.task)