예제 #1
0
 def run(self):
     analysis_folder = os.environ['ANALYSIS_FOLDER']
     machine_run_folder = os.environ['MACHINE_RUN_FOLDER']
     sample_sheet = os.environ['SAMPLE_SHEET']
     Logger.info('Starting analytical processing for sample sheet %s' %
                 sample_sheet,
                 task_name=self.task)
     samples = SampleSheetParser(
         sample_sheet,
         [SAMPLE_ID, SAMPLE_NAME, SAMPLE_PROJECT]).parse_sample_sheet()
     launched_runs = {}
     for sample in samples:
         Logger.info('Starting "%s" sample processing.' %
                     sample[SAMPLE_NAME],
                     task_name=self.task)
         launched_runs[sample[SAMPLE_NAME]] = self.__run_sample(
             sample[SAMPLE_NAME], analysis_folder, machine_run_folder)
     failed_runs = self.__wait_runs_completion(launched_runs)
     if failed_runs:
         for sample, run_id in failed_runs.iteritems():
             Logger.fail(
                 'Processing failed for sample "%s". Check run %d logs for more information.'
                 % (sample, run_id),
                 task_name=self.task)
         sys.exit(1)
     Logger.success("All samples processed successfully.",
                    task_name=self.task)
 def run(self, upload):
     Logger.info('Starting localization of remote data...',
                 task_name=self.task_name)
     try:
         dts_registry = self.fetch_dts_registry()
         parameter_types = {ParameterType.INPUT_PARAMETER, ParameterType.COMMON_PARAMETER} if upload else \
             {ParameterType.OUTPUT_PARAMETER}
         remote_locations = self.find_remote_locations(
             dts_registry, parameter_types)
         if len(remote_locations) == 0:
             Logger.info('No remote sources found',
                         task_name=self.task_name)
         else:
             dts_locations = [
                 path for location in remote_locations
                 for path in location.paths if path.type == PathType.DTS
             ]
             if upload:
                 self.transfer_dts(dts_locations, dts_registry, upload)
                 self.localize_data(remote_locations, upload)
                 if self.report_file:
                     with open(self.report_file, 'w') as report:
                         for location in remote_locations:
                             env_name = location.env_name
                             original_value = location.original_value
                             localized_value = location.delimiter.join([
                                 path.local_path for path in location.paths
                             ])
                             report.write('export {}="{}"\n'.format(
                                 env_name, localized_value))
                             report.write('export {}="{}"\n'.format(
                                 env_name + '_ORIGINAL', original_value))
             else:
                 rule_patterns = DataStorageRule.read_from_file(self.rules)
                 rules = []
                 for rule in rule_patterns:
                     if rule.move_to_sts:
                         rules.append(rule.file_mask)
                 self.localize_data(remote_locations, upload, rules=rules)
                 self.transfer_dts(dts_locations,
                                   dts_registry,
                                   upload,
                                   rules=rules)
         Logger.success('Finished localization of remote data',
                        task_name=self.task_name)
     except BaseException as e:
         Logger.fail(
             'Localization of remote data failed due to exception: %s' %
             e.message,
             task_name=self.task_name)
         exit(1)
예제 #3
0
 def child_run_active(self):
     if self.child_id is None:
         return False
     attempts = 0
     while attempts < self.RETRY_COUNT:
         try:
             run = self.api.load_run(self.child_id)
             return run['status'] == 'RUNNING'
         except Exception as e:
             Logger.warn(
                 "Failed to fetch child run ID '' status: {}.".format(
                     str(self.child_id), e.message),
                 task_name=self.TASK_NAME)
             attempts = attempts + 1
             time.sleep(self.POLL_TIMEOUT)
     Logger.fail("Exceeded maximum attempts to fetch child run status.")
     raise RuntimeError(
         "Exceeded maximum attempts to fetch child run status.")
예제 #4
0
 def transfer_data(self, data_paths, log_task):
     if len(data_paths) > 0:
         Logger.info('Transferring %d path(s)' % len(data_paths),
                     task_name=log_task)
         transfers = map(self.__schedule_transfer_task, data_paths)
         for transfer in transfers:
             if transfer is None:
                 raise RuntimeError('Upload via DTS failed')
         remaining_ids = map(lambda transfer: transfer['id'], transfers)
         while remaining_ids:
             current_ids = list(remaining_ids)
             for id in current_ids:
                 transfer_task = self.__get_transfer_task(id)
                 source_path = transfer_task['source']['path']
                 destination_path = transfer_task['destination']['path']
                 if transfer_task['status'] == _TransferStatus.SUCCESS:
                     remaining_ids.remove(id)
                     Logger.info(
                         'Data transfer from source %s to destination %s has finished'
                         % (destination_path, source_path),
                         task_name=log_task)
                 elif transfer_task['status'] == _TransferStatus.FAILURE:
                     remaining_ids.remove(id)
                     reason = transfer_task[
                         'reason'] if 'reason' in transfer_task else 'No reason available'
                     Logger.fail(
                         "Data transfer from source %s to destination %s went bad due to the reason: '%s'"
                         % (source_path, destination_path, reason),
                         task_name=log_task)
                     raise RuntimeError(
                         'Data transfer went bad for source %s' %
                         source_path)
                 else:
                     time.sleep(self.pooling_delay)
             if not len(remaining_ids) == len(
                     current_ids) and remaining_ids:
                 Logger.info('%d data transfers are still being processed' %
                             len(remaining_ids),
                             task_name=log_task)
         Logger.info('All data transfers have finished successfully',
                     task_name=log_task)
     else:
         Logger.warn('No files for data transfer were found',
                     task_name=log_task)
예제 #5
0
 def get_running_samples(self):
     attempts = 0
     while attempts < self.RETRY_COUNT:
         try:
             child_runs = self.api.load_child_pipelines(self.run_id)
             count = 0
             for run in child_runs:
                 if run['status'] == 'RUNNING':
                     count = count + 1
             return count
         except Exception as e:
             Logger.warn("Failed to fetch running samples: {}.".format(
                 e.message),
                         task_name=self.TASK_NAME)
             attempts = attempts + 1
             time.sleep(self.POLL_TIMEOUT)
     Logger.fail("Exceeded maximum attempts to fetch running samples.")
     raise RuntimeError(
         "Exceeded maximum attempts to fetch running samples.")
예제 #6
0
def upload_data(src, dst, f_name_format, c_name, c_type, create_folders,
                entity_id, m_id, ent_api, upd_paths):
    if not dst.endswith('/'):
        dst = dst + '/'
    if f_name_format is not None and c_name is not None:
        if create_folders:
            dst = dst + c_name + '/' + f_name_format
        else:
            dst = dst + f_name_format.format(c_name)
    else:
        dst = dst + src.split('/')[-1:][0]

    code = 1
    for upload_try_num in range(1, UPLOAD_RETRY_COUNT + 1):
        Logger.info("Attempt #{}. Uploading {} to {}...".format(
            upload_try_num, src, dst),
                    task_name=UPLOAD_TASK_NAME)
        Logger.info(
            'Executing command \'pipe storage cp "{}" "{}" -f  > /dev/null\''.
            format(src, dst),
            task_name=UPLOAD_TASK_NAME)
        code = os.system('pipe storage cp "{}" "{}" -f > /dev/null'.format(
            src, dst))
        if code != 0:
            Logger.fail("Attempt #{}. Error uploading {} to {}".format(
                upload_try_num, src, dst),
                        task_name=UPLOAD_TASK_NAME)
            if upload_try_num < UPLOAD_RETRY_COUNT:
                time.sleep(UPLOAD_RETRY_TIMEOUT_SEC)
            else:
                Logger.fail(
                    "All {} attempts failed for {}. Source is not uploaded".
                    format(UPLOAD_RETRY_COUNT, src),
                    task_name=UPLOAD_TASK_NAME)
        else:
            Logger.info("Uploading {} to {} done".format(src, dst),
                        task_name=UPLOAD_TASK_NAME)
            if upd_paths:
                ent_api.update_key(m_id, entity_id, c_name, c_type, dst)
            break

    return code
예제 #7
0
 def run(self):
     Logger.info('Launching demultiplex pipeline "%s" with version "%s"' %
                 (self.pipeline_name, self.version),
                 task_name=self.task)
     pipeline = self.api.find_pipeline(self.pipeline_name)
     pipeline_params = {
         'MACHINE_RUN_FOLDER': {
             'value': os.environ['MACHINE_RUN_FOLDER'],
             'type': 'input'
         },
         'SAMPLE_SHEET': {
             'value': os.environ['SAMPLE_SHEET_ORIGINAL'],
             'type': 'input'
         },
         'ANALYSIS_FOLDER': {
             'value': os.environ['ANALYSIS_FOLDER'],
             'type': 'output'
         }
     }
     run = self.api.launch_pipeline(pipeline['id'],
                                    self.version,
                                    pipeline_params,
                                    instance=self.instance_type,
                                    disk=self.instance_disk,
                                    parent_run_id=os.environ['RUN_ID'])
     demultiplex_run_id = run['id']
     Logger.info('Launched demultiplex run %d.' % demultiplex_run_id,
                 task_name=self.task)
     Logger.info('Waiting till run %d completion.' % demultiplex_run_id,
                 task_name=self.task)
     final_status = self.__wait_run_completion(demultiplex_run_id)
     if final_status != 'SUCCESS':
         Logger.fail(
             'Demultiplex processing does not completed successfully. '
             'Check run %d logs for more information.' % demultiplex_run_id,
             task_name=self.task)
         sys.exit(1)
     Logger.success('Demultiplex processing completed sucessfully.',
                    task_name=self.task)
예제 #8
0
    def run(self, mount_root, tmp_dir):
        try:
            Logger.info('Starting mounting remote data storages.',
                        task_name=self.task_name)

            Logger.info('Fetching list of allowed storages...',
                        task_name=self.task_name)
            available_storages = self.api.load_available_storages()
            if not available_storages:
                Logger.success('No remote storages are available',
                               task_name=self.task_name)
                return
            Logger.info(
                'Found {} available storage(s). Checking mount options.'.
                format(len(available_storages)),
                task_name=self.task_name)

            fuse_tmp = os.path.join(tmp_dir, "s3fuse")
            if not self.create_directory(fuse_tmp):
                fuse_tmp = '/tmp'

            fuse_available = self.check_or_install_fuse()

            aws_default_region = os.getenv('AWS_DEFAULT_REGION', 'us-east-1')
            aws_region = os.getenv('AWS_REGION', aws_default_region)
            limited_storages = os.getenv('CP_CAP_LIMIT_MOUNTS')
            if limited_storages:
                try:
                    limited_storages_list = [
                        int(x.strip()) for x in limited_storages.split(',')
                    ]
                    available_storages = [
                        x for x in available_storages
                        if x.id in limited_storages_list
                    ]
                    Logger.info(
                        'Run is launched with mount limits ({}) Only {} storages will be mounted'
                        .format(limited_storages, len(available_storages)),
                        task_name=self.task_name)
                except Exception as limited_storages_ex:
                    Logger.warn(
                        'Unable to parse CP_CAP_LIMIT_MOUNTS value({}) with error: {}.'
                        .format(limited_storages,
                                str(limited_storages_ex.message)),
                        task_name=self.task_name)

            nfs_count = len(
                filter((lambda ds: ds.storage_type == 'NFS' and ds.region_name
                        == aws_region), available_storages))
            nfs_available = nfs_count > 0 and self.check_or_install_nfs()
            if not fuse_available and not nfs_available:
                Logger.success(
                    'Mounting of remote storages is not available for this image',
                    task_name=self.task_name)
                return
            for storage in available_storages:
                if not PermissionHelper.is_storage_readable(storage):
                    continue
                mounter = self.get_mount_manager(storage, nfs_available,
                                                 fuse_available, fuse_tmp)
                if mounter is not None:
                    self.mount(mounter, mount_root)
                elif storage.storage_type != NFS_TYPE and storage.storage_type != S3_TYPE:
                    Logger.warn('Unsupported storage type {}.'.format(
                        storage.storage_type),
                                task_name=self.task_name)
            Logger.success('Finished data storage mounting',
                           task_name=self.task_name)
        except Exception as e:
            Logger.fail('Unhandled error during mount task: {}.'.format(
                str(e.message)),
                        task_name=self.task_name)
예제 #9
0
 def fail_task(self, message):
     error_text = '{} task failed: {}.'.format(self.task_name, message)
     Logger.fail(error_text, task_name=self.task_name)
     raise RuntimeError(error_text)
예제 #10
0
                    task_name=UPLOAD_TASK_NAME)
        else:
            Logger.info("Uploading {} to {} done".format(src, dst),
                        task_name=UPLOAD_TASK_NAME)
            if upd_paths:
                ent_api.update_key(m_id, entity_id, c_name, c_type, dst)
            break

    return code


if __name__ == '__main__':
    Logger.info("Checking input parameters", task_name=INPUT_CHECK_TASK_NAME)
    scripts_dir = os.environ['SCRIPTS_DIR']
    if 'DESTINATION_DIRECTORY' not in os.environ:
        Logger.fail("DESTINATION_DIRECTORY parameter is missing",
                    task_name=INPUT_CHECK_TASK_NAME)
        exit(1)
    if 'METADATA_ID' not in os.environ:
        Logger.fail("METADATA_ID parameter is missing",
                    task_name=INPUT_CHECK_TASK_NAME)
        exit(1)
    if 'METADATA_CLASS' not in os.environ:
        Logger.fail("METADATA_CLASS parameter is missing",
                    task_name=INPUT_CHECK_TASK_NAME)
        exit(1)
    if 'METADATA_COLUMNS' not in os.environ:
        Logger.fail("METADATA_COLUMNS parameter is missing or invalid",
                    task_name=INPUT_CHECK_TASK_NAME)
        exit(1)
    destination = os.environ['DESTINATION_DIRECTORY']
    api_path = os.environ['API']
예제 #11
0
 def fail_task(self, message):
     Logger.fail(message, task_name=self.name)
     raise RuntimeError(message)
예제 #12
0
 def fail(message, crucial=True, *args, **kwargs):
     logging.error(message, *args, **kwargs)
     if not Logger.cmd and (crucial or Logger.verbose):
         CloudPipelineLogger.fail(message, task_name=Logger.task)