def config_file_setup(logger, cf_label, cf_from_cli=None): """ Create a config file if does not exists, copying it from the package default into the user_config_dir. Return a configuration file path from cli args if present, otherwise return a path from the user_config_dir :param logger: logger :param cf_label: label of the configuration file (required) :param cf_from_cli: path to configuration file from cli arg :return: Path """ presta_config_dir = os.path.join(user_config_dir(__appname__)) config_file_from_home = os.path.join(presta_config_dir, cf_label) if not path_exists(config_file_from_home, logger, force=False): logger.info('Creating config path {}'.format(presta_config_dir)) ensure_dir(presta_config_dir) config_file_path = '/'.join(['config', cf_label]) config_file_from_package = resource_filename(__appname__, config_file_path) copyfile(config_file_from_package, config_file_from_home) config_file_paths = [] if cf_from_cli and path_exists(cf_from_cli, logger, force=False): config_file_paths.append(WeightedPath(cf_from_cli, 0)) if path_exists(config_file_from_home, logger, force=False): config_file_paths.append(WeightedPath(config_file_from_home, 1)) logger.debug("config file paths: {}".format(config_file_paths)) config_file_path = sorted(config_file_paths)[0].path logger.info('Reading configuration from {}'.format(config_file_path)) return config_file_path
def get_object(self, src_path, dest_path=None, prefix='irods://'): """ Retrieves an object from an existing path. If dest_path is set, data will be copied from iRODS to the filesystem. :type src_path: str :param src_path: irods path :param dest_path: str :param dest_path: destination path :type prefix: str :param prefix: path's prefix (if any) :return: an irods.data_object.iRODSDataObject, irods.collection.iRODSCollection or None """ if src_path.startswith(prefix): src_path = os.path.join(src_path.replace(prefix, '/')) exists, obj = self.exists(src_path, delivery=True) if exists and dest_path: ensure_dir(os.path.dirname(dest_path)) with open(dest_path, 'w') as df: with obj.open('r') as sf: for line in sf: df.write(line) return obj
def main(): app = App() parser = app.make_parser() args = parser.parse_args() ensure_dir(os.path.dirname(args.logfile)) logger = a_logger('Main', level=args.loglevel, filename=args.logfile) logger.info('{} started'.format(__appname__.capitalize())) args.func(logger, args) if hasattr(args, 'func') else parser.print_help()
def update(self, label): pipeline = Pipeline(self.conf[label], loglevel=self.loglevel, logfile=self.logfile) skip = '' if self.ask_before_to_refresh: msg = ("Updating {} - {}".format(pipeline.label.capitalize(), pipeline.description)) skip = self.user_input(msg) if skip != ord('s'): repo_dir = os.path.join(self.cache_dir, label) ensure_dir(repo_dir, force=True) self.clone(label)
def __init__(self, args=None): self.loglevel = args.loglevel self.logfile = args.logfile self.logger = a_logger(self.__class__.__name__, level=self.loglevel, filename=self.logfile) path_from_cli = args.config_file if 'config_file' in vars( args) else None cm = ConfigurationManager(args=args, path_from_cli=path_from_cli) self.conf = cm.get_pipelines_config self.core_environment_file = cm.get_default_config[ 'core_environment_file'] self.environment_file = cm.get_default_config[ 'project_environment_file'] self.cache_dir = cache_dir ensure_dir(self.cache_dir) self.ask_before_to_refresh = args.ask if 'ask' in vars(args) else False
def copy_qc_dirs(trigger=None, **kwargs): if trigger is False: return trigger src = kwargs.get('src') dest = kwargs.get('dest') dirs = ['Stats', 'Reports', 'fastqc'] ensure_dir(dest) task0 = copy.si(os.path.join(src, dirs[0]), os.path.join(dest, dirs[0])) task1 = copy.si(os.path.join(src, dirs[1]), os.path.join(dest, dirs[1])) task2 = copy.si(os.path.join(src, dirs[2]), os.path.join(dest, dirs[2])) job = group([task0, task1, task2]) result = job.apply_async() return result
def __init__(self, args=None, path_from_cli=None, path_from_package='config/config.yaml', config_filename='config.yaml'): def copy_config_file_from_package(appname, src, dst): _from_package = resource_filename(appname, src) copyfile(_from_package, dst) self.loglevel = args.loglevel self.logfile = args.logfile logger = a_logger(self.__class__.__name__, level=self.loglevel, filename=self.logfile) cfg_dir = os.path.join(config_dir) config_file_path = os.path.join(cfg_dir, config_filename) # Create configuration file from default if needed if not path_exists(cfg_dir, logger, force=False): logger.info('Creating config dir {}'.format(cfg_dir)) ensure_dir(cfg_dir) if not path_exists(config_file_path, logger, force=False): logger.info('Copying default config file from {} package ' 'resource'.format(__appname__)) copy_config_file_from_package(__appname__, path_from_package, config_file_path) config_file_paths = [] if path_from_cli and path_exists(path_from_cli, logger, force=False): config_file_paths.append(WeightedPath(path_from_cli, 0)) if path_exists(config_file_path, logger, force=False): config_file_paths.append(WeightedPath(config_file_path, 1)) logger.debug("config file paths: {}".format(config_file_paths)) config_file_path = sorted(config_file_paths)[0].path logger.info('Reading configuration from {}'.format(config_file_path)) c = load_config(config_file_path) self.pipes_conf = c['pipelines'] if 'pipelines' in c else None self.default_conf = c['default_vars'] if 'default_vars' in c else None
def clone(self, label): pipeline = Pipeline(self.conf[label], loglevel=self.loglevel, logfile=self.logfile) repo_dir = os.path.join(self.cache_dir, label) ensure_dir(repo_dir) if path_is_empty(repo_dir): print("Cloning {}".format(pipeline.url)) Repo.clone_from(pipeline.url, repo_dir) repo = Repo(repo_dir) heads = repo.heads master = heads.master with open(os.path.join(repo_dir, ".git_repo_last_commit"), 'w') as filename: filename.write(pipeline.url) filename.write("\ncommit id: {}".format(master.commit)) requirements_path = os.path.join(repo_dir, self.core_environment_file) if not path_exists(requirements_path): data = { 'channels': ['bioconda', 'conda-forge', 'defaults'], 'dependencies': ['python==3.6.1', 'pip'] } dump(data, requirements_path) requirements_path = os.path.join(repo_dir, self.environment_file) if not path_exists(requirements_path): data = { 'channels': ['bioconda', 'conda-forge', 'defaults'], 'dependencies': ['snakemake', 'drmaa==0.7.8'] } dump(data, requirements_path) print("commit id: {}".format(master.commit)) print("Done.\n") self.logger.info('Cloned git repo at {} into {} ' 'directory'.format(pipeline.url, repo_dir)) else: self.logger.warning("Can't clone git repo {} " "into {}".format(pipeline.url, repo_dir))
def run(self): msgs = [ "Generating Fastqc reports", "Coping qc dirs from {} to {}".format(self.input_path, self.output_path) ] if path_exists(self.qc_path, self.logger, force=False) and len(os.listdir(self.qc_path)) > 0 \ and not self.rerun: self.logger.info(msgs[1]) copy_task = dispatch_event.si(event='copy_qc_folders', params=dict(src=self.input_path, dest=self.output_path)) copy_task.delay() else: self.logger.info("{} and {}".format(msgs[0], msgs[1])) ensure_dir(self.qc_path, force=True) qc_task = chain( dispatch_event.si( event='qc_started', params=dict(progress_status_file=self.started)), rd_collect_fastq.si(ds_path=self.input_path), qc_runner.s(outdir=self.qc_path, batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('q_fastqc')), ).apply_async() copy_task = trigger_event.si(event='copy_qc_folders', params=dict(src=self.input_path, dest=self.output_path), tasks=qc_task.get()) copy_task.apply_async() trigger_event.si(event='qc_completed', params=dict(progress_status_file=self.completed), tasks=qc_task.get()).apply_async()
def run(self): msgs = ["Generating Fastqc reports", "Coping qc dirs from {} to {}".format(self.input_path, self.output_path)] if path_exists(self.qc_path, self.logger, force=False) and len(os.listdir(self.qc_path)) > 0 \ and not self.rerun: self.logger.info(msgs[1]) copy_task = dispatch_event.si(event='copy_qc_folders', params=dict(src=self.input_path, dest=self.output_path) ) copy_task.delay() else: self.logger.info("{} and {}".format(msgs[0], msgs[1])) ensure_dir(self.qc_path, force=True) qc_task = chain(dispatch_event.si(event='qc_started', params=dict(progress_status_file=self.started)), rd_collect_fastq.si(ds_path=self.input_path), qc_runner.s(outdir=self.qc_path, batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('q_fastqc')), ).apply_async() copy_task = trigger_event.si(event='copy_qc_folders', params=dict(src=self.input_path, dest=self.output_path), tasks=qc_task.get()) copy_task.apply_async() trigger_event.si(event='qc_completed', params=dict(progress_status_file=self.completed), tasks=qc_task.get()).apply_async()
def __init__(self, args=None, logger=None): self.logger = logger self.conf = get_conf(logger, args.config_file) self.io_conf = self.conf.get_io_section() self.batch_queuing = args.batch_queuing self.queues_conf = self.conf.get_section('queues') rd_label = args.rd_label ds_path = args.ds_path if args.ds_path \ else os.path.join(self.io_conf.get('archive_root_path'), rd_label, self.io_conf.get('ds_folder_name')) qc_path = args.qc_path if args.qc_path \ else os.path.join(ds_path, self.io_conf.get('qc_folder_name')) qc_export_path = args.qc_export_path if args.qc_export_path \ else os.path.join(self.io_conf.get('qc_export_basepath'), rd_label) # FIXME: this is a local path, must be checked that run on right node if not path_exists(qc_export_path, logger, force=False): ensure_dir(qc_export_path) path_exists(ds_path, logger) path_exists(qc_export_path, logger) self.input_path = ds_path self.output_path = qc_export_path self.qc_path = qc_path self.rerun = args.rerun self.started = os.path.join(self.qc_path, self.io_conf.get('quality_check_started_file')) self.completed = os.path.join(self.qc_path, self.io_conf.get('quality_check_completed_file'))
def __init__(self, args=None, logger=None): self.logger = logger self.conf = get_conf(logger, args.config_file) self.io_conf = self.conf.get_io_section() self.batch_queuing = args.batch_queuing self.queues_conf = self.conf.get_section('queues') rd_label = args.rd_label ds_path = args.ds_path if args.ds_path \ else os.path.join(self.io_conf.get('archive_root_path'), rd_label, self.io_conf.get('ds_folder_name')) qc_path = args.qc_path if args.qc_path \ else os.path.join(ds_path, self.io_conf.get('qc_folder_name')) qc_export_path = args.qc_export_path if args.qc_export_path \ else os.path.join(self.io_conf.get('qc_export_basepath'), rd_label) # FIXME: this is a local path, must be checked that run on right node if not path_exists(qc_export_path, logger, force=False): ensure_dir(qc_export_path) path_exists(ds_path, logger) path_exists(qc_export_path, logger) self.input_path = ds_path self.output_path = qc_export_path self.qc_path = qc_path self.rerun = args.rerun self.started = os.path.join( self.qc_path, self.io_conf.get('quality_check_started_file')) self.completed = os.path.join( self.qc_path, self.io_conf.get('quality_check_completed_file'))
def __fs2fs_carrier(self, input_paths, opath): self.delivery_started = os.path.join( opath, self.io_conf.get('delivery_started_file')) self.delivery_completed = os.path.join( opath, self.io_conf.get('delivery_completed_file')) self.merge_started = os.path.join( opath, self.io_conf.get('merge_started_file')) self.merge_completed = os.path.join( opath, self.io_conf.get('merge_completed_file')) bids = [ _ for _ in self.delivery['samples_info'].keys() if self.delivery['samples_info'][_].get('type') not in SAMPLE_TYPES_TOSKIP ] if len(bids) > 0: for id, info in self.delivery['samples_info'].iteritems(): batch_id = info.get('batch_id') path = os.path.join(opath, batch_id) if not self.dry_run and not os.path.exists(path): ensure_dir(path) self.logger.info('Looking for files related to {} Bika ids'.format( len(bids))) dm = DatasetsManager(self.logger, bids) for path in input_paths: if self.runs and isinstance(self.runs, list) and len(self.runs) > 0: for run in self.runs: ipath = os.path.join(path, run) if os.path.exists(ipath): self.logger.info('Searching in {}'.format(ipath)) datasets_info, count = dm.collect_fastq_from_fs(ipath) self.logger.info("found {} files in {}".format( count, ipath)) else: ipath = path if os.path.exists(ipath): self.logger.info('Searching in {}'.format(ipath)) datasets_info, count = dm.collect_fastq_from_fs(ipath) self.logger.info("found {} files in {}".format( count, ipath)) datasets_info = dm.fastq_collection count = dm.fastq_counter self.logger.info("found {} files".format(count)) to_be_merged = dict() if not self.dry_run: dispatch_event.si(event='delivery_started', params=dict( progress_status_file=self.delivery_started, delivery_id=self.delivery_id)).delay() for bid in bids: sample_label = self.samples_info[bid].get('client_sample_id') if bid not in to_be_merged: to_be_merged[bid] = dict() if bid in datasets_info: for f in datasets_info[bid]: src = f.get('filepath') read = f.get('read_label') lane = f.get('lane') ext = f.get('file_ext') batch_id = self.samples_info[bid].get('batch_id') filename = format_dataset_filename( sample_label=sample_label, lane=lane, read=read, ext=ext, uid=True) dst = os.path.join(opath, batch_id, filename) self.logger.info("Coping {} into {}".format(src, dst)) if os.path.isfile(dst): self.logger.info('{} skipped'.format( os.path.basename(dst))) else: if not self.dry_run: tsk = copy.si(src, dst).delay() self.logger.info('{} copied'.format( os.path.basename(dst))) if self.merge: to_be_merged[bid][ext] = dict( ) if ext not in to_be_merged[ bid] else to_be_merged[bid][ext] if read not in to_be_merged[bid][ext]: to_be_merged[bid][ext][read] = dict(src=list(), dst=list(), tsk=list()) to_be_merged[bid][ext][read]['src'].append(src) to_be_merged[bid][ext][read]['dst'].append(dst) if not self.dry_run and tsk: to_be_merged[bid][ext][read]['tsk'].append( tsk.task_id) else: if self.md5_check: # MD5 CHECKSUM self.logger.info( "Getting MD5 hash of {}".format(dst)) if not self.dry_run: md5_task = trigger_event.si( event='get_md5_checksum', params=dict(src=dst, dst=".".join([dst, 'md5'])), tasks=[tsk.task_id]).delay() task_id = md5_task.get() else: msg = 'I have not found any file related to this ' \ 'Bika id: {}'.format(bid) self.logger.warning(msg) self.logger.info('{} skipped'.format(bid)) del to_be_merged[bid] if self.merge: if not self.dry_run: dispatch_event.si( event='merge_started', params=dict( progress_status_file=self.merge_started)).delay() for bid, file_ext in to_be_merged.iteritems(): sample_label = self.samples_info[bid].get('client_sample_id') for ext, reads in file_ext.iteritems(): for read, datasets in reads.iteritems(): filename = format_dataset_filename( sample_label=sample_label, read=read, ext=ext) src = datasets['dst'] dst = os.path.join(opath, batch_id, filename) tsk = datasets['tsk'] self.logger.info("Merging {} into {}".format( " ".join(src), dst)) if not self.dry_run: merge_task = trigger_event.si( event='merge_datasets', params=dict(src=src, dst=dst, remove_src=True), tasks=tsk).delay() task_id = merge_task.get() if self.md5_check: # MD5 CHECKSUM self.logger.info( "Getting MD5 hash of {}".format(dst)) md5_task = trigger_event.si( event='get_md5_checksum', params=dict(src=dst, dst=".".join([dst, 'md5'])), tasks=[task_id]).delay() task_id = md5_task.get() to_be_merged[bid][ext][read]['tsk'] = [task_id] if not self.dry_run: task_ids = list() for bid, file_ext in to_be_merged.iteritems(): for ext, reads in file_ext.iteritems(): for read, datasets in reads.iteritems(): task_ids.extend(datasets['tsk']) trigger_event.si(event='delivery_completed', params=dict( progress_status_file=self.delivery_completed, delivery_id=self.delivery_id), tasks=task_ids).delay() if self.merge: trigger_event.si(event='merge_completed', params=dict( progress_status_file=self.merge_completed, delivery_id=self.delivery_id), tasks=task_ids).delay()
def run(self): path_exists(self.rd['path'], self.logger) rd_status_checks = rd_ready_to_be_preprocessed( user=self.user, group=self.group, path=self.rd['path'], rd_label=self.rd['label'], ssht_filename=self.samplesheet['filename'], ir_conf=self.ir_conf, io_conf=self.io_conf) check = rd_status_checks[0] and rd_status_checks[1] and \ rd_status_checks[2][0] and rd_status_checks[2][1] check_sanitize_metadata = not rd_status_checks[3] if not check: self.logger.error("{} is not ready to be preprocessed".format( self.rd['label'])) sys.exit() self.logger.info('Processing {}'.format(self.rd['label'])) self.logger.info('running path {}'.format(self.rd['path'])) self.logger.info('datasets path {}'.format(self.ds['path'])) self.logger.info('samplesheet path {}'.format(self.samplesheet['path'])) if self.emit_events: self.logger.info('quality check output path {}'.format(self.qc['path'])) self.logger.info('quality check export path {}'.format(self.qc['export_path'])) ensure_dir(self.ds['path']) irods_task = chain( sanitize_metadata.si(conf=self.ir_conf, ssht_filename=self.samplesheet['filename'], rd_label=self.rd['label'], sanitize=check_sanitize_metadata, logbook_path=self.logbook['path'] ), copy_run_info_to_irods.si(conf=self.ir_conf, run_info_path=self.run_info['path'], rd_label=self.rd['label'], logbook_path=self.logbook['path'] ), copy_run_parameters_to_irods.si(conf=self.ir_conf, run_parameters_path=self.run_parameters['path'], rd_label=self.rd['label'], logbook_path=self.logbook['path'] ), ) samplesheet_task = chain( copy_samplesheet_from_irods.si(conf=self.ir_conf, ssht_path=self.samplesheet['path'], rd_label=self.rd['label'], overwrite_samplesheet=self.overwrite_samplesheet, logbook_path=self.logbook['path'] ), replace_values_into_samplesheet.si(conf=self.ir_conf, ssht_path=self.samplesheet['path'], rd_label=self.rd['label'], overwrite_samplesheet=self.overwrite_samplesheet, logbook_path=self.logbook['path'] ), ) # full pre-processing sequencing rundir pipeline pipeline = chain( dispatch_event.si(event='preprocessing_started', params=dict(ds_path=self.ds['path'], rd_label=self.rd['label'], progress_status_file=self.started_file, emit_events=self.emit_events)), irods_task, samplesheet_task, replace_index_cycles_into_run_info.si(conf=self.ir_conf, ssht_path=self.samplesheet['path'], run_info_path=self.run_info['path'], rd_label=self.rd['label'], logbook_path=self.logbook['path']), bcl2fastq.si(rd_path=self.rd['path'], ds_path=self.ds['path'], ssht_path=self.samplesheet['path'], run_info_path=self.run_info['path'], no_lane_splitting=self.no_lane_splitting, barcode_mismatches=self.barcode_mismatches, with_failed_reads=self.with_failed_reads, batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('q_bcl2fastq'), logbook_path=self.logbook['path']), replace_index_cycles_into_run_info.si(conf=self.ir_conf, ssht_path=self.samplesheet['path'], run_info_path=self.run_info['path'], rd_label=self.rd['label'], logbook_path=self.logbook['path']), dispatch_event.si(event='fastq_ready', params=dict(ds_path=self.ds['path'], qc_path=self.qc['path'], qc_export_path=self.qc['export_path'], force=True, rd_label=self.rd['label'], progress_status_file=self.completed_file, emit_events=self.emit_events)), ) pipeline.delay()
def __fs2fs_carrier(self, input_paths, opath): self.delivery_started = os.path.join(opath, self.io_conf.get('delivery_started_file')) self.delivery_completed = os.path.join(opath, self.io_conf.get('delivery_completed_file')) self.merge_started = os.path.join(opath, self.io_conf.get('merge_started_file')) self.merge_completed = os.path.join(opath, self.io_conf.get('merge_completed_file')) bids = [_ for _ in self.delivery['samples_info'].keys() if self.delivery['samples_info'][_].get( 'type') not in SAMPLE_TYPES_TOSKIP] if len(bids) > 0: for id, info in self.delivery['samples_info'].iteritems(): batch_id = info.get('batch_id') path = os.path.join(opath, batch_id) if not self.dry_run and not os.path.exists(path): ensure_dir(path) self.logger.info('Looking for files related to {} Bika ids'.format(len(bids))) dm = DatasetsManager(self.logger, bids) for path in input_paths: if self.runs and isinstance(self.runs, list) and len(self.runs) > 0: for run in self.runs: ipath = os.path.join(path, run) if os.path.exists(ipath): self.logger.info('Searching in {}'.format(ipath)) datasets_info, count = dm.collect_fastq_from_fs(ipath) self.logger.info("found {} files in {}".format(count, ipath)) else: ipath = path if os.path.exists(ipath): self.logger.info('Searching in {}'.format(ipath)) datasets_info, count = dm.collect_fastq_from_fs(ipath) self.logger.info("found {} files in {}".format(count, ipath)) datasets_info = dm.fastq_collection count = dm.fastq_counter self.logger.info("found {} files".format(count)) to_be_merged = dict() if not self.dry_run: dispatch_event.si(event='delivery_started', params=dict(progress_status_file=self.delivery_started, delivery_id=self.delivery_id) ).delay() for bid in bids: sample_label = self.samples_info[bid].get('client_sample_id') if bid not in to_be_merged: to_be_merged[bid] = dict() if bid in datasets_info: for f in datasets_info[bid]: src = f.get('filepath') read = f.get('read_label') lane = f.get('lane') ext = f.get('file_ext') batch_id = self.samples_info[bid].get('batch_id') filename = format_dataset_filename(sample_label=sample_label, lane=lane, read=read, ext=ext, uid=True) dst = os.path.join(opath, batch_id, filename) self.logger.info("Coping {} into {}".format(src, dst)) if os.path.isfile(dst): self.logger.info('{} skipped'.format(os.path.basename( dst))) else: if not self.dry_run: tsk = copy.si(src, dst).delay() self.logger.info( '{} copied'.format(os.path.basename(dst))) if self.merge: to_be_merged[bid][ext] = dict() if ext not in to_be_merged[bid] else to_be_merged[bid][ext] if read not in to_be_merged[bid][ext]: to_be_merged[bid][ext][read] = dict(src=list(), dst=list(), tsk=list()) to_be_merged[bid][ext][read]['src'].append(src) to_be_merged[bid][ext][read]['dst'].append(dst) if not self.dry_run and tsk: to_be_merged[bid][ext][read]['tsk'].append(tsk.task_id) else: if self.md5_check: # MD5 CHECKSUM self.logger.info("Getting MD5 hash of {}".format(dst)) if not self.dry_run: md5_task = trigger_event.si(event='get_md5_checksum', params=dict(src=dst, dst=".".join([dst, 'md5'])), tasks=[tsk.task_id]).delay() task_id = md5_task.get() else: msg = 'I have not found any file related to this ' \ 'Bika id: {}'.format(bid) self.logger.warning(msg) self.logger.info('{} skipped'.format(bid)) del to_be_merged[bid] if self.merge: if not self.dry_run: dispatch_event.si(event='merge_started', params=dict(progress_status_file=self.merge_started) ).delay() for bid, file_ext in to_be_merged.iteritems(): sample_label = self.samples_info[bid].get('client_sample_id') for ext, reads in file_ext.iteritems(): for read, datasets in reads.iteritems(): filename = format_dataset_filename(sample_label=sample_label, read=read, ext=ext) src = datasets['dst'] dst = os.path.join(opath, batch_id, filename) tsk = datasets['tsk'] self.logger.info("Merging {} into {}".format(" ".join(src), dst)) if not self.dry_run: merge_task = trigger_event.si(event='merge_datasets', params=dict(src=src, dst=dst, remove_src=True), tasks=tsk).delay() task_id = merge_task.get() if self.md5_check: # MD5 CHECKSUM self.logger.info("Getting MD5 hash of {}".format(dst)) md5_task = trigger_event.si(event='get_md5_checksum', params=dict(src=dst, dst=".".join([dst, 'md5'])), tasks=[task_id]).delay() task_id = md5_task.get() to_be_merged[bid][ext][read]['tsk'] = [task_id] if not self.dry_run: task_ids = list() for bid, file_ext in to_be_merged.iteritems(): for ext, reads in file_ext.iteritems(): for read, datasets in reads.iteritems(): task_ids.extend(datasets['tsk']) trigger_event.si(event='delivery_completed', params=dict(progress_status_file=self.delivery_completed, delivery_id=self.delivery_id), tasks=task_ids).delay() if self.merge: trigger_event.si(event='merge_completed', params=dict(progress_status_file=self.merge_completed, delivery_id=self.delivery_id), tasks=task_ids).delay()
def implementation(logger, args): def get_profile(profile_label, profile_path, logger_): file_path = os.path.join(profile_path, '{}.yaml'.format(profile_label)) if path_exists(file_path, logger_, force=False): msg = "Profile found at {}".format(file_path) print(msg) logger.info(msg) profile = load(file_path) return profile logger.info("Profile not found at {}".format(file_path)) return None def write_profile(default_config, pl_, profile_label, profile_path, logger_): def merge_two_dicts(x, y): z = x.copy() # start with x's keys and values z.update(y) # modifies z with y's keys and values & returns None return z file_path = os.path.join(profile_path, '{}.yaml'.format(profile_label)) if path_exists(file_path, logger_, force=False) and not args.force: msg = "{} profile already exists".format(file_path) print(msg) logger.error(msg) # sys.exit() else: to_dump = merge_two_dicts( default_config, pl_.playbook_vars_template(project_name=profile_label)) dump(to_dump, file_path) logger.info("Created {} profile".format(file_path)) print("Edit variables value into the {} file".format(file_path)) return profile_label, ext = os.path.splitext(args.profile) profile_path = os.path.join(profile_dir, args.label) ensure_dir(profile_path) plm = PipelinesManager(args) pl = plm.get_pipeline(args.label) profile = get_profile(profile_label, profile_path, logger) path_from_cli = args.config_file if 'config_file' in vars(args) else None cm = ConfigurationManager(args=args, path_from_cli=path_from_cli) default_config = cm.get_default_config if args.create_profile and not args.deployment: write_profile(default_config, pl, profile_label, profile_path, logger) return if args.deployment and not args.create_profile: if profile: host = args.host remote_user = args.remote_user connection = args.connection pl.instantiate(host, remote_user, connection, profile) return if not profile: msg = 'Profile "{}" not found. Have you created it? \n' \ 'Digit "solida setup --help" for more details'.format(profile_label) print(msg) logger.error(msg)