def __execute_playbook(self, playbook, inventory_file, random_user, random_clear_text_password): path_exists(playbook, self.logger) path_exists(inventory_file, self.logger) variable_manager = VariableManager() loader = DataLoader() inventory = Inventory(loader=loader, variable_manager=variable_manager, host_list=inventory_file) Options = namedtuple('Options', [ 'listtags', 'listtasks', 'listhosts', 'syntax', 'connection', 'module_path', 'forks', 'remote_user', 'private_key_file', 'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args', 'scp_extra_args', 'become', 'become_method', 'become_user', 'verbosity', 'check' ]) options = Options(listtags=False, listtasks=False, listhosts=False, syntax=False, connection='ssh', module_path=None, forks=1, remote_user=None, private_key_file=None, ssh_common_args=None, ssh_extra_args=None, sftp_extra_args=None, scp_extra_args=None, become=True, become_method='sudo', become_user='******', verbosity=None, check=False) variable_manager.extra_vars = { 'r_user': random_user, 'r_password': random_clear_text_password } passwords = {} pbex = PlaybookExecutor(playbooks=[playbook], inventory=inventory, variable_manager=variable_manager, loader=loader, options=options, passwords=passwords) results = pbex.run() return results
def check(self): def flatten(l): out = [] for item in l: if isinstance(item, (list, tuple)): out.extend(flatten(item)) else: out.append(item) return out path_exists(self.root_path, self.logger) localroot, dirnames, filenames = os.walk(self.root_path).next() positive_labels = ['finished', "ownership ok" , 'SampleSheet found', 'Barcodes have the same size', 'Metadata found', 'To be processed'] negative_labels = ['running ', "waiting for ownership's modification", 'SampleSheet not found', "Barcodes don't have the same size", 'Metadata not found', 'Processed'] dir_dict = dict() for d in dirnames: dir_dict[d] = [] d_path = os.path.join(self.root_path, d) checks = rd_ready_to_be_preprocessed(user=self.user, group=self.group, path=d_path, rd_label=d, ir_conf=self.ir_conf, io_conf=self.io_conf) ready_to_be_preprocessed = checks[0] and checks[1] and checks[2][0] and checks[4] if self.emit_events and ready_to_be_preprocessed: dispatch_event.si(event='rd_ready', params=dict(rd_path=d_path, rd_label=d, emit_events=self.emit_events) ).delay() checks = flatten(checks) for i in range(len(checks)): if checks[i]: dir_dict[d].append(positive_labels[i]) else: dir_dict[d].append(negative_labels[i]) self.logger.info('Checking rundirs in: {}'.format(self.root_path)) for d, labels in dir_dict.iteritems(): self.logger.info(' ') self.logger.info('Rundir {}'.format(d)) self.logger.info('{}'.format(labels))
def __init__(self, args=None, logger=None): self.logger = logger self.output_format = args.output_format self.conf = get_conf(logger, args.config_file) self.io_conf = self.conf.get_io_section() c = Client(conf=self.conf, logger=self.logger) c.init_bika() self.bika = c.bk path_exists(args.input_file, self.logger) with open(args.input_file, 'r') as stream: input_data = yaml.safe_load(stream) self.input_paths = input_data.get('default_paths', [self.io_conf.get('archive_root_path')]) for _ in self.input_paths: path_exists(_, self.logger) output_file = args.output_file if args.output_file else None if output_file != os.path.realpath(output_file): self.logger.error('{} is not a valid path. Please use absolute path'.format(output_file)) sys.exit() self.output_file = output_file batches = input_data.get('batches', None) self.batches_info = dict() self.sids = list() for _, batch in batches.items(): bid = batch.get('bid', None) samples = batch.get('samples', []) if bid: self.logger.info("Retrieving info for batch {}".format(bid)) batch_info = self.bika.get_batch_info(bid, samples) if batch_info: sids = [_ for _ in batch_info.keys() if batch_info[ _].get('type') not in SAMPLE_TYPES_TOSKIP] self.sids.extend(sids) self.batches_info.update(batch_info) else: self.logger.error('No samples information found for the ' 'batch {}'.format(bid)) if not self.sids: self.logger.error('I have not retrieve any information for the ' 'batches {}'.format(" ".join(self.sids))) sys.exit()
def __init__(self, args=None, logger=None): self.logger = logger conf = get_conf(logger, args.config_file) self.batch_queuing = args.batch_queuing self.queues_conf = conf.get_section('queues') r_dir_label = args.rundir_label ds_dir_label = 'datasets' fqc_dir_label = 'fastqc' input_path = args.ds_path output_path = args.export_path if r_dir_label or (input_path and output_path): pass else: logger.error("You must provide the rundir_label or both ds_path " "and export_path") sys.exit() # input path must exists as parser argument or as config file argument if not input_path: io_conf = conf.get_io_section() input_path = os.path.join(io_conf.get('archive_root_path'), r_dir_label, ds_dir_label) path_exists(input_path, logger) self.input_path = input_path # export path must exists as parser argument or as config file argument if not output_path: io_conf = conf.get_io_section() output_path = os.path.join(io_conf.get('qc_export_basepath'), r_dir_label) # FIXME: this is a local path, must be checked that run on right node if not path_exists(output_path, logger, force=False): ensure_dir(output_path) path_exists(output_path, logger) self.output_path = output_path self.fqc_path = os.path.join(self.input_path, fqc_dir_label)
def __init__(self, args=None, logger=None): self.logger = logger self.destination = args.destination self.dry_run = args.dry_run conf = get_conf(logger, args.config_file) self.conf = conf self.batch_id = batch_id = args.batch_id c = Client(conf=conf, logger=logger) c.init_bika() batch_info = c.bk.get_batch_info(batch_id) if batch_info: self.batch_info = batch_info else: logger.error('I have not found any information of the samples ' 'owned by the batch {}'.format(batch_id)) sys.exit() # input path must exists as parser argument or as config file argument if args.input_path: input_path = args.input_path else: io_conf = conf.get_io_section() input_path = io_conf.get('archive_root_path') path_exists(input_path, logger) self.input_path = input_path output_path = args.output_path if args.output_path else None self.output_path = output_path inventory = args.inventory if args.inventory else None self.inventory = inventory playbook_path = args.playbook_path if args.playbook_path else None self.playbook_path = playbook_path
def __init__(self, args=None, logger=None): self.logger = logger self.conf = get_conf(logger, args.config_file) self.io_conf = self.conf.get_io_section() self.batch_queuing = args.batch_queuing self.queues_conf = self.conf.get_section('queues') rd_label = args.rd_label ds_path = args.ds_path if args.ds_path \ else os.path.join(self.io_conf.get('archive_root_path'), rd_label, self.io_conf.get('ds_folder_name')) qc_path = args.qc_path if args.qc_path \ else os.path.join(ds_path, self.io_conf.get('qc_folder_name')) qc_export_path = args.qc_export_path if args.qc_export_path \ else os.path.join(self.io_conf.get('qc_export_basepath'), rd_label) # FIXME: this is a local path, must be checked that run on right node if not path_exists(qc_export_path, logger, force=False): ensure_dir(qc_export_path) path_exists(ds_path, logger) path_exists(qc_export_path, logger) self.input_path = ds_path self.output_path = qc_export_path self.qc_path = qc_path self.rerun = args.rerun self.started = os.path.join(self.qc_path, self.io_conf.get('quality_check_started_file')) self.completed = os.path.join(self.qc_path, self.io_conf.get('quality_check_completed_file'))
def __init__(self, args=None, logger=None): self.logger = logger self.conf = get_conf(logger, args.config_file) self.io_conf = self.conf.get_io_section() self.batch_queuing = args.batch_queuing self.queues_conf = self.conf.get_section('queues') rd_label = args.rd_label ds_path = args.ds_path if args.ds_path \ else os.path.join(self.io_conf.get('archive_root_path'), rd_label, self.io_conf.get('ds_folder_name')) qc_path = args.qc_path if args.qc_path \ else os.path.join(ds_path, self.io_conf.get('qc_folder_name')) qc_export_path = args.qc_export_path if args.qc_export_path \ else os.path.join(self.io_conf.get('qc_export_basepath'), rd_label) # FIXME: this is a local path, must be checked that run on right node if not path_exists(qc_export_path, logger, force=False): ensure_dir(qc_export_path) path_exists(ds_path, logger) path_exists(qc_export_path, logger) self.input_path = ds_path self.output_path = qc_export_path self.qc_path = qc_path self.rerun = args.rerun self.started = os.path.join( self.qc_path, self.io_conf.get('quality_check_started_file')) self.completed = os.path.join( self.qc_path, self.io_conf.get('quality_check_completed_file'))
def run(self): copy_task = copy_qc_dirs.si(self.input_path, self.output_path) msgs = [ "Generating Fastqc reports", "Coping qc dirs from {} to {}".format(self.input_path, self.output_path) ] if not path_exists(self.fqc_path, self.logger, force=False): self.logger.info("{} and {}".format(msgs[0], msgs[1])) ensure_dir(self.fqc_path) qc_task = chain( rd_collect_fastq.si(ds_path=self.input_path), qc_runner.s(outdir=self.fqc_path, batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('q_fastqc')), copy_task).delay() else: self.logger.info(msgs[1]) copy_task.delay()
def run(self): msgs = [ "Generating Fastqc reports", "Coping qc dirs from {} to {}".format(self.input_path, self.output_path) ] if path_exists(self.qc_path, self.logger, force=False) and len(os.listdir(self.qc_path)) > 0 \ and not self.rerun: self.logger.info(msgs[1]) copy_task = dispatch_event.si(event='copy_qc_folders', params=dict(src=self.input_path, dest=self.output_path)) copy_task.delay() else: self.logger.info("{} and {}".format(msgs[0], msgs[1])) ensure_dir(self.qc_path, force=True) qc_task = chain( dispatch_event.si( event='qc_started', params=dict(progress_status_file=self.started)), rd_collect_fastq.si(ds_path=self.input_path), qc_runner.s(outdir=self.qc_path, batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('q_fastqc')), ).apply_async() copy_task = trigger_event.si(event='copy_qc_folders', params=dict(src=self.input_path, dest=self.output_path), tasks=qc_task.get()) copy_task.apply_async() trigger_event.si(event='qc_completed', params=dict(progress_status_file=self.completed), tasks=qc_task.get()).apply_async()
def run(self): msgs = ["Generating Fastqc reports", "Coping qc dirs from {} to {}".format(self.input_path, self.output_path)] if path_exists(self.qc_path, self.logger, force=False) and len(os.listdir(self.qc_path)) > 0 \ and not self.rerun: self.logger.info(msgs[1]) copy_task = dispatch_event.si(event='copy_qc_folders', params=dict(src=self.input_path, dest=self.output_path) ) copy_task.delay() else: self.logger.info("{} and {}".format(msgs[0], msgs[1])) ensure_dir(self.qc_path, force=True) qc_task = chain(dispatch_event.si(event='qc_started', params=dict(progress_status_file=self.started)), rd_collect_fastq.si(ds_path=self.input_path), qc_runner.s(outdir=self.qc_path, batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('q_fastqc')), ).apply_async() copy_task = trigger_event.si(event='copy_qc_folders', params=dict(src=self.input_path, dest=self.output_path), tasks=qc_task.get()) copy_task.apply_async() trigger_event.si(event='qc_completed', params=dict(progress_status_file=self.completed), tasks=qc_task.get()).apply_async()
def run(self): if self.destination == 'path': io_conf = self.conf.get_io_section() if self.output_path: output_path = self.output_path else: output_path = io_conf.get('ds_export_path') # if not path_exists(output_path, logger, force=False): # ensure_dir(output_path) # path_exists(output_path, logger) self.__fs2fs_carrier(self.input_path, output_path) if self.destination == 'ftp': def pass_gen(length): import string import random ascii = string.ascii_letters + string.digits + '@-_' return "".join([ list(set(ascii))[random.randint(0, len(list(set(ascii))) - 1)] for i in range(length) ]) random_user = pass_gen(8) random_clear_text_password = pass_gen(12) self.logger.info('Creating random account into the ftp server') self.logger.info('user: {}'.format(random_user)) self.logger.info('password: {}'.format(random_clear_text_password)) playbook_label = 'create_ftp_user.yml' if self.playbook_path: playbook_path = self.playbook_path else: io_conf = self.conf.get_io_section() playbook_path = os.path.expanduser( io_conf.get('playbooks_path')) playbook = os.path.join(playbook_path, playbook_label) path_exists(playbook, self.logger) inventory_label = 'inventory' if self.inventory: inventory = self.inventory else: io_conf = self.conf.get_io_section() inventory_path = os.path.expanduser( io_conf.get('playbooks_path')) inventory = os.path.join(inventory_path, inventory_label) path_exists(inventory, self.logger) results = self.__execute_playbook(playbook, inventory, random_user, random_clear_text_password) self.logger.info('Playbook result: {}'.format(results)) if self.output_path: output_path = self.output_path else: io_conf = self.conf.get_io_section() output_path = os.path.join(io_conf.get('ftp_export_path'), random_user) path_exists(output_path, self.logger) self.__fs2fs_carrier(self.input_path, output_path)
def run(self): path_exists(self.rd['rpath'], self.logger) rd_status_checks = rd_ready_to_be_preprocessed( user=self.user, group=self.group, path=self.rd['rpath'], rd_label=self.rd['label'], ssht_filename=self.samplesheet['filename'], ir_conf=self.conf.get_irods_section()) check = rd_status_checks[0] and rd_status_checks[1] and \ rd_status_checks[2][0] barcodes_have_same_size = rd_status_checks[2][1] check_sanitize_metadata = not rd_status_checks[3] if not check: self.logger.error("{} is not ready to be preprocessed".format( self.rd['label'])) sys.exit() self.logger.info('Processing {}'.format(self.rd['label'])) self.logger.info('running path {}'.format(self.rd['rpath'])) self.logger.info('completed path {}'.format(self.rd['cpath'])) self.logger.info('archive path {}'.format(self.rd['apath'])) self.logger.info('samplesheet path {}'.format( self.samplesheet['file_path'])) ensure_dir(self.ds['path']) ensure_dir(self.fqc['path']) irods_task = chain( sanitize_metadata.si(conf=self.conf.get_irods_section(), ssht_filename=self.samplesheet['filename'], rd_label=self.rd['label'], sanitize=check_sanitize_metadata), copy_run_info_to_irods.si(conf=self.conf.get_irods_section(), run_info_path=self.run_info['file_path'], rd_label=self.rd['label']), copy_run_parameters_to_irods.si( conf=self.conf.get_irods_section(), run_parameters_path=self.run_parameters['file_path'], rd_label=self.rd['label']), ) samplesheet_task = chain( copy_samplesheet_from_irods.si( conf=self.conf.get_irods_section(), ssht_path=self.samplesheet['file_path'], rd_label=self.rd['label'], overwrite_samplesheet=self.overwrite_samplesheet), replace_values_into_samplesheet.si( conf=self.conf.get_irods_section(), ssht_path=self.samplesheet['file_path'], rd_label=self.rd['label'], overwrite_samplesheet=self.overwrite_samplesheet), ) qc_task = chain( rd_collect_fastq.si(ds_path=self.ds['path']), qc_runner.s(outdir=self.fqc['path'], batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('low')), copy_qc_dirs.si(src=self.fqc['path'], dest=self.fqc['export_path'], copy_qc=self.copy_qc), ) # full pre-processing sequencing rundir pipeline pipeline = chain( irods_task, samplesheet_task, replace_index_cycles_into_run_info.si( conf=self.conf.get_irods_section(), barcodes_have_same_size=barcodes_have_same_size, run_info_path=self.run_info['file_path'], rd_label=self.rd['label']), move.si(self.rd['rpath'], self.rd['apath']), bcl2fastq.si(rd_path=self.rd['apath'], ds_path=self.ds['path'], ssht_path=self.samplesheet['file_path'], no_lane_splitting=self.no_lane_splitting, barcode_mismatches=self.barcode_mismatches, batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('low')), replace_index_cycles_into_run_info.si( conf=self.conf.get_irods_section(), barcodes_have_same_size=barcodes_have_same_size, run_info_path=self.run_info['file_apath'], rd_label=self.rd['label']), qc_task, ).delay()
def run(self): path_exists(self.rd['path'], self.logger) rd_status_checks = rd_ready_to_be_preprocessed( user=self.user, group=self.group, path=self.rd['path'], rd_label=self.rd['label'], ssht_filename=self.samplesheet['filename'], ir_conf=self.ir_conf, io_conf=self.io_conf) check = rd_status_checks[0] and rd_status_checks[1] and \ rd_status_checks[2][0] and rd_status_checks[2][1] check_sanitize_metadata = not rd_status_checks[3] if not check: self.logger.error("{} is not ready to be preprocessed".format( self.rd['label'])) sys.exit() self.logger.info('Processing {}'.format(self.rd['label'])) self.logger.info('running path {}'.format(self.rd['path'])) self.logger.info('datasets path {}'.format(self.ds['path'])) self.logger.info('samplesheet path {}'.format(self.samplesheet['path'])) if self.emit_events: self.logger.info('quality check output path {}'.format(self.qc['path'])) self.logger.info('quality check export path {}'.format(self.qc['export_path'])) ensure_dir(self.ds['path']) irods_task = chain( sanitize_metadata.si(conf=self.ir_conf, ssht_filename=self.samplesheet['filename'], rd_label=self.rd['label'], sanitize=check_sanitize_metadata, logbook_path=self.logbook['path'] ), copy_run_info_to_irods.si(conf=self.ir_conf, run_info_path=self.run_info['path'], rd_label=self.rd['label'], logbook_path=self.logbook['path'] ), copy_run_parameters_to_irods.si(conf=self.ir_conf, run_parameters_path=self.run_parameters['path'], rd_label=self.rd['label'], logbook_path=self.logbook['path'] ), ) samplesheet_task = chain( copy_samplesheet_from_irods.si(conf=self.ir_conf, ssht_path=self.samplesheet['path'], rd_label=self.rd['label'], overwrite_samplesheet=self.overwrite_samplesheet, logbook_path=self.logbook['path'] ), replace_values_into_samplesheet.si(conf=self.ir_conf, ssht_path=self.samplesheet['path'], rd_label=self.rd['label'], overwrite_samplesheet=self.overwrite_samplesheet, logbook_path=self.logbook['path'] ), ) # full pre-processing sequencing rundir pipeline pipeline = chain( dispatch_event.si(event='preprocessing_started', params=dict(ds_path=self.ds['path'], rd_label=self.rd['label'], progress_status_file=self.started_file, emit_events=self.emit_events)), irods_task, samplesheet_task, replace_index_cycles_into_run_info.si(conf=self.ir_conf, ssht_path=self.samplesheet['path'], run_info_path=self.run_info['path'], rd_label=self.rd['label'], logbook_path=self.logbook['path']), bcl2fastq.si(rd_path=self.rd['path'], ds_path=self.ds['path'], ssht_path=self.samplesheet['path'], run_info_path=self.run_info['path'], no_lane_splitting=self.no_lane_splitting, barcode_mismatches=self.barcode_mismatches, with_failed_reads=self.with_failed_reads, batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('q_bcl2fastq'), logbook_path=self.logbook['path']), replace_index_cycles_into_run_info.si(conf=self.ir_conf, ssht_path=self.samplesheet['path'], run_info_path=self.run_info['path'], rd_label=self.rd['label'], logbook_path=self.logbook['path']), dispatch_event.si(event='fastq_ready', params=dict(ds_path=self.ds['path'], qc_path=self.qc['path'], qc_export_path=self.qc['export_path'], force=True, rd_label=self.rd['label'], progress_status_file=self.completed_file, emit_events=self.emit_events)), ) pipeline.delay()