def copy_qc_dirs(src, dest, copy_qc=True): if copy_qc: dirs = ['Stats', 'Reports', 'fastqc'] ensure_dir(dest) task0 = copy.si(os.path.join(src, dirs[0]), os.path.join(dest, dirs[0])) task1 = copy.si(os.path.join(src, dirs[1]), os.path.join(dest, dirs[1])) task2 = copy.si(os.path.join(src, dirs[2]), os.path.join(dest, dirs[2])) job = group(task0, task1, task2)() while job.waiting(): pass return job.join() return None
def __fs2fs_carrier(self, ipath, opath): bids = [ _ for _ in self.batch_info.keys() if self.batch_info[_].get('type') not in SAMPLE_TYPES_TOSKIP ] self.logger.info('Looking for files related to {} Bika ids'.format( len(bids))) self.logger.info('Starting from {}'.format(ipath)) if len(bids) > 0: ensure_dir(os.path.join(opath, self.batch_id)) dm = DatasetsManager(self.logger, bids) datasets_info, count = dm.collect_fastq_from_fs(ipath) self.logger.info("found {} files".format(count)) for bid in bids: if bid in datasets_info: for f in datasets_info[bid]: src = f.get('filepath') read = f.get('read_label') lane = f.get('lane') ext = f.get('file_ext') sample_label = self.batch_info[bid].get('client_sample_id') sample_label = '_'.join([ sample_label.replace(' ', '_'), lane, read ]) if lane else '_'.join( [sample_label.replace(' ', '_'), read]) sample_label = '.'.join([sample_label, ext]) dst = os.path.join(opath, self.batch_id, sample_label) self.logger.info("Coping {} into {}".format(src, dst)) if os.path.isfile(dst): self.logger.info('{} skipped'.format( os.path.basename(dst))) else: if not self.dry_run: copy.si(src, dst).delay() self.logger.info('{} copied'.format( os.path.basename(dst))) else: msg = 'I have not found any file related to this ' \ 'Bika id: {}'.format(bid) self.logger.warning(msg) self.logger.info('{} skipped'.format(bid))
def run(self): copy_task = copy_qc_dirs.si(self.input_path, self.output_path) msgs = [ "Generating Fastqc reports", "Coping qc dirs from {} to {}".format(self.input_path, self.output_path) ] if not path_exists(self.fqc_path, self.logger, force=False): self.logger.info("{} and {}".format(msgs[0], msgs[1])) ensure_dir(self.fqc_path) qc_task = chain( rd_collect_fastq.si(ds_path=self.input_path), qc_runner.s(outdir=self.fqc_path, batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('q_fastqc')), copy_task).delay() else: self.logger.info(msgs[1]) copy_task.delay()
def __init__(self, args=None, logger=None): self.logger = logger conf = get_conf(logger, args.config_file) self.batch_queuing = args.batch_queuing self.queues_conf = conf.get_section('queues') r_dir_label = args.rundir_label ds_dir_label = 'datasets' fqc_dir_label = 'fastqc' input_path = args.ds_path output_path = args.export_path if r_dir_label or (input_path and output_path): pass else: logger.error("You must provide the rundir_label or both ds_path " "and export_path") sys.exit() # input path must exists as parser argument or as config file argument if not input_path: io_conf = conf.get_io_section() input_path = os.path.join(io_conf.get('archive_root_path'), r_dir_label, ds_dir_label) path_exists(input_path, logger) self.input_path = input_path # export path must exists as parser argument or as config file argument if not output_path: io_conf = conf.get_io_section() output_path = os.path.join(io_conf.get('qc_export_basepath'), r_dir_label) # FIXME: this is a local path, must be checked that run on right node if not path_exists(output_path, logger, force=False): ensure_dir(output_path) path_exists(output_path, logger) self.output_path = output_path self.fqc_path = os.path.join(self.input_path, fqc_dir_label)
def run(self): path_exists(self.rd['rpath'], self.logger) rd_status_checks = rd_ready_to_be_preprocessed( user=self.user, group=self.group, path=self.rd['rpath'], rd_label=self.rd['label'], ssht_filename=self.samplesheet['filename'], ir_conf=self.conf.get_irods_section()) check = rd_status_checks[0] and rd_status_checks[1] and \ rd_status_checks[2][0] barcodes_have_same_size = rd_status_checks[2][1] check_sanitize_metadata = not rd_status_checks[3] if not check: self.logger.error("{} is not ready to be preprocessed".format( self.rd['label'])) sys.exit() self.logger.info('Processing {}'.format(self.rd['label'])) self.logger.info('running path {}'.format(self.rd['rpath'])) self.logger.info('completed path {}'.format(self.rd['cpath'])) self.logger.info('archive path {}'.format(self.rd['apath'])) self.logger.info('samplesheet path {}'.format( self.samplesheet['file_path'])) ensure_dir(self.ds['path']) ensure_dir(self.fqc['path']) irods_task = chain( sanitize_metadata.si(conf=self.conf.get_irods_section(), ssht_filename=self.samplesheet['filename'], rd_label=self.rd['label'], sanitize=check_sanitize_metadata), copy_run_info_to_irods.si(conf=self.conf.get_irods_section(), run_info_path=self.run_info['file_path'], rd_label=self.rd['label']), copy_run_parameters_to_irods.si( conf=self.conf.get_irods_section(), run_parameters_path=self.run_parameters['file_path'], rd_label=self.rd['label']), ) samplesheet_task = chain( copy_samplesheet_from_irods.si( conf=self.conf.get_irods_section(), ssht_path=self.samplesheet['file_path'], rd_label=self.rd['label'], overwrite_samplesheet=self.overwrite_samplesheet), replace_values_into_samplesheet.si( conf=self.conf.get_irods_section(), ssht_path=self.samplesheet['file_path'], rd_label=self.rd['label'], overwrite_samplesheet=self.overwrite_samplesheet), ) qc_task = chain( rd_collect_fastq.si(ds_path=self.ds['path']), qc_runner.s(outdir=self.fqc['path'], batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('low')), copy_qc_dirs.si(src=self.fqc['path'], dest=self.fqc['export_path'], copy_qc=self.copy_qc), ) # full pre-processing sequencing rundir pipeline pipeline = chain( irods_task, samplesheet_task, replace_index_cycles_into_run_info.si( conf=self.conf.get_irods_section(), barcodes_have_same_size=barcodes_have_same_size, run_info_path=self.run_info['file_path'], rd_label=self.rd['label']), move.si(self.rd['rpath'], self.rd['apath']), bcl2fastq.si(rd_path=self.rd['apath'], ds_path=self.ds['path'], ssht_path=self.samplesheet['file_path'], no_lane_splitting=self.no_lane_splitting, barcode_mismatches=self.barcode_mismatches, batch_queuing=self.batch_queuing, queue_spec=self.queues_conf.get('low')), replace_index_cycles_into_run_info.si( conf=self.conf.get_irods_section(), barcodes_have_same_size=barcodes_have_same_size, run_info_path=self.run_info['file_apath'], rd_label=self.rd['label']), qc_task, ).delay()