def __init__(self, args=None, logger=None): self.logger = logger self.output_format = args.output_format self.conf = get_conf(logger, args.config_file) self.io_conf = self.conf.get_io_section() c = Client(conf=self.conf, logger=self.logger) c.init_bika() self.bika = c.bk path_exists(args.input_file, self.logger) with open(args.input_file, 'r') as stream: input_data = yaml.safe_load(stream) self.input_paths = input_data.get('default_paths', [self.io_conf.get('archive_root_path')]) for _ in self.input_paths: path_exists(_, self.logger) output_file = args.output_file if args.output_file else None if output_file != os.path.realpath(output_file): self.logger.error('{} is not a valid path. Please use absolute path'.format(output_file)) sys.exit() self.output_file = output_file batches = input_data.get('batches', None) self.batches_info = dict() self.sids = list() for _, batch in batches.items(): bid = batch.get('bid', None) samples = batch.get('samples', []) if bid: self.logger.info("Retrieving info for batch {}".format(bid)) batch_info = self.bika.get_batch_info(bid, samples) if batch_info: sids = [_ for _ in batch_info.keys() if batch_info[ _].get('type') not in SAMPLE_TYPES_TOSKIP] self.sids.extend(sids) self.batches_info.update(batch_info) else: self.logger.error('No samples information found for the ' 'batch {}'.format(bid)) if not self.sids: self.logger.error('I have not retrieve any information for the ' 'batches {}'.format(" ".join(self.sids))) sys.exit()
def __init__(self, args=None, logger=None): self.logger = logger self.conf = get_conf(logger, args.config_file) self.io_conf = self.conf.get_io_section() self.do_conf = self.conf.get_section('data_ownership') self.ir_conf = self.conf.get_irods_section() self.root_path = args.root_path if args.root_path else self.io_conf.get('rundirs_root_path') self.user = self.do_conf.get('user') self.group = self.do_conf.get('group') self.emit_events = args.emit_events
def __init__(self, args=None, logger=None): self.logger = logger conf = get_conf(logger, args.config_file) self.conf = conf self.irods_conf = self.conf.get_irods_section() self.bika_conf = self.conf.get_section('bika') self.io_conf = self.conf.get_io_section() self.rundir_label = args.rundir_label self.samplesheet_filename = 'SampleSheet.csv' self.emit_events = args.emit_events self.force = args.force self.sync_all_analyses = args.sync_all_analyses
def __init__(self, args=None, logger=None): self.logger = logger conf = get_conf(logger, args.config_file) if args.root_path: self.root_path = args.root_path else: io_conf = conf.get_io_section() self.root_path = io_conf.get('rundirs_root_path') do_conf = conf.get_section('data_ownership') self.user = do_conf.get('user') self.group = do_conf.get('group') self.ir_conf = conf.get_irods_section() self.proc_rundir = args.proc_rundir
def __init__(self, args=None, logger=None): self.logger = logger conf = get_conf(logger, args.config_file) self.batch_queuing = args.batch_queuing self.queues_conf = conf.get_section('queues') r_dir_label = args.rundir_label ds_dir_label = 'datasets' fqc_dir_label = 'fastqc' input_path = args.ds_path output_path = args.export_path if r_dir_label or (input_path and output_path): pass else: logger.error("You must provide the rundir_label or both ds_path " "and export_path") sys.exit() # input path must exists as parser argument or as config file argument if not input_path: io_conf = conf.get_io_section() input_path = os.path.join(io_conf.get('archive_root_path'), r_dir_label, ds_dir_label) path_exists(input_path, logger) self.input_path = input_path # export path must exists as parser argument or as config file argument if not output_path: io_conf = conf.get_io_section() output_path = os.path.join(io_conf.get('qc_export_basepath'), r_dir_label) # FIXME: this is a local path, must be checked that run on right node if not path_exists(output_path, logger, force=False): ensure_dir(output_path) path_exists(output_path, logger) self.output_path = output_path self.fqc_path = os.path.join(self.input_path, fqc_dir_label)
def __init__(self, args=None, logger=None): self.logger = logger self.destination = args.destination self.dry_run = args.dry_run conf = get_conf(logger, args.config_file) self.conf = conf self.batch_id = batch_id = args.batch_id c = Client(conf=conf, logger=logger) c.init_bika() batch_info = c.bk.get_batch_info(batch_id) if batch_info: self.batch_info = batch_info else: logger.error('I have not found any information of the samples ' 'owned by the batch {}'.format(batch_id)) sys.exit() # input path must exists as parser argument or as config file argument if args.input_path: input_path = args.input_path else: io_conf = conf.get_io_section() input_path = io_conf.get('archive_root_path') path_exists(input_path, logger) self.input_path = input_path output_path = args.output_path if args.output_path else None self.output_path = output_path inventory = args.inventory if args.inventory else None self.inventory = inventory playbook_path = args.playbook_path if args.playbook_path else None self.playbook_path = playbook_path
def __init__(self, args=None, logger=None): self.logger = logger self.conf = get_conf(logger, args.config_file) self.io_conf = self.conf.get_io_section() self.batch_queuing = args.batch_queuing self.queues_conf = self.conf.get_section('queues') rd_label = args.rd_label ds_path = args.ds_path if args.ds_path \ else os.path.join(self.io_conf.get('archive_root_path'), rd_label, self.io_conf.get('ds_folder_name')) qc_path = args.qc_path if args.qc_path \ else os.path.join(ds_path, self.io_conf.get('qc_folder_name')) qc_export_path = args.qc_export_path if args.qc_export_path \ else os.path.join(self.io_conf.get('qc_export_basepath'), rd_label) # FIXME: this is a local path, must be checked that run on right node if not path_exists(qc_export_path, logger, force=False): ensure_dir(qc_export_path) path_exists(ds_path, logger) path_exists(qc_export_path, logger) self.input_path = ds_path self.output_path = qc_export_path self.qc_path = qc_path self.rerun = args.rerun self.started = os.path.join(self.qc_path, self.io_conf.get('quality_check_started_file')) self.completed = os.path.join(self.qc_path, self.io_conf.get('quality_check_completed_file'))
def __init__(self, args=None, logger=None): self.logger = logger self.conf = get_conf(logger, args.config_file) self.io_conf = self.conf.get_io_section() self.batch_queuing = args.batch_queuing self.queues_conf = self.conf.get_section('queues') rd_label = args.rd_label ds_path = args.ds_path if args.ds_path \ else os.path.join(self.io_conf.get('archive_root_path'), rd_label, self.io_conf.get('ds_folder_name')) qc_path = args.qc_path if args.qc_path \ else os.path.join(ds_path, self.io_conf.get('qc_folder_name')) qc_export_path = args.qc_export_path if args.qc_export_path \ else os.path.join(self.io_conf.get('qc_export_basepath'), rd_label) # FIXME: this is a local path, must be checked that run on right node if not path_exists(qc_export_path, logger, force=False): ensure_dir(qc_export_path) path_exists(ds_path, logger) path_exists(qc_export_path, logger) self.input_path = ds_path self.output_path = qc_export_path self.qc_path = qc_path self.rerun = args.rerun self.started = os.path.join( self.qc_path, self.io_conf.get('quality_check_started_file')) self.completed = os.path.join( self.qc_path, self.io_conf.get('quality_check_completed_file'))
def __init__(self, args=None, logger=None): self.logger = logger rpath = args.rd_path cpath = args.rd_path.replace('running', 'completed') apath = os.path.join(cpath, 'raw') self.rd = { 'rpath': rpath, 'cpath': cpath, 'apath': apath, 'label': os.path.basename(args.rd_path) } conf = get_conf(logger, args.config_file) self.conf = conf dspath = os.path.join(cpath, 'datasets') self.ds = {'path': dspath} fqc_basepath = os.path.join(dspath, 'fastqc') self.fqc = dict(path=fqc_basepath) io_conf = conf.get_io_section() export_path = os.path.join(io_conf.get('qc_export_basepath'), self.rd['label']) self.fqc.update(dict(export_path=export_path)) ssheet = { 'basepath': os.path.join(cpath), 'filename': 'SampleSheet.csv' } ssheet['file_path'] = os.path.join(ssheet['basepath'], ssheet['filename']) self.samplesheet = ssheet run_info = {'basepath': os.path.join(rpath), 'filename': 'RunInfo.xml'} run_info['file_path'] = os.path.join(run_info['basepath'], run_info['filename']) run_info['file_apath'] = os.path.join(apath, run_info['filename']) self.run_info = run_info run_parameters = { 'basepath': os.path.join(rpath), 'filename': 'runParameters.xml' } run_parameters['file_path'] = os.path.join(run_parameters['basepath'], run_parameters['filename']) run_parameters['file_apath'] = os.path.join(apath, run_parameters['filename']) self.run_parameters = run_parameters do_conf = conf.get_section('data_ownership') self.user = do_conf.get('user') self.group = do_conf.get('group') self.no_lane_splitting = args.no_lane_splitting self.barcode_mismatches = args.barcode_mismatches self.overwrite_samplesheet = args.overwrite_samplesheet self.copy_qc = args.export_qc self.batch_queuing = args.batch_queuing self.queues_conf = conf.get_section('queues') self._add_config_from_cli(args)
def __init__(self, args=None, logger=None): self.logger = logger conf = get_conf(logger, args.config_file) self.conf = conf self.io_conf = self.conf.get_io_section() self.ir_conf = self.conf.get_irods_section() self.do_conf = self.conf.get_section('data_ownership') self.queues_conf = self.conf.get_section('queues') rd_label = args.rd_label rd_path = args.rd_path if args.rd_path \ else os.path.join(self.io_conf.get('rundirs_root_path'), rd_label) ds_path = args.ds_path if args.ds_path \ else os.path.join(self.io_conf.get('archive_root_path'), rd_label, self.io_conf.get('ds_folder_name')) ssheet_path = args.ssheet_path if args.ssheet_path \ else os.path.join(self.io_conf.get('archive_root_path'), rd_label, self.io_conf.get('ssheet_filename')) qc_path = args.qc_path if args.qc_path \ else os.path.join(ds_path, self.io_conf.get('qc_folder_name')) qc_export_path = args.qc_export_path if args.qc_export_path \ else os.path.join(self.io_conf.get('qc_export_basepath'), rd_label) logbook_path = args.logbook if args.logbook \ else os.path.join(ds_path, self.io_conf.get('logbook_filename')) run_info_path = os.path.join( rd_path, self.io_conf.get('run_info_filename') ) run_parameters_path = os.path.join( rd_path, self.io_conf.get('run_parameters_filename') ) self.rd = dict( path=rd_path, label=rd_label ) self.ds = dict( path=ds_path ) self.qc = dict( path=qc_path, export_path=qc_export_path ) self.samplesheet = dict( path=ssheet_path, filename=os.path.basename(ssheet_path) ) self.logbook = dict( path=logbook_path, filename=os.path.basename(logbook_path) ) self.run_info = dict( path=run_info_path, filename=os.path.basename(run_info_path) ) self.run_parameters = dict( path=run_parameters_path, filename=os.path.basename(run_parameters_path) ) self.user = self.do_conf.get('user') self.group = self.do_conf.get('group') self.no_lane_splitting = args.no_lane_splitting self.barcode_mismatches = args.barcode_mismatches self.with_failed_reads = args.with_failed_reads self.overwrite_samplesheet = not os.path.isfile(ssheet_path) self.emit_events = args.emit_events self.batch_queuing = args.batch_queuing self.started_file = os.path.join(self.rd['path'], self.io_conf.get('preprocessing_started_file')) self.completed_file = os.path.join(self.rd['path'], self.io_conf.get('preprocessing_completed_file'))