예제 #1
0
def copy_qc_dirs(src, dest, copy_qc=True):
    if copy_qc:
        dirs = ['Stats', 'Reports', 'fastqc']
        ensure_dir(dest)
        task0 = copy.si(os.path.join(src, dirs[0]),
                        os.path.join(dest, dirs[0]))
        task1 = copy.si(os.path.join(src, dirs[1]),
                        os.path.join(dest, dirs[1]))
        task2 = copy.si(os.path.join(src, dirs[2]),
                        os.path.join(dest, dirs[2]))

        job = group(task0, task1, task2)()
        while job.waiting():
            pass
        return job.join()

    return None
예제 #2
0
    def __fs2fs_carrier(self, ipath, opath):
        bids = [
            _ for _ in self.batch_info.keys()
            if self.batch_info[_].get('type') not in SAMPLE_TYPES_TOSKIP
        ]
        self.logger.info('Looking for files related to {} Bika ids'.format(
            len(bids)))
        self.logger.info('Starting from {}'.format(ipath))
        if len(bids) > 0:
            ensure_dir(os.path.join(opath, self.batch_id))

        dm = DatasetsManager(self.logger, bids)
        datasets_info, count = dm.collect_fastq_from_fs(ipath)
        self.logger.info("found {} files".format(count))

        for bid in bids:
            if bid in datasets_info:
                for f in datasets_info[bid]:
                    src = f.get('filepath')
                    read = f.get('read_label')
                    lane = f.get('lane')
                    ext = f.get('file_ext')
                    sample_label = self.batch_info[bid].get('client_sample_id')
                    sample_label = '_'.join([
                        sample_label.replace(' ', '_'), lane, read
                    ]) if lane else '_'.join(
                        [sample_label.replace(' ', '_'), read])
                    sample_label = '.'.join([sample_label, ext])
                    dst = os.path.join(opath, self.batch_id, sample_label)

                    self.logger.info("Coping {} into {}".format(src, dst))
                    if os.path.isfile(dst):
                        self.logger.info('{} skipped'.format(
                            os.path.basename(dst)))
                    else:
                        if not self.dry_run:
                            copy.si(src, dst).delay()
                            self.logger.info('{} copied'.format(
                                os.path.basename(dst)))
            else:
                msg = 'I have not found any file related to this ' \
                      'Bika id: {}'.format(bid)
                self.logger.warning(msg)
                self.logger.info('{} skipped'.format(bid))
예제 #3
0
파일: qc.py 프로젝트: ratzeni/presta
    def run(self):

        copy_task = copy_qc_dirs.si(self.input_path, self.output_path)
        msgs = [
            "Generating Fastqc reports",
            "Coping qc dirs from {} to {}".format(self.input_path,
                                                  self.output_path)
        ]
        if not path_exists(self.fqc_path, self.logger, force=False):
            self.logger.info("{} and {}".format(msgs[0], msgs[1]))
            ensure_dir(self.fqc_path)
            qc_task = chain(
                rd_collect_fastq.si(ds_path=self.input_path),
                qc_runner.s(outdir=self.fqc_path,
                            batch_queuing=self.batch_queuing,
                            queue_spec=self.queues_conf.get('q_fastqc')),
                copy_task).delay()
        else:
            self.logger.info(msgs[1])
            copy_task.delay()
예제 #4
0
파일: qc.py 프로젝트: ratzeni/presta
    def __init__(self, args=None, logger=None):
        self.logger = logger
        conf = get_conf(logger, args.config_file)
        self.batch_queuing = args.batch_queuing
        self.queues_conf = conf.get_section('queues')

        r_dir_label = args.rundir_label
        ds_dir_label = 'datasets'
        fqc_dir_label = 'fastqc'

        input_path = args.ds_path
        output_path = args.export_path

        if r_dir_label or (input_path and output_path):
            pass
        else:
            logger.error("You must provide the rundir_label or both ds_path "
                         "and export_path")
            sys.exit()

        # input path must exists as parser argument or as config file argument
        if not input_path:
            io_conf = conf.get_io_section()
            input_path = os.path.join(io_conf.get('archive_root_path'),
                                      r_dir_label, ds_dir_label)
        path_exists(input_path, logger)
        self.input_path = input_path

        # export path must exists as parser argument or as config file argument
        if not output_path:
            io_conf = conf.get_io_section()
            output_path = os.path.join(io_conf.get('qc_export_basepath'),
                                       r_dir_label)
        # FIXME: this is a local path, must be checked that run on right node
        if not path_exists(output_path, logger, force=False):
            ensure_dir(output_path)
        path_exists(output_path, logger)
        self.output_path = output_path

        self.fqc_path = os.path.join(self.input_path, fqc_dir_label)
예제 #5
0
    def run(self):
        path_exists(self.rd['rpath'], self.logger)
        rd_status_checks = rd_ready_to_be_preprocessed(
            user=self.user,
            group=self.group,
            path=self.rd['rpath'],
            rd_label=self.rd['label'],
            ssht_filename=self.samplesheet['filename'],
            ir_conf=self.conf.get_irods_section())

        check = rd_status_checks[0] and rd_status_checks[1] and \
                rd_status_checks[2][0]

        barcodes_have_same_size = rd_status_checks[2][1]
        check_sanitize_metadata = not rd_status_checks[3]

        if not check:
            self.logger.error("{} is not ready to be preprocessed".format(
                self.rd['label']))
            sys.exit()

        self.logger.info('Processing {}'.format(self.rd['label']))
        self.logger.info('running path {}'.format(self.rd['rpath']))
        self.logger.info('completed path {}'.format(self.rd['cpath']))
        self.logger.info('archive path {}'.format(self.rd['apath']))
        self.logger.info('samplesheet path {}'.format(
            self.samplesheet['file_path']))

        ensure_dir(self.ds['path'])
        ensure_dir(self.fqc['path'])

        irods_task = chain(
            sanitize_metadata.si(conf=self.conf.get_irods_section(),
                                 ssht_filename=self.samplesheet['filename'],
                                 rd_label=self.rd['label'],
                                 sanitize=check_sanitize_metadata),
            copy_run_info_to_irods.si(conf=self.conf.get_irods_section(),
                                      run_info_path=self.run_info['file_path'],
                                      rd_label=self.rd['label']),
            copy_run_parameters_to_irods.si(
                conf=self.conf.get_irods_section(),
                run_parameters_path=self.run_parameters['file_path'],
                rd_label=self.rd['label']),
        )

        samplesheet_task = chain(
            copy_samplesheet_from_irods.si(
                conf=self.conf.get_irods_section(),
                ssht_path=self.samplesheet['file_path'],
                rd_label=self.rd['label'],
                overwrite_samplesheet=self.overwrite_samplesheet),
            replace_values_into_samplesheet.si(
                conf=self.conf.get_irods_section(),
                ssht_path=self.samplesheet['file_path'],
                rd_label=self.rd['label'],
                overwrite_samplesheet=self.overwrite_samplesheet),
        )

        qc_task = chain(
            rd_collect_fastq.si(ds_path=self.ds['path']),
            qc_runner.s(outdir=self.fqc['path'],
                        batch_queuing=self.batch_queuing,
                        queue_spec=self.queues_conf.get('low')),
            copy_qc_dirs.si(src=self.fqc['path'],
                            dest=self.fqc['export_path'],
                            copy_qc=self.copy_qc),
        )

        # full pre-processing sequencing rundir pipeline
        pipeline = chain(
            irods_task,
            samplesheet_task,
            replace_index_cycles_into_run_info.si(
                conf=self.conf.get_irods_section(),
                barcodes_have_same_size=barcodes_have_same_size,
                run_info_path=self.run_info['file_path'],
                rd_label=self.rd['label']),
            move.si(self.rd['rpath'], self.rd['apath']),
            bcl2fastq.si(rd_path=self.rd['apath'],
                         ds_path=self.ds['path'],
                         ssht_path=self.samplesheet['file_path'],
                         no_lane_splitting=self.no_lane_splitting,
                         barcode_mismatches=self.barcode_mismatches,
                         batch_queuing=self.batch_queuing,
                         queue_spec=self.queues_conf.get('low')),
            replace_index_cycles_into_run_info.si(
                conf=self.conf.get_irods_section(),
                barcodes_have_same_size=barcodes_have_same_size,
                run_info_path=self.run_info['file_apath'],
                rd_label=self.rd['label']),
            qc_task,
        ).delay()