Example #1
0
    def __execute_playbook(self, playbook, inventory_file, random_user,
                           random_clear_text_password):
        path_exists(playbook, self.logger)
        path_exists(inventory_file, self.logger)

        variable_manager = VariableManager()
        loader = DataLoader()

        inventory = Inventory(loader=loader,
                              variable_manager=variable_manager,
                              host_list=inventory_file)

        Options = namedtuple('Options', [
            'listtags', 'listtasks', 'listhosts', 'syntax', 'connection',
            'module_path', 'forks', 'remote_user', 'private_key_file',
            'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args',
            'scp_extra_args', 'become', 'become_method', 'become_user',
            'verbosity', 'check'
        ])

        options = Options(listtags=False,
                          listtasks=False,
                          listhosts=False,
                          syntax=False,
                          connection='ssh',
                          module_path=None,
                          forks=1,
                          remote_user=None,
                          private_key_file=None,
                          ssh_common_args=None,
                          ssh_extra_args=None,
                          sftp_extra_args=None,
                          scp_extra_args=None,
                          become=True,
                          become_method='sudo',
                          become_user='******',
                          verbosity=None,
                          check=False)

        variable_manager.extra_vars = {
            'r_user': random_user,
            'r_password': random_clear_text_password
        }
        passwords = {}

        pbex = PlaybookExecutor(playbooks=[playbook],
                                inventory=inventory,
                                variable_manager=variable_manager,
                                loader=loader,
                                options=options,
                                passwords=passwords)
        results = pbex.run()
        return results
Example #2
0
    def check(self):
        def flatten(l):
            out = []
            for item in l:
                if isinstance(item, (list, tuple)):
                    out.extend(flatten(item))
                else:
                    out.append(item)
            return out

        path_exists(self.root_path, self.logger)
        localroot, dirnames, filenames = os.walk(self.root_path).next()

        positive_labels = ['finished', "ownership ok" ,
                           'SampleSheet found', 'Barcodes have the same size', 'Metadata found', 'To be processed']
        negative_labels = ['running ', "waiting for ownership's modification",
                           'SampleSheet not found',
                           "Barcodes don't have the same size", 'Metadata not found', 'Processed']

        dir_dict = dict()
        for d in dirnames:
            dir_dict[d] = []
            d_path = os.path.join(self.root_path, d)
            checks = rd_ready_to_be_preprocessed(user=self.user,
                                                 group=self.group,
                                                 path=d_path,
                                                 rd_label=d,
                                                 ir_conf=self.ir_conf,
                                                 io_conf=self.io_conf)

            ready_to_be_preprocessed = checks[0] and checks[1] and checks[2][0] and checks[4]

            if self.emit_events and ready_to_be_preprocessed:
                dispatch_event.si(event='rd_ready',
                                  params=dict(rd_path=d_path,
                                              rd_label=d,
                                              emit_events=self.emit_events)
                                  ).delay()

            checks = flatten(checks)
            for i in range(len(checks)):
                if checks[i]:
                    dir_dict[d].append(positive_labels[i])
                else:
                    dir_dict[d].append(negative_labels[i])

        self.logger.info('Checking rundirs in: {}'.format(self.root_path))

        for d, labels in dir_dict.iteritems():
            self.logger.info(' ')
            self.logger.info('Rundir {}'.format(d))
            self.logger.info('{}'.format(labels))
Example #3
0
    def __init__(self, args=None, logger=None):
        self.logger = logger
        self.output_format = args.output_format

        self.conf = get_conf(logger, args.config_file)
        self.io_conf = self.conf.get_io_section()

        c = Client(conf=self.conf, logger=self.logger)
        c.init_bika()
        self.bika = c.bk

        path_exists(args.input_file, self.logger)
        with open(args.input_file, 'r') as stream:
            input_data = yaml.safe_load(stream)

        self.input_paths = input_data.get('default_paths',
                                          [self.io_conf.get('archive_root_path')])
        for _ in self.input_paths:
            path_exists(_, self.logger)

        output_file = args.output_file if args.output_file else None
        if output_file != os.path.realpath(output_file):
            self.logger.error('{} is not a valid path. Please use absolute path'.format(output_file))
            sys.exit()
        self.output_file = output_file

        batches = input_data.get('batches', None)
            
        self.batches_info = dict()
        self.sids = list()

        for _, batch in batches.items():
            bid = batch.get('bid', None)
            samples = batch.get('samples', [])

            if bid:
                self.logger.info("Retrieving info for batch {}".format(bid))
                batch_info = self.bika.get_batch_info(bid, samples)
                if batch_info:
                    sids = [_ for _ in batch_info.keys() if batch_info[
                        _].get('type') not in SAMPLE_TYPES_TOSKIP]
                    self.sids.extend(sids)
                    self.batches_info.update(batch_info)
                else:
                    self.logger.error('No samples information found for the '
                                      'batch {}'.format(bid))

        if not self.sids:
            self.logger.error('I have not retrieve any information for the '
                              'batches {}'.format(" ".join(self.sids)))
            sys.exit()
Example #4
0
File: qc.py Project: ratzeni/presta
    def __init__(self, args=None, logger=None):
        self.logger = logger
        conf = get_conf(logger, args.config_file)
        self.batch_queuing = args.batch_queuing
        self.queues_conf = conf.get_section('queues')

        r_dir_label = args.rundir_label
        ds_dir_label = 'datasets'
        fqc_dir_label = 'fastqc'

        input_path = args.ds_path
        output_path = args.export_path

        if r_dir_label or (input_path and output_path):
            pass
        else:
            logger.error("You must provide the rundir_label or both ds_path "
                         "and export_path")
            sys.exit()

        # input path must exists as parser argument or as config file argument
        if not input_path:
            io_conf = conf.get_io_section()
            input_path = os.path.join(io_conf.get('archive_root_path'),
                                      r_dir_label, ds_dir_label)
        path_exists(input_path, logger)
        self.input_path = input_path

        # export path must exists as parser argument or as config file argument
        if not output_path:
            io_conf = conf.get_io_section()
            output_path = os.path.join(io_conf.get('qc_export_basepath'),
                                       r_dir_label)
        # FIXME: this is a local path, must be checked that run on right node
        if not path_exists(output_path, logger, force=False):
            ensure_dir(output_path)
        path_exists(output_path, logger)
        self.output_path = output_path

        self.fqc_path = os.path.join(self.input_path, fqc_dir_label)
Example #5
0
    def __init__(self, args=None, logger=None):
        self.logger = logger
        self.destination = args.destination
        self.dry_run = args.dry_run

        conf = get_conf(logger, args.config_file)
        self.conf = conf

        self.batch_id = batch_id = args.batch_id
        c = Client(conf=conf, logger=logger)
        c.init_bika()
        batch_info = c.bk.get_batch_info(batch_id)
        if batch_info:
            self.batch_info = batch_info
        else:
            logger.error('I have not found any information of the samples '
                         'owned by the batch {}'.format(batch_id))
            sys.exit()

        # input path must exists as parser argument or as config file argument
        if args.input_path:
            input_path = args.input_path
        else:
            io_conf = conf.get_io_section()
            input_path = io_conf.get('archive_root_path')
        path_exists(input_path, logger)
        self.input_path = input_path

        output_path = args.output_path if args.output_path else None
        self.output_path = output_path

        inventory = args.inventory if args.inventory else None
        self.inventory = inventory

        playbook_path = args.playbook_path if args.playbook_path else None
        self.playbook_path = playbook_path
Example #6
0
File: qc.py Project: gmauro/presta
    def __init__(self, args=None, logger=None):
        self.logger = logger
        self.conf = get_conf(logger, args.config_file)
        self.io_conf = self.conf.get_io_section()
        self.batch_queuing = args.batch_queuing
        self.queues_conf = self.conf.get_section('queues')

        rd_label = args.rd_label
        ds_path = args.ds_path if args.ds_path \
            else os.path.join(self.io_conf.get('archive_root_path'),
                              rd_label,
                              self.io_conf.get('ds_folder_name'))

        qc_path = args.qc_path if args.qc_path \
            else os.path.join(ds_path,
                              self.io_conf.get('qc_folder_name'))

        qc_export_path = args.qc_export_path if args.qc_export_path \
            else os.path.join(self.io_conf.get('qc_export_basepath'),
                              rd_label)

        # FIXME: this is a local path, must be checked that run on right node
        if not path_exists(qc_export_path, logger, force=False):
            ensure_dir(qc_export_path)

        path_exists(ds_path, logger)
        path_exists(qc_export_path, logger)

        self.input_path = ds_path
        self.output_path = qc_export_path
        self.qc_path = qc_path
        self.rerun = args.rerun
        self.started = os.path.join(self.qc_path,
                                    self.io_conf.get('quality_check_started_file'))
        self.completed = os.path.join(self.qc_path,
                                      self.io_conf.get('quality_check_completed_file'))
Example #7
0
    def __init__(self, args=None, logger=None):
        self.logger = logger
        self.conf = get_conf(logger, args.config_file)
        self.io_conf = self.conf.get_io_section()
        self.batch_queuing = args.batch_queuing
        self.queues_conf = self.conf.get_section('queues')

        rd_label = args.rd_label
        ds_path = args.ds_path if args.ds_path \
            else os.path.join(self.io_conf.get('archive_root_path'),
                              rd_label,
                              self.io_conf.get('ds_folder_name'))

        qc_path = args.qc_path if args.qc_path \
            else os.path.join(ds_path,
                              self.io_conf.get('qc_folder_name'))

        qc_export_path = args.qc_export_path if args.qc_export_path \
            else os.path.join(self.io_conf.get('qc_export_basepath'),
                              rd_label)

        # FIXME: this is a local path, must be checked that run on right node
        if not path_exists(qc_export_path, logger, force=False):
            ensure_dir(qc_export_path)

        path_exists(ds_path, logger)
        path_exists(qc_export_path, logger)

        self.input_path = ds_path
        self.output_path = qc_export_path
        self.qc_path = qc_path
        self.rerun = args.rerun
        self.started = os.path.join(
            self.qc_path, self.io_conf.get('quality_check_started_file'))
        self.completed = os.path.join(
            self.qc_path, self.io_conf.get('quality_check_completed_file'))
Example #8
0
File: qc.py Project: ratzeni/presta
    def run(self):

        copy_task = copy_qc_dirs.si(self.input_path, self.output_path)
        msgs = [
            "Generating Fastqc reports",
            "Coping qc dirs from {} to {}".format(self.input_path,
                                                  self.output_path)
        ]
        if not path_exists(self.fqc_path, self.logger, force=False):
            self.logger.info("{} and {}".format(msgs[0], msgs[1]))
            ensure_dir(self.fqc_path)
            qc_task = chain(
                rd_collect_fastq.si(ds_path=self.input_path),
                qc_runner.s(outdir=self.fqc_path,
                            batch_queuing=self.batch_queuing,
                            queue_spec=self.queues_conf.get('q_fastqc')),
                copy_task).delay()
        else:
            self.logger.info(msgs[1])
            copy_task.delay()
Example #9
0
    def run(self):
        msgs = [
            "Generating Fastqc reports",
            "Coping qc dirs from {} to {}".format(self.input_path,
                                                  self.output_path)
        ]

        if path_exists(self.qc_path, self.logger, force=False) and len(os.listdir(self.qc_path)) > 0 \
                and not self.rerun:

            self.logger.info(msgs[1])
            copy_task = dispatch_event.si(event='copy_qc_folders',
                                          params=dict(src=self.input_path,
                                                      dest=self.output_path))
            copy_task.delay()

        else:
            self.logger.info("{} and {}".format(msgs[0], msgs[1]))
            ensure_dir(self.qc_path, force=True)

            qc_task = chain(
                dispatch_event.si(
                    event='qc_started',
                    params=dict(progress_status_file=self.started)),
                rd_collect_fastq.si(ds_path=self.input_path),
                qc_runner.s(outdir=self.qc_path,
                            batch_queuing=self.batch_queuing,
                            queue_spec=self.queues_conf.get('q_fastqc')),
            ).apply_async()

            copy_task = trigger_event.si(event='copy_qc_folders',
                                         params=dict(src=self.input_path,
                                                     dest=self.output_path),
                                         tasks=qc_task.get())
            copy_task.apply_async()

            trigger_event.si(event='qc_completed',
                             params=dict(progress_status_file=self.completed),
                             tasks=qc_task.get()).apply_async()
Example #10
0
File: qc.py Project: gmauro/presta
    def run(self):
        msgs = ["Generating Fastqc reports",
                "Coping qc dirs from {} to {}".format(self.input_path,
                                                      self.output_path)]

        if path_exists(self.qc_path, self.logger, force=False) and len(os.listdir(self.qc_path)) > 0 \
                and not self.rerun:

            self.logger.info(msgs[1])
            copy_task = dispatch_event.si(event='copy_qc_folders',
                                          params=dict(src=self.input_path,
                                                      dest=self.output_path)
                                          )
            copy_task.delay()

        else:
            self.logger.info("{} and {}".format(msgs[0], msgs[1]))
            ensure_dir(self.qc_path, force=True)

            qc_task = chain(dispatch_event.si(event='qc_started',
                                              params=dict(progress_status_file=self.started)),
                            rd_collect_fastq.si(ds_path=self.input_path),
                            qc_runner.s(outdir=self.qc_path,
                                        batch_queuing=self.batch_queuing,
                                        queue_spec=self.queues_conf.get('q_fastqc')),
                            ).apply_async()

            copy_task = trigger_event.si(event='copy_qc_folders',
                                         params=dict(src=self.input_path,
                                                     dest=self.output_path),
                                         tasks=qc_task.get())
            copy_task.apply_async()

            trigger_event.si(event='qc_completed',
                             params=dict(progress_status_file=self.completed),
                             tasks=qc_task.get()).apply_async()
Example #11
0
    def run(self):
        if self.destination == 'path':
            io_conf = self.conf.get_io_section()
            if self.output_path:
                output_path = self.output_path
            else:
                output_path = io_conf.get('ds_export_path')

            # if not path_exists(output_path, logger, force=False):
            #     ensure_dir(output_path)
            # path_exists(output_path, logger)
            self.__fs2fs_carrier(self.input_path, output_path)

        if self.destination == 'ftp':

            def pass_gen(length):
                import string
                import random

                ascii = string.ascii_letters + string.digits + '@-_'

                return "".join([
                    list(set(ascii))[random.randint(0,
                                                    len(list(set(ascii))) - 1)]
                    for i in range(length)
                ])

            random_user = pass_gen(8)
            random_clear_text_password = pass_gen(12)

            self.logger.info('Creating random account into the ftp server')
            self.logger.info('user: {}'.format(random_user))
            self.logger.info('password: {}'.format(random_clear_text_password))

            playbook_label = 'create_ftp_user.yml'
            if self.playbook_path:
                playbook_path = self.playbook_path
            else:
                io_conf = self.conf.get_io_section()
                playbook_path = os.path.expanduser(
                    io_conf.get('playbooks_path'))
            playbook = os.path.join(playbook_path, playbook_label)
            path_exists(playbook, self.logger)

            inventory_label = 'inventory'
            if self.inventory:
                inventory = self.inventory
            else:
                io_conf = self.conf.get_io_section()
                inventory_path = os.path.expanduser(
                    io_conf.get('playbooks_path'))
                inventory = os.path.join(inventory_path, inventory_label)
            path_exists(inventory, self.logger)

            results = self.__execute_playbook(playbook, inventory, random_user,
                                              random_clear_text_password)
            self.logger.info('Playbook result: {}'.format(results))

            if self.output_path:
                output_path = self.output_path
            else:
                io_conf = self.conf.get_io_section()
                output_path = os.path.join(io_conf.get('ftp_export_path'),
                                           random_user)
            path_exists(output_path, self.logger)

            self.__fs2fs_carrier(self.input_path, output_path)
Example #12
0
    def run(self):
        path_exists(self.rd['rpath'], self.logger)
        rd_status_checks = rd_ready_to_be_preprocessed(
            user=self.user,
            group=self.group,
            path=self.rd['rpath'],
            rd_label=self.rd['label'],
            ssht_filename=self.samplesheet['filename'],
            ir_conf=self.conf.get_irods_section())

        check = rd_status_checks[0] and rd_status_checks[1] and \
                rd_status_checks[2][0]

        barcodes_have_same_size = rd_status_checks[2][1]
        check_sanitize_metadata = not rd_status_checks[3]

        if not check:
            self.logger.error("{} is not ready to be preprocessed".format(
                self.rd['label']))
            sys.exit()

        self.logger.info('Processing {}'.format(self.rd['label']))
        self.logger.info('running path {}'.format(self.rd['rpath']))
        self.logger.info('completed path {}'.format(self.rd['cpath']))
        self.logger.info('archive path {}'.format(self.rd['apath']))
        self.logger.info('samplesheet path {}'.format(
            self.samplesheet['file_path']))

        ensure_dir(self.ds['path'])
        ensure_dir(self.fqc['path'])

        irods_task = chain(
            sanitize_metadata.si(conf=self.conf.get_irods_section(),
                                 ssht_filename=self.samplesheet['filename'],
                                 rd_label=self.rd['label'],
                                 sanitize=check_sanitize_metadata),
            copy_run_info_to_irods.si(conf=self.conf.get_irods_section(),
                                      run_info_path=self.run_info['file_path'],
                                      rd_label=self.rd['label']),
            copy_run_parameters_to_irods.si(
                conf=self.conf.get_irods_section(),
                run_parameters_path=self.run_parameters['file_path'],
                rd_label=self.rd['label']),
        )

        samplesheet_task = chain(
            copy_samplesheet_from_irods.si(
                conf=self.conf.get_irods_section(),
                ssht_path=self.samplesheet['file_path'],
                rd_label=self.rd['label'],
                overwrite_samplesheet=self.overwrite_samplesheet),
            replace_values_into_samplesheet.si(
                conf=self.conf.get_irods_section(),
                ssht_path=self.samplesheet['file_path'],
                rd_label=self.rd['label'],
                overwrite_samplesheet=self.overwrite_samplesheet),
        )

        qc_task = chain(
            rd_collect_fastq.si(ds_path=self.ds['path']),
            qc_runner.s(outdir=self.fqc['path'],
                        batch_queuing=self.batch_queuing,
                        queue_spec=self.queues_conf.get('low')),
            copy_qc_dirs.si(src=self.fqc['path'],
                            dest=self.fqc['export_path'],
                            copy_qc=self.copy_qc),
        )

        # full pre-processing sequencing rundir pipeline
        pipeline = chain(
            irods_task,
            samplesheet_task,
            replace_index_cycles_into_run_info.si(
                conf=self.conf.get_irods_section(),
                barcodes_have_same_size=barcodes_have_same_size,
                run_info_path=self.run_info['file_path'],
                rd_label=self.rd['label']),
            move.si(self.rd['rpath'], self.rd['apath']),
            bcl2fastq.si(rd_path=self.rd['apath'],
                         ds_path=self.ds['path'],
                         ssht_path=self.samplesheet['file_path'],
                         no_lane_splitting=self.no_lane_splitting,
                         barcode_mismatches=self.barcode_mismatches,
                         batch_queuing=self.batch_queuing,
                         queue_spec=self.queues_conf.get('low')),
            replace_index_cycles_into_run_info.si(
                conf=self.conf.get_irods_section(),
                barcodes_have_same_size=barcodes_have_same_size,
                run_info_path=self.run_info['file_apath'],
                rd_label=self.rd['label']),
            qc_task,
        ).delay()
Example #13
0
    def run(self):
        path_exists(self.rd['path'], self.logger)

        rd_status_checks = rd_ready_to_be_preprocessed(
            user=self.user,
            group=self.group,
            path=self.rd['path'],
            rd_label=self.rd['label'],
            ssht_filename=self.samplesheet['filename'],
            ir_conf=self.ir_conf,
            io_conf=self.io_conf)

        check = rd_status_checks[0] and rd_status_checks[1] and \
                rd_status_checks[2][0] and rd_status_checks[2][1]

        check_sanitize_metadata = not rd_status_checks[3]

        if not check:
            self.logger.error("{} is not ready to be preprocessed".format(
                self.rd['label']))
            sys.exit()

        self.logger.info('Processing {}'.format(self.rd['label']))
        self.logger.info('running path {}'.format(self.rd['path']))
        self.logger.info('datasets path {}'.format(self.ds['path']))
        self.logger.info('samplesheet path {}'.format(self.samplesheet['path']))

        if self.emit_events:
            self.logger.info('quality check output path {}'.format(self.qc['path']))
            self.logger.info('quality check export path {}'.format(self.qc['export_path']))

        ensure_dir(self.ds['path'])

        irods_task = chain(
            sanitize_metadata.si(conf=self.ir_conf,
                                 ssht_filename=self.samplesheet['filename'],
                                 rd_label=self.rd['label'],
                                 sanitize=check_sanitize_metadata,
                                 logbook_path=self.logbook['path']
                                 ),

            copy_run_info_to_irods.si(conf=self.ir_conf,
                                      run_info_path=self.run_info['path'],
                                      rd_label=self.rd['label'],
                                      logbook_path=self.logbook['path']
                                      ),

            copy_run_parameters_to_irods.si(conf=self.ir_conf,
                                            run_parameters_path=self.run_parameters['path'],
                                            rd_label=self.rd['label'],
                                            logbook_path=self.logbook['path']
                                            ),
        )

        samplesheet_task = chain(

            copy_samplesheet_from_irods.si(conf=self.ir_conf,
                                           ssht_path=self.samplesheet['path'],
                                           rd_label=self.rd['label'],
                                           overwrite_samplesheet=self.overwrite_samplesheet,
                                           logbook_path=self.logbook['path']
                                           ),

            replace_values_into_samplesheet.si(conf=self.ir_conf,
                                               ssht_path=self.samplesheet['path'],
                                               rd_label=self.rd['label'],
                                               overwrite_samplesheet=self.overwrite_samplesheet,
                                               logbook_path=self.logbook['path']
                                               ),

        )

        # full pre-processing sequencing rundir pipeline
        pipeline = chain(
            dispatch_event.si(event='preprocessing_started',
                              params=dict(ds_path=self.ds['path'],
                                          rd_label=self.rd['label'],
                                          progress_status_file=self.started_file,
                                          emit_events=self.emit_events)),

            irods_task,
            samplesheet_task,

            replace_index_cycles_into_run_info.si(conf=self.ir_conf,
                                                  ssht_path=self.samplesheet['path'],
                                                  run_info_path=self.run_info['path'],
                                                  rd_label=self.rd['label'],
                                                  logbook_path=self.logbook['path']),

            bcl2fastq.si(rd_path=self.rd['path'],
                         ds_path=self.ds['path'],
                         ssht_path=self.samplesheet['path'],
                         run_info_path=self.run_info['path'],
                         no_lane_splitting=self.no_lane_splitting,
                         barcode_mismatches=self.barcode_mismatches,
                         with_failed_reads=self.with_failed_reads,
                         batch_queuing=self.batch_queuing,
                         queue_spec=self.queues_conf.get('q_bcl2fastq'),
                         logbook_path=self.logbook['path']),

            replace_index_cycles_into_run_info.si(conf=self.ir_conf,
                                                  ssht_path=self.samplesheet['path'],
                                                  run_info_path=self.run_info['path'],
                                                  rd_label=self.rd['label'],
                                                  logbook_path=self.logbook['path']),

            dispatch_event.si(event='fastq_ready',
                              params=dict(ds_path=self.ds['path'],
                                          qc_path=self.qc['path'],
                                          qc_export_path=self.qc['export_path'],
                                          force=True,
                                          rd_label=self.rd['label'],
                                          progress_status_file=self.completed_file,
                                          emit_events=self.emit_events)),
        )
        pipeline.delay()