Beispiel #1
0
def setUpModule():
    global cnf
    cnf = get_config()
    with open(localconf, "w") as fp:
        fp.write(
            yaml.safe_dump(
                {
                    'picard': {
                        'InsertMetrics': {
                            'parent_task':
                            'ratatosk.lib.tools.picard.DuplicationMetrics'
                        },
                    },
                    'gatk': {
                        'IndelRealigner': {
                            'parent_task': [
                                'ratatosk.lib.tools.picard.MergeSamFiles',
                                'ratatosk.lib.tools.gatk.RealignerTargetCreator',
                                'ratatosk.lib.tools.gatk.UnifiedGenotyper'
                            ],
                            'source_label': [None, None, 'BOTH.raw'],
                            'source_suffix': ['.bam', '.intervals', '.vcf'],
                        },
                        'RealignerTargetCreator': {
                            'parent_task': 'ratatosk.lib.align.bwa.BwaAln'
                        },
                    }
                },
                default_flow_style=False))
        # Need to add ratatosk first, then override with localconf
        cnf.add_config_path(ratatosk_conf)
        cnf.add_config_path(localconf)
Beispiel #2
0
def setUpModule():
    global cnf
    cnf = get_config()
    with open(localconf, "w") as fp:
        fp.write(yaml.safe_dump({
                    'picard' : {
                        'InsertMetrics' :
                            {'parent_task' : 'ratatosk.lib.tools.picard.DuplicationMetrics'},
                        },
                    'gatk' : 
                    {
                        'IndelRealigner' :
                            {'parent_task': ['ratatosk.lib.tools.picard.MergeSamFiles',
                                             'ratatosk.lib.tools.gatk.RealignerTargetCreator',
                                             'ratatosk.lib.tools.gatk.UnifiedGenotyper'],
                             'source_label': [None, None, 'BOTH.raw'],
                             'source_suffix' : ['.bam', '.intervals', '.vcf'],
                             },
                        'RealignerTargetCreator' :
                            {'parent_task' : 'ratatosk.lib.align.bwa.BwaAln'},
                        }
                    },
                                default_flow_style=False))
        # Need to add ratatosk first, then override with localconf
        cnf.add_config_path(ratatosk_conf)
        cnf.add_config_path(localconf)
Beispiel #3
0
 def test_config_update_with_config(self):
     """Test that configuration file overrides default values"""
     ug = ratatosk.lib.tools.gatk.UnifiedGenotyper()
     param_values_dict = {x[0]:x[1] for x in ug.get_param_values(ug.get_params(), [], {})}
     cnf = get_config()
     cnf.clear()
     cnf.add_config_path("mock.yaml")
     kwargs = ug._update_config(cnf, param_values_dict)
     self.assertEqual(kwargs['options'], ['-stand_call_conf 10.0', '-stand_emit_conf 3.0'])
Beispiel #4
0
 def test_job_init(self):
     """Test initialization of job"""
     cnf = get_config()
     cnf.add_config_path(localconf)
     task = ratatosk.lib.align.bwa.Aln(
         target="data/sample1_1.sai",
         parent_task=('ratatosk.lib.align.bwa.InputFastqFile', ))
     task = ratatosk.lib.tools.gatk.UnifiedGenotyper(
         target="data/sample1_1.sai")
Beispiel #5
0
def setUpModule():
    global cnf, custom_cnf
    cnf = get_config()
    cnf.clear()
    cnf.add_config_path(os.path.join(os.path.dirname(__file__), os.pardir, "config", "ratatosk.yaml"))
    cnf.add_config_path(localconf)
    custom_cnf = get_custom_config()
    custom_cnf.clear()
    custom_cnf.add_config_path(localconf)
    custom_cnf.reload()
Beispiel #6
0
 def requires(self):
     cls = self.parent()[0]
     sources = []
     cnf = get_config()
     if self.target_generator_handler and "target_generator_handler" not in self._handlers.keys():
         tgf = RatatoskHandler(label="target_generator_handler", mod=self.target_generator_handler)
         register_task_handler(self, tgf)
     if not "target_generator_handler" in self._handlers.keys():
         logging.warn("MergeSamFiles requires a target generator handler; no defaults are as of yet implemented")
         return []
     sources = list(set(self._handlers["target_generator_handler"](self)))
     return [cls(target=src) for src in sources]    
Beispiel #7
0
def setUpModule():
    global cnf, custom_cnf
    cnf = get_config()
    cnf.clear()
    cnf.add_config_path(
        os.path.join(os.path.dirname(__file__), os.pardir, "config",
                     "ratatosk.yaml"))
    cnf.add_config_path(localconf)
    custom_cnf = get_custom_config()
    custom_cnf.clear()
    custom_cnf.add_config_path(localconf)
    custom_cnf.reload()
Beispiel #8
0
 def test_config_update_with_custom_config(self):
     """Test that custom configuration overrides configuration setting"""
     ug = ratatosk.lib.tools.gatk.UnifiedGenotyper()
     param_values_dict = {x[0]:x[1] for x in ug.get_param_values(ug.get_params(), [], {})}
     cnf = get_config()
     cnf.clear()
     cnf.add_config_path("mock.yaml")
     customcnf = get_custom_config()
     customcnf.clear()
     customcnf.add_config_path("custommock.yaml")
     kwargs = ug._update_config(cnf, param_values_dict)
     self.assertEqual(kwargs['options'], ['-stand_call_conf 10.0', '-stand_emit_conf 3.0'])
     kwargs = ug._update_config(customcnf, param_values_dict, disable_parent_task_update=True)
     self.assertEqual(kwargs['options'], ['-stand_call_conf 20.0', '-stand_emit_conf 30.0'])
Beispiel #9
0
 def test_config_update_with_command_line_parameter(self):
     """Test that command line parameter overrides configuration setting"""
     ug = ratatosk.lib.tools.gatk.UnifiedGenotyper(options='test')
     param_values_dict = {x[0]:x[1] for x in ug.get_param_values(ug.get_params(), [], {'options':'test'})}
     cnf = get_config()
     cnf.clear()
     cnf.add_config_path("mock.yaml")
     customcnf = get_custom_config()
     customcnf.clear()
     customcnf.add_config_path("custommock.yaml")
     kwargs = ug._update_config(cnf, param_values_dict)
     self.assertEqual(kwargs['options'], ['-stand_call_conf 10.0', '-stand_emit_conf 3.0'])
     kwargs = ug._update_config(customcnf, param_values_dict, disable_parent_task_update=True)
     self.assertEqual(kwargs['options'], ['-stand_call_conf 20.0', '-stand_emit_conf 30.0'])
     for key, value in ug.get_params():
         new_value = None
         # Got a command line option => override config file. Currently overriding parent_task *is* possible here (FIX ME?)
         if value.default != param_values_dict.get(key, None):
             new_value = param_values_dict.get(key, None)
             kwargs[key] = new_value
     self.assertEqual(kwargs['options'], 'test')
Beispiel #10
0
def setUpModule():
    global cnf
    cnf = get_config()
    cnf.clear()
    with open(localconf, "w") as fp:
        fp.write(yaml.safe_dump({
                    'ratatosk.lib.align.bwa' :{
                        'InputFastqFile': {'target_suffix':'.fastq.gz'},
                        'bwaref': 'data/chr11.fa',
                        'Aln':{'read1_suffix':"_1",
                               'read2_suffix':"_2"},
                        'Bampe':{'add_label':["_1","_2"]},
                        'Sampe':{'add_label':["_1","_2"]},
                        },
                    'ratatosk.lib.tools.picard' : {
                        'InputBamFile' :
                            {'parent_task': 'ratatosk.lib.tools.samtools.SamToBam'},
                        'SortSam':
                            {'parent_task': 'ratatosk.lib.tools.samtools.SamToBam'},
                        'DuplicationMetrics':
                            {'parent_task': 'ratatosk.lib.tools.picard.SortSam'},
                        'AlignmentMetrics' :
                            {'parent_task': 'ratatosk.lib.tools.picard.DuplicationMetrics'},
                        'InsertMetrics' :
                            {'parent_task' : 'ratatosk.lib.tools.picard.DuplicationMetrics'},
                        'HsMetrics' :
                            {'parent_task' : 'ratatosk.lib.tools.picard.DuplicationMetrics',
                             'bait_regions' : 'data/chr11_baits.interval_list',
                             'target_regions' : 'data/chr11_targets.interval_list'},
                        },
                    'ratatosk.lib.tools.gatk' : 
                    {
                        'UnifiedGenotyper' : {'ref': 'data/chr11.fa'},
                        'CombineVariants' : {'ref': 'data/chr11.fa'},
                        }
                    },
                                default_flow_style=False))
Beispiel #11
0
 def test_job_init(self):
     """Test initialization of job"""
     cnf = get_config()
     cnf.add_config_path(localconf)
     task = ratatosk.lib.align.bwa.Aln(target="data/sample1_1.sai", parent_task=('ratatosk.lib.align.bwa.InputFastqFile', ))
     task = ratatosk.lib.tools.gatk.UnifiedGenotyper(target="data/sample1_1.sai")
Beispiel #12
0
def setUpModule():
    global cnf, conf
    cnf = get_custom_config()
    cnf.clear()
    conf = get_config()
    conf.clear()
Beispiel #13
0
 def test_expand_vars(self):
     cnf = get_config()
     cnf.add_config_path("mock.yaml")
     self.assertEqual(os.getenv("GATK_HOME_MOCK"), cnf._sections['ratatosk.lib.tools.gatk']['path'])
     self.assertEqual(os.path.join(os.getenv("PICARD_HOME_MOCK"), "test"), cnf._sections['ratatosk.lib.tools.picard']['path'])
     cnf.del_config_path("mock.yaml")
Beispiel #14
0
def setUpModule():
    global cnf
    cnf = get_config()
    cnf.clear()
    with open(localconf, "w") as fp:
        fp.write(
            yaml.safe_dump(
                {
                    'ratatosk.lib.align.bwa': {
                        'InputFastqFile': {
                            'target_suffix': '.fastq.gz'
                        },
                        'bwaref': 'data/chr11.fa',
                        'Aln': {
                            'read1_suffix': "_1",
                            'read2_suffix': "_2"
                        },
                        'Bampe': {
                            'add_label': ["_1", "_2"]
                        },
                        'Sampe': {
                            'add_label': ["_1", "_2"]
                        },
                    },
                    'ratatosk.lib.tools.picard': {
                        'InputBamFile': {
                            'parent_task':
                            'ratatosk.lib.tools.samtools.SamToBam'
                        },
                        'SortSam': {
                            'parent_task':
                            'ratatosk.lib.tools.samtools.SamToBam'
                        },
                        'DuplicationMetrics': {
                            'parent_task': 'ratatosk.lib.tools.picard.SortSam'
                        },
                        'AlignmentMetrics': {
                            'parent_task':
                            'ratatosk.lib.tools.picard.DuplicationMetrics'
                        },
                        'InsertMetrics': {
                            'parent_task':
                            'ratatosk.lib.tools.picard.DuplicationMetrics'
                        },
                        'HsMetrics': {
                            'parent_task':
                            'ratatosk.lib.tools.picard.DuplicationMetrics',
                            'bait_regions': 'data/chr11_baits.interval_list',
                            'target_regions':
                            'data/chr11_targets.interval_list'
                        },
                    },
                    'ratatosk.lib.tools.gatk': {
                        'UnifiedGenotyper': {
                            'ref': 'data/chr11.fa'
                        },
                        'CombineVariants': {
                            'ref': 'data/chr11.fa'
                        },
                    }
                },
                default_flow_style=False))
Beispiel #15
0
    def __init__(self, *args, **kwargs):
        """Initializes job task. A job task can be customized via
        configuration files. There are currently two configuration
        files:

        1. config_file, passed via option ``--config-file``
        2. custom_config, passed via option ``--custom-config``

        The reason there being two files is that updating
        ``parent_task`` is disabled in the custom configuration,
        thereby ensuring that predefined workflows in the regular
        configuration cannot be tampered with. However, other options
        can be modified.

        Options are updated in the by the following order of
        precedence:

        1. reads the configuration file updating the kwargs
        2. reads the custom configuration if present, updating relevant kwargs
        3. checks if any command line options have been passed, and if so, update kwargs
        4. use the default value

        Once the configuration has been set, the parent tasks are
        registered via
        :func:`ratatosk.job.BaseJobTask._register_parent_task`.

        """
        self._parent_cls = []
        self._handlers = {}
        params = self.get_params()
        param_values = self.get_param_values(params, args, kwargs)
        param_values_dict = {x[0]:x[1] for x in self.get_param_values(params, args, kwargs)}
        # 1. Main configuration file
        for key, value in param_values:
            if key == "config_file":
                config_file = value
                config = get_config()
                config.add_config_path(config_file)
                kwargs = self._update_config(config, param_values_dict, *args, **kwargs)
        # 2. Custom configuration file
        for key, value in param_values:
            if key == "custom_config":
                if not value:
                    continue
                custom_config_file = value
                # This must be a separate instance
                custom_config = get_custom_config()
                custom_config.add_config_path(custom_config_file)
                kwargs = self._update_config(custom_config, param_values_dict, disable_parent_task_update=True, *args, **kwargs)

        # 3. Finally, check if options were passed via the command line 
        for key, value in self.get_params():
            new_value = None
            # Got a command line option => override config file. Currently overriding parent_task *is* possible here (FIX ME?)
            if value.default != param_values_dict.get(key, None):
                new_value = param_values_dict.get(key, None)
                logger.debug("option '{0}'; got value '{1}' from command line, overriding configuration file setting and default '{2}' for task class '{3}'".format(key, new_value, value.default, self.__class__))
                kwargs[key] = new_value
        super(BaseJobTask, self).__init__(*args, **kwargs)
        # TODO: now that all parameters have been collected, global sections should be updated here
        # Update global configuration here for printing everything in PrintConfig task
        # backend.__global_config__ = update(backend.__global_config__, vars(config)["_sections"])

        # Register parent tasks
        parents = [v for k, v in self.get_param_values(params, args, kwargs) if k == "parent_task"].pop()
        # In case parent_task is defined as a string, not a list
        if not isinstance(parents, tuple):
            parents = [parents]
        self._register_parent_task(parents)
        if self.dry_run:
            print "DRY RUN: " + str(self)
Beispiel #16
0
def setUpModule():
    global cnf, conf
    cnf = get_custom_config()
    cnf.clear()
    conf = get_config()
    conf.clear()
Beispiel #17
0
    def __init__(self, *args, **kwargs):
        """Initializes job task. A job task can be customized via
        configuration files. There are currently two configuration
        files:

        1. config_file, passed via option ``--config-file``
        2. custom_config, passed via option ``--custom-config``

        The reason there being two files is that updating
        ``parent_task`` is disabled in the custom configuration,
        thereby ensuring that predefined workflows in the regular
        configuration cannot be tampered with. However, other options
        can be modified.

        Options are updated in the by the following order of
        precedence:

        1. reads the configuration file updating the kwargs
        2. reads the custom configuration if present, updating relevant kwargs
        3. checks if any command line options have been passed, and if so, update kwargs
        4. use the default value

        Once the configuration has been set, the parent tasks are
        registered via
        :func:`ratatosk.job.BaseJobTask._register_parent_task`.

        """
        self._parent_cls = []
        self._handlers = {}
        params = self.get_params()
        param_values = self.get_param_values(params, args, kwargs)
        param_values_dict = {
            x[0]: x[1]
            for x in self.get_param_values(params, args, kwargs)
        }
        # 1. Main configuration file
        for key, value in param_values:
            if key == "config_file":
                config_file = value
                config = get_config()
                config.add_config_path(config_file)
                kwargs = self._update_config(config, param_values_dict, *args,
                                             **kwargs)
        # 2. Custom configuration file
        for key, value in param_values:
            if key == "custom_config":
                if not value:
                    continue
                custom_config_file = value
                # This must be a separate instance
                custom_config = get_custom_config()
                custom_config.add_config_path(custom_config_file)
                kwargs = self._update_config(custom_config,
                                             param_values_dict,
                                             disable_parent_task_update=True,
                                             *args,
                                             **kwargs)

        # 3. Finally, check if options were passed via the command line
        for key, value in self.get_params():
            new_value = None
            # Got a command line option => override config file. Currently overriding parent_task *is* possible here (FIX ME?)
            if value.default != param_values_dict.get(key, None):
                new_value = param_values_dict.get(key, None)
                logger.debug(
                    "option '{0}'; got value '{1}' from command line, overriding configuration file setting and default '{2}' for task class '{3}'"
                    .format(key, new_value, value.default, self.__class__))
                kwargs[key] = new_value
        super(BaseJobTask, self).__init__(*args, **kwargs)
        # TODO: now that all parameters have been collected, global sections should be updated here
        # Update global configuration here for printing everything in PrintConfig task
        # backend.__global_config__ = update(backend.__global_config__, vars(config)["_sections"])

        # Register parent tasks
        parents = [
            v for k, v in self.get_param_values(params, args, kwargs)
            if k == "parent_task"
        ].pop()
        # In case parent_task is defined as a string, not a list
        if not isinstance(parents, tuple):
            parents = [parents]
        self._register_parent_task(parents)
        if self.dry_run:
            print "DRY RUN: " + str(self)