def setUpModule(): global cnf cnf = get_config() with open(localconf, "w") as fp: fp.write( yaml.safe_dump( { 'picard': { 'InsertMetrics': { 'parent_task': 'ratatosk.lib.tools.picard.DuplicationMetrics' }, }, 'gatk': { 'IndelRealigner': { 'parent_task': [ 'ratatosk.lib.tools.picard.MergeSamFiles', 'ratatosk.lib.tools.gatk.RealignerTargetCreator', 'ratatosk.lib.tools.gatk.UnifiedGenotyper' ], 'source_label': [None, None, 'BOTH.raw'], 'source_suffix': ['.bam', '.intervals', '.vcf'], }, 'RealignerTargetCreator': { 'parent_task': 'ratatosk.lib.align.bwa.BwaAln' }, } }, default_flow_style=False)) # Need to add ratatosk first, then override with localconf cnf.add_config_path(ratatosk_conf) cnf.add_config_path(localconf)
def setUpModule(): global cnf cnf = get_config() with open(localconf, "w") as fp: fp.write(yaml.safe_dump({ 'picard' : { 'InsertMetrics' : {'parent_task' : 'ratatosk.lib.tools.picard.DuplicationMetrics'}, }, 'gatk' : { 'IndelRealigner' : {'parent_task': ['ratatosk.lib.tools.picard.MergeSamFiles', 'ratatosk.lib.tools.gatk.RealignerTargetCreator', 'ratatosk.lib.tools.gatk.UnifiedGenotyper'], 'source_label': [None, None, 'BOTH.raw'], 'source_suffix' : ['.bam', '.intervals', '.vcf'], }, 'RealignerTargetCreator' : {'parent_task' : 'ratatosk.lib.align.bwa.BwaAln'}, } }, default_flow_style=False)) # Need to add ratatosk first, then override with localconf cnf.add_config_path(ratatosk_conf) cnf.add_config_path(localconf)
def test_config_update_with_config(self): """Test that configuration file overrides default values""" ug = ratatosk.lib.tools.gatk.UnifiedGenotyper() param_values_dict = {x[0]:x[1] for x in ug.get_param_values(ug.get_params(), [], {})} cnf = get_config() cnf.clear() cnf.add_config_path("mock.yaml") kwargs = ug._update_config(cnf, param_values_dict) self.assertEqual(kwargs['options'], ['-stand_call_conf 10.0', '-stand_emit_conf 3.0'])
def test_job_init(self): """Test initialization of job""" cnf = get_config() cnf.add_config_path(localconf) task = ratatosk.lib.align.bwa.Aln( target="data/sample1_1.sai", parent_task=('ratatosk.lib.align.bwa.InputFastqFile', )) task = ratatosk.lib.tools.gatk.UnifiedGenotyper( target="data/sample1_1.sai")
def setUpModule(): global cnf, custom_cnf cnf = get_config() cnf.clear() cnf.add_config_path(os.path.join(os.path.dirname(__file__), os.pardir, "config", "ratatosk.yaml")) cnf.add_config_path(localconf) custom_cnf = get_custom_config() custom_cnf.clear() custom_cnf.add_config_path(localconf) custom_cnf.reload()
def requires(self): cls = self.parent()[0] sources = [] cnf = get_config() if self.target_generator_handler and "target_generator_handler" not in self._handlers.keys(): tgf = RatatoskHandler(label="target_generator_handler", mod=self.target_generator_handler) register_task_handler(self, tgf) if not "target_generator_handler" in self._handlers.keys(): logging.warn("MergeSamFiles requires a target generator handler; no defaults are as of yet implemented") return [] sources = list(set(self._handlers["target_generator_handler"](self))) return [cls(target=src) for src in sources]
def setUpModule(): global cnf, custom_cnf cnf = get_config() cnf.clear() cnf.add_config_path( os.path.join(os.path.dirname(__file__), os.pardir, "config", "ratatosk.yaml")) cnf.add_config_path(localconf) custom_cnf = get_custom_config() custom_cnf.clear() custom_cnf.add_config_path(localconf) custom_cnf.reload()
def test_config_update_with_custom_config(self): """Test that custom configuration overrides configuration setting""" ug = ratatosk.lib.tools.gatk.UnifiedGenotyper() param_values_dict = {x[0]:x[1] for x in ug.get_param_values(ug.get_params(), [], {})} cnf = get_config() cnf.clear() cnf.add_config_path("mock.yaml") customcnf = get_custom_config() customcnf.clear() customcnf.add_config_path("custommock.yaml") kwargs = ug._update_config(cnf, param_values_dict) self.assertEqual(kwargs['options'], ['-stand_call_conf 10.0', '-stand_emit_conf 3.0']) kwargs = ug._update_config(customcnf, param_values_dict, disable_parent_task_update=True) self.assertEqual(kwargs['options'], ['-stand_call_conf 20.0', '-stand_emit_conf 30.0'])
def test_config_update_with_command_line_parameter(self): """Test that command line parameter overrides configuration setting""" ug = ratatosk.lib.tools.gatk.UnifiedGenotyper(options='test') param_values_dict = {x[0]:x[1] for x in ug.get_param_values(ug.get_params(), [], {'options':'test'})} cnf = get_config() cnf.clear() cnf.add_config_path("mock.yaml") customcnf = get_custom_config() customcnf.clear() customcnf.add_config_path("custommock.yaml") kwargs = ug._update_config(cnf, param_values_dict) self.assertEqual(kwargs['options'], ['-stand_call_conf 10.0', '-stand_emit_conf 3.0']) kwargs = ug._update_config(customcnf, param_values_dict, disable_parent_task_update=True) self.assertEqual(kwargs['options'], ['-stand_call_conf 20.0', '-stand_emit_conf 30.0']) for key, value in ug.get_params(): new_value = None # Got a command line option => override config file. Currently overriding parent_task *is* possible here (FIX ME?) if value.default != param_values_dict.get(key, None): new_value = param_values_dict.get(key, None) kwargs[key] = new_value self.assertEqual(kwargs['options'], 'test')
def setUpModule(): global cnf cnf = get_config() cnf.clear() with open(localconf, "w") as fp: fp.write(yaml.safe_dump({ 'ratatosk.lib.align.bwa' :{ 'InputFastqFile': {'target_suffix':'.fastq.gz'}, 'bwaref': 'data/chr11.fa', 'Aln':{'read1_suffix':"_1", 'read2_suffix':"_2"}, 'Bampe':{'add_label':["_1","_2"]}, 'Sampe':{'add_label':["_1","_2"]}, }, 'ratatosk.lib.tools.picard' : { 'InputBamFile' : {'parent_task': 'ratatosk.lib.tools.samtools.SamToBam'}, 'SortSam': {'parent_task': 'ratatosk.lib.tools.samtools.SamToBam'}, 'DuplicationMetrics': {'parent_task': 'ratatosk.lib.tools.picard.SortSam'}, 'AlignmentMetrics' : {'parent_task': 'ratatosk.lib.tools.picard.DuplicationMetrics'}, 'InsertMetrics' : {'parent_task' : 'ratatosk.lib.tools.picard.DuplicationMetrics'}, 'HsMetrics' : {'parent_task' : 'ratatosk.lib.tools.picard.DuplicationMetrics', 'bait_regions' : 'data/chr11_baits.interval_list', 'target_regions' : 'data/chr11_targets.interval_list'}, }, 'ratatosk.lib.tools.gatk' : { 'UnifiedGenotyper' : {'ref': 'data/chr11.fa'}, 'CombineVariants' : {'ref': 'data/chr11.fa'}, } }, default_flow_style=False))
def test_job_init(self): """Test initialization of job""" cnf = get_config() cnf.add_config_path(localconf) task = ratatosk.lib.align.bwa.Aln(target="data/sample1_1.sai", parent_task=('ratatosk.lib.align.bwa.InputFastqFile', )) task = ratatosk.lib.tools.gatk.UnifiedGenotyper(target="data/sample1_1.sai")
def setUpModule(): global cnf, conf cnf = get_custom_config() cnf.clear() conf = get_config() conf.clear()
def test_expand_vars(self): cnf = get_config() cnf.add_config_path("mock.yaml") self.assertEqual(os.getenv("GATK_HOME_MOCK"), cnf._sections['ratatosk.lib.tools.gatk']['path']) self.assertEqual(os.path.join(os.getenv("PICARD_HOME_MOCK"), "test"), cnf._sections['ratatosk.lib.tools.picard']['path']) cnf.del_config_path("mock.yaml")
def setUpModule(): global cnf cnf = get_config() cnf.clear() with open(localconf, "w") as fp: fp.write( yaml.safe_dump( { 'ratatosk.lib.align.bwa': { 'InputFastqFile': { 'target_suffix': '.fastq.gz' }, 'bwaref': 'data/chr11.fa', 'Aln': { 'read1_suffix': "_1", 'read2_suffix': "_2" }, 'Bampe': { 'add_label': ["_1", "_2"] }, 'Sampe': { 'add_label': ["_1", "_2"] }, }, 'ratatosk.lib.tools.picard': { 'InputBamFile': { 'parent_task': 'ratatosk.lib.tools.samtools.SamToBam' }, 'SortSam': { 'parent_task': 'ratatosk.lib.tools.samtools.SamToBam' }, 'DuplicationMetrics': { 'parent_task': 'ratatosk.lib.tools.picard.SortSam' }, 'AlignmentMetrics': { 'parent_task': 'ratatosk.lib.tools.picard.DuplicationMetrics' }, 'InsertMetrics': { 'parent_task': 'ratatosk.lib.tools.picard.DuplicationMetrics' }, 'HsMetrics': { 'parent_task': 'ratatosk.lib.tools.picard.DuplicationMetrics', 'bait_regions': 'data/chr11_baits.interval_list', 'target_regions': 'data/chr11_targets.interval_list' }, }, 'ratatosk.lib.tools.gatk': { 'UnifiedGenotyper': { 'ref': 'data/chr11.fa' }, 'CombineVariants': { 'ref': 'data/chr11.fa' }, } }, default_flow_style=False))
def __init__(self, *args, **kwargs): """Initializes job task. A job task can be customized via configuration files. There are currently two configuration files: 1. config_file, passed via option ``--config-file`` 2. custom_config, passed via option ``--custom-config`` The reason there being two files is that updating ``parent_task`` is disabled in the custom configuration, thereby ensuring that predefined workflows in the regular configuration cannot be tampered with. However, other options can be modified. Options are updated in the by the following order of precedence: 1. reads the configuration file updating the kwargs 2. reads the custom configuration if present, updating relevant kwargs 3. checks if any command line options have been passed, and if so, update kwargs 4. use the default value Once the configuration has been set, the parent tasks are registered via :func:`ratatosk.job.BaseJobTask._register_parent_task`. """ self._parent_cls = [] self._handlers = {} params = self.get_params() param_values = self.get_param_values(params, args, kwargs) param_values_dict = {x[0]:x[1] for x in self.get_param_values(params, args, kwargs)} # 1. Main configuration file for key, value in param_values: if key == "config_file": config_file = value config = get_config() config.add_config_path(config_file) kwargs = self._update_config(config, param_values_dict, *args, **kwargs) # 2. Custom configuration file for key, value in param_values: if key == "custom_config": if not value: continue custom_config_file = value # This must be a separate instance custom_config = get_custom_config() custom_config.add_config_path(custom_config_file) kwargs = self._update_config(custom_config, param_values_dict, disable_parent_task_update=True, *args, **kwargs) # 3. Finally, check if options were passed via the command line for key, value in self.get_params(): new_value = None # Got a command line option => override config file. Currently overriding parent_task *is* possible here (FIX ME?) if value.default != param_values_dict.get(key, None): new_value = param_values_dict.get(key, None) logger.debug("option '{0}'; got value '{1}' from command line, overriding configuration file setting and default '{2}' for task class '{3}'".format(key, new_value, value.default, self.__class__)) kwargs[key] = new_value super(BaseJobTask, self).__init__(*args, **kwargs) # TODO: now that all parameters have been collected, global sections should be updated here # Update global configuration here for printing everything in PrintConfig task # backend.__global_config__ = update(backend.__global_config__, vars(config)["_sections"]) # Register parent tasks parents = [v for k, v in self.get_param_values(params, args, kwargs) if k == "parent_task"].pop() # In case parent_task is defined as a string, not a list if not isinstance(parents, tuple): parents = [parents] self._register_parent_task(parents) if self.dry_run: print "DRY RUN: " + str(self)
def __init__(self, *args, **kwargs): """Initializes job task. A job task can be customized via configuration files. There are currently two configuration files: 1. config_file, passed via option ``--config-file`` 2. custom_config, passed via option ``--custom-config`` The reason there being two files is that updating ``parent_task`` is disabled in the custom configuration, thereby ensuring that predefined workflows in the regular configuration cannot be tampered with. However, other options can be modified. Options are updated in the by the following order of precedence: 1. reads the configuration file updating the kwargs 2. reads the custom configuration if present, updating relevant kwargs 3. checks if any command line options have been passed, and if so, update kwargs 4. use the default value Once the configuration has been set, the parent tasks are registered via :func:`ratatosk.job.BaseJobTask._register_parent_task`. """ self._parent_cls = [] self._handlers = {} params = self.get_params() param_values = self.get_param_values(params, args, kwargs) param_values_dict = { x[0]: x[1] for x in self.get_param_values(params, args, kwargs) } # 1. Main configuration file for key, value in param_values: if key == "config_file": config_file = value config = get_config() config.add_config_path(config_file) kwargs = self._update_config(config, param_values_dict, *args, **kwargs) # 2. Custom configuration file for key, value in param_values: if key == "custom_config": if not value: continue custom_config_file = value # This must be a separate instance custom_config = get_custom_config() custom_config.add_config_path(custom_config_file) kwargs = self._update_config(custom_config, param_values_dict, disable_parent_task_update=True, *args, **kwargs) # 3. Finally, check if options were passed via the command line for key, value in self.get_params(): new_value = None # Got a command line option => override config file. Currently overriding parent_task *is* possible here (FIX ME?) if value.default != param_values_dict.get(key, None): new_value = param_values_dict.get(key, None) logger.debug( "option '{0}'; got value '{1}' from command line, overriding configuration file setting and default '{2}' for task class '{3}'" .format(key, new_value, value.default, self.__class__)) kwargs[key] = new_value super(BaseJobTask, self).__init__(*args, **kwargs) # TODO: now that all parameters have been collected, global sections should be updated here # Update global configuration here for printing everything in PrintConfig task # backend.__global_config__ = update(backend.__global_config__, vars(config)["_sections"]) # Register parent tasks parents = [ v for k, v in self.get_param_values(params, args, kwargs) if k == "parent_task" ].pop() # In case parent_task is defined as a string, not a list if not isinstance(parents, tuple): parents = [parents] self._register_parent_task(parents) if self.dry_run: print "DRY RUN: " + str(self)