def testMultiTinyRna(self): cmd = ['-i', self.multi_rna_dir, '-d', 'rna', '-o', self.ts.top_level_temp, '--temporaries=%s' % self.ts.top_level_temp, '-m', '-j', self.job_name, '--max-subproblem-size=2', '--iter-limit=1'] self._exe(cmd) self.assert_is_nuc(self.in_path1, 'RNA') self.assert_is_nuc(self.in_path2, 'RNA') self.assert_is_nuc(self.aln_path1, 'RNA') self.assert_is_nuc(self.aln_path2, 'RNA') self.assert_is_nuc(self.concat_path, 'DNA') self.assertSameInputOutputSequenceData( [self.in_path1, self.in_path2], [self.aln_path1, self.aln_path2]) self.assertNoGapColumns([ self.aln_path1, self.aln_path2, self.concat_path]) self.assertSameConcatenatedSequences( concatenated_data=self.convert_rna_to_dna( self.concat_path, reverse=True), seq_data_list=[self.in_path1, self.in_path2]) cfg = get_configuration(self.cfg_path) self.assertEqual(cfg.commandline.datatype.upper(), 'RNA')
def testTinyRnaAuto(self): cmd = ['-i', self.tiny_rna, '-d', 'rna', '-o', self.ts.top_level_temp, '--temporaries=%s' % self.ts.top_level_temp, '-j', self.job_name, '--keeptemp', '--max-subproblem-size=2', '--auto'] self._exe(cmd) self.assert_is_nuc(self.tiny_rna, 'RNA') self.assert_is_nuc(self.tiny_aln_path, 'RNA') self.assertSameInputOutputSequenceData( [self.tiny_rna], [self.tiny_aln_path]) self.assertNoGapColumns([self.tiny_aln_path, self.init_aln_path, self.iter_aln_path]) self.assert_is_nuc(self.init_aln_path, 'DNA') self.assert_is_nuc(self.iter_aln_path, 'DNA') self.assertSameSequences([ self.tiny_rna, self.tiny_aln_path, self.convert_rna_to_dna(self.init_aln_path, reverse=True), self.convert_rna_to_dna(self.iter_aln_path, reverse=True)]) cfg = get_configuration(self.cfg_path) self.assertEqual(cfg.commandline.datatype.upper(), 'RNA')
def testSmallRna(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name])): cmd = ['-i', self.small_rna, '-t', self.small_tree, '-d', 'rna', '-o', self.ts.top_level_temp, '--temporaries=%s' % self.ts.top_level_temp, '-j', self.job_name, '--keeptemp', '--iter-limit=1'] self._exe(cmd) self.assert_is_nuc(self.small_rna, 'RNA') self.assert_is_nuc(self.small_aln_path, 'RNA') self.assertSameInputOutputSequenceData( [self.small_rna], [self.small_aln_path]) self.assertNoGapColumns([self.small_aln_path, self.iter_aln_path]) self.assert_is_nuc(self.iter_aln_path, 'DNA') self.assertSameSequences([ self.small_rna, self.small_aln_path, self.convert_rna_to_dna(self.iter_aln_path, reverse=True)]) cfg = get_configuration(self.cfg_path) self.assertEqual(cfg.commandline.datatype.upper(), 'RNA')
def get_testing_configuration(): """This function reads the users installation specific configuration files (so that we can get the path to the tools), but then strips all other settings out of the config object (so that users with different defaults settings will not experience failures of tests involving the configurable tools). """ c = get_configuration() for sect in c._categories: g = getattr(c, sect) to_del = [opt for opt in g.options.keys() if opt != 'path'] for d in to_del: g.remove_option(d) return c
def sate_main(argv=sys.argv): '''Returns (True, dir, temp_fs) on successful execution or raises an exception. Where `dir` is either None or the undeleted directory of temporary files. and `temp_fs` is is the TempFS object used to create `dir` (if `dir` is not None) Note that if `argv` is sys.argv then the first element will be skipped, but if it is not the sys.argv list then the first element will be interpretted as an argument (and will NOT be skipped). ''' _START_TIME = time.time() usage = """usage: %prog [options] <settings_file1> <settings_file2> ...""" parser = optparse.OptionParser(usage=usage, description=PROGRAM_LONG_DESCRIPTION, formatter=IndentedHelpFormatterWithNL(), version="%s v%s" % (PROGRAM_NAME, PROGRAM_VERSION)) user_config = get_configuration() command_line_group = user_config.get('commandline') command_line_group.add_to_optparser(parser) sate_group = user_config.get('sate') sate_group.add_to_optparser(parser) if argv == sys.argv: (options, args) = parser.parse_args(argv[1:]) else: (options, args) = parser.parse_args(argv) #if options.multilocus: # sys.exit("SATe: Multilocus mode is disabled in this release.") config_filenames = list(args) for fn in config_filenames: if fn[0] == '"' and fn[-1] == '"': fn = fn[1:-1] if not os.path.exists(fn): raise Exception('The configuration (settings) file "%s" does not exist' % fn) try: user_config.read_config_filepath(fn) except: raise Exception('The file "%s" does not appear to be a valid configuration file format. It lacks section headers.' % fn) user_config.set_values_from_dict(options.__dict__) command_line_group.job = coerce_string_to_nice_outfilename(command_line_group.job, 'Job', 'satejob') exportconfig = command_line_group.exportconfig if exportconfig: command_line_group.exportconfig = None user_config.save_to_filepath(exportconfig) ### TODO: wrap up in messaging system sys.stdout.write('Configuration written to "%s". Exiting successfully.' % exportconfig ) return True, None, None if user_config.commandline.input is None: sys.exit("ERROR: Input file(s) not specified.") # note: need to read sequence files first to allow SateProducts to # correctly self-configure user_config.read_seq_filepaths(src=user_config.commandline.input, multilocus=user_config.commandline.multilocus) sate_products = filemgr.SateProducts(user_config) MESSENGER.run_log_streams.append(sate_products.run_log_stream) MESSENGER.err_log_streams.append(sate_products.err_log_stream) temp_dir, temp_fs = run_sate_from_config(user_config, sate_products) _TIME_SPENT = time.time() - _START_TIME MESSENGER.send_info("Total time spent: %ss" % _TIME_SPENT) return True, temp_dir, temp_fs
def create_and_verify(self, job_name="satejob", input_subdir=None, output_subdir=None, expected_index=None): ## create directories and files # job subdirectory job_subdir = "test-%s" % job_name # basic set of input sequences input_seq_filepaths = self.create_input_files(job_subdir=job_subdir, input_subdir=input_subdir) # check if we can handle redundant input files without overwriting output input_seq_filepaths.extend(list(input_seq_filepaths)) # output directory if output_subdir is not None: output_dir = os.path.join(self.top_dir, job_subdir, output_subdir) expected_output_dir = output_dir else: output_dir = None expected_output_dir = os.path.dirname(input_seq_filepaths[0]) ## create the product manager user_config = get_configuration() user_config.input_seq_filepaths = input_seq_filepaths user_config.commandline.input = input_seq_filepaths[0] sp = filemgr.SateProducts(sate_user_settings=user_config) ## job prefix: must be unique output_prefix = sp.output_prefix self.assertTrue(output_prefix not in self.output_prefixes) self.output_prefixes.append(output_prefix) ## meta products (score, tree, and log files) self.assertTrue(hasattr(sp, "score_stream")) self.assertTrue(hasattr(sp, "tree_stream")) self.assertTrue(hasattr(sp, "run_log_stream")) self.assertTrue(hasattr(sp, "err_log_stream")) for stream_name, product_extension in filemgr.SateProducts.meta_product_types.items(): expected_fn = output_prefix + product_extension self.assertTrue(os.path.exists(expected_fn)) stream_attr_name = stream_name + "_stream" self.assertTrue(hasattr(sp, stream_attr_name)) stream = getattr(sp, stream_attr_name) self.assertEquals( os.path.abspath(stream.name), os.path.abspath(expected_fn)) random_result = self.generate_random_result() self.product_results.append((expected_fn, random_result,)) stream.write(random_result) stream.flush() stream.close() ## final alignment files self.assertEquals(len(sp.alignment_streams), len(input_seq_filepaths)) align_fnames = [] for alignment_stream in sp.alignment_streams: fn = os.path.abspath(alignment_stream.name) self.assertTrue(os.path.exists(fn)) align_fnames.append(fn) random_result = self.generate_random_result() self.product_results.append((os.path.abspath(alignment_stream.name), random_result,)) alignment_stream.write(random_result) alignment_stream.flush() alignment_stream.close() self.assertEqual(len(set(align_fnames)), len(align_fnames)) ## return sp, for futher tests if needed return sp
def sate_main(argv=sys.argv): '''Returns (True, dir, temp_fs) on successful execution or raises an exception. Where `dir` is either None or the undeleted directory of temporary files. and `temp_fs` is is the TempFS object used to create `dir` (if `dir` is not None) Note that if `argv` is sys.argv then the first element will be skipped, but if it is not the sys.argv list then the first element will be interpretted as an argument (and will NOT be skipped). ''' _START_TIME = time.time() usage = """usage: %prog [options] <settings_file1> <settings_file2> ...""" parser = optparse.OptionParser(usage=usage, description=PROGRAM_LONG_DESCRIPTION, formatter=IndentedHelpFormatterWithNL(), version="%s v%s" % (PROGRAM_NAME, PROGRAM_VERSION)) user_config = get_configuration() command_line_group = user_config.get('commandline') command_line_group.add_to_optparser(parser) sate_group = user_config.get('sate') sate_group.add_to_optparser(parser) group = optparse.OptionGroup(parser, "SATe tools extra options") group.add_option('--tree-estimator-model', type='string', dest='tree_estimator_model', help='Do not use this option.') parser.add_option_group(group) if argv == sys.argv: (options, args) = parser.parse_args(argv[1:]) else: (options, args) = parser.parse_args(argv) #if options.multilocus: # sys.exit("SATe: Multilocus mode is disabled in this release.") if options.tree_estimator_model and options.tree_estimator and len(args) == 0: if options.tree_estimator.lower() == 'raxml': user_config.raxml.model = options.tree_estimator_model elif options.tree_estimator.lower() == 'fasttree': user_config.fasttree.model = options.tree_estimator_model config_filenames = list(args) for fn in config_filenames: if fn[0] == '"' and fn[-1] == '"': fn = fn[1:-1] if not os.path.exists(fn): raise Exception('The configuration (settings) file "%s" does not exist' % fn) try: user_config.read_config_filepath(fn) except: raise Exception('The file "%s" does not appear to be a valid configuration file format. It lacks section headers.' % fn) user_config.set_values_from_dict(options.__dict__) command_line_group.job = coerce_string_to_nice_outfilename(command_line_group.job, 'Job', 'satejob') if user_config.commandline.auto or (user_config.commandline.untrusted): if user_config.commandline.input is None: sys.exit("ERROR: Input file(s) not specified.") from sate.usersettingclasses import get_list_of_seq_filepaths_from_dir from sate.alignment import summary_stats_from_parse try: if user_config.commandline.multilocus: fn_list = get_list_of_seq_filepaths_from_dir(user_config.commandline.input) else: fn_list = [user_config.commandline.input] datatype_list = [user_config.commandline.datatype.upper()] careful_parse = user_config.commandline.untrusted summary_stats = summary_stats_from_parse(fn_list, datatype_list, careful_parse=careful_parse) except: if user_config.commandline.auto: MESSENGER.send_error("Error reading input while setting options for the --auto mode\n") else: MESSENGER.send_error("Error reading input\n") raise if user_config.commandline.auto: user_config.commandline.auto = False auto_opts = get_auto_defaults_from_summary_stats(summary_stats[0], summary_stats[1], summary_stats[2]) user_config.get('sate').set_values_from_dict(auto_opts['sate']) user_config.get('commandline').set_values_from_dict(auto_opts['commandline']) user_config.get('fasttree').set_values_from_dict(auto_opts['fasttree']) if user_config.commandline.raxml_search_after: if user_config.sate.tree_estimator.upper() != 'FASTTREE': sys.exit("ERROR: the 'raxml_search_after' option is only supported when the tree_estimator is FastTree") exportconfig = command_line_group.exportconfig if exportconfig: command_line_group.exportconfig = None user_config.save_to_filepath(exportconfig) ### TODO: wrap up in messaging system sys.stdout.write('Configuration written to "%s". Exiting successfully.\n' % exportconfig ) return True, None, None if user_config.commandline.input is None: sys.exit("ERROR: Input file(s) not specified.") # note: need to read sequence files first to allow SateProducts to # correctly self-configure user_config.read_seq_filepaths(src=user_config.commandline.input, multilocus=user_config.commandline.multilocus) sate_products = filemgr.SateProducts(user_config) export_config_as_temp = True if export_config_as_temp: name_cfg = sate_products.get_abs_path_for_tag('sate_config.txt') command_line_group.exportconfig = None user_config.save_to_filepath(name_cfg) MESSENGER.send_info('Configuration written to "%s".\n' % name_cfg ) MESSENGER.run_log_streams.append(sate_products.run_log_stream) MESSENGER.err_log_streams.append(sate_products.err_log_stream) temp_dir, temp_fs = run_sate_from_config(user_config, sate_products) _TIME_SPENT = time.time() - _START_TIME MESSENGER.send_info("Total time spent: %ss" % _TIME_SPENT) return True, temp_dir, temp_fs
def sate_main(argv=sys.argv): '''Returns (True, dir, temp_fs) on successful execution or raises an exception. Where `dir` is either None or the undeleted directory of temporary files. and `temp_fs` is is the TempFS object used to create `dir` (if `dir` is not None) Note that if `argv` is sys.argv then the first element will be skipped, but if it is not the sys.argv list then the first element will be interpretted as an argument (and will NOT be skipped). ''' _START_TIME = time.time() usage = """usage: %prog [options] <settings_file1> <settings_file2> ...""" parser = optparse.OptionParser(usage=usage, description=PROGRAM_LONG_DESCRIPTION, formatter=IndentedHelpFormatterWithNL(), version="%s v%s" % (PROGRAM_NAME, PROGRAM_VERSION)) user_config = get_configuration() command_line_group = user_config.get('commandline') command_line_group.add_to_optparser(parser) sate_group = user_config.get('sate') sate_group.add_to_optparser(parser) group = optparse.OptionGroup(parser, "SATe tools extra options") group.add_option('--tree-estimator-model', type='string', dest='tree_estimator_model', help='Do not use this option.') parser.add_option_group(group) if argv == sys.argv: (options, args) = parser.parse_args(argv[1:]) else: (options, args) = parser.parse_args(argv) #if options.multilocus: # sys.exit("SATe: Multilocus mode is disabled in this release.") if options.tree_estimator_model and options.tree_estimator and len( args) == 0: if options.tree_estimator.lower() == 'raxml': user_config.raxml.model = options.tree_estimator_model elif options.tree_estimator.lower() == 'fasttree': user_config.fasttree.model = options.tree_estimator_model config_filenames = list(args) for fn in config_filenames: if fn[0] == '"' and fn[-1] == '"': fn = fn[1:-1] if not os.path.exists(fn): raise Exception( 'The configuration (settings) file "%s" does not exist' % fn) try: user_config.read_config_filepath(fn) except: raise Exception( 'The file "%s" does not appear to be a valid configuration file format. It lacks section headers.' % fn) user_config.set_values_from_dict(options.__dict__) command_line_group.job = coerce_string_to_nice_outfilename( command_line_group.job, 'Job', 'satejob') if user_config.commandline.auto or (user_config.commandline.untrusted): if user_config.commandline.input is None: sys.exit("ERROR: Input file(s) not specified.") from sate.usersettingclasses import get_list_of_seq_filepaths_from_dir from sate.alignment import summary_stats_from_parse try: if user_config.commandline.multilocus: fn_list = get_list_of_seq_filepaths_from_dir( user_config.commandline.input) else: fn_list = [user_config.commandline.input] datatype_list = [user_config.commandline.datatype.upper()] careful_parse = user_config.commandline.untrusted summary_stats = summary_stats_from_parse( fn_list, datatype_list, careful_parse=careful_parse) except: if user_config.commandline.auto: MESSENGER.send_error( "Error reading input while setting options for the --auto mode\n" ) else: MESSENGER.send_error("Error reading input\n") raise if user_config.commandline.auto: user_config.commandline.auto = False auto_opts = get_auto_defaults_from_summary_stats( summary_stats[0], summary_stats[1], summary_stats[2]) user_config.get('sate').set_values_from_dict(auto_opts['sate']) user_config.get('commandline').set_values_from_dict( auto_opts['commandline']) user_config.get('fasttree').set_values_from_dict( auto_opts['fasttree']) if user_config.commandline.raxml_search_after: if user_config.sate.tree_estimator.upper() != 'FASTTREE': sys.exit( "ERROR: the 'raxml_search_after' option is only supported when the tree_estimator is FastTree" ) exportconfig = command_line_group.exportconfig if exportconfig: command_line_group.exportconfig = None user_config.save_to_filepath(exportconfig) ### TODO: wrap up in messaging system sys.stdout.write( 'Configuration written to "%s". Exiting successfully.\n' % exportconfig) return True, None, None if user_config.commandline.input is None: sys.exit("ERROR: Input file(s) not specified.") # note: need to read sequence files first to allow SateProducts to # correctly self-configure user_config.read_seq_filepaths( src=user_config.commandline.input, multilocus=user_config.commandline.multilocus) sate_products = filemgr.SateProducts(user_config) export_config_as_temp = True if export_config_as_temp: name_cfg = sate_products.get_abs_path_for_tag('sate_config.txt') command_line_group.exportconfig = None user_config.save_to_filepath(name_cfg) MESSENGER.send_info('Configuration written to "%s".\n' % name_cfg) MESSENGER.run_log_streams.append(sate_products.run_log_stream) MESSENGER.err_log_streams.append(sate_products.err_log_stream) temp_dir, temp_fs = run_sate_from_config(user_config, sate_products) _TIME_SPENT = time.time() - _START_TIME MESSENGER.send_info("Total time spent: %ss" % _TIME_SPENT) return True, temp_dir, temp_fs