Example #1
0
def pasta_main(argv=sys.argv):
    '''Returns (True, dir, temp_fs) on successful execution or raises an exception.

    Where `dir` is either None or the undeleted directory of temporary files.
    and `temp_fs` is is the TempFS object used to create `dir` (if `dir` is
    not None)

    Note that if `argv` is sys.argv then the first element will be skipped, but
        if it is not the sys.argv list then the first element will be interpretted
        as an argument (and will NOT be skipped).
    '''

    _START_TIME = time.time()
    usage = """usage: %prog [options] <settings_file1> <settings_file2> ..."""
    parser = optparse.OptionParser(usage=usage,
                                   description=PROGRAM_LONG_DESCRIPTION,
                                   formatter=IndentedHelpFormatterWithNL(),
                                   version="%s v%s" %
                                   (PROGRAM_NAME, PROGRAM_VERSION))

    user_config = get_configuration()
    command_line_group = user_config.get('commandline')
    command_line_group.add_to_optparser(parser)
    sate_group = user_config.get('sate')
    sate_group.add_to_optparser(parser)

    # This is just to read the configurations so that auto value could be set
    parse_user_options(argv, parser, user_config, command_line_group)

    # Read the input file, this is needed for auto values
    user_config.read_seq_filepaths(
        src=user_config.commandline.input,
        multilocus=user_config.commandline.multilocus)
    multilocus_dataset = read_input_sequences(
        user_config.input_seq_filepaths,
        datatype=user_config.commandline.datatype,
        missing=user_config.commandline.missing)

    # This is to automatically set the auto default options
    populate_auto_options(user_config, multilocus_dataset, force=True)

    # This is to actually read the config files and commandline args and overwrite auto value
    parse_user_options(argv, parser, user_config, command_line_group)

    # This is now to make sure --auto overwrites user options
    if user_config.commandline.auto or (user_config.commandline.untrusted):
        populate_auto_options(user_config, multilocus_dataset)

    check_user_options(user_config)

    if user_config.commandline.raxml_search_after:
        if user_config.sate.tree_estimator.upper() != 'FASTTREE':
            sys.exit(
                "ERROR: the 'raxml_search_after' option is only supported when the tree_estimator is FastTree"
            )

    exportconfig = command_line_group.exportconfig
    if exportconfig:
        command_line_group.exportconfig = None
        user_config.save_to_filepath(exportconfig)

        ### TODO: wrap up in messaging system
        sys.stdout.write(
            'Configuration written to "%s". Exiting successfully.\n' %
            exportconfig)

        return True, None, None

    if user_config.commandline.input is None:
        sys.exit("ERROR: Input file(s) not specified.")

    # note: need to read sequence files first to allow PastaProducts to
    # correctly self-configure
    pasta_products = filemgr.PastaProducts(user_config)

    export_config_as_temp = True
    if export_config_as_temp:
        name_cfg = pasta_products.get_abs_path_for_tag('pasta_config.txt')
        command_line_group.exportconfig = None
        user_config.save_to_filepath(name_cfg)
        MESSENGER.send_info('Configuration written to "%s".\n' % name_cfg)

    MESSENGER.run_log_streams.append(pasta_products.run_log_stream)
    MESSENGER.err_log_streams.append(pasta_products.err_log_stream)
    temp_dir, temp_fs = run_pasta_from_config(user_config, pasta_products,
                                              multilocus_dataset)
    _TIME_SPENT = time.time() - _START_TIME
    MESSENGER.send_info("Total time spent: %ss" % _TIME_SPENT)
    return True, temp_dir, temp_fs
Example #2
0
    def create_and_verify(self,
            job_name="satejob",
            input_subdir=None,
            output_subdir=None,
            expected_index=None):

        ## create directories and files

        # job subdirectory
        job_subdir = "test-%s" % job_name

        # basic set of input sequences
        input_seq_filepaths = self.create_input_files(job_subdir=job_subdir,
                input_subdir=input_subdir)

        # check if we can handle redundant input files without overwriting output
        input_seq_filepaths.extend(list(input_seq_filepaths))

        # output directory
        if output_subdir is not None:
            output_dir = os.path.join(self.top_dir, job_subdir, output_subdir)
            expected_output_dir = output_dir
        else:
            output_dir = None
            expected_output_dir = os.path.dirname(input_seq_filepaths[0])

        ## create the product manager
        user_config = get_configuration()
        user_config.input_seq_filepaths = input_seq_filepaths
        user_config.commandline.input = input_seq_filepaths[0]
        sp = filemgr.PastaProducts(sate_user_settings=user_config)

        ## job prefix: must be unique
        output_prefix = sp.output_prefix
        self.assertTrue(output_prefix not in self.output_prefixes)
        self.output_prefixes.append(output_prefix)

        ## meta products (score, tree, and log files)
        self.assertTrue(hasattr(sp, "score_stream"))
        self.assertTrue(hasattr(sp, "tree_stream"))
        self.assertTrue(hasattr(sp, "run_log_stream"))
        self.assertTrue(hasattr(sp, "err_log_stream"))
        for stream_name, product_extension in filemgr.PastaProducts.meta_product_types.items():
            expected_fn = output_prefix + product_extension
            self.assertTrue(os.path.exists(expected_fn))
            stream_attr_name = stream_name + "_stream"
            self.assertTrue(hasattr(sp, stream_attr_name))
            stream = getattr(sp, stream_attr_name)
            self.assertEquals(
                    os.path.abspath(stream.name),
                    os.path.abspath(expected_fn))
            random_result = self.generate_random_result()
            self.product_results.append((expected_fn, random_result,))
            stream.write(random_result)
            stream.flush()
            stream.close()

        ## final alignment files
        self.assertEquals(len(sp.alignment_streams), len(input_seq_filepaths))
        align_fnames = []
        for alignment_stream in sp.alignment_streams:
            fn = os.path.abspath(alignment_stream.name)
            self.assertTrue(os.path.exists(fn))
            align_fnames.append(fn)
            random_result = self.generate_random_result()
            self.product_results.append((os.path.abspath(alignment_stream.name), random_result,))
            alignment_stream.write(random_result)
            alignment_stream.flush()
            alignment_stream.close()
        self.assertEqual(len(set(align_fnames)), len(align_fnames))

        ## return sp, for futher tests if needed
        return sp