def create_time_plot(config_path,
        info_path,
        out_dir,
        iteration_index = 249):
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    cfg = MsBayesConfig(config_path)
    dmc = DMCSimulationResults(info_path)
    sum_path = (dmc.get_result_path_prefix(1, 1, 1) + 
            '{0}-posterior-summary.txt'.format(iteration_index))
    labels = []
    for t in cfg.taxa:
        l = t.strip().split('.')
        labels.append(r'\textit{{{0} {1}}}'.format(l[0], l[1]))
    pg = plotting.get_marginal_divergence_time_plot(
            config_path = config_path,
            posterior_summary_path = sum_path,
            labels = labels,
            horizontal = True,
            label_size = 12.0,
            measure_tick_label_size = 12.0,
            measure_axis_label = 'Divergence time ($4N_C$ generations)',
            measure_axis_label_size = 16.0,
            label_axis_label = 'Taxon pair',
            label_axis_label_size = 16.0)
    pg.savefig(os.path.join(out_dir, 'negros-panay-marginal-times.pdf'))
예제 #2
0
def main_cli():
    description = '{name} {version}'.format(**_program_info)
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('info_path',
                        metavar='PYMSBAYES-INFO-FILE',
                        type=argparse_utils.arg_is_file,
                        help=('Path to `pymsbayes-info.txt` file.'))
    parser.add_argument(
        '-n',
        '--num-prior-samples',
        action='store',
        type=argparse_utils.arg_is_positive_int,
        default=100000,
        help=('The number of prior samples to simulate for estimating '
              'prior probabilities.'))
    parser.add_argument(
        '-i',
        '--sample-index',
        action='store',
        type=argparse_utils.arg_is_positive_int,
        help=('The prior-sample index of results to be summarized. '
              'Output files should have a consistent schema. For '
              'example, a results file for divergence models might look '
              'something like '
              '`d1-m1-s1-1000000-div-model-results.txt`. In this example, '
              'the prior-sample index is "1000000". The default is to '
              'use the largest prior-sample index, which is probably '
              'what you want.'))
    parser.add_argument(
        '-o',
        '--output-dir',
        action='store',
        type=argparse_utils.arg_is_dir,
        help=('The directory in which all output plots will be written. '
              'The default is to use the directory of the pymsbayes info '
              'file.'))
    parser.add_argument(
        '--np',
        action='store',
        type=argparse_utils.arg_is_positive_int,
        default=multiprocessing.cpu_count(),
        help=('The maximum number of processes to run in parallel. The '
              'default is the number of CPUs available on the machine.'))
    parser.add_argument(
        '-m',
        '--mu',
        action='store',
        type=argparse_utils.arg_is_positive_float,
        default=None,
        help=('The mutation rate with which to scale time to units of '
              'generations. By default, time is not scaled to '
              'generations.'))
    parser.add_argument(
        '--extension',
        action='store',
        type=str,
        default='pdf',
        help=('The file format extension of the plots (e.g., "pdf", '
              '"png"). The default is pdf.'))
    parser.add_argument('--seed',
                        action='store',
                        type=argparse_utils.arg_is_positive_int,
                        help='Random number seed to use for the analysis.')
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + _program_info['version'],
                        help='Report version and exit.')
    parser.add_argument('--quiet',
                        action='store_true',
                        help='Run without verbose messaging.')
    parser.add_argument('--debug',
                        action='store_true',
                        help='Run in debugging mode.')

    args = parser.parse_args()

    ##########################################################################
    ## handle args

    from pymsbayes.utils.messaging import (LoggingControl, InfoLogger)

    LoggingControl.set_logging_level("INFO")
    if args.quiet:
        LoggingControl.set_logging_level("WARNING")
    if args.debug:
        LoggingControl.set_logging_level("DEBUG")
    log = LoggingControl.get_logger(__name__)

    from pymsbayes import plotting
    from pymsbayes.utils import sumresults
    from pymsbayes.utils import GLOBAL_RNG

    if not plotting.MATPLOTLIB_AVAILABLE:
        log.error('`matplotlib` could not be imported, so plots can not be\n'
                  'produced. Please install `matplotlib` and try again.')
        sys.exit(1)

    if not args.seed:
        args.seed = random.randint(1, 999999999)
    GLOBAL_RNG.seed(args.seed)

    if not args.output_dir:
        args.output_dir = os.path.dirname(args.info_path)
    args.output_dir = os.path.join(args.output_dir, 'plots')
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    results = sumresults.DMCSimulationResults(args.info_path)
    if results.num_sim_reps > 1:
        log.error('Results appear to be from simulation-based analysis, '
                  'for which this plotting script is not appropriate.')
        sys.exit(1)

    observed_indices = sorted(results.observed_index_to_config.keys())
    prior_indices = sorted(results.prior_index_to_config.keys())
    for obs_idx in observed_indices:
        for prior_idx in prior_indices:
            result_indices = results.get_result_indices(obs_idx, prior_idx, 1)
            result_idx = max(result_indices)
            result_path_prefix = '{0}{1}-'.format(
                results.get_result_path_prefix(obs_idx, prior_idx, 1),
                result_idx)
            result_dir = os.path.dirname(result_path_prefix)
            out_prefix = os.path.join(args.output_dir,
                                      os.path.basename(result_path_prefix))
            prior_cfg = results.prior_configs[prior_idx]
            posterior_summary_path = get_result_path(result_path_prefix,
                                                     'posterior-summary')
            div_model_path = get_result_path(result_path_prefix,
                                             'div-model-results')
            config_path = results.prior_index_to_config[prior_idx]
            time_multiplier = 1.0
            if args.mu is not None:
                if prior_cfg.time_in_subs_per_site:
                    time_multiplier = 1.0 / args.mu
                else:
                    try:
                        mean_theta = prior_cfg.theta.mean
                    except:
                        mean_theta = prior_cfg.d_theta.mean
                    time_multiplier = mean_theta / args.mu

            if results.sort_index == 0:
                #plot marginal times
                if not posterior_summary_path:
                    log.warning('Could not find {0}{1}.txt(.gz); '
                                'Skipping marginal times plot...'.format(
                                    result_path_prefix, 'posterior-summary'))
                else:
                    label_dimension = (0.34 * (prior_cfg.npairs + 1)) + 0.56
                    marginal_times_plot = plotting.get_marginal_divergence_time_plot(
                        config_path=config_path,
                        posterior_summary_path=posterior_summary_path,
                        labels=None,
                        estimate='median',
                        interval='HPD_95_interval',
                        time_multiplier=time_multiplier,
                        horizontal=True,
                        label_dimension=label_dimension,
                        measure_dimension=8.0,
                        label_size=12.0,
                        measure_tick_label_size=12.0,
                        measure_axis_label='Divergence time',
                        measure_axis_label_size=14.0,
                        label_axis_label='Taxon pair',
                        label_axis_label_size=14.0,
                        usetex=False)
                    marginal_times_path = '{0}{1}'.format(
                        out_prefix,
                        'marginal-divergence-times.' + args.extension)
                    marginal_times_plot.savefig(marginal_times_path)

                #plot top ordered models
                if not div_model_path:
                    log.warning('Could not find {0}{1}.txt(.gz); '
                                'Skipping ordered div model plot...'.format(
                                    result_path_prefix, 'div-model-results'))
                else:
                    height = 12.0
                    margin_top = 0.99
                    margin_left = 0.03
                    padding_between_vertical = 0.8
                    if prior_cfg.npairs < 4:
                        height *= 0.8
                        margin_top -= 0.01
                        margin_left += 0.05
                        padding_between_vertical += 0.3
                    width = (0.38 * prior_cfg.npairs) + 1.5
                    div_model_plot = plotting.OrderedDivergenceModelPlotGrid(
                        div_model_results_path=div_model_path,
                        config_path=config_path,
                        num_top_models=10,
                        time_multiplier=time_multiplier,
                        height=height,
                        width=width,
                        plot_label_schema='uppercase',
                        plot_label_offset=0,
                        plot_label_size=12.0,
                        y_title='Divergence time',
                        y_title_size=14.0,
                        y_tick_label_size=10.0,
                        right_text_size=10.0,
                        margin_left=margin_left,
                        margin_bottom=0.0,
                        margin_right=1,
                        margin_top=margin_top,
                        padding_between_vertical=padding_between_vertical,
                        tab=0.08)
                    plot = div_model_plot.create_grid()
                    div_model_plot_path = '{0}{1}'.format(
                        out_prefix, 'ordered-div-models.' + args.extension)
                    plot.savefig(div_model_plot_path)

            else:
                #plot top unordered models
                if not div_model_path:
                    log.warning('Could not find {0}{1}.txt(.gz); '
                                'Skipping unordered div model plot...'.format(
                                    result_path_prefix, 'div-model-results'))
                else:
                    width = (0.38 * prior_cfg.npairs) + 1.5
                    div_model_plot = plotting.UnorderedDivergenceModelPlotGrid(
                        div_model_results_path=div_model_path,
                        num_top_models=10,
                        time_multiplier=time_multiplier,
                        height=10.0,
                        width=width,
                        data_label_size=10.0,
                        plot_label_schema='uppercase',
                        plot_label_offset=0,
                        plot_label_size=12.0,
                        y_title='Divergence time',
                        y_title_size=14.0,
                        y_tick_label_size=10.0,
                        right_text_size=10.0,
                        margin_left=0.03,
                        margin_bottom=0.0,
                        margin_right=1,
                        margin_top=0.99,
                        padding_between_vertical=0.8,
                        tab=0.08)
                    plot = div_model_plot.create_grid()
                    div_model_plot_path = '{0}{1}'.format(
                        out_prefix, 'ordered-div-models.' + args.extension)
                    plot.savefig(div_model_plot_path)

            #plot ndiv plot
            psi_path = get_result_path(result_path_prefix, 'psi-results')
            if not psi_path:
                log.warning('Could not find {0}{1}.txt(.gz); '
                            'Skipping number of divergences plot...'.format(
                                result_path_prefix, 'psi-results'))
            else:
                width = (0.25 * prior_cfg.npairs) + 0.55
                if width < 2.8:
                    width = 2.8
                num_div_summary = plotting.NumberOfDivergencesSummary(
                    config_path=results.prior_index_to_config[prior_idx],
                    psi_results_path=psi_path,
                    posterior_summary_path=posterior_summary_path,
                    num_prior_samples=args.num_prior_samples,
                    num_processors=args.np)
                num_div_summary.create_plot(plot_label_size=10.0,
                                            right_text_size=10.0,
                                            x_label_size=10.0,
                                            y_label_size=10.0,
                                            xtick_label_size=10.0,
                                            ytick_label_size=8.0,
                                            height=6.0,
                                            width=width,
                                            margin_bottom=0.0,
                                            margin_left=0.0,
                                            margin_top=0.97,
                                            margin_right=1.0,
                                            padding_between_vertical=1.0)
                num_div_plot_path = '{0}{1}'.format(
                    out_prefix, 'number-of-divergences.' + args.extension)
                num_div_summary.save_plot(num_div_plot_path)

                bf_plot_path = '{0}{1}'.format(
                    out_prefix, ('number-of-divergences-bayes-factors-only.' +
                                 args.extension))
                num_div_summary.save_bf_plot(bf_plot_path)

                num_div_bf_path = '{0}{1}'.format(
                    out_prefix, 'number-of-divergences-bayes-factors.txt')
                with open(num_div_bf_path, 'w') as out:
                    out.write('num_of_divs\t2ln(bf)\n')
                    for n in sorted(num_div_summary.psi_bayes_factors.keys()):
                        out.write('{0}\t{1}\n'.format(
                            n, num_div_summary.psi_bayes_factors[n]))

    log.info('The plots are in: {0}'.format(args.output_dir))
예제 #3
0
def main_cli():
    description = '{name} {version}'.format(**_program_info)
    parser = argparse.ArgumentParser(description = description)
    parser.add_argument('info_path',
            metavar='PYMSBAYES-INFO-FILE',
            type=argparse_utils.arg_is_file,
            help=('Path to `pymsbayes-info.txt` file.'))
    parser.add_argument('-n', '--num-prior-samples',
            action = 'store',
            type = argparse_utils.arg_is_positive_int,
            default = 100000,
            help = ('The number of prior samples to simulate for estimating '
                    'prior probabilities.'))
    parser.add_argument('-i', '--sample-index',
            action = 'store',
            type = argparse_utils.arg_is_positive_int,
            help = ('The prior-sample index of results to be summarized. '
                    'Output files should have a consistent schema. For '
                    'example, a results file for divergence models might look '
                    'something like '
                    '`d1-m1-s1-1000000-div-model-results.txt`. In this example, '
                    'the prior-sample index is "1000000". The default is to '
                    'use the largest prior-sample index, which is probably '
                    'what you want.'))
    parser.add_argument('-o', '--output-dir',
            action = 'store',
            type = argparse_utils.arg_is_dir,
            help = ('The directory in which all output plots will be written. '
                    'The default is to use the directory of the pymsbayes info '
                    'file.'))
    parser.add_argument('--np',
            action = 'store',
            type = argparse_utils.arg_is_positive_int,
            default = multiprocessing.cpu_count(),
            help = ('The maximum number of processes to run in parallel. The '
                    'default is the number of CPUs available on the machine.'))
    parser.add_argument('-m', '--mu',
            action = 'store',
            type = argparse_utils.arg_is_positive_float,
            default = None,
            help = ('The mutation rate with which to scale time to units of '
                    'generations. By default, time is not scaled to '
                    'generations.'))
    parser.add_argument('--extension',
            action = 'store',
            type = str,
            default = 'pdf',
            help = ('The file format extension of the plots (e.g., "pdf", '
                    '"png"). The default is pdf.'))
    parser.add_argument('--seed',
            action = 'store',
            type = argparse_utils.arg_is_positive_int,
            help = 'Random number seed to use for the analysis.')
    parser.add_argument('--version',
            action = 'version',
            version = '%(prog)s ' + _program_info['version'],
            help = 'Report version and exit.')
    parser.add_argument('--quiet',
            action = 'store_true',
            help = 'Run without verbose messaging.')
    parser.add_argument('--debug',
            action = 'store_true',
            help = 'Run in debugging mode.')

    args = parser.parse_args()

    ##########################################################################
    ## handle args

    from pymsbayes.utils.messaging import (LoggingControl,
            InfoLogger)

    LoggingControl.set_logging_level("INFO")
    if args.quiet:
        LoggingControl.set_logging_level("WARNING")
    if args.debug:
        LoggingControl.set_logging_level("DEBUG")
    log = LoggingControl.get_logger(__name__)

    from pymsbayes import plotting
    from pymsbayes.utils import sumresults
    from pymsbayes.utils import GLOBAL_RNG

    if not plotting.MATPLOTLIB_AVAILABLE:
        log.error(
                '`matplotlib` could not be imported, so plots can not be\n'
                'produced. Please install `matplotlib` and try again.')
        sys.exit(1)

    if not args.seed:
        args.seed = random.randint(1, 999999999)
    GLOBAL_RNG.seed(args.seed)

    if not args.output_dir:
        args.output_dir = os.path.dirname(args.info_path)
    args.output_dir = os.path.join(args.output_dir, 'plots')
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    results = sumresults.DMCSimulationResults(args.info_path)
    if results.num_sim_reps > 1:
        log.error('Results appear to be from simulation-based analysis, '
                'for which this plotting script is not appropriate.')
        sys.exit(1)

    observed_indices = sorted(results.observed_index_to_config.keys())
    prior_indices = sorted(results.prior_index_to_config.keys())
    for obs_idx in observed_indices:
        for prior_idx in prior_indices:
            result_indices = results.get_result_indices(obs_idx, prior_idx, 1)
            result_idx = max(result_indices)
            result_path_prefix = '{0}{1}-'.format(
                    results.get_result_path_prefix(obs_idx, prior_idx, 1),
                    result_idx)
            result_dir = os.path.dirname(result_path_prefix)
            out_prefix = os.path.join(args.output_dir, os.path.basename(
                    result_path_prefix))
            prior_cfg = results.prior_configs[prior_idx]
            posterior_summary_path = get_result_path(result_path_prefix,
                    'posterior-summary')
            div_model_path = get_result_path(result_path_prefix,
                    'div-model-results')
            config_path = results.prior_index_to_config[prior_idx]
            time_multiplier = 1.0
            if args.mu is not None:
                if prior_cfg.time_in_subs_per_site:
                    time_multiplier = 1.0 / args.mu
                else:
                    try:
                        mean_theta = prior_cfg.theta.mean
                    except:
                        mean_theta = prior_cfg.d_theta.mean
                    time_multiplier = mean_theta / args.mu

            if results.sort_index == 0:
                #plot marginal times
                if not posterior_summary_path:
                    log.warning('Could not find {0}{1}.txt(.gz); '
                            'Skipping marginal times plot...'.format(
                                    result_path_prefix,
                                    'posterior-summary'))
                else:
                    label_dimension = (0.34 * (prior_cfg.npairs + 1)) + 0.56
                    marginal_times_plot = plotting.get_marginal_divergence_time_plot(
                            config_path = config_path,
                            posterior_summary_path = posterior_summary_path,
                            labels = None,
                            estimate = 'median',
                            interval = 'HPD_95_interval',
                            time_multiplier = time_multiplier,
                            horizontal = True,
                            label_dimension = label_dimension,
                            measure_dimension = 8.0,
                            label_size = 12.0,
                            measure_tick_label_size = 12.0,
                            measure_axis_label = 'Divergence time',
                            measure_axis_label_size = 14.0,
                            label_axis_label = 'Taxon pair',
                            label_axis_label_size = 14.0,
                            usetex = False)
                    marginal_times_path = '{0}{1}'.format(out_prefix,
                            'marginal-divergence-times.' + args.extension)
                    marginal_times_plot.savefig(marginal_times_path)

                #plot top ordered models
                if not div_model_path:
                    log.warning('Could not find {0}{1}.txt(.gz); '
                            'Skipping ordered div model plot...'.format(
                                    result_path_prefix,
                                    'div-model-results'))
                else:
                    height = 12.0
                    margin_top = 0.99
                    margin_left = 0.03
                    padding_between_vertical = 0.8
                    if prior_cfg.npairs < 4:
                        height *= 0.8
                        margin_top -= 0.01
                        margin_left += 0.05
                        padding_between_vertical += 0.3
                    width = (0.38 * prior_cfg.npairs) + 1.5
                    div_model_plot = plotting.OrderedDivergenceModelPlotGrid(
                            div_model_results_path = div_model_path,
                            config_path = config_path,
                            num_top_models = 10,
                            time_multiplier = time_multiplier,
                            height = height,
                            width = width,
                            plot_label_schema = 'uppercase',
                            plot_label_offset = 0,
                            plot_label_size = 12.0,
                            y_title = 'Divergence time',
                            y_title_size = 14.0,
                            y_tick_label_size = 10.0,
                            right_text_size = 10.0,
                            margin_left = margin_left,
                            margin_bottom = 0.0,
                            margin_right = 1,
                            margin_top = margin_top,
                            padding_between_vertical = padding_between_vertical,
                            tab = 0.08)
                    plot = div_model_plot.create_grid()
                    div_model_plot_path = '{0}{1}'.format(out_prefix,
                            'ordered-div-models.' + args.extension)
                    plot.savefig(div_model_plot_path)

            else:
                #plot top unordered models
                if not div_model_path:
                    log.warning('Could not find {0}{1}.txt(.gz); '
                            'Skipping unordered div model plot...'.format(
                                    result_path_prefix,
                                    'div-model-results'))
                else:
                    width = (0.38 * prior_cfg.npairs) + 1.5
                    div_model_plot = plotting.UnorderedDivergenceModelPlotGrid(
                            div_model_results_path = div_model_path,
                            num_top_models = 10,
                            time_multiplier = time_multiplier,
                            height = 10.0,
                            width = width,
                            data_label_size = 10.0,
                            plot_label_schema = 'uppercase',
                            plot_label_offset = 0,
                            plot_label_size = 12.0,
                            y_title = 'Divergence time',
                            y_title_size = 14.0,
                            y_tick_label_size = 10.0,
                            right_text_size = 10.0,
                            margin_left = 0.03,
                            margin_bottom = 0.0,
                            margin_right = 1,
                            margin_top = 0.99,
                            padding_between_vertical = 0.8,
                            tab = 0.08)
                    plot = div_model_plot.create_grid()
                    div_model_plot_path = '{0}{1}'.format(out_prefix,
                            'ordered-div-models.' + args.extension)
                    plot.savefig(div_model_plot_path)

            #plot ndiv plot
            psi_path = get_result_path(result_path_prefix,
                    'psi-results')
            if not psi_path:
                log.warning('Could not find {0}{1}.txt(.gz); '
                        'Skipping number of divergences plot...'.format(
                                result_path_prefix,
                                'psi-results'))
            else:
                width = (0.25 * prior_cfg.npairs) + 0.55
                if width < 2.8:
                    width = 2.8
                num_div_summary = plotting.NumberOfDivergencesSummary(
                        config_path = results.prior_index_to_config[prior_idx],
                        psi_results_path = psi_path,
                        posterior_summary_path = posterior_summary_path,
                        num_prior_samples = args.num_prior_samples,
                        num_processors = args.np)
                num_div_summary.create_plot(
                        plot_label_size = 10.0,
                        right_text_size = 10.0,
                        x_label_size = 10.0,
                        y_label_size = 10.0,
                        xtick_label_size = 10.0,
                        ytick_label_size = 8.0,
                        height = 6.0,
                        width = width,
                        margin_bottom = 0.0,
                        margin_left = 0.0,
                        margin_top = 0.97,
                        margin_right = 1.0,
                        padding_between_vertical = 1.0)
                num_div_plot_path = '{0}{1}'.format(out_prefix,
                        'number-of-divergences.' + args.extension)
                num_div_summary.save_plot(num_div_plot_path)

                bf_plot_path = '{0}{1}'.format(out_prefix,
                        ('number-of-divergences-bayes-factors-only.' +
                                args.extension))
                num_div_summary.save_bf_plot(bf_plot_path)
                
                num_div_bf_path = '{0}{1}'.format(out_prefix,
                        'number-of-divergences-bayes-factors.txt')
                with open(num_div_bf_path, 'w') as out:
                    out.write('num_of_divs\t2ln(bf)\n')
                    for n in sorted(num_div_summary.psi_bayes_factors.keys()):
                        out.write('{0}\t{1}\n'.format(n,
                                num_div_summary.psi_bayes_factors[n]))

    log.info('The plots are in: {0}'.format(args.output_dir))