def test_new_implementation_round_trip(self): p = { "c_shape": 2, "c_scale": 5, "theta_shape": 2.0, "theta_scale": 0.002, "tau_shape": 1.0, "tau_scale": 5.0, "a_theta_shape": 1, "a_theta_scale": 0.01, "theta_params": "001", "migration_shape": 1.5, "migration_scale": 0.1, "recombination_shape": 0.0, "recombination_scale": 0.0, "psi": 0, } self._update_config(self.cfg, p, new_impl=True) _LOG.debug("testing config:\n\n{0}\n".format(self.cfg.getvalue())) c = MsBayesConfig(self.cfg) s = StringIO() c.write(s) _LOG.debug("written config:\n\n{0}\n".format(s.getvalue())) s.seek(0) c2 = MsBayesConfig(s) self.assertSameConfigs([c, c2])
def test_new_implementation_round_trip(self): p = { 'c_shape': 2, 'c_scale': 5, 'theta_shape': 2.0, 'theta_scale': 0.002, 'tau_shape': 1.0, 'tau_scale': 5.0, 'a_theta_shape': 1, 'a_theta_scale': 0.01, 'theta_params': '001', 'migration_shape': 1.5, 'migration_scale': 0.1, 'recombination_shape': 0.0, 'recombination_scale': 0.0, 'psi': 0, } self._update_config(self.cfg, p, new_impl=True) _LOG.debug('testing config:\n\n{0}\n'.format(self.cfg.getvalue())) c = MsBayesConfig(self.cfg) s = StringIO() c.write(s) _LOG.debug('written config:\n\n{0}\n'.format(s.getvalue())) s.seek(0) c2 = MsBayesConfig(s) self.assertSameConfigs([c, c2])
def test_multi_locus_round_trip(self): p = {"ltheta": 0.0001, "utheta": 0.1, "utau": 10.0, "psi": 0, "umig": 0.0, "urec": 0.0, "atheta": 1.0} self._update_config(self.cfg, p, multi_locus=True) _LOG.debug("testing config:\n\n{0}\n".format(self.cfg.getvalue())) c = MsBayesConfig(self.cfg) s = StringIO() c.write(s) _LOG.debug("written config:\n\n{0}\n".format(s.getvalue())) s.seek(0) c2 = MsBayesConfig(s) self.assertSameConfigs([c, c2])
def test_multi_locus_round_trip(self): p = { 'ltheta': 0.0001, 'utheta': 0.1, 'utau': 10.0, 'psi': 0, 'umig': 0.0, 'urec': 0.0, 'atheta': 1.0, } self._update_config(self.cfg, p, multi_locus=True) _LOG.debug('testing config:\n\n{0}\n'.format(self.cfg.getvalue())) c = MsBayesConfig(self.cfg) s = StringIO() c.write(s) _LOG.debug('written config:\n\n{0}\n'.format(s.getvalue())) s.seek(0) c2 = MsBayesConfig(s) self.assertSameConfigs([c, c2])
def main_cli(): description = '{name} {version}'.format(**_program_info) parser = argparse.ArgumentParser(description = description) parser.add_argument('-c', '--config', type = arg_is_config, required = True, help = ('msBayes config file to be used to generate saturation ' 'plot.')) parser.add_argument('-n', '--num-prior-samples', action = 'store', type = int, default = 1000, help = ('The number of prior samples to simulate for the ' 'saturation plot.')) parser.add_argument('--np', action = 'store', type = int, default = multiprocessing.cpu_count(), help = ('The maximum number of processes to run in parallel. The ' 'default is the number of CPUs available on the machine.')) parser.add_argument('-o', '--output-dir', action = 'store', type = arg_is_dir, help = ('The directory in which all output files will be written. ' 'The default is to use the directory of the first observed ' 'config file.')) parser.add_argument('--temp-dir', action = 'store', type = arg_is_dir, help = ('A directory to temporarily stage files. The default is to ' 'use the output directory.')) parser.add_argument('-s', '--stat-prefixes', nargs = '*', type = str, default = ['pi', 'pi.net', 'wattTheta', 'tajD.denom'], help = ('Prefixes of summary statistics to use in the analyses. ' 'The prefixes should be separated by spaces. ' 'Default: `-s pi pi.net wattTheta tajD.denom`.')) parser.add_argument('--vertical-lines', nargs = '*', type = float, default = [], help = ('Positions along x-axis where vertical lines are to be ' 'drawn. Default is to draw no vertical lines.')) parser.add_argument('--compress', action = 'store_true', help = 'Compress plot data file.') parser.add_argument('--keep-temps', action = 'store_true', help = 'Keep all temporary files.') parser.add_argument('--seed', action = 'store', type = int, help = 'Random number seed to use for the analysis.') parser.add_argument('--version', action = 'version', version = '%(prog)s ' + _program_info['version'], help = 'Report version and exit.') parser.add_argument('--quiet', action = 'store_true', help = 'Run without verbose messaging.') parser.add_argument('--debug', action = 'store_true', help = 'Run in debugging mode.') args = parser.parse_args() ########################################################################## ## handle args from pymsbayes.utils.messaging import (LoggingControl, InfoLogger) LoggingControl.set_logging_level("INFO") if args.quiet: LoggingControl.set_logging_level("WARNING") if args.debug: LoggingControl.set_logging_level("DEBUG") log = LoggingControl.get_logger(__name__) from pymsbayes.workers import MsBayesWorker from pymsbayes.utils.parsing import (get_patterns_from_prefixes, DEFAULT_STAT_PATTERNS, get_stats_by_time, dict_line_iter) from pymsbayes.manager import Manager from pymsbayes.utils.tempfs import TempFileSystem from pymsbayes.utils import probability from pymsbayes.utils.functions import long_division from pymsbayes.config import MsBayesConfig from pymsbayes.utils import GLOBAL_RNG, MSBAYES_SORT_INDEX, ToolPathManager from pymsbayes.fileio import process_file_arg from pymsbayes.plotting import MATPLOTLIB_AVAILABLE, SaturationPlotGrid MSBAYES_SORT_INDEX.set_index(0) # get full paths to tools msbayes_path = ToolPathManager.get_tool_full_path('msbayes.pl') dpp_msbayes_path = ToolPathManager.get_tool_full_path('dpp-msbayes.pl') if not args.output_dir: args.output_dir = os.path.dirname(args.config) info = InfoLogger(os.path.join(args.output_dir, 'pymsbayes-info.txt')) stats_by_time_path = os.path.join(args.output_dir, 'stats-by-time.txt') if args.compress: stats_by_time_path += '.gz' plot_path = os.path.join(args.output_dir, 'saturation-plot.pdf') if not args.temp_dir: args.temp_dir = args.output_dir temp_fs = TempFileSystem(parent=args.temp_dir, prefix='temp-files-') args.stat_prefixes = [s.rstrip('.') for s in args.stat_prefixes] stat_patterns = get_patterns_from_prefixes( [s + '.' for s in args.stat_prefixes], ignore_case=True) if not args.seed: args.seed = random.randint(1, 999999999) GLOBAL_RNG.seed(args.seed) compress_level = None if args.compress: compress_level = 9 cfg = MsBayesConfig(args.config) num_taxon_pairs = cfg.npairs cfg.div_model_prior = 'constrained' cfg.psi = probability.DiscreteUniformDistribution(num_taxon_pairs, num_taxon_pairs) config_path = temp_fs.get_file_path(prefix='cfg-') cfg.write(config_path) info.write('[pymsbayes]', log.info) info.write('\tprogram_name = {name}'.format(**_program_info), log.info) info.write('\tversion = {version}'.format(**_program_info), log.info) info.write('\tinvocation = {0!r}'.format(' '.join(sys.argv)), log.info) info.write('\toutput_directory = {0!r}'.format(args.output_dir), log.info) info.write('\ttemp_directory = {0!r}'.format(temp_fs.base_dir), log.info) info.write('\tsort_index = {0}'.format( MSBAYES_SORT_INDEX.current_value()), log.info) info.write('\tstat_patterns = {0!r}'.format( ', '.join([p.pattern for p in stat_patterns])), log.info) info.write('\tseed = {0}'.format(args.seed), log.info) info.write('\tnum_prior_samples = {0}'.format(args.num_prior_samples), log.info) info.write('\tstats_by_time_path = {0!r}'.format(stats_by_time_path), log.info) info.write('\t[[tool_paths]]', log.info) info.write('\t\tdpp_msbayes = {0}'.format(dpp_msbayes_path), log.info) info.write('\t\tmsbayes = {0}'.format(msbayes_path), log.info) info.write('\t[[config]]', log.debug) info.write('{0}'.format(str(cfg)), log.debug) ########################################################################## ## begin analysis --- generate samples start_time = datetime.datetime.now() if args.np > args.num_prior_samples: args.np = args.num_prior_samples batch_size, remainder = long_division(args.num_prior_samples, args.np) schema = 'abctoolbox' workers = [] for i in range(args.np): sample_size = batch_size if i == (args.np - 1): sample_size += remainder w = MsBayesWorker( temp_fs = temp_fs, sample_size = sample_size, config_path = config_path, report_parameters = True, schema = schema, include_header = True, stat_patterns = stat_patterns, write_stats_file = False) workers.append(w) log.info('Generating samples...') workers = Manager.run_workers( workers = workers, num_processors = args.np) log.info('Parsing samples...') stats_by_time = get_stats_by_time([w.prior_path for w in workers]) stat_keys = stats_by_time.keys() stat_keys.remove('PRI.t') for prefix in args.stat_prefixes: if not prefix in stat_keys: raise Exception('stat prefix {0!r} not found in simulated stats:' '\n\t{1}'.format(prefix, ', '.join(stat_keys))) header = ['PRI.t'] + args.stat_prefixes log.info('Writing stats-by-time matrix...') out, close = process_file_arg(stats_by_time_path, 'w', compresslevel = compress_level) for row in dict_line_iter(stats_by_time, sep = '\t', header = header): out.write(row) if close: out.close() log.info('Creating plots...') if not MATPLOTLIB_AVAILABLE: log.warning( '`matplotlib` could not be imported, so the plot can not be\n' 'produced. The data to create the plot can be found in:\n\t' '{0!r}'.format(stats_by_time_path)) else: y_labels = {'pi': r'$\pi$', 'pi.net': r'$\pi_{net}$', 'wattTheta': r'$\theta_W$', 'tajD.denom': r'$SD(\pi - \theta_W)$'} spg = SaturationPlotGrid(stats_by_time, x_key = 'PRI.t', y_keys = args.stat_prefixes, y_labels = y_labels, num_columns = 2, vertical_line_positions = args.vertical_lines) fig = spg.create_grid() fig.savefig(plot_path) stop_time = datetime.datetime.now() log.info('Done!') info.write('\t[[run_stats]]', log.info) info.write('\t\tstart_time = {0}'.format(str(start_time)), log.info) info.write('\t\tstop_time = {0}'.format(str(stop_time)), log.info) info.write('\t\ttotal_duration = {0}'.format(str(stop_time - start_time)), log.info) if not args.keep_temps: log.debug('purging temps...') temp_fs.purge()
def main_cli(): description = '{name} {version}'.format(**_program_info) parser = argparse.ArgumentParser(description=description) parser.add_argument( '-c', '--config', type=arg_is_config, required=True, help=('msBayes config file to be used to generate saturation ' 'plot.')) parser.add_argument( '-n', '--num-prior-samples', action='store', type=int, default=1000, help=('The number of prior samples to simulate for the ' 'saturation plot.')) parser.add_argument( '--np', action='store', type=int, default=multiprocessing.cpu_count(), help=('The maximum number of processes to run in parallel. The ' 'default is the number of CPUs available on the machine.')) parser.add_argument( '-o', '--output-dir', action='store', type=arg_is_dir, help=('The directory in which all output files will be written. ' 'The default is to use the directory of the first observed ' 'config file.')) parser.add_argument( '--temp-dir', action='store', type=arg_is_dir, help=('A directory to temporarily stage files. The default is to ' 'use the output directory.')) parser.add_argument( '-s', '--stat-prefixes', nargs='*', type=str, default=['pi', 'pi.net', 'wattTheta', 'tajD.denom'], help=('Prefixes of summary statistics to use in the analyses. ' 'The prefixes should be separated by spaces. ' 'Default: `-s pi pi.net wattTheta tajD.denom`.')) parser.add_argument( '--vertical-lines', nargs='*', type=float, default=[], help=('Positions along x-axis where vertical lines are to be ' 'drawn. Default is to draw no vertical lines.')) parser.add_argument('--compress', action='store_true', help='Compress plot data file.') parser.add_argument('--keep-temps', action='store_true', help='Keep all temporary files.') parser.add_argument('--seed', action='store', type=int, help='Random number seed to use for the analysis.') parser.add_argument('--version', action='version', version='%(prog)s ' + _program_info['version'], help='Report version and exit.') parser.add_argument('--quiet', action='store_true', help='Run without verbose messaging.') parser.add_argument('--debug', action='store_true', help='Run in debugging mode.') args = parser.parse_args() ########################################################################## ## handle args from pymsbayes.utils.messaging import (LoggingControl, InfoLogger) LoggingControl.set_logging_level("INFO") if args.quiet: LoggingControl.set_logging_level("WARNING") if args.debug: LoggingControl.set_logging_level("DEBUG") log = LoggingControl.get_logger(__name__) from pymsbayes.workers import MsBayesWorker from pymsbayes.utils.parsing import (get_patterns_from_prefixes, DEFAULT_STAT_PATTERNS, get_stats_by_time, dict_line_iter) from pymsbayes.manager import Manager from pymsbayes.utils.tempfs import TempFileSystem from pymsbayes.utils import probability from pymsbayes.utils.functions import long_division from pymsbayes.config import MsBayesConfig from pymsbayes.utils import GLOBAL_RNG, MSBAYES_SORT_INDEX, ToolPathManager from pymsbayes.fileio import process_file_arg from pymsbayes.plotting import MATPLOTLIB_AVAILABLE, SaturationPlotGrid MSBAYES_SORT_INDEX.set_index(0) # get full paths to tools msbayes_path = ToolPathManager.get_tool_full_path('msbayes.pl') dpp_msbayes_path = ToolPathManager.get_tool_full_path('dpp-msbayes.pl') if not args.output_dir: args.output_dir = os.path.dirname(args.config) info = InfoLogger(os.path.join(args.output_dir, 'pymsbayes-info.txt')) stats_by_time_path = os.path.join(args.output_dir, 'stats-by-time.txt') if args.compress: stats_by_time_path += '.gz' plot_path = os.path.join(args.output_dir, 'saturation-plot.pdf') if not args.temp_dir: args.temp_dir = args.output_dir temp_fs = TempFileSystem(parent=args.temp_dir, prefix='temp-files-') args.stat_prefixes = [s.rstrip('.') for s in args.stat_prefixes] stat_patterns = get_patterns_from_prefixes( [s + '.' for s in args.stat_prefixes], ignore_case=True) if not args.seed: args.seed = random.randint(1, 999999999) GLOBAL_RNG.seed(args.seed) compress_level = None if args.compress: compress_level = 9 cfg = MsBayesConfig(args.config) num_taxon_pairs = cfg.npairs cfg.div_model_prior = 'constrained' cfg.psi = probability.DiscreteUniformDistribution(num_taxon_pairs, num_taxon_pairs) config_path = temp_fs.get_file_path(prefix='cfg-') cfg.write(config_path) info.write('[pymsbayes]', log.info) info.write('\tprogram_name = {name}'.format(**_program_info), log.info) info.write('\tversion = {version}'.format(**_program_info), log.info) info.write('\tinvocation = {0!r}'.format(' '.join(sys.argv)), log.info) info.write('\toutput_directory = {0!r}'.format(args.output_dir), log.info) info.write('\ttemp_directory = {0!r}'.format(temp_fs.base_dir), log.info) info.write('\tsort_index = {0}'.format(MSBAYES_SORT_INDEX.current_value()), log.info) info.write( '\tstat_patterns = {0!r}'.format(', '.join( [p.pattern for p in stat_patterns])), log.info) info.write('\tseed = {0}'.format(args.seed), log.info) info.write('\tnum_prior_samples = {0}'.format(args.num_prior_samples), log.info) info.write('\tstats_by_time_path = {0!r}'.format(stats_by_time_path), log.info) info.write('\t[[tool_paths]]', log.info) info.write('\t\tdpp_msbayes = {0}'.format(dpp_msbayes_path), log.info) info.write('\t\tmsbayes = {0}'.format(msbayes_path), log.info) info.write('\t[[config]]', log.debug) info.write('{0}'.format(str(cfg)), log.debug) ########################################################################## ## begin analysis --- generate samples start_time = datetime.datetime.now() if args.np > args.num_prior_samples: args.np = args.num_prior_samples batch_size, remainder = long_division(args.num_prior_samples, args.np) schema = 'abctoolbox' workers = [] for i in range(args.np): sample_size = batch_size if i == (args.np - 1): sample_size += remainder w = MsBayesWorker(temp_fs=temp_fs, sample_size=sample_size, config_path=config_path, report_parameters=True, schema=schema, include_header=True, stat_patterns=stat_patterns, write_stats_file=False) workers.append(w) log.info('Generating samples...') workers = Manager.run_workers(workers=workers, num_processors=args.np) log.info('Parsing samples...') stats_by_time = get_stats_by_time([w.prior_path for w in workers]) stat_keys = stats_by_time.keys() stat_keys.remove('PRI.t') for prefix in args.stat_prefixes: if not prefix in stat_keys: raise Exception('stat prefix {0!r} not found in simulated stats:' '\n\t{1}'.format(prefix, ', '.join(stat_keys))) header = ['PRI.t'] + args.stat_prefixes log.info('Writing stats-by-time matrix...') out, close = process_file_arg(stats_by_time_path, 'w', compresslevel=compress_level) for row in dict_line_iter(stats_by_time, sep='\t', header=header): out.write(row) if close: out.close() log.info('Creating plots...') if not MATPLOTLIB_AVAILABLE: log.warning( '`matplotlib` could not be imported, so the plot can not be\n' 'produced. The data to create the plot can be found in:\n\t' '{0!r}'.format(stats_by_time_path)) else: y_labels = { 'pi': r'$\pi$', 'pi.net': r'$\pi_{net}$', 'wattTheta': r'$\theta_W$', 'tajD.denom': r'$SD(\pi - \theta_W)$' } spg = SaturationPlotGrid(stats_by_time, x_key='PRI.t', y_keys=args.stat_prefixes, y_labels=y_labels, num_columns=2, vertical_line_positions=args.vertical_lines) fig = spg.create_grid() fig.savefig(plot_path) stop_time = datetime.datetime.now() log.info('Done!') info.write('\t[[run_stats]]', log.info) info.write('\t\tstart_time = {0}'.format(str(start_time)), log.info) info.write('\t\tstop_time = {0}'.format(str(stop_time)), log.info) info.write('\t\ttotal_duration = {0}'.format(str(stop_time - start_time)), log.info) if not args.keep_temps: log.debug('purging temps...') temp_fs.purge()