def __init__(self, options): super(_IsolationInitialMigration3HMMModel, self).__init__() alignments_src1_admix = _prepare_alignments(options, 'ziphmm_src1_admix') alignments_src1_scr1 = _prepare_alignments(options, 'ziphmm_scr1_scr1') alignments_admix_admix = _prepare_alignments(options, 'ziphmm_admix_admix') self.forwarders_src1_admix = [Forwarder.fromDirectory(arg) for arg in alignments_src1_admix] self.forwarders_src1_src1 = [Forwarder.fromDirectory(arg) for arg in alignments_src1_scr1] self.forwarders_admix_admix = [Forwarder.fromDirectory(arg) for arg in alignments_admix_admix]
def __init__(self, options): super(_ThreePopAdmix23Model, self).__init__() alignments_src1_admix = _prepare_alignments(options, 'ziphmm_src1_admix') alignments_src2_admix = _prepare_alignments(options, 'ziphmm_src2_admix') alignments_src1_src2 = _prepare_alignments(options, 'ziphmm_src1_src2') self.forwarders_src1_admix = [Forwarder.fromDirectory(arg) for arg in alignments_src1_admix] self.forwarders_src2_admix = [Forwarder.fromDirectory(arg) for arg in alignments_src2_admix] self.forwarders_src1_src2 = [Forwarder.fromDirectory(arg) for arg in alignments_src1_src2]
def __init__(self, options): super(_ThreePopAdmix23Model15HMM, self).__init__() alignments_src1_admix = [_prepare_alignments(options, 'ziphmm_src1_admix'), _prepare_alignments(options, 'ziphmm_src1_admix_2'), _prepare_alignments(options, 'ziphmm_src1_admix_3'), _prepare_alignments(options, 'ziphmm_src1_admix_4')] alignments_src2_admix = [_prepare_alignments(options, 'ziphmm_src2_admix'), _prepare_alignments(options, 'ziphmm_src2_admix_2'), _prepare_alignments(options, 'ziphmm_src2_admix_3'), _prepare_alignments(options, 'ziphmm_src2_admix_4')] alignments_src1_src2 = [_prepare_alignments(options, 'ziphmm_src1_src2'), _prepare_alignments(options, 'ziphmm_src1_src2_2'), _prepare_alignments(options, 'ziphmm_src1_src2_3'), _prepare_alignments(options, 'ziphmm_src1_src2_4')] alignments_src1_scr1 = _prepare_alignments(options, 'ziphmm_scr1_scr1') alignments_src2_src2 = _prepare_alignments(options, 'ziphmm_src2_src2') alignments_admix_admix = _prepare_alignments(options, 'ziphmm_admix_admix') self.forwarders_src1_admix = [[Forwarder.fromDirectory(arg) for arg in algs] for algs in alignments_src1_admix] self.forwarders_src2_admix = [[Forwarder.fromDirectory(arg) for arg in algs] for algs in alignments_src2_admix] self.forwarders_src1_src2 = [[Forwarder.fromDirectory(arg) for arg in algs] for algs in alignments_src1_src2] self.forwarders_src1_src1 = [Forwarder.fromDirectory(arg) for arg in alignments_src1_scr1] self.forwarders_src2_src2 = [Forwarder.fromDirectory(arg) for arg in alignments_src2_src2] self.forwarders_admix_admix = [Forwarder.fromDirectory(arg) for arg in alignments_admix_admix]
def main(): """ Run the main script. """ usage = """%(prog)s [options] <forwarder dirs> This program estimates the parameters of an isolation model with two species and uniform coalescence and recombination rates.""" parser = ArgumentParser(usage=usage, version="%(prog)s 1.1") parser.add_argument("--header", action="store_true", default=False, help="Include a header on the output") parser.add_argument("-o", "--outfile", type=str, default="/dev/stdout", help="Output file for the estimate (/dev/stdout)") parser.add_argument( "--logfile", type=str, default=None, help="Log for all points estimated in the optimization") parser.add_argument( "--states-12", type=int, default=10, help= "Number of intervals used to discretize the time between the first and second speciation (10)" ) parser.add_argument( "--states-123", type=int, default=10, help= "Number of intervals used to discretize the time after the second speciation (10)" ) parser.add_argument( "--optimizer", type=str, default="Nelder-Mead", help= "Optimization algorithm to use for maximizing the likelihood (Nealder-Mead)", choices=['Nelder-Mead', 'Powell', 'L-BFGS-B', 'TNC']) parser.add_argument( "--outgroup", action="store_true", default=None, help="Outgroup is included as fourth sequence in alignment.") optimized_params = [ ('split-12', 'First split time in substitutions', 1e6 / 1e9), ('split-123', 'Second split time in substitutions', 1e6 / 1e9), ('theta-1', 'effective population size in 4Ne substitutions for species 1', 1e6 / 1e9), ('theta-2', 'effective population size in 4Ne substitutions for species 2', 1e6 / 1e9), ('theta-3', 'effective population size in 4Ne substitutions for species 3', 1e6 / 1e9), ('theta-12', 'effective population size in 4Ne substitutions for species 12 (first ancestral)', 1e6 / 1e9), ('theta-123', 'effective population size in 4Ne substitutions for species 123 (ancestral to all)', 1e6 / 1e9), ('rho', 'recombination rate in substitutions', 0.4), ('outgroup', 'total height of tree with outgroup', 1e6 / 1e9) ] for parameter_name, description, default in optimized_params: parser.add_argument("--%s" % parameter_name, type=float, default=default, help="Initial guess at the %s (%g)" % (description, default)) parser.add_argument('alignments', nargs='+', help='Alignments in ZipHMM format') options = parser.parse_args() if len(options.alignments) < 1: parser.error("Input alignment not provided!") init_parameters = (options.split_12, options.split_123, 1 / (options.theta_1 / 2), 1 / (options.theta_2 / 2), 1 / (options.theta_3 / 2), 1 / (options.theta_12 / 2), 1 / (options.theta_123 / 2), options.rho) if options.outgroup: init_parameters += (options.outgroup, ) output_header = [ 'split.time.12', 'split.time.123', 'theta.1', 'theta.2', 'theta.3', 'theta.12', 'theta.123', 'rho' ] if options.outgroup: output_header.append("outgroup") forwarders = [Forwarder.fromDirectory(arg) for arg in options.alignments] log_likelihood = Likelihood( ILSModel(options.states_12, options.states_123), forwarders) if options.logfile: with open(options.logfile, 'w') as logfile: if options.header: print >> logfile, '\t'.join(output_header) mle_parameters = maximum_likelihood_estimate( log_likelihood, init_parameters, optimizer_method=options.optimizer, log_file=logfile, log_param_transform=transform) else: mle_parameters = maximum_likelihood_estimate( log_likelihood, init_parameters, optimizer_method=options.optimizer) max_log_likelihood = log_likelihood(mle_parameters) with open(options.outfile, 'w') as outfile: if options.header: print >> outfile, '\t'.join(output_header) print >> outfile, '\t'.join( map(str, transform(mle_parameters) + (max_log_likelihood, )))
def main(): usage = """%prog [options] <forwarder dirs> This program estimates the parameters of an isolation model with two species and uniform coalescence/recombination rate.""" parser = OptionParser(usage=usage, version="%prog 1.0") parser.add_option("-o", "--out", dest="outfile", type="string", default="/dev/stdout", help="Output file for the estimate (/dev/stdout)") parser.add_option( "--tmpfile", dest="tmpfile", type="string", default="/dev/null", help="Log for all points estimated in the optimization (/dev/null)") optimized_params = [ ('splittime', 'split time', 1e6), ('Ne', 'effective population size', 20e3), ('recomb', 'recombination rate', 0.1), ] for (cname, desc, default) in optimized_params: parser.add_option("--%s" % cname, dest=cname, type="float", default=default, help="Initial guess at the %s (%g)" % (desc, default)) fixed_params = [ ('mu', 'mutation rate', 1e-9), ('g', 'generation time', 20), ] for (cname, desc, default) in fixed_params: parser.add_option("--%s" % cname, dest=cname, type="float", default=default, help="Value of the %s (%g)" % (desc, default)) parser.add_option( "--intervals", dest="intervals", type="int", default=10, help="Number of sub intervals used to discretize the time (10)") parser.add_option("--header", dest="include_header", action="store_true", default=False, help="Include a header on the output") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Print help") (options, args) = parser.parse_args() if len(args) < 1: parser.error("Needs at least one preprocessed sequence to work on") if not options.verbose: log = lambda s: None logu = lambda s: None else: logu = log_unfinished_line log = log_finished_line logu("Loading forwarders...") forwarders = [Forwarder.fromDirectory(dir) for dir in args] log("done") logu("Constructing model...") intervals = options.intervals modelI = build_epoch_seperated_model(2, [[0, 0]], [1, intervals]) log("done") mu = options.mu g = options.g T = options.splittime * mu C = 1.0 / (g * mu * 2 * options.Ne) R = options.recomb with open(options.tmpfile, 'w') as tmpfile: L, est = estimate_I(modelI, forwarders, T, C, R, outfile=tmpfile) vals = "\t".join(map(str, est)) with open(options.outfile, 'w') as outfile: if options.include_header: print >> outfile, 'logL\tT\tC\tR' print >> outfile, "%f\t%s" % (L, vals)
def __init__(self, options): super(_IsolationModel, self).__init__() alignments = _prepare_alignments(options, 'ziphmm_src1_admix') self.forwarders = [Forwarder.fromDirectory(arg) for arg in alignments]
def __init__(self, options): super(_TwoPopAdmix23OneSampleModel, self).__init__() alignments_src1_admix = _prepare_alignments(options, 'ziphmm_src1_admix') self.forwarders_src1_admix = [Forwarder.fromDirectory(arg) for arg in alignments_src1_admix]
def main(): usage="""%prog [options] <forwarder dirs> This program estimates the parameters of an isolation model with two species and uniform coalescence/recombination rate.""" parser = OptionParser(usage=usage, version="%prog 1.0") parser.add_option("-o", "--out", dest="outfile", type="string", default="/dev/stdout", help="Output file for the estimate (/dev/stdout)") parser.add_option("--tmpfile", dest="tmpfile", type="string", default="/dev/null", help="Log for all points estimated in the optimization (/dev/null)") optimized_params = [ ('splittime', 'split time', 1e6), ('Ne', 'effective population size', 20e3), ('recomb', 'recombination rate', 0.1), ] for (cname, desc, default) in optimized_params: parser.add_option("--%s" % cname, dest=cname, type="float", default=default, help="Initial guess at the %s (%g)" % (desc, default)) fixed_params = [ ('mu', 'mutation rate', 1e-9), ('g', 'generation time', 20), ] for (cname, desc, default) in fixed_params: parser.add_option("--%s" % cname, dest=cname, type="float", default=default, help="Value of the %s (%g)" % (desc, default)) parser.add_option("--intervals", dest="intervals", type="int", default=10, help="Number of sub intervals used to discretize the time (10)") parser.add_option("--header", dest="include_header", action="store_true", default=False, help="Include a header on the output") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Print help") (options, args) = parser.parse_args() if len(args) < 1: parser.error("Needs at least one preprocessed sequence to work on") if not options.verbose: log = lambda s: None logu = lambda s: None else: logu = log_unfinished_line log = log_finished_line logu("Loading forwarders...") forwarders = [Forwarder.fromDirectory(dir) for dir in args] log("done") logu("Constructing model...") intervals = options.intervals modelI = build_epoch_seperated_model(2, [[0,0]], [1,intervals]) log("done") mu = options.mu g = options.g T = options.splittime * mu C = 1.0/(g*mu*2*options.Ne) R = options.recomb with open(options.tmpfile, 'w') as tmpfile: L, est = estimate_I(modelI, forwarders, T, C, R, outfile=tmpfile) vals = "\t".join(map(str,est)) with open(options.outfile, 'w') as outfile: if options.include_header: print >>outfile, 'logL\tT\tC\tR' print >>outfile, "%f\t%s" % (L,vals)