Esempio n. 1
0
    def __init__(self, options):
        super(_IsolationInitialMigration3HMMModel, self).__init__()

        alignments_src1_admix = _prepare_alignments(options, 'ziphmm_src1_admix')
        alignments_src1_scr1 = _prepare_alignments(options, 'ziphmm_scr1_scr1')
        alignments_admix_admix = _prepare_alignments(options, 'ziphmm_admix_admix')
        self.forwarders_src1_admix = [Forwarder.fromDirectory(arg) for arg in alignments_src1_admix]
        self.forwarders_src1_src1 = [Forwarder.fromDirectory(arg) for arg in alignments_src1_scr1]
        self.forwarders_admix_admix = [Forwarder.fromDirectory(arg) for arg in alignments_admix_admix]
Esempio n. 2
0
    def __init__(self, options):
        super(_ThreePopAdmix23Model, self).__init__()

        alignments_src1_admix = _prepare_alignments(options, 'ziphmm_src1_admix')
        alignments_src2_admix = _prepare_alignments(options, 'ziphmm_src2_admix')
        alignments_src1_src2 = _prepare_alignments(options, 'ziphmm_src1_src2')

        self.forwarders_src1_admix = [Forwarder.fromDirectory(arg) for arg in alignments_src1_admix]
        self.forwarders_src2_admix = [Forwarder.fromDirectory(arg) for arg in alignments_src2_admix]
        self.forwarders_src1_src2 = [Forwarder.fromDirectory(arg) for arg in alignments_src1_src2]
Esempio n. 3
0
    def __init__(self, options):
        super(_ThreePopAdmix23Model15HMM, self).__init__()

        alignments_src1_admix = [_prepare_alignments(options, 'ziphmm_src1_admix'),
                                 _prepare_alignments(options, 'ziphmm_src1_admix_2'),
                                 _prepare_alignments(options, 'ziphmm_src1_admix_3'),
                                 _prepare_alignments(options, 'ziphmm_src1_admix_4')]
        alignments_src2_admix = [_prepare_alignments(options, 'ziphmm_src2_admix'),
                                 _prepare_alignments(options, 'ziphmm_src2_admix_2'),
                                 _prepare_alignments(options, 'ziphmm_src2_admix_3'),
                                 _prepare_alignments(options, 'ziphmm_src2_admix_4')]
        alignments_src1_src2 = [_prepare_alignments(options, 'ziphmm_src1_src2'),
                                _prepare_alignments(options, 'ziphmm_src1_src2_2'),
                                _prepare_alignments(options, 'ziphmm_src1_src2_3'),
                                _prepare_alignments(options, 'ziphmm_src1_src2_4')]
        alignments_src1_scr1 = _prepare_alignments(options, 'ziphmm_scr1_scr1')
        alignments_src2_src2 = _prepare_alignments(options, 'ziphmm_src2_src2')
        alignments_admix_admix = _prepare_alignments(options, 'ziphmm_admix_admix')

        self.forwarders_src1_admix = [[Forwarder.fromDirectory(arg) for arg in algs] for algs in alignments_src1_admix]
        self.forwarders_src2_admix = [[Forwarder.fromDirectory(arg) for arg in algs] for algs in alignments_src2_admix]
        self.forwarders_src1_src2 = [[Forwarder.fromDirectory(arg) for arg in algs] for algs in alignments_src1_src2]
        self.forwarders_src1_src1 = [Forwarder.fromDirectory(arg) for arg in alignments_src1_scr1]
        self.forwarders_src2_src2 = [Forwarder.fromDirectory(arg) for arg in alignments_src2_src2]
        self.forwarders_admix_admix = [Forwarder.fromDirectory(arg) for arg in alignments_admix_admix]
Esempio n. 4
0
def main():
    """
    Run the main script.
    """
    usage = """%(prog)s [options] <forwarder dirs>

This program estimates the parameters of an isolation model with two species
and uniform coalescence and recombination rates."""

    parser = ArgumentParser(usage=usage, version="%(prog)s 1.1")

    parser.add_argument("--header",
                        action="store_true",
                        default=False,
                        help="Include a header on the output")
    parser.add_argument("-o",
                        "--outfile",
                        type=str,
                        default="/dev/stdout",
                        help="Output file for the estimate (/dev/stdout)")

    parser.add_argument(
        "--logfile",
        type=str,
        default=None,
        help="Log for all points estimated in the optimization")

    parser.add_argument(
        "--states-12",
        type=int,
        default=10,
        help=
        "Number of intervals used to discretize the time between the first and second speciation (10)"
    )

    parser.add_argument(
        "--states-123",
        type=int,
        default=10,
        help=
        "Number of intervals used to discretize the time after the second speciation (10)"
    )

    parser.add_argument(
        "--optimizer",
        type=str,
        default="Nelder-Mead",
        help=
        "Optimization algorithm to use for maximizing the likelihood (Nealder-Mead)",
        choices=['Nelder-Mead', 'Powell', 'L-BFGS-B', 'TNC'])

    parser.add_argument(
        "--outgroup",
        action="store_true",
        default=None,
        help="Outgroup is included as fourth sequence in alignment.")

    optimized_params = [
        ('split-12', 'First split time in substitutions', 1e6 / 1e9),
        ('split-123', 'Second split time in substitutions', 1e6 / 1e9),
        ('theta-1',
         'effective population size in 4Ne substitutions for species 1',
         1e6 / 1e9),
        ('theta-2',
         'effective population size in 4Ne substitutions for species 2',
         1e6 / 1e9),
        ('theta-3',
         'effective population size in 4Ne substitutions for species 3',
         1e6 / 1e9),
        ('theta-12',
         'effective population size in 4Ne substitutions for species 12 (first ancestral)',
         1e6 / 1e9),
        ('theta-123',
         'effective population size in 4Ne substitutions for species 123 (ancestral to all)',
         1e6 / 1e9), ('rho', 'recombination rate in substitutions', 0.4),
        ('outgroup', 'total height of tree with outgroup', 1e6 / 1e9)
    ]

    for parameter_name, description, default in optimized_params:
        parser.add_argument("--%s" % parameter_name,
                            type=float,
                            default=default,
                            help="Initial guess at the %s (%g)" %
                            (description, default))

    parser.add_argument('alignments',
                        nargs='+',
                        help='Alignments in ZipHMM format')

    options = parser.parse_args()
    if len(options.alignments) < 1:
        parser.error("Input alignment not provided!")

    init_parameters = (options.split_12, options.split_123,
                       1 / (options.theta_1 / 2), 1 / (options.theta_2 / 2),
                       1 / (options.theta_3 / 2), 1 / (options.theta_12 / 2),
                       1 / (options.theta_123 / 2), options.rho)
    if options.outgroup:
        init_parameters += (options.outgroup, )

    output_header = [
        'split.time.12', 'split.time.123', 'theta.1', 'theta.2', 'theta.3',
        'theta.12', 'theta.123', 'rho'
    ]
    if options.outgroup:
        output_header.append("outgroup")

    forwarders = [Forwarder.fromDirectory(arg) for arg in options.alignments]
    log_likelihood = Likelihood(
        ILSModel(options.states_12, options.states_123), forwarders)

    if options.logfile:
        with open(options.logfile, 'w') as logfile:

            if options.header:
                print >> logfile, '\t'.join(output_header)

            mle_parameters = maximum_likelihood_estimate(
                log_likelihood,
                init_parameters,
                optimizer_method=options.optimizer,
                log_file=logfile,
                log_param_transform=transform)
    else:
        mle_parameters = maximum_likelihood_estimate(
            log_likelihood,
            init_parameters,
            optimizer_method=options.optimizer)

    max_log_likelihood = log_likelihood(mle_parameters)

    with open(options.outfile, 'w') as outfile:
        if options.header:
            print >> outfile, '\t'.join(output_header)
        print >> outfile, '\t'.join(
            map(str,
                transform(mle_parameters) + (max_log_likelihood, )))
Esempio n. 5
0
def main():
    usage = """%prog [options] <forwarder dirs>

This program estimates the parameters of an isolation model with two species
and uniform coalescence/recombination rate."""

    parser = OptionParser(usage=usage, version="%prog 1.0")

    parser.add_option("-o",
                      "--out",
                      dest="outfile",
                      type="string",
                      default="/dev/stdout",
                      help="Output file for the estimate (/dev/stdout)")
    parser.add_option(
        "--tmpfile",
        dest="tmpfile",
        type="string",
        default="/dev/null",
        help="Log for all points estimated in the optimization (/dev/null)")
    optimized_params = [
        ('splittime', 'split time', 1e6),
        ('Ne', 'effective population size', 20e3),
        ('recomb', 'recombination rate', 0.1),
    ]
    for (cname, desc, default) in optimized_params:
        parser.add_option("--%s" % cname,
                          dest=cname,
                          type="float",
                          default=default,
                          help="Initial guess at the %s (%g)" %
                          (desc, default))
    fixed_params = [
        ('mu', 'mutation rate', 1e-9),
        ('g', 'generation time', 20),
    ]
    for (cname, desc, default) in fixed_params:
        parser.add_option("--%s" % cname,
                          dest=cname,
                          type="float",
                          default=default,
                          help="Value of the %s (%g)" % (desc, default))
    parser.add_option(
        "--intervals",
        dest="intervals",
        type="int",
        default=10,
        help="Number of sub intervals used to discretize the time (10)")
    parser.add_option("--header",
                      dest="include_header",
                      action="store_true",
                      default=False,
                      help="Include a header on the output")
    parser.add_option("-v",
                      "--verbose",
                      dest="verbose",
                      action="store_true",
                      default=False,
                      help="Print help")

    (options, args) = parser.parse_args()
    if len(args) < 1:
        parser.error("Needs at least one preprocessed sequence to work on")

    if not options.verbose:
        log = lambda s: None
        logu = lambda s: None
    else:
        logu = log_unfinished_line
        log = log_finished_line

    logu("Loading forwarders...")
    forwarders = [Forwarder.fromDirectory(dir) for dir in args]
    log("done")

    logu("Constructing model...")
    intervals = options.intervals
    modelI = build_epoch_seperated_model(2, [[0, 0]], [1, intervals])
    log("done")

    mu = options.mu
    g = options.g
    T = options.splittime * mu
    C = 1.0 / (g * mu * 2 * options.Ne)
    R = options.recomb

    with open(options.tmpfile, 'w') as tmpfile:
        L, est = estimate_I(modelI, forwarders, T, C, R, outfile=tmpfile)

    vals = "\t".join(map(str, est))
    with open(options.outfile, 'w') as outfile:
        if options.include_header:
            print >> outfile, 'logL\tT\tC\tR'
        print >> outfile, "%f\t%s" % (L, vals)
Esempio n. 6
0
 def __init__(self, options):
     super(_IsolationModel, self).__init__()
     alignments = _prepare_alignments(options, 'ziphmm_src1_admix')
     self.forwarders = [Forwarder.fromDirectory(arg) for arg in alignments]
Esempio n. 7
0
    def __init__(self, options):
        super(_TwoPopAdmix23OneSampleModel, self).__init__()

        alignments_src1_admix = _prepare_alignments(options, 'ziphmm_src1_admix')
        self.forwarders_src1_admix = [Forwarder.fromDirectory(arg) for arg in alignments_src1_admix]
Esempio n. 8
0
def main():
    usage="""%prog [options] <forwarder dirs>

This program estimates the parameters of an isolation model with two species
and uniform coalescence/recombination rate."""


    parser = OptionParser(usage=usage, version="%prog 1.0")

    parser.add_option("-o", "--out",
                      dest="outfile",
                      type="string",
                      default="/dev/stdout",
                      help="Output file for the estimate (/dev/stdout)")
    parser.add_option("--tmpfile",
                      dest="tmpfile",
                      type="string",
                      default="/dev/null",
                      help="Log for all points estimated in the optimization (/dev/null)")
    optimized_params = [
            ('splittime', 'split time', 1e6),
            ('Ne', 'effective population size', 20e3),
            ('recomb', 'recombination rate', 0.1),
            ]
    for (cname, desc, default) in optimized_params:
        parser.add_option("--%s" % cname,
                          dest=cname,
                          type="float",
                          default=default,
                          help="Initial guess at the %s (%g)" % (desc, default))
    fixed_params = [
            ('mu', 'mutation rate', 1e-9),
            ('g', 'generation time', 20),
            ]
    for (cname, desc, default) in fixed_params:
        parser.add_option("--%s" % cname,
                          dest=cname,
                          type="float",
                          default=default,
                          help="Value of the %s (%g)" % (desc, default))
    parser.add_option("--intervals",
                      dest="intervals",
                      type="int",
                      default=10,
                      help="Number of sub intervals used to discretize the time (10)")
    parser.add_option("--header",
                      dest="include_header",
                      action="store_true",
                      default=False,
                      help="Include a header on the output")
    parser.add_option("-v", "--verbose",
                      dest="verbose",
                      action="store_true",
                      default=False,
                      help="Print help")

    (options, args) = parser.parse_args()
    if len(args) < 1:
        parser.error("Needs at least one preprocessed sequence to work on")

    if not options.verbose:
        log = lambda s: None
        logu = lambda s: None
    else:
        logu = log_unfinished_line
        log = log_finished_line

    logu("Loading forwarders...")
    forwarders = [Forwarder.fromDirectory(dir) for dir in args]
    log("done")

    logu("Constructing model...")
    intervals = options.intervals
    modelI = build_epoch_seperated_model(2, [[0,0]], [1,intervals])
    log("done")


    mu = options.mu
    g = options.g
    T = options.splittime * mu
    C = 1.0/(g*mu*2*options.Ne)
    R = options.recomb
    
    with open(options.tmpfile, 'w') as tmpfile:
        L, est = estimate_I(modelI, forwarders, T, C, R, outfile=tmpfile)
    
    vals = "\t".join(map(str,est))
    with open(options.outfile, 'w') as outfile:
        if options.include_header:
            print >>outfile, 'logL\tT\tC\tR'
        print >>outfile, "%f\t%s" % (L,vals)