Example #1
0
 def get_orthoplanar_intersection_times(self):
     """
     Get the list of intersection points per axis.
     This is a geometric concept.
     """
     npoints = len(self.points)
     abstol = 1e-6
     time_seqs = []
     for axis in range(self.ndim):
         time_seq = []
         # check points for exact intersections
         for p, t in zip(self.points, self.times):
             if abs(p[axis]) < abstol:
                 time_seq.append(t)
         # check line segments for intersections
         for i, j in iterutils.pairwise(range(npoints)):
             pa, pb = self.points[i], self.points[j]
             ta, tb = self.times[i], self.times[j]
             if abs(pa[axis]) > abstol and abs(pb[axis]) > abstol:
                 if pa[axis]*pb[axis] < 0:
                     t_local = pa[axis] / (pa[axis] - pb[axis])
                     t_global = ta + t_local * (tb - ta)
                     time_seq.append(t_global)
         time_seqs.append(sorted(time_seq))
     return time_seqs
Example #2
0
 def get_orthoplanar_intersection_times(self):
     """
     Get the list of intersection points per axis.
     This is a geometric concept.
     """
     npoints = len(self.points)
     abstol = 1e-6
     time_seqs = []
     for axis in range(self.ndim):
         time_seq = []
         # check points for exact intersections
         for p, t in zip(self.points, self.times):
             if abs(p[axis]) < abstol:
                 time_seq.append(t)
         # check line segments for intersections
         for i, j in iterutils.pairwise(range(npoints)):
             pa, pb = self.points[i], self.points[j]
             ta, tb = self.times[i], self.times[j]
             if abs(pa[axis]) > abstol and abs(pb[axis]) > abstol:
                 if pa[axis] * pb[axis] < 0:
                     t_local = pa[axis] / (pa[axis] - pb[axis])
                     t_global = ta + t_local * (tb - ta)
                     time_seq.append(t_global)
         time_seqs.append(sorted(time_seq))
     return time_seqs
Example #3
0
 def annotate_posteriors(self, stickiness, hidden_models):
     """
     @param stickiness: a nonnegative integer that defines the transition matrix
     @param hidden_models: a list of statistical models
     """
     # define the transition matrix
     nhidden = len(hidden_models)
     prandom = .01**stickiness
     transition_object = TransitionMatrix.UniformTransitionObject(
         prandom, nhidden)
     # define the HMM
     cache_size = 100000
     hmm = FastHMM.Model(transition_object, hidden_models, cache_size)
     # define the observations and distances
     observations = [
         tuple(sorted(coverage)) for coverage in self.nt_coverages
     ]
     distances = [b - a for a, b in iterutils.pairwise(self.offsets)]
     # get the posterior distribution for each observation
     dp_info = hmm.get_dp_info(observations, distances)
     distribution_list = hmm.scaled_posterior_durbin(dp_info)
     # store the MAP state and conditional MAP substate
     annotation_list = []
     for obs, distribution in zip(observations, distribution_list):
         map_p, map_index = max((p, i) for i, p in enumerate(distribution))
         map_subindex = hidden_models[map_index].get_maximum_posterior(obs)
         annotation_list.append((map_index, map_subindex))
     self.annotation_lists.append(annotation_list)
Example #4
0
 def annotate_posteriors(self, stickiness, hidden_models):
     """
     @param stickiness: a nonnegative integer that defines the transition matrix
     @param hidden_models: a list of statistical models
     """
     # define the transition matrix
     nhidden = len(hidden_models)
     prandom = .01**stickiness
     transition_object = TransitionMatrix.UniformTransitionObject(prandom, nhidden)
     # define the HMM
     cache_size = 100000
     hmm = FastHMM.Model(transition_object, hidden_models, cache_size)
     # define the observations and distances
     observations = [tuple(sorted(coverage)) for coverage in self.nt_coverages]
     distances = [b - a for a, b in iterutils.pairwise(self.offsets)]
     # get the posterior distribution for each observation
     dp_info = hmm.get_dp_info(observations, distances)
     distribution_list = hmm.scaled_posterior_durbin(dp_info)
     # store the MAP state and conditional MAP substate
     annotation_list = []
     for obs, distribution in zip(observations, distribution_list):
         map_p, map_index = max((p, i) for i, p in enumerate(distribution))
         map_subindex = hidden_models[map_index].get_maximum_posterior(obs)
         annotation_list.append((map_index, map_subindex))
     self.annotation_lists.append(annotation_list)
Example #5
0
def get_table_string_and_scripts(fs):
    nstates = fs.nresidues**fs.nsites
    if nstates > 256:
        raise ValueError('the mutation rate matrix is too big')
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1 / s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1 / s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1 / tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i * incr for i in range(fs.ntimes)]
    # compute the statistics
    nsels = len(Q_sels)
    pairs = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    mi_sign_lists, time_stats = zip(*pairs)
    ncrossing_list = []
    # look at how the signs change over time for each selection sample
    for signs in zip(*mi_sign_lists):
        count = 0
        for sign_a, sign_b in iterutils.pairwise(signs):
            if sign_a != sign_b:
                count += 1
        ncrossing_list.append(count)
    # get the R scripts
    scripts = [
        get_r_band_script(nsels, time_stats),
        get_r_prop_script(nsels, time_stats),
        get_r_cross_script(ncrossing_list)
    ]
    table_string = RUtil.get_table_string(time_stats, g_time_stats_headers)
    return table_string, scripts
Example #6
0
 def get_bezier_path(self):
     bchunks = []
     for i, (pa, pb) in enumerate(iterutils.pairwise(self.points)):
         b = bezier.create_bchunk_line_segment(pa, pb)
         b.start_time = float(i)
         b.stop_time = float(i + 1)
         bchunks.append(b)
     return pcurve.BezierPath(bchunks)
Example #7
0
 def get_bezier_path(self):
     bchunks = []
     for i, (pa, pb) in enumerate(iterutils.pairwise(self.points)):
         b = bezier.create_bchunk_line_segment(pa, pb)
         b.start_time = float(i)
         b.stop_time = float(i+1)
         bchunks.append(b)
     return pcurve.BezierPath(bchunks)
Example #8
0
def get_table_string_and_scripts(fs):
    nstates = fs.nresidues ** fs.nsites
    if nstates > 256:
        raise ValueError('the mutation rate matrix is too big')
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1/s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1/s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1/tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i*incr for i in range(fs.ntimes)]
    # compute the statistics
    nsels = len(Q_sels)
    pairs = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    mi_sign_lists, time_stats = zip(*pairs)
    ncrossing_list = []
    # look at how the signs change over time for each selection sample
    for signs in zip(*mi_sign_lists):
        count = 0
        for sign_a, sign_b in iterutils.pairwise(signs):
            if sign_a != sign_b:
                count += 1
        ncrossing_list.append(count)
    # get the R scripts
    scripts = [
            get_r_band_script(nsels, time_stats),
            get_r_prop_script(nsels, time_stats),
            get_r_cross_script(ncrossing_list)]
    table_string = RUtil.get_table_string(time_stats, g_time_stats_headers)
    return table_string, scripts
Example #9
0
def get_tikz_body(fs):
    out = StringIO()
    # define user variables
    plot_width = fs.plot_width
    plot_height = fs.plot_height
    timescale = fs.t_max
    # create the function objects
    f_a = JC69.IdentitySlopeInformation(fs.a_mu, fs.a_N)
    f_b = JC69.IdentitySlopeInformation(fs.b_mu, fs.b_N)
    # Define some times for evaluation of the curve.
    times = [timescale*2**-i for i in range(10)]
    # define some more intermediate values
    ymax = max(f_a(min(times)), f_b(min(times))) * 1.2
    plotscale = np.array((plot_width / timescale, plot_height / ymax))
    origin = (0, 0)
    # draw the boundary of the plot
    print >> out, r'\draw[color=gray] %s %s {%s} %s;' % (
            tikz.point_to_tikz(origin),
            'edge node[color=black,below]',
            '$t$',
            tikz.point_to_tikz((plot_width, 0)))
    print >> out, r'\draw[color=gray] ' + get_segment(
            origin, (0, plot_height))
    # draw the bezier curves hitting the right knots
    for f in (f_a, f_b):
        bchunks = []
        for a, b in iterutils.pairwise(sorted(times)):
            pta = np.array((a, f(a)))
            ptb = np.array((b, f(b)))
            dta = np.array((1, f.deriv(a)))
            dtb = np.array((1, f.deriv(b)))
            bchunk = bezier.create_bchunk_hermite(
                    a, b,
                    pta * plotscale, ptb * plotscale,
                    dta * plotscale, dtb * plotscale)
            bchunks.append(bchunk)
        print >> out, r'\draw[color=gray] ' + get_tikz_bezier(bchunks)
    # draw filled black dots at some intersections
    dot_points = [origin]
    dot_points.append((0, f_a(0)))
    dot_points.append((0, f_b(0)))
    for p in dot_points:
        print >> out, r'\fill[color=black,inner sep=0pt]',
        print >> out, tikz.point_to_tikz(np.array(p) * plotscale),
        print >> out, 'circle (1pt);'
    # draw some text annotations
    pt_txt_pairs = [
            ((0, 0), '0'),
            ]
    for i, (pt, txt) in enumerate(pt_txt_pairs):
        print >> out, r'\node[anchor=east] (%s) at %s {%s};' % (
                'ylabel%d' % i,
                tikz.point_to_tikz(pt),
                txt)
    #
    return out.getvalue().rstrip()
Example #10
0
 def test_clocklike(self):
     nleaves = 10
     R, B = sample(nleaves)
     paths_to_root = get_paths_to_root(R)
     ages = []
     for path in paths_to_root:
         age = sum(B[frozenset(p)] for p in iterutils.pairwise(path))
         ages.append(age)
     self.assertEqual(len(ages), nleaves)
     self.assertEqual(len(set(ages)), 1)
Example #11
0
 def test_clocklike(self):
     nleaves = 10
     R, B = sample(nleaves)
     paths_to_root = get_paths_to_root(R)
     ages = []
     for path in paths_to_root:
         age = sum(B[frozenset(p)] for p in iterutils.pairwise(path))
         ages.append(age)
     self.assertEqual(len(ages), nleaves)
     self.assertEqual(len(set(ages)), 1)
Example #12
0
 def get_bezier_path(self):
     bchunks = []
     npoints = len(self.points)
     for i, j in iterutils.pairwise(range(npoints)):
         pa, pb = self.points[i], self.points[j]
         ta, tb = self.times[i], self.times[j]
         b = bezier.create_bchunk_line_segment(pa, pb)
         b.start_time = ta
         b.stop_time = tb
         bchunks.append(b)
     return pcurve.BezierPath(bchunks)
Example #13
0
def create_adjacency_matrix(affinity, nvertices):
    """
    @param affinity: affinity between adjacent vertices
    @param nvertices: the number of vertices in the graph
    @return: a numpy matrix
    """
    A = np.zeros((nvertices, nvertices))
    for i, j in iterutils.pairwise(range(nvertices)):
        A[i,j] = affinity
        A[j,i] = affinity
    return A
Example #14
0
 def get_bezier_path(self):
     bchunks = []
     npoints = len(self.points)
     for i, j in iterutils.pairwise(range(npoints)):
         pa, pb = self.points[i], self.points[j]
         ta, tb = self.times[i], self.times[j]
         b = bezier.create_bchunk_line_segment(pa, pb)
         b.start_time = ta
         b.stop_time = tb
         bchunks.append(b)
     return pcurve.BezierPath(bchunks)
Example #15
0
 def draw_curve(self):
     scale = np.array((self.plot_width / self.timescale, self.plot_height))
     times = self._get_knot_times()
     bchunks = []
     for a, b in iterutils.pairwise(times):
         pta = np.array((a, self.f(a)))
         ptb = np.array((b, self.f(b)))
         dta = np.array((1, self.f.deriv(a)))
         dtb = np.array((1, self.f.deriv(b)))
         bchunk = bezier.create_bchunk_hermite(a, b, pta * scale, ptb * scale, dta * scale, dtb * scale)
         bchunks.append(bchunk)
     return r"\draw " + get_tikz_bezier(bchunks)
Example #16
0
def get_expected_transitions_brute(prandom, nstates, nsteps):
    """
    This function is for transition matrices defined by their size and a single parameter.
    Use brute force to compute transition expectations.
    This function returns two values.
    The first value is the expected number of transitions
    when the endpoints are the same.
    The second value is the expected number of transitions
    when the endpoints are different.
    @param prandom: the probability of randomization at each step
    @param nstates: the number of states in the chain
    @param nsteps: one fewer than the length of the sequence
    @return: (expected_t_same, expected_t_different)
    """
    # handle corner cases
    if not nsteps:
        return 0.0, float('nan')
    if nsteps == 1:
        return 0.0, 1.0
    if not prandom:
        return 0.0, float('nan')
    # precalculate stuff
    p_notrans = prandom / nstates + (1 - prandom)
    p_particular_trans = prandom / nstates
    p_any_trans = p_particular_trans * (nstates - 1)
    # initialize probabilities
    total_p_different = 0
    total_p_same = 0
    # initialize expectations
    e_same = 0
    e_different = 0
    # define expectations
    for sequence in itertools.product(range(nstates), repeat=nsteps+1):
        # Calculate the probability of the sequence
        # and the number of transitions.
        ntransitions = 0
        p = 1.0 / nstates
        for a, b in iterutils.pairwise(sequence):
            if a == b:
                p *= p_notrans
            else:
                p *= p_particular_trans
                ntransitions += 1
        # add to the expectation
        if sequence[0] == sequence[-1]:
            total_p_same += p
            e_same += p * ntransitions
        else:
            total_p_different += p
            e_different += p * ntransitions
    e_same /= total_p_same
    e_different /= total_p_different
    return e_same, e_different
Example #17
0
def get_expected_transitions_brute(prandom, nstates, nsteps):
    """
    This function is for transition matrices defined by their size and a single parameter.
    Use brute force to compute transition expectations.
    This function returns two values.
    The first value is the expected number of transitions
    when the endpoints are the same.
    The second value is the expected number of transitions
    when the endpoints are different.
    @param prandom: the probability of randomization at each step
    @param nstates: the number of states in the chain
    @param nsteps: one fewer than the length of the sequence
    @return: (expected_t_same, expected_t_different)
    """
    # handle corner cases
    if not nsteps:
        return 0.0, float('nan')
    if nsteps == 1:
        return 0.0, 1.0
    if not prandom:
        return 0.0, float('nan')
    # precalculate stuff
    p_notrans = prandom / nstates + (1 - prandom)
    p_particular_trans = prandom / nstates
    p_any_trans = p_particular_trans * (nstates - 1)
    # initialize probabilities
    total_p_different = 0
    total_p_same = 0
    # initialize expectations
    e_same = 0
    e_different = 0
    # define expectations
    for sequence in itertools.product(range(nstates), repeat=nsteps + 1):
        # Calculate the probability of the sequence
        # and the number of transitions.
        ntransitions = 0
        p = 1.0 / nstates
        for a, b in iterutils.pairwise(sequence):
            if a == b:
                p *= p_notrans
            else:
                p *= p_particular_trans
                ntransitions += 1
        # add to the expectation
        if sequence[0] == sequence[-1]:
            total_p_same += p
            e_same += p * ntransitions
        else:
            total_p_different += p
            e_different += p * ntransitions
    e_same /= total_p_same
    e_different /= total_p_different
    return e_same, e_different
Example #18
0
 def shatter(self, times):
     """
     Return a collection of BezierPath objects.
     The returned objects should be annotated
     with characteristic times corresponding to intersections.
     @param times: sorted filtered intersection times
     @return: a collection of BezierPath objects
     """
     # handle the edge case of no intersections
     if not times:
         self.characteristic_time = 0.5 * (self.get_start_time() +
                                           self.get_stop_time())
         return [self]
     # handle the edge case of a single intersection
     if len(times) == 1:
         self.characteristic_time = times[0]
         return [self]
     # Compute quiescence times.
     # TODO use weak spatially quiescent midpoints
     # instead of naive temporally quiescent midpoints
     quiescence_times = [
         0.5 * (a + b) for a, b in iterutils.pairwise(times)
     ]
     # Construct the bchunks sequences.
     # Use whole bchunks when possible,
     # but at quiescence times we might have to split the bchuncks.
     remaining = deque(self.bchunks)
     groups = []
     g = []
     # repeatedly split the remaining sequence
     for q in quiescence_times:
         while True:
             b = remaining.popleft()
             if b.start_time <= q <= b.stop_time:
                 ba, bb = b.split_global(q)
                 g.append(ba)
                 remaining.appendleft(bb)
                 groups.append(g)
                 g = []
                 break
             else:
                 g.append(b)
     g.extend(remaining)
     groups.append(g)
     # Create a piecewise bezier curve from each group,
     # and give each piecewise curve a characteristic time.
     piecewise_curves = []
     for t, group in zip(times, groups):
         curve = self.__class__(group)
         curve.characteristic_time = t
         piecewise_curves.append(curve)
     return piecewise_curves
Example #19
0
def create_laplacian_matrix(nvertices):
    """
    @param affinity: affinity between adjacent vertices
    @param nvertices: the number of vertices in the graph
    @return: a numpy matrix
    """
    affinity = nvertices * 2.0
    A = np.zeros((nvertices, nvertices), dtype=float)
    for i, j in iterutils.pairwise(range(nvertices)):
        A[i,j] = affinity
        A[j,i] = affinity
    L = Euclid.adjacency_to_laplacian(A)
    return L
Example #20
0
 def annotate_posteriors(self, T, hidden_models):
     """
     @param T: a matrix of transition probabilities among the hidden states
     @param hidden_models: a list of statistical models
     """
     # define the HMM
     hmm = MissingHMM.MissingHMM(T, hidden_models)
     # define the observations and distances
     observations = self.nt_coverages
     distances = [b - a for a, b in iterutils.pairwise(self.offsets)]
     # do the annotation
     self.posterior_distributions = hmm.scaled_posterior_durbin(
         observations, distances)
Example #21
0
 def get_orthoplanar_intersection_times(self):
     """
     Get the intersection times for the plane orthogonal to each axis.
     Note that this function assumes interlacing roots.
     """
     root_seqs = [[]]
     for f in self.fps:
         root_seq = []
         for low, high in iterutils.pairwise(
                 [self.t_initial] + root_seqs[-1] + [self.t_final]):
             root_seq.append(scipy.optimize.brentq(f, low, high))
         root_seqs.append(root_seq)
     return root_seqs[1:]
Example #22
0
 def get_transition_expectations_brute(self, initial_state, final_state, nsteps):
     """
     @return: a matrix of expected transition counts
     """
     T = self.transition_object.get_transition_probability
     # initialize the matrix of expected counts
     A = np.zeros((self.nstates, self.nstates))
     # compute the probability of observing the final state conditional on the first state
     p_total = T(initial_state, final_state, nsteps)
     # iterate over all possible sequences of missing states
     for missing_sequence in itertools.product(range(self.nstates), repeat=nsteps-1):
         sequence = [initial_state] + list(missing_sequence) + [final_state]
         # get the probability of observing this continuation of the initial state
         p = 1.0
         for a, b in iterutils.pairwise(sequence):
             p *= T(a, b)
         # add the weighted transitions of each type
         for a, b in iterutils.pairwise(sequence):
             A[a, b] += p
     # divide by the total probability so that the conditioning is correct
     A /= p_total
     return A
Example #23
0
def get_tikz_body(fs):
    out = StringIO()
    # define user variables
    plot_width = fs.plot_width
    plot_height = fs.plot_height
    timescale = fs.t_max
    # create the function objects
    f_a = JC69.IdentitySlopeInformation(fs.a_mu, fs.a_N)
    f_b = JC69.IdentitySlopeInformation(fs.b_mu, fs.b_N)
    # Define some times for evaluation of the curve.
    times = [timescale * 2**-i for i in range(10)]
    # define some more intermediate values
    ymax = max(f_a(min(times)), f_b(min(times))) * 1.2
    plotscale = np.array((plot_width / timescale, plot_height / ymax))
    origin = (0, 0)
    # draw the boundary of the plot
    print >> out, r'\draw[color=gray] %s %s {%s} %s;' % (
        tikz.point_to_tikz(origin), 'edge node[color=black,below]', '$t$',
        tikz.point_to_tikz((plot_width, 0)))
    print >> out, r'\draw[color=gray] ' + get_segment(origin, (0, plot_height))
    # draw the bezier curves hitting the right knots
    for f in (f_a, f_b):
        bchunks = []
        for a, b in iterutils.pairwise(sorted(times)):
            pta = np.array((a, f(a)))
            ptb = np.array((b, f(b)))
            dta = np.array((1, f.deriv(a)))
            dtb = np.array((1, f.deriv(b)))
            bchunk = bezier.create_bchunk_hermite(a, b, pta * plotscale,
                                                  ptb * plotscale,
                                                  dta * plotscale,
                                                  dtb * plotscale)
            bchunks.append(bchunk)
        print >> out, r'\draw[color=gray] ' + get_tikz_bezier(bchunks)
    # draw filled black dots at some intersections
    dot_points = [origin]
    dot_points.append((0, f_a(0)))
    dot_points.append((0, f_b(0)))
    for p in dot_points:
        print >> out, r'\fill[color=black,inner sep=0pt]',
        print >> out, tikz.point_to_tikz(np.array(p) * plotscale),
        print >> out, 'circle (1pt);'
    # draw some text annotations
    pt_txt_pairs = [
        ((0, 0), '0'),
    ]
    for i, (pt, txt) in enumerate(pt_txt_pairs):
        print >> out, r'\node[anchor=east] (%s) at %s {%s};' % (
            'ylabel%d' % i, tikz.point_to_tikz(pt), txt)
    #
    return out.getvalue().rstrip()
Example #24
0
 def get_patches(self, times):
     """
     The idea is to patch over the quiescent joints.
     This will erase the small imperfection caused by
     drawing two background-erased curves butted against each other
     or overlapping each other.
     The characteristic times of the returned bpaths should
     be equal to the quiescence time.
     The endpoints of the patches should be halfway between
     the characteristic quiescence time and the neighboring
     intersection times.
     @param times: sorted filtered intersection times
     @return: a collection of BezierPath objects
     """
     # if no quiescence time exists then no patch is needed
     if len(times) < 2:
         return []
     # avoid numerical error at piecewise boundaries
     abstol = 1e-6
     # define the patch endtimes and characteristic times
     patch_triples = []
     for intersect_a, intersect_b in iterutils.pairwise(times):
         tq = 0.5 * (intersect_a + intersect_b)
         ta = (2.0 / 3.0) * intersect_a + (1.0 / 3.0) * intersect_b
         tb = (1.0 / 3.0) * intersect_a + (2.0 / 3.0) * intersect_b
         patch_triples.append((ta, tq, tb))
     # make the patches
     patches = []
     remaining = deque(self.bchunks)
     for ta, tq, tb in patch_triples:
         # chop until we are near time ta
         while remaining[0].start_time < ta - abstol:
             b = remaining.popleft()
             if ta < b.stop_time:
                 ba, bb = b.split_global(ta)
                 remaining.appendleft(bb)
         # eat until we are near time tb
         g = []
         while remaining[0].start_time < tb - abstol:
             b = remaining.popleft()
             if tb < b.stop_time:
                 ba, bb = b.split_global(tb)
                 g.append(ba)
                 remaining.appendleft(bb)
             else:
                 g.append(b)
         # add the patch
         patch = self.__class__(g)
         patch.characteristic_time = tq
         patches.append(patch)
     return patches
Example #25
0
 def get_patches(self, times):
     """
     The idea is to patch over the quiescent joints.
     This will erase the small imperfection caused by
     drawing two background-erased curves butted against each other
     or overlapping each other.
     The characteristic times of the returned bpaths should
     be equal to the quiescence time.
     The endpoints of the patches should be halfway between
     the characteristic quiescence time and the neighboring
     intersection times.
     @param times: sorted filtered intersection times
     @return: a collection of BezierPath objects
     """
     # if no quiescence time exists then no patch is needed
     if len(times) < 2:
         return []
     # avoid numerical error at piecewise boundaries
     abstol = 1e-6
     # define the patch endtimes and characteristic times
     patch_triples = []
     for intersect_a, intersect_b in iterutils.pairwise(times):
         tq = 0.5 * (intersect_a + intersect_b)
         ta = (2.0 / 3.0) * intersect_a + (1.0 / 3.0) * intersect_b
         tb = (1.0 / 3.0) * intersect_a + (2.0 / 3.0) * intersect_b
         patch_triples.append((ta, tq, tb))
     # make the patches
     patches = []
     remaining = deque(self.bchunks)
     for ta, tq, tb in patch_triples:
         # chop until we are near time ta
         while remaining[0].start_time < ta - abstol:
             b = remaining.popleft()
             if ta < b.stop_time:
                 ba, bb = b.split_global(ta)
                 remaining.appendleft(bb)
         # eat until we are near time tb
         g = []
         while remaining[0].start_time < tb - abstol:
             b = remaining.popleft()
             if tb < b.stop_time:
                 ba, bb = b.split_global(tb)
                 g.append(ba)
                 remaining.appendleft(bb)
             else:
                 g.append(b)
         # add the patch
         patch = self.__class__(g)
         patch.characteristic_time = tq
         patches.append(patch)
     return patches
Example #26
0
 def draw_curve(self):
     scale = np.array((self.plot_width / self.timescale, self.plot_height))
     times = self._get_knot_times()
     bchunks = []
     for a, b in iterutils.pairwise(times):
         pta = np.array((a, self.f(a)))
         ptb = np.array((b, self.f(b)))
         dta = np.array((1, self.f.deriv(a)))
         dtb = np.array((1, self.f.deriv(b)))
         bchunk = bezier.create_bchunk_hermite(a, b, pta * scale,
                                               ptb * scale, dta * scale,
                                               dtb * scale)
         bchunks.append(bchunk)
     return r'\draw ' + get_tikz_bezier(bchunks)
Example #27
0
 def annotate_posteriors(self, transition_object, hidden_models):
     """
     @param transition_object: has transition matrix information
     @param hidden_models: a list of statistical models
     """
     # define the HMM
     cache_size = 10000
     hmm = FastHMM.Model(transition_object, hidden_models, cache_size)
     # define the observations and distances
     observations = [tuple(sorted(coverage[:-1])) for coverage in self.nt_coverages]
     distances = [b - a for a, b in iterutils.pairwise(self.offsets)]
     # do the annotation
     dp_info = hmm.get_dp_info(observations, distances)
     self.posterior_distributions = hmm.scaled_posterior_durbin(dp_info)
Example #28
0
 def shatter(self, times):
     """
     Return a collection of BezierPath objects.
     The returned objects should be annotated
     with characteristic times corresponding to intersections.
     @param times: sorted filtered intersection times
     @return: a collection of BezierPath objects
     """
     # handle the edge case of no intersections
     if not times:
         self.characteristic_time = 0.5 * (
                 self.get_start_time() + self.get_stop_time())
         return [self]
     # handle the edge case of a single intersection
     if len(times) == 1:
         self.characteristic_time = times[0]
         return [self]
     # Compute quiescence times.
     # TODO use weak spatially quiescent midpoints
     # instead of naive temporally quiescent midpoints
     quiescence_times = [0.5*(a+b) for a, b in iterutils.pairwise(times)]
     # Construct the bchunks sequences.
     # Use whole bchunks when possible,
     # but at quiescence times we might have to split the bchuncks.
     remaining = deque(self.bchunks)
     groups = []
     g = []
     # repeatedly split the remaining sequence
     for q in quiescence_times:
         while True:
             b = remaining.popleft()
             if b.start_time <= q <= b.stop_time:
                 ba, bb = b.split_global(q)
                 g.append(ba)
                 remaining.appendleft(bb)
                 groups.append(g)
                 g = []
                 break
             else:
                 g.append(b)
     g.extend(remaining)
     groups.append(g)
     # Create a piecewise bezier curve from each group,
     # and give each piecewise curve a characteristic time.
     piecewise_curves = []
     for t, group in zip(times, groups):
         curve = self.__class__(group)
         curve.characteristic_time = t
         piecewise_curves.append(curve)
     return piecewise_curves
Example #29
0
 def get_orthoplanar_intersection_times(self):
     """
     Get the intersection times for the plane orthogonal to each axis.
     Note that this function assumes interlacing roots.
     """
     root_seqs = [[]]
     for f in self.fps:
         root_seq = []
         for low, high in iterutils.pairwise([self.t_initial] +
                                             root_seqs[-1] +
                                             [self.t_final]):
             root_seq.append(scipy.optimize.brentq(f, low, high))
         root_seqs.append(root_seq)
     return root_seqs[1:]
Example #30
0
 def evaluate(self, t_target):
     """
     This is slow.
     @param t_target: target time
     """
     if not self.times[0] <= t_target <= self.times[-1]:
         raise ValueError('out of range')
     npoints = len(self.points)
     for i, j in iterutils.pairwise(range(npoints)):
         pa, pb = self.points[i], self.points[j]
         ta, tb = self.times[i], self.times[j]
         if ta <= t_target <= tb:
             t_local = (t_target - ta) / (tb - ta)
             p = (1 - t_local) * pa + t_local * pb
             return p
Example #31
0
 def get_transition_expectations_brute(self, initial_state, final_state,
                                       nsteps):
     """
     @return: a matrix of expected transition counts
     """
     T = self.transition_object.get_transition_probability
     # initialize the matrix of expected counts
     A = np.zeros((self.nstates, self.nstates))
     # compute the probability of observing the final state conditional on the first state
     p_total = T(initial_state, final_state, nsteps)
     # iterate over all possible sequences of missing states
     for missing_sequence in itertools.product(range(self.nstates),
                                               repeat=nsteps - 1):
         sequence = [initial_state] + list(missing_sequence) + [final_state]
         # get the probability of observing this continuation of the initial state
         p = 1.0
         for a, b in iterutils.pairwise(sequence):
             p *= T(a, b)
         # add the weighted transitions of each type
         for a, b in iterutils.pairwise(sequence):
             A[a, b] += p
     # divide by the total probability so that the conditioning is correct
     A /= p_total
     return A
Example #32
0
 def evaluate(self, t_target):
     """
     This is slow.
     @param t_target: target time
     """
     if not self.times[0] <= t_target <= self.times[-1]:
         raise ValueError('out of range')
     npoints = len(self.points)
     for i, j in iterutils.pairwise(range(npoints)):
         pa, pb = self.points[i], self.points[j]
         ta, tb = self.times[i], self.times[j]
         if ta <= t_target <= tb:
             t_local = (t_target - ta) / (tb - ta)
             p = (1 - t_local) * pa + t_local * pb
             return p
Example #33
0
 def annotate_posteriors(self, transition_object, hidden_models):
     """
     @param transition_object: has transition matrix information
     @param hidden_models: a list of statistical models
     """
     # define the HMM
     cache_size = 10000
     hmm = FastHMM.Model(transition_object, hidden_models, cache_size)
     # define the observations and distances
     observations = [
         tuple(sorted(coverage[:-1])) for coverage in self.nt_coverages
     ]
     distances = [b - a for a, b in iterutils.pairwise(self.offsets)]
     # do the annotation
     dp_info = hmm.get_dp_info(observations, distances)
     self.posterior_distributions = hmm.scaled_posterior_durbin(dp_info)
Example #34
0
def get_segmentation(p, t0, t1):
    """
    A segmentation is a sequence of triples (left, right, sign).
    @param p: a sympy Poly
    @param t0: initial time
    @param t1: final time
    @return: a segmentation
    """
    roots = sorted(float(r) for r in sympy.roots(p))
    points = [t0] + roots + [t1]
    segmentation = []
    for left, right in iterutils.pairwise(points):
        mid = (left + right) / 2
        sign = -1 if p.eval(mid) <= 0 else 1
        seg = (left, right, sign)
        segmentation.append(seg)
    return segmentation
Example #35
0
 def test_check_sign_lacing_true(self):
     id_to_adj = {
             1: [5],
             2: [5],
             3: [6],
             4: [6],
             5: [1, 2, 6],
             6: [3, 4, 5]}
     vs = [
             {1:1, 2:1, 3:1, 4:1, 5:1, 6:1},
             {1:-1, 2:-1, 3:1, 4:1, 5:-1, 6:1},
             {1:-1, 2:1, 3:1, 4:-1, 5:1, 6:1},
             {1:-1, 2:-1, 3:1, 4:-1, 5:1, 6:1}]
     for va, vb in iterutils.pairwise(vs):
         observed = check_sign_lacing(id_to_adj, va, vb)
         expected = True
         self.assertEqual(observed, expected)
Example #36
0
def get_segmentation(p, t0, t1):
    """
    A segmentation is a sequence of triples (left, right, sign).
    @param p: a sympy Poly
    @param t0: initial time
    @param t1: final time
    @return: a segmentation
    """
    roots = sorted(float(r) for r in sympy.roots(p))
    points = [t0] + roots + [t1]
    segmentation = []
    for left, right in iterutils.pairwise(points):
        mid = (left + right) / 2
        sign = -1 if p.eval(mid) <= 0 else 1
        seg = (left, right, sign)
        segmentation.append(seg)
    return segmentation
Example #37
0
 def annotate_posteriors(self, stickiness, hidden_models):
     """
     @param stickiness: a nonnegative integer that defines the transition matrix
     @param hidden_models: a list of statistical models
     """
     # use unlimited cache sizes
     cache_limit = None
     # define the transition matrix
     nhidden = len(hidden_models)
     prandom = .1**stickiness
     transition_object = TransitionMatrix.UniformTransitionObject(
         prandom, nhidden, cache_limit)
     # define the HMM
     hmm = FastHMM.Model(transition_object, hidden_models, cache_limit)
     # define the observations and distances
     observations = [
         tuple(sorted(coverage)) for coverage in self.nt_coverages
     ]
     distances = [b - a for a, b in iterutils.pairwise(self.offsets)]
     # get the posterior distribution for each observation
     dp_info = hmm.get_dp_info(observations, distances)
     distribution_list = hmm.scaled_posterior_durbin(dp_info)
     # initialize the counts
     for model in hidden_models:
         self.expected_count_vectors.append(np.zeros(len(model.states)))
     # accumulate the counts
     for observation, distribution in zip(observations, distribution_list):
         for p in distribution:
             if math.isnan(p):
                 raise ValueError('nan in distribution: %s' % distribution)
         vectors = [
             model.get_posterior_distribution(observation)
             for model in hidden_models
         ]
         for v in vectors:
             for x in v:
                 if math.isnan(x):
                     raise ValueError('nan in posterior mixture: %s' % v)
         normalized_vectors = [v * p for v, p in zip(vectors, distribution)]
         for i, v in enumerate(normalized_vectors):
             self.expected_count_vectors[i] += v
     # compute the log likelihood
     self.log_likelihood = hmm.get_log_likelihood(dp_info)
     # compute the expected number of hidden state transitions
     self.ntransitions_expected = hmm.scaled_ntransitions_expected(dp_info)
Example #38
0
def get_bezier_path(fp, fv, t_initial, t_final, nchunks):
    """
    @param fp: a python function from t to position vector
    @param fv: a python function from t to velocity vector
    @param t_initial: initial time
    @param t_final: final time
    @param nchunks: use this many chunks in the piecewise approximation
    @return: a BezierPath
    """
    bchunks = []
    npoints = nchunks + 1
    duration = t_final - t_initial
    incr = duration / nchunks
    times = [t_initial + i*incr for i in range(npoints)]
    for ta, tb in iterutils.pairwise(times):
        b = bezier.create_bchunk_hermite(
                ta, tb, fp(ta), fp(tb), fv(ta), fv(tb))
        bchunks.append(b)
    return BezierPath(bchunks)
Example #39
0
def get_bezier_path(fp, fv, t_initial, t_final, nchunks):
    """
    @param fp: a python function from t to position vector
    @param fv: a python function from t to velocity vector
    @param t_initial: initial time
    @param t_final: final time
    @param nchunks: use this many chunks in the piecewise approximation
    @return: a BezierPath
    """
    bchunks = []
    npoints = nchunks + 1
    duration = t_final - t_initial
    incr = duration / nchunks
    times = [t_initial + i * incr for i in range(npoints)]
    for ta, tb in iterutils.pairwise(times):
        b = bezier.create_bchunk_hermite(ta, tb, fp(ta), fp(tb), fv(ta),
                                         fv(tb))
        bchunks.append(b)
    return BezierPath(bchunks)
Example #40
0
 def test_check_sign_lacing_true(self):
     id_to_adj = {
         1: [5],
         2: [5],
         3: [6],
         4: [6],
         5: [1, 2, 6],
         6: [3, 4, 5]
     }
     vs = [{
         1: 1,
         2: 1,
         3: 1,
         4: 1,
         5: 1,
         6: 1
     }, {
         1: -1,
         2: -1,
         3: 1,
         4: 1,
         5: -1,
         6: 1
     }, {
         1: -1,
         2: 1,
         3: 1,
         4: -1,
         5: 1,
         6: 1
     }, {
         1: -1,
         2: -1,
         3: 1,
         4: -1,
         5: 1,
         6: 1
     }]
     for va, vb in iterutils.pairwise(vs):
         observed = check_sign_lacing(id_to_adj, va, vb)
         expected = True
         self.assertEqual(observed, expected)
Example #41
0
 def transition_expectations(self, observations, forward, backward):
     """
     @param observations: an observation source
     @param forward: a source of forward vectors
     @param backward: a source of backward vectors
     @return: a matrix of expected hidden state transition counts
     """
     nhidden = len(self.hidden_state_objects)
     # initialize the matrix of expected counts
     A = np.zeros((nhidden, nhidden))
     # get the expected counts for each transition
     dp_source = itertools.izip(observations, forward, backward)
     for old, new in iterutils.pairwise(dp_source):
         o_old, f_old, b_old = old
         o_new, f_new, b_new = new
         likelihoods = self.get_likelihoods(o_new)
         for i, j in itertools.product(range(nhidden), repeat=2):
             tprob = self.T.get_transition_probability(i, j)
             A[i, j] += f_old[i] * tprob * likelihoods[j] * b_new[j]
     return A
Example #42
0
 def transition_expectations(self, observations, forward, backward):
     """
     @param observations: an observation source
     @param forward: a source of forward vectors
     @param backward: a source of backward vectors
     @return: a matrix of expected hidden state transition counts
     """
     nhidden = len(self.hidden_state_objects)
     # initialize the matrix of expected counts
     A = np.zeros((nhidden, nhidden))
     # get the expected counts for each transition
     dp_source = itertools.izip(observations, forward, backward)
     for old, new in iterutils.pairwise(dp_source):
         o_old, f_old, b_old = old
         o_new, f_new, b_new = new
         likelihoods = self.get_likelihoods(o_new)
         for i, j in itertools.product(range(nhidden), repeat=2):
             tprob = self.T.get_transition_probability(i, j)
             A[i, j] += f_old[i] * tprob * likelihoods[j] * b_new[j]
     return A
Example #43
0
def get_linear_tikz_pane(
        shape, width, height, time_lists,
        t_initial, t_final,
        vgap, cut_radius):
    abstol = 1e-6
    duration = float(t_final - t_initial)
    arr = []
    for i in range(4):
        c = g_colors[i]
        xa = t_initial * (width / duration)
        xb = t_final * (width / duration)
        # draw the thin line of the correct color
        line = '\\draw[%s] %s -- %s;' % (
                c,
                tikz.point_to_tikz((xa, -i*vgap)),
                tikz.point_to_tikz((xb, -i*vgap)))
        arr.append(line)
        # draw the thick segments of the correct color
        if i:
            augmented_times = [t_initial] + time_lists[i-1] + [t_final]
            for ta, tb in iterutils.pairwise(augmented_times):
                t = (ta + tb) / 2.0
                xa = ta * (width / duration)
                xb = tb * (width / duration)
                value = shape.fps[i-1](t)
                if value > 0:
                    line = '\\draw[very thick,%s] %s -- %s;' % (
                            c,
                            tikz.point_to_tikz((xa, -i*vgap)),
                            tikz.point_to_tikz((xb, -i*vgap)))
                    arr.append(line)
        # draw the cuts in black ink
        if i < 3:
            times = time_lists[i]
            for t in times:
                x = t * (width / duration)
                line = '\\draw %s -- %s;' % (
                        tikz.point_to_tikz((x, cut_radius-i*vgap)),
                        tikz.point_to_tikz((x, -cut_radius-i*vgap)))
                arr.append(line)
    return '\n'.join(arr)
Example #44
0
 def annotate_posteriors(self, stickiness, hidden_models):
     """
     @param stickiness: a nonnegative integer that defines the transition matrix
     @param hidden_models: a list of statistical models
     """
     # define the transition matrix
     nhidden = len(hidden_models)
     prandom = .1**stickiness
     transition_object = TransitionMatrix.UniformTransitionObject(prandom, nhidden)
     # define the HMM
     cache_size = 10000
     hmm = FastHMM.Model(transition_object, hidden_models, cache_size)
     # define the observations and distances
     observations = [tuple(sorted(coverage[:-1])) for coverage in self.nt_coverages]
     distances = [b - a for a, b in iterutils.pairwise(self.offsets)]
     # do the annotation
     dp_info = hmm.get_dp_info(observations, distances)
     distribution_list = hmm.scaled_posterior_durbin(dp_info)
     # store the annotation with its respective stickiness
     self.posterior_distribution_lists.append(distribution_list)
     self.stickinesses.append(stickiness)
Example #45
0
 def test_lfdi_approximation(self):
     """
     As N increases, the approximation should become closer.
     More precisely, as N becomes large,
     multiplying N by ten should
     add one decimal place of accuracy to the approximation.
     Where the accuracy of the approximation is taken
     to be the frobenius norm of the error matrix.
     """
     lfdo = tree_string_to_LFDO(g_tree_string)
     lfdi = LFDO_to_LFDI(lfdo)
     # For these values of N,
     # the error for N should be more than 9 times the error for 10N.
     # When N is very large,
     # the error for N should approach 10 times the error for 10N.
     Ns = (10, 100, 1000, 10000)
     lfdns = [LFDO_to_LFDN(lfdo, N) for N in Ns]
     error_norms = [np.linalg.norm(lfdi.M - lfdn.M) for lfdn in lfdns]
     for ea, eb in iterutils.pairwise(error_norms):
         # ea should be more than nine times as bad as eb
         self.assertTrue(ea / eb > 9)
Example #46
0
 def get_orthoplanar_intersections(self):
     """
     Get the list of intersection points per axis.
     This is a geometric concept.
     """
     abstol = 1e-6
     point_seqs = []
     for axis in range(self.ndim):
         point_seq = []
         # check points for exact intersections
         for p in self.points:
             if abs(p[axis]) < abstol:
                 point_seq.append(p)
         # check line segments for intersections
         for pa, pb in iterutils.pairwise(self.points):
             if abs(pa[axis]) > abstol and abs(pb[axis]) > abstol:
                 if pa[axis]*pb[axis] < 0:
                     p = (pb[axis]*pa - pa[axis]*pb) / (pb[axis] - pa[axis])
                     point_seq.append(p)
         point_seqs.append(point_seq)
     return point_seqs
Example #47
0
def get_response_content(fs):
    # get the combo info
    combo_triples = list(gen_combo_line_triples(fs.combo.splitlines()))
    names, lows, highs = zip(*combo_triples)
    ranges = zip(lows, highs)
    if lows[0] != 1:
        raise ValueError('expected the first lower bound to be 1')
    for (low, high), (nlow, nhigh) in iterutils.pairwise(ranges):
        if high + 1 != nlow:
            raise ValueError(
                    'expected the next lower bound '
                    'to be one more than the current upper bound')
    # get the phylip info
    headers, sequences = Phylip.decode(fs.phylip.splitlines())
    phylip_columns = zip(*sequences)
    counts = [len(set(col)) for col in phylip_columns]
    # validate the compatibility between the combo and phylip data
    if highs[-1] != len(phylip_columns):
        raise ValueError(
                'expected the last upper bound to be '
                'equal to the number of columns of the phylip alignment')
    # get the sum of counts in each combination group
    combo_counts = []
    for i, (low, high) in enumerate(ranges):
        combo_count = 0
        # note that low and high are 1-based and inclusive
        for j in range(low-1, high):
            combo_count += counts[j]
        combo_counts.append(combo_count)
    # write the new combo log
    out = StringIO()
    print >> out, 'Loci combined'
    print >> out
    k = 0
    for name, count in zip(names, combo_counts):
        low = k + 1
        high = k + count
        print >> out, '%s\t%d-%d' % (name, low, high)
        k += count
    return out.getvalue()
Example #48
0
 def annotate_posteriors(self, stickiness, hidden_models):
     """
     @param stickiness: a nonnegative integer that defines the transition matrix
     @param hidden_models: a list of statistical models
     """
     # use unlimited cache sizes
     cache_limit = None
     # define the transition matrix
     nhidden = len(hidden_models)
     prandom = .1**stickiness
     transition_object = TransitionMatrix.UniformTransitionObject(prandom, nhidden, cache_limit)
     # define the HMM
     hmm = FastHMM.Model(transition_object, hidden_models, cache_limit)
     # define the observations and distances
     observations = [tuple(sorted(coverage)) for coverage in self.nt_coverages]
     distances = [b - a for a, b in iterutils.pairwise(self.offsets)]
     # get the posterior distribution for each observation
     dp_info = hmm.get_dp_info(observations, distances)
     distribution_list = hmm.scaled_posterior_durbin(dp_info)
     # initialize the counts
     for model in hidden_models:
         self.expected_count_vectors.append(np.zeros(len(model.states)))
     # accumulate the counts
     for observation, distribution in zip(observations, distribution_list):
         for p in distribution:
             if math.isnan(p):
                 raise ValueError('nan in distribution: %s' % distribution)
         vectors = [model.get_posterior_distribution(observation) for model in hidden_models]
         for v in vectors:
             for x in v:
                 if math.isnan(x):
                     raise ValueError('nan in posterior mixture: %s' % v)
         normalized_vectors = [v*p for v, p in zip(vectors, distribution)]
         for i, v in enumerate(normalized_vectors):
             self.expected_count_vectors[i] += v
     # compute the log likelihood
     self.log_likelihood = hmm.get_log_likelihood(dp_info)
     # compute the expected number of hidden state transitions
     self.ntransitions_expected = hmm.scaled_ntransitions_expected(dp_info)
Example #49
0
 def brute_posterior_decoding(self, observations):
     """
     Get the distribution of hidden states at each position given the observed sequence.
     This is done inefficiently by summing over each possible hidden state sequence.
     @param observations: the sequence of observations
     @return: hidden state distributions at each position, and total probability
     """
     nhidden = len(self.hidden_state_objects)
     total_log_sums = []
     # precalculate the log likelihood for each observation for each hidden state
     position_log_likelihoods = []
     for obs in observations:
         log_likelihoods = [state.get_log_likelihood(obs) for state in self.hidden_state_objects]
         position_log_likelihoods.append(log_likelihoods)
     # each hidden state at each position gets a list of log likelihoods
     total_accum = [[[] for i in range(nhidden)] for j in observations]
     # calculate the log likelihood for each hidden sequence
     for hidden_sequence in itertools.product(range(nhidden), repeat=len(observations)):
         accum = 0
         accum += math.log(self.initial_distribution[hidden_sequence[0]])
         for i, j in iterutils.pairwise(hidden_sequence):
             accum += math.log(self.transition_matrix[i, j])
         for index, log_likelihoods in zip(hidden_sequence, position_log_likelihoods):
             accum += log_likelihoods[index]
         # accumulate the log likelihood
         for i, hidden_state in enumerate(hidden_sequence):
             total_accum[i][hidden_state].append(accum)
         # add to the total probability
         total_log_sums.append(accum)
     # get the distribution at each position
     distributions = []
     for log_distribution_lists in total_accum:
         distribution = [scipy.misc.logsumexp(x) for x in log_distribution_lists]
         distribution = [d - max(distribution) for d in distribution]
         distribution = [math.exp(d) for d in distribution]
         distribution = [d / sum(distribution) for d in distribution]
         distributions.append(distribution)
     total_probability = math.exp(scipy.misc.logsumexp(total_log_sums))
     return distributions, total_probability
Example #50
0
 def get_orthoplanar_intersections(self):
     """
     Get the list of intersection points per axis.
     This is a geometric concept.
     """
     abstol = 1e-6
     point_seqs = []
     for axis in range(self.ndim):
         point_seq = []
         # check points for exact intersections
         for p in self.points:
             if abs(p[axis]) < abstol:
                 point_seq.append(p)
         # check line segments for intersections
         for pa, pb in iterutils.pairwise(self.points):
             if abs(pa[axis]) > abstol and abs(pb[axis]) > abstol:
                 if pa[axis] * pb[axis] < 0:
                     p = (pb[axis] * pa - pa[axis] * pb) / (pb[axis] -
                                                            pa[axis])
                     point_seq.append(p)
         point_seqs.append(point_seq)
     return point_seqs
Example #51
0
def parse_hky_output(lines):
    """
    @param lines: lines of output
    @return: a dictionary with keys 'kappa', 'A', 'C', 'G', 'T', and 'lnL'
    """
    d = {}
    lines = Util.get_stripped_lines(lines)
    for line in lines:
        # read kappa
        if line.startswith('kappa under HKY85'):
            arr = [x.strip() for x in line.split(':')]
            d['kappa'] = float(arr[1])
        # read the log likelihood
        if line.startswith('lnL('):
            arr = line.split()
            d['lnL'] = float(arr[-2])
    # read the frequency parameters
    for first, second in iterutils.pairwise(lines):
        if first.startswith('base frequency parameters'):
            bases = list('TCAG')
            frequencies = [float(x) for x in second.split()]
            d.update(zip(bases, frequencies))
    return d
Example #52
0
 def get_joint_log_likelihood(self, hidden_seq, observed_seq):
     """
     The two arguments are conformantly ordered.
     @param hidden_seq: a sequence of hidden state indices
     @param observed_seq: a conformant sequence of observation objects
     @return: the joint likelihood of the hidden and observed sequences
     """
     # do validation
     if len(hidden_seq) != len(observed_seq):
         raise ValueError('expected conformant input sequences')
     # initialize the log likelihood
     log_accum = 0
     # add the contribution of the initial hidden state
     initial_hidden_state = hidden_seq[0]
     log_accum += math.log(self.initial_distribution[initial_hidden_state])
     # add the contribution of hidden state transitions
     for i, j in iterutils.pairwise(hidden_seq):
         log_accum += math.log(self.transition_matrix[i, j])
     # add the contribution of emissions
     for i, observation in zip(hidden_seq, observed_seq):
         log_accum += self.hidden_state_objects[i].get_log_likelihood(observation)
     # return the log likelihood
     return log_accum
Example #53
0
 def get_expectations_brute(self, initial_state, final_state, nsteps):
     """
     Get the number of times each state was expected to occur between the initial and final positions.
     @return: an expectation for each state
     """
     T = self.transition_object.get_transition_probability
     # initialize the vector of expected counts
     v = np.zeros(self.nstates)
     # compute the probability of observing the final state conditional on the first state
     p_total = T(initial_state, final_state, nsteps)
     # iterate over all possible sequences of missing states
     for missing_sequence in itertools.product(range(self.nstates), repeat=nsteps-1):
         sequence = [initial_state] + list(missing_sequence) + [final_state]
         # get the probability of observing this continuation of the initial state
         p = 1.0
         for a, b in iterutils.pairwise(sequence):
             p *= T(a, b)
         # add the weighted transitions of each type
         for state in missing_sequence:
             v[state] += p
     # divide by the total probability so that the conditioning is correct
     v /= p_total
     return v
Example #54
0
def get_linear_tikz_pane(shape, width, height, time_lists, t_initial, t_final,
                         vgap, cut_radius):
    abstol = 1e-6
    duration = float(t_final - t_initial)
    arr = []
    for i in range(4):
        c = g_colors[i]
        xa = t_initial * (width / duration)
        xb = t_final * (width / duration)
        # draw the thin line of the correct color
        line = '\\draw[%s] %s -- %s;' % (c, tikz.point_to_tikz(
            (xa, -i * vgap)), tikz.point_to_tikz((xb, -i * vgap)))
        arr.append(line)
        # draw the thick segments of the correct color
        if i:
            augmented_times = [t_initial] + time_lists[i - 1] + [t_final]
            for ta, tb in iterutils.pairwise(augmented_times):
                t = (ta + tb) / 2.0
                xa = ta * (width / duration)
                xb = tb * (width / duration)
                value = shape.fps[i - 1](t)
                if value > 0:
                    line = '\\draw[very thick,%s] %s -- %s;' % (
                        c, tikz.point_to_tikz((xa, -i * vgap)),
                        tikz.point_to_tikz((xb, -i * vgap)))
                    arr.append(line)
        # draw the cuts in black ink
        if i < 3:
            times = time_lists[i]
            for t in times:
                x = t * (width / duration)
                line = '\\draw %s -- %s;' % (tikz.point_to_tikz(
                    (x, cut_radius - i * vgap)),
                                             tikz.point_to_tikz(
                                                 (x, -cut_radius - i * vgap)))
                arr.append(line)
    return '\n'.join(arr)