Example #1
0
    def zernike(self, G, N):
        V = np.zeros([N + 1, N + 1, N + 1], dtype=complex)
        for a, b, c, alpha in nest(lambda: range(int(N / 2) + 1),
                                   lambda _a: range(N - 2 * _a + 1),
                                   lambda _a, _b: range(N - 2 * _a - _b + 1),
                                   lambda _a, _b, _c: range(_a + _c + 1),
                                   ):
            V[a, b, c] += np.power(IMAG_CONST, alpha) * \
                nchoosek(a + c, alpha) * G[2 * a + c - alpha, alpha, b]

        W = np.zeros([N + 1, N + 1, N + 1], dtype=complex)
        for a, b, c, alpha in nest(lambda: range(int(N / 2) + 1),
                                   lambda _a: range(N - 2 * _a + 1),
                                   lambda _a, _b: range(N - 2 * _a - _b + 1),
                                   lambda _a, _b, _c: range(_a + 1),
                                   ):
            W[a, b, c] += np.power(-1, alpha) * np.power(2, a - alpha) * \
                nchoosek(a, alpha) * V[a - alpha, b, c + 2 * alpha]

        X = np.zeros([N + 1, N + 1, N + 1], dtype=complex)
        for a, b, c, alpha in nest(lambda: range(int(N / 2) + 1),
                                   lambda _a: range(N - 2 * _a + 1),
                                   lambda _a, _b: range(N - 2 * _a - _b + 1),
                                   lambda _a, _b, _c: range(_a + 1),
                                   ):
            X[a, b, c] += nchoosek(a, alpha) * W[a - alpha, b + 2 * alpha, c]

        Y = np.zeros([N + 1, N + 1, N + 1], dtype=complex)
        for l, nu, m, j in nest(lambda: range(N + 1),
                                lambda _l: range(int((N - _l) / 2) + 1),
                                lambda _l, _nu: range(_l + 1),
                                lambda _l, _nu, _m: range(int((_l - _m) / 2) + 1),
                                ):
            Y[l, nu, m] += self.Yljm(l, j, m) * X[nu + j, l - m - 2 * j, m]

        Z = np.zeros([N + 1, N + 1, N + 1], dtype=complex)
        for n, l, m, nu, in nest(lambda: range(N + 1),
                                 lambda _n: range(_n + 1),
                                 # there's an if...mod missing in this but it
                                 # still works?
                                 lambda _n, _l: range(_l + 1),
                                 lambda _n, _l, _m: range(int((_n - _l) / 2) + 1),
                                 ):
            # integer required for k when used as power in Qklnu below:
            k = int((n - l) / 2)
            Z[n, l, m] += (3 / (4 * PI_CONST)) * \
                self.Qklnu(k, l, nu) * np.conj(Y[l, nu, m])

        for n, l, m in nest(lambda: range(N + 1),
                            lambda _n: range(n + 1),
                            lambda _n, _l: range(l + 1),
                            ):
            if np.mod(np.sum([n, l, m]), 2) == 0:
                Z[n, l, m] = np.real(
                    Z[n, l, m]) - np.imag(Z[n, l, m]) * IMAG_CONST
            else:
                Z[n, l, m] = -np.real(Z[n, l, m]) + \
                    np.imag(Z[n, l, m]) * IMAG_CONST

        return Z
Example #2
0
 def Qklnu(self, k, l, nu):
     aux_1 = np.power(-1, k + nu) / np.float(np.power(4, k))
     aux_2 = np.sqrt((2 * l + 4 * k + 3) / 3.0)
     aux_3 = self.trinomial(
         nu, k - nu, l + nu + 1) * nchoosek(2 * (l + nu + 1 + k), l + nu + 1 + k)
     aux_4 = nchoosek(2.0 * (l + nu + 1), l + nu + 1)
     return (aux_1 * aux_2 * aux_3) / aux_4
Example #3
0
 def _get_minion_probability(self, barcode, e_s=0.051, e_i=0.049, e_d=0.078):
     ## since the read is the "ref" here.... n_d and n_i are dels/ins from/in read.
     ## Barcode is the real "reference" when considering 'deletions' and 'insertions' in MinION read
     ## ...meaning for minion probs the number of insertions is what we have listed as numdels
     ## .... and number of dels is what we have listed as num insertions
     ## i.e. we need e_i**n_d and e_d**n_i instead of e_i**n_i and e_d**n_d
     ## NOTE2: ## p_minion not necessarily comparable when barcodes are different lengths - can divide by barcode_len or q*r maybe -- or can multiply by multinomial maybe...
     self._aln_string_check(barcode)
     ## Gather variables needed
     p_m = 1 - e_i - e_d - e_s
     Esum = e_s + e_i + e_d
     E_s = e_s/Esum
     E_i = e_i/Esum
     E_d = e_d/Esum
     N = self.counts[barcode]['alnlen']
     N2 = N - self.counts[barcode]['m']
     N3 = N2 - self.counts[barcode]['mm']
     N4 = N3 - self.counts[barcode]['i']
     N5 = N4 - self.counts[barcode]['d']
     NcK = nchoosek(N, self.counts[barcode]['m']) * nchoosek(N2, self.counts[barcode]['mm']) * nchoosek(N3, self.counts[barcode]['i']) * nchoosek(N4, self.counts[barcode]['d']) 
     N_u = N + self.counts[barcode]['u']
     #populate dict
     self.minionprobs[barcode] = {}
     self.minionprobs[barcode]['params'] = [e_s, e_i, e_d]
     self.minionprobs[barcode]['p_minion_aln'] =  (p_m**self.counts[barcode]['m']) * (e_s**self.counts[barcode]['mm']) * (e_d**self.counts[barcode]['i']) * (e_i**self.counts[barcode]['d']) ## See note above as to why i and d are switched here
     self.minionprobs[barcode]['p_minion_un'] = (e_s**(E_s*self.counts[barcode]['u'])) * (e_i**(E_i*self.counts[barcode]['u'])) *(e_d**(E_d*self.counts[barcode]['u'])) ## since it is unclear if the unaligned were subs/dels/ins I am making use of all
     self.minionprobs[barcode]['p_minion'] = self.minionprobs[barcode]['p_minion_aln'] * self.minionprobs[barcode]['p_minion_un']
     self.minionprobs[barcode]['norm_p_minion_aln'] = NcK * self.minionprobs[barcode]['p_minion_aln']
     self.minionprobs[barcode]['norm_p_minion'] = self.minionprobs[barcode]['norm_p_minion_aln'] * self.minionprobs[barcode]['p_minion_un'] ## There seems to be no reason to multiply the unaligned by a nchoosek b/c they are fixed at the ends and correspond to some "single" unknown composite error
Example #4
0
def func4_matrix_diag(beta, k, V):

    diag_ = beta**k
    diff_diag_ = k * beta**(k - 1)

    for j in range(1, k + 1):
        temp_diag = numpy.ones(
            (j + 1, )) * nchoosek(k, k - j) * (beta**(k - j))
        diag_ = numpy.r_[temp_diag, diag_]
        if (k - j) == 0:
            temp_diag = numpy.zeros((j + 1, ))
            diff_diag_ = numpy.r_[temp_diag, diff_diag_]
        else:
            temp_diag = numpy.ones(
                (j + 1, )) * nchoosek(k, k - j) * (k - j) * (beta**(k - j - 1))
            diff_diag_ = numpy.r_[temp_diag, diff_diag_]

    D = numpy.diag(diag_)
    dD = numpy.diag(diff_diag_)

    dim = len(diag_)
    U = numpy.zeros([dim - 1, dim - 1])
    U[:, :] = D[0:dim - 1, 0:dim - 1]

    dU = numpy.zeros([dim - 1, dim - 1])
    dU[:, :] = dD[0:dim - 1, 0:dim - 1]

    return D, U, dD, dU
def equidistant_barycentric_weights( n ):
    w = np.zeros( n, np.double )
    for i in range( 0, n - n%2, 2 ):
        w[i] = 1. * nchoosek( n-1, i )
        w[i+1] = -1. * nchoosek( n-1, i+1 )
    if ( n%2 == 1 ): 
        w[n-1] = 1.
    return w
Example #6
0
 def get_nonseparable_basis(n, L):
     x, y = np.meshgrid(np.linspace(0, L, L + 1), np.linspace(0, L, L + 1))
     res = []
     for i in range(n + 1):
         for j in range(n - i + 1):
             res.append(
                 nchoosek(n, i) * nchoosek(n - i, j) * x**i * y**j *
                 (L - x - y)**(n - i - j) / L**n)
     return res
Example #7
0
File: brute.py Project: zha/SALib
    def find_most_distant(self, input_sample, num_samples,
                          num_params, k_choices, num_groups=None):
        """
        Finds the 'k_choices' most distant choices from the
        'num_samples' trajectories contained in 'input_sample'

        Arguments
        ---------
        input_sample : numpy.ndarray
        num_samples : int
            The number of samples to generate
        num_params : int
            The number of parameters
        k_choices : int
            The number of optimal trajectories
        num_groups : int, default=None
            The number of groups

        Returns
        -------
        numpy.ndarray
        """
        # Now evaluate the (N choose k_choices) possible combinations
        if nchoosek(num_samples, k_choices) >= sys.maxsize:
            raise ValueError("Number of combinations is too large")
        number_of_combinations = int(nchoosek(num_samples, k_choices))

        # First compute the distance matrix for each possible pairing
        # of trajectories and store in a shared-memory array
        distance_matrix = self.compute_distance_matrix(input_sample,
                                                       num_samples,
                                                       num_params,
                                                       num_groups)

        # Initialise the output array
        chunk = int(1e6)
        if chunk > number_of_combinations:
            chunk = number_of_combinations

        counter = 0
        # Generate a list of all the possible combinations
        combo_gen = combinations(range(num_samples), k_choices)
        scores = np.zeros(number_of_combinations, dtype=np.float32)
        # Generate the pairwise indices once
        pairwise = np.array(
            [y for y in combinations(range(k_choices), 2)])

        mappable = self.mappable
        for combos in self.grouper(chunk, combo_gen):
            scores[(counter * chunk):((counter + 1) * chunk)] \
                = mappable(combos, pairwise, distance_matrix)
            counter += 1
        return scores
Example #8
0
 def _get_binomial_probability(self, barcode, e_s=0.051, e_i=0.049, e_d=0.078):
     p_m = 1 - e_i - e_d - e_s
     n_m = self.counts[barcode]['m']
     n_mm = self.counts[barcode]['mm']+self.counts[barcode]['i']+self.counts[barcode]['d']
     n_u = self.counts[barcode]['u']
     #binom_prob_k_matches_in_alignment
     binom_prob1 = nchoosek(self.counts[barcode]['alnlen'], n_m) * (p_m**n_m) * ((1-p_m)**(n_mm))
     #binom_prob_k_matches_in_alignment_incl_unaligned_portions_of_barcode
     binom_prob2 = nchoosek(self.counts[barcode]['alnlen']+n_u, n_m) * (p_m**n_m) * ((1-p_m)**(n_mm+n_u))
     #binom_prob_k_matches_in_barcode (w/ unaligned parts)
     binom_prob3 = nchoosek(self.counts[barcode]['queryLen'], n_m) * (p_m**n_m) * ((1-p_m)**(self.counts[barcode]['queryLen']-n_m))
     self.binomprobs[barcode] = [binom_prob1, binom_prob2, binom_prob3, p_m]
Example #9
0
File: brute.py Project: SALib/SALib
    def find_most_distant(self, input_sample, num_samples,
                          num_params, k_choices, num_groups=None):
        """
        Finds the 'k_choices' most distant choices from the
        'num_samples' trajectories contained in 'input_sample'

        Arguments
        ---------
        input_sample : numpy.ndarray
        num_samples : int
            The number of samples to generate
        num_params : int
            The number of parameters
        k_choices : int
            The number of optimal trajectories
        num_groups : int, default=None
            The number of groups

        Returns
        -------
        numpy.ndarray
        """
        # Now evaluate the (N choose k_choices) possible combinations
        if nchoosek(num_samples, k_choices) >= sys.maxsize:
            raise ValueError("Number of combinations is too large")
        number_of_combinations = int(nchoosek(num_samples, k_choices))

        # First compute the distance matrix for each possible pairing
        # of trajectories and store in a shared-memory array
        distance_matrix = self.compute_distance_matrix(input_sample,
                                                       num_samples,
                                                       num_params,
                                                       num_groups)

        # Initialise the output array
        chunk = int(1e6)
        if chunk > number_of_combinations:
            chunk = number_of_combinations

        counter = 0
        # Generate a list of all the possible combinations
        combo_gen = combinations(list(range(num_samples)), k_choices)
        scores = np.zeros(number_of_combinations, dtype=np.float32)
        # Generate the pairwise indices once
        pairwise = np.array(
            [y for y in combinations(list(range(k_choices)), 2)])

        for combos in self.grouper(chunk, combo_gen):
            scores[(counter * chunk):((counter + 1) * chunk)] \
                = self.mappable(combos, pairwise, distance_matrix)
            counter += 1
        return scores
Example #10
0
 def Yljm(self, l, j, m):
     aux_1 = np.power(-1, j) * (np.sqrt(2 * l + 1) / np.power(2, l))
     aux_2 = self.trinomial(
         m, j, l - m - 2 * j) * nchoosek(2 * (l - j), l - j)
     aux_3 = np.sqrt(self.trinomial(m, m, l - m))
     y = (aux_1 * aux_2) / aux_3
     return y
Example #11
0
def amb6(d, exponent=2.2):  # weighted Bernoulli Trials
    if d <= 0:
        return 0
    if d > 250:
        return amb6(250, exponent)

    universe = d**2
    prob_beneficial = .5  #+ math.log(d,2)/d
    prob_detrimental = 1 - prob_beneficial

    p = prob_beneficial
    q = prob_detrimental
    # d             => node's degree
    # 1<= k <=d      => how many edges must be ONES or ZEROS for the node to be considered ambiguous. The smaller k the stricter the criteria of ambiguity
    ambiguity = []
    unity = 0
    for k in range(0, d + 1, 1):
        dCk = nchoosek(d, k, exact=True)
        count = dCk * p**k * q**(d - k)

        ###################################################################
        ambiguity.append(count**exponent)  # winner
        ###################################################################

        #print('d:'+l(d)+'k:'+l(k)+'count:'+l(count)+'\timpact:'+l(impact)+'(count*impact)**4:  '+l((count*impact)))#+'\tambiguity:'+l(ambiguity))
        unity += count
    #print('\td:'+l(d)+' ambiguity: '+r(np.average(ambiguity))+'\t 1/d**2: '+str(1/(d**2)))
    verify(unity)
    return np.average(ambiguity)
Example #12
0
def amb7(d, n2e, e2n):  # weighted Bernoulli Trials
    if d <= 0:
        return 0
    if d > 250:
        return amb7(250, n2e, e2n)
    prob_beneficial = .5  #+ math.log(d,2)/d
    prob_detrimental = 1 - prob_beneficial

    p = prob_beneficial
    q = prob_detrimental
    ambiguity = []
    unity = 0
    for k in range(0, d + 1, 1):

        dCk = nchoosek(d, k, exact=True)
        count = dCk * p**k * q**(d - k)

        ###################################################################
        ambiguity.append(n2e * (count**(e2n * log10(d))))  # winner
        ###################################################################

        #print('d:'+l(d)+'k:'+l(k)+'count:'+l(count)+'\timpact:'+l(impact)+'(count*impact)**4:  '+l((count*impact)))#+'\tambiguity:'+l(ambiguity))
        unity += count

    #print('\td:'+l(d)+' ambiguity: '+r(np.average(ambiguity))+'\t 1/d**2: '+str(1/(d**2)))
    verify(unity)
    return np.average(ambiguity)
Example #13
0
def amb8(d, exponent):  # weighted Bernoulli Trials
    universe = d**2
    prob_beneficial = .5  #+ math.log(d,2)/d
    prob_detrimental = 1 - prob_beneficial

    p = prob_beneficial
    q = prob_detrimental

    ambiguity = []
    unity = 0
    for k in range(0, d + 1, 1):
        dCk = nchoosek(d, k, exact=True)
        count = dCk * p**k * q**(d - k)

        ###################################################################
        impact = k / d
        if k > d / 2:
            impact = 1 - impact
        #ambiguity +=    impact*count*universe
        ambiguity.append(1 / ((count**impact) * universe))
        #ambiguity.append((impact*count)**(log10(d)))
        ###################################################################

        #pprint('d:'+l(d)+'k:'+l(k)+'count:'+l(count)+'\timpact:'+l(impact)+'(count**impact)*universe:  '+l(1/((count**impact)*universe)))#+'\tambiguity:'+l(ambiguity))
        unity += count
    print('\td:' + l(d) + ' ambiguity: ' + r(np.average(ambiguity)) +
          '\t 1/d**2: ' + str(1 / (d**2)))
    verify(unity)
    return np.average(ambiguity)  #sdiv(1,ambiguity)
Example #14
0
def get_total_degree(num_dims, num_pts):
    degree = 1
    while True:
        num_terms = int(round(nchoosek( num_dims+degree, degree )))
        if ( num_terms >= num_pts ):
            break
        degree += 1
    return degree
Example #15
0
 def MMLSCurvatureTensor2D3D(self, U, PX0, X0, W, deg=2):
     '''
     The function assumes we are dealing with 2d surface in R^3.
     '''
     #dim = 2
     Normal = np.cross(U[:,0], U[:,1])
     y_data = np.dot(Normal.T, X0)
     coeffs, Base = self.weightedLeastSquares(PX0, W, y_data, self.poly_deg)
     Curv_tensor = np.zeros((np.int(nchoosek(self.manifold_dim, 2)+1), np.int(nchoosek(self.manifold_dim, 2)+1)))
     for c, b in zip(coeffs.T[0], Base):
         if sum(b) == 2:
             indices = np.arange(len(b))[np.array(b, dtype=bool)]
             if len(indices)>1:
                 Curv_tensor[indices[0], indices[1]] = c/2
                 Curv_tensor[indices[1], indices[0]] = c/2
             else:
                 Curv_tensor[indices[0], indices[0]] = c
     return Curv_tensor
Example #16
0
    def binomial(self, n,k):
        
        if (n,k) in self.binomial_dict.keys():
            output = self.binomial_dict[(n,k)]
        else:
            output = nchoosek(n,k)
            self.binomial_dict[(n,k)] = output

        return output
Example #17
0
 def calculateSigmaFromPoint(self, point):
     '''calculating an approximate distance for the weight function
     This is a very naive implementation!!!'''
     point = point.squeeze()
     N = self.data.shape[1]    
     N_THRESH = min([(self.sparse_factor * nchoosek(self.manifold_dim+self.poly_deg,self.manifold_dim)) + 1, N])
     N_PERC = 100*np.float(N_THRESH)/N
     DISTS = np.linalg.norm(self.data - nlib.repmat(point,self.data.shape[1],1).T, axis = 0)
     sig_approximation = np.percentile(DISTS, N_PERC)
     self.sigma = sig_approximation
Example #18
0
def compute_combinations(num_vars, level):
    if ( level > 0 ):
        num_indices = nchoosek(num_vars + level, num_vars) -\
          nchoosek(num_vars + level-1, num_vars)
        indices = np.empty((num_vars, num_indices),dtype=int)
        extend = False
        h = 0; t = 0; i = 0;
        #important this is initialized to zero
        index = np.zeros((num_vars),dtype=int)
        while ( True ):
            index, extend, h, t = compute_next_combination(
                num_vars, level, extend, h, t, index);
            indices[:,i] = index.copy()
            i+=1

            if ( not extend ): break
    else:
        indices = np.zeros((num_vars,1),dtype=int)
      
    return indices
Example #19
0
 def calculateSigma(self, n_iter=100):
     '''calculating an approximate distance for the weight function
     This is a very naive implementation!!!'''
     N = self.data.shape[1]    
     N_THRESH = (self.sparse_factor * nchoosek(self.manifold_dim + self.poly_deg,self.manifold_dim)) + 1
     N_PERC = max(min(100*np.float(N_THRESH)/N, 100), 0)
     sig_approximation = np.zeros(n_iter)
     for r_index, i in zip(np.random.randint(3,N-3, n_iter), range(n_iter)):
         q = np.asarray(self.data[:,r_index])
         DISTS = np.linalg.norm(self.data - nlib.repmat(q,self.data.shape[1],1).T, axis = 0)
         sig_approximation[i] = np.percentile(DISTS, N_PERC)
     
     self.sigma = np.max(sig_approximation)
Example #20
0
def matrix_A(k, th):
    V = len(th)
    if k == 0:
        A = numpy.ones((V, 1))
    else:
        c = numpy.cos(th)
        s = numpy.sin(th)
        A = numpy.zeros((V, k + 1))
    for j in range(0, k + 1):
        vec = (c**(k - j)) * (s**(j)) * nchoosek(k, j)
        vec.shape = [
            V,
        ]
        A[:, j] = vec
    return A
Example #21
0
def amb5(d):  # Bernoulli Trials
    prob_beneficial = .5  #+ math.log(d,2)/d
    prob_detrimental = 1 - prob_beneficial

    # d             => node's degree
    # 1<= k <=d      => how many edges must be ONES or ZEROS for the node to be considered ambiguous. The smaller k the stricter the criteria of ambiguity

    k1 = math.ceil(math.log2(d))  #d-math.ceil(d*.5)
    #k2 = d-math.floor(d*.5)

    dCk1 = nchoosek(
        d, k1,
        exact=True)  # equivelantly, dCk = fact(d) / ( fact(k1)*fact(d-k1)  )
    amb1 = dCk1 * prob_beneficial**k1 * prob_detrimental**(d - k1)
    #dCk2 = nchoosek(d,k2,exact=True)
    #amb2 = dCk2   *   prob_beneficial**k2   *   prob_detrimental**(d-k2)

    #amb = (amb1+amb2) / 2
    print('d:' + l(d) + 'k:' + l(k1) + 'amb1:' + r(amb1))
    return amb1**3
def get_coefficients_for_plotting(pce, qoi_idx):
    coeff = pce.get_coefficients()[:, qoi_idx]
    indices = pce.indices.copy()
    assert coeff.shape[0] == indices.shape[1]

    num_vars = pce.num_vars()
    degree = -1
    indices_dict = dict()
    max_degree = indices.sum(axis=0).max()
    for ii in range(indices.shape[1]):
        key = hash_array(indices[:, ii])
        indices_dict[key] = ii
    i = 0
    degree_breaks = []
    coeff_sorted = []
    degree_indices_set = np.empty((num_vars, 0))
    for degree in range(max_degree+1):
        nterms = nchoosek(num_vars+degree, degree)
        if nterms < 1e6:
            degree_indices = compute_hyperbolic_level_indices(
                num_vars, degree, 1.)
        else:
            'Could not plot coefficients of terms with degree >= %d' % degree
            break
        degree_indices_set = np.hstack((degree_indices_set, indices))
        for ii in range(degree_indices.shape[1]-1, -1, -1):
            index = degree_indices[:, ii]
            key = hash_array(index)
            if key in indices_dict:
                coeff_sorted.append(coeff[indices_dict[key]])
            else:
                coeff_sorted.append(0.0)
            i += 1
        degree_breaks.append(i)

    return np.array(coeff_sorted), degree_indices_set, degree_breaks
Example #23
0
def num_total_degree_indices(num_vars,degree):
    num_indices = nchoosek(num_vars + degree, num_vars)
    return num_indices
Example #24
0
def get_formatted_pairwise_alignment(alignment, blocksize=100, e_s=0.051, e_i=0.049, e_d=0.078, report_prob=False, report_evalue=False, with_unaligned_portion=False):
    ## TODO: printout unaligned portion as part of alignment viz
    ## For report_prob, you need to give the prob
    ## e_i, e_d, and e_s are insertion/deletion/substitution errors found in early MinION sequencing by Jain et al: Improved data analysis for the MinION nanopore sequencer
    ## Can compute prob of alignment by p_m=1-e_i-e_d-e_s; 
    n_m = 0
    n_mm = 0
    n_d = 0
    n_i = 0
    ref = ''
    query = ''
    sticks = ''
##    refseq = alignment.orig_ref[alignment.r_pos:alignment.r_end]
    r_i = alignment.r_pos
    q_i = alignment.q_pos
    for e in alignment.cigar:
        if e[1] == 'M':
            newref = alignment.orig_ref[r_i:r_i+e[0]]
            newquer = alignment.orig_query[q_i:q_i+e[0]]
            ref += newref
            query += newquer
            r_i += e[0]
            q_i += e[0]
            for i in range(len(newref)):
                sticks += '|' if newref[i].upper() == newquer[i].upper() else ' '
                n_m += 1 if newref[i].upper() == newquer[i].upper() else 0
                n_mm += 0 if newref[i].upper() == newquer[i].upper() else 1
        if e[1] == 'D':
            ref += alignment.orig_ref[r_i:r_i+e[0]]
            query += '-'*e[0]
            sticks += ' '*e[0]
            r_i += e[0]
            n_d += e[0] #1
        elif e[1] == 'I':
            ref += '-'*e[0]
            query += alignment.orig_query[q_i:q_i+e[0]]
            sticks += ' '*e[0]
            q_i += e[0]
            n_i += e[0] #1
####    print alignment.matches, alignment.mismatches, alignment.mismatches-n_mm-n_d-n_i, len(query), sum([n_m, n_mm, n_d, n_i]),  sum([n_mm, n_d, n_i])
##    print "Ref (top):", alignment.r_name
##    print "Query (bottom):", alignment.q_name
##    print "Match:"+str(n_m), "Mismatch:"+str(n_mm), "Deletion:"+str(n_d), "Insertion:"+str(n_i), "PercentIdentity:"+str(100.0*n_m/sum([n_m, n_mm, n_d, n_i]))
##    for i in range(0, len(ref), blocksize):
##        print ref[i:i+blocksize]
##        print sticks[i:i+blocksize]
##        print query[i:i+blocksize]
##        print
    qbases = n_m + n_mm + n_i
    rbases = n_m + n_mm + n_d
    n_u = len(alignment.orig_query) - qbases ## these were bases not in the alignment. Since barcode is query... i.e. pieces of barcode not found in read
    assert qbases == alignment.q_end-alignment.q_pos and rbases == alignment.r_end-alignment.r_pos
    assert len(ref) == len(query)
    assert len(ref) == sum([n_m, n_mm, n_d, n_i])
    outstring = alignment.q_name + '\n'
    outstring += "Ref (top): " + alignment.r_name + ' ' + str(alignment.r_pos) + '-' + str(alignment.r_end) + ' r_bases_aligned:' + str(rbases) + ' pct_r_bases_aligned:' + str(100.0*rbases/len(alignment.orig_ref)) + ' refLen:' + str(len(alignment.orig_ref)) + ' bp\n'
    outstring += "Query (bottom): " + alignment.q_name + ' ' + str(alignment.q_pos) + '-' + str(alignment.q_end) + ' q_bases_aligned:' + str(qbases) + ' pct_q_bases_aligned:'+str(100.0*qbases/len(alignment.orig_query)) + ' queryLen:' + str(len(alignment.orig_query)) + ' bp\n'
    stats = ["AS:" + str(alignment.score), "Match:"+str(n_m), "Mismatch:"+str(n_mm), "Deletion:"+str(n_d), "Insertion:"+str(n_i), "AlignmentLength:"+str(len(ref)), "PercentIdentity:"+str(100.0*n_m/sum([n_m, n_mm, n_d, n_i])),  "Barcode_Unaligned:"+str(n_u), "PercentIdentity_with_unaligned:"+str(100.0*n_m/sum([n_m, n_mm, n_d, n_i, n_u]))]
    outstring += (' ').join(stats) + '\n'
    p = np.e**(-1*alignment.score)
    n = len(alignment.orig_ref)*len(alignment.orig_query)
    evalue = n*p
    stats = ['n:' + str(n), 'p:' + str(p), 'e_value:' + str(evalue)]
    outstring += (' ').join(stats) + '\n'
    p_m = 1 - e_i - e_d - e_s
    p_minion_aln =  (p_m**n_m) * (e_s**n_mm) * (e_d**n_d) * (e_i**n_i)
    Esum = e_s + e_i + e_d
    E_s = e_s/Esum
    E_i = e_i/Esum
    E_d = e_d/Esum
    p_minion_un = (e_s**(E_s*n_u)) * (e_i**(E_i*n_u)) *(e_d**(E_d*n_u)) ## since it is unclear if the unaligned were subs/dels/ins I am making use of all
    p_minion = p_minion_aln * p_minion_un
    N=sum([n_m, n_mm, n_i, n_d])
    N2 = N - n_m
    N3 = N2 - n_mm
    N4 = N3 - n_i
    N5 = N4 - n_d
    NcK = nchoosek(N, n_m) * nchoosek(N2, n_mm) * nchoosek(N3, n_i) * nchoosek(N4, n_d) 
    norm_p_minion_aln = NcK * p_minion_aln
    N_u = N + n_u
    norm_p_minion = norm_p_minion_aln * p_minion_un ## There seems to be no reason to multiply the unaligned by a nchoosek b/c they are fixed at the ends and correspond to some "single" unknown composite error
    stats = ['p_minion_aln:' + str(p_minion_aln), 'p_minion_un:' + str(p_minion_un), 'p_minion:' + str(p_minion), 'norm_p_minion_aln:'+str(norm_p_minion_aln), 'norm_p_minion:'+str(norm_p_minion)] ## p_minion not necessarily comparable when barcodes are different lengths - can divide by bc_len or q*r maybe
    outstring += (' ').join(stats) + '\n'
    
    ## actually since the read is the "ref" here.... n_d and n_i are dels/ins from/in read. Barcode is the real "reference" meaning for minion probs we need e_i**n_d and e_d**n_i 
    p_minion_aln =  (p_m**n_m) * (e_s**n_mm) * (e_d**n_i) * (e_i**n_d)
    p_minion = p_minion_aln * p_minion_un
    norm_p_minion_aln = NcK * p_minion_aln
    norm_p_minion = norm_p_minion_aln * p_minion_un
    stats = ['p_minion_aln:' + str(p_minion_aln), 'p_minion_un:' + str(p_minion_un), 'p_minion:' + str(p_minion), 'norm_p_minion_aln:'+str(norm_p_minion_aln), 'norm_p_minion:'+str(norm_p_minion)] ## p_minion not necessarily comparable when barcodes are different lengths - can divide by bc_len or q*r maybe
    outstring += (' ').join(stats) + '\n'
    
    binom_prob = nchoosek(sum([n_m, n_mm, n_d, n_i]), n_m) * (p_m**n_m) * ((1-p_m)**(n_mm+n_i+n_d))
    outstring += 'binom_prob_k_matches_in_alignment:' + str(binom_prob) + '\n'

    binom_prob = nchoosek(sum([n_m, n_mm, n_d, n_i, n_u]), n_m) * (p_m**n_m) * ((1-p_m)**(n_mm+n_i+n_d+n_u))
    outstring += 'binom_prob_k_matches_in_alignment_incl_unaligned_portions_of_barcode:' + str(binom_prob) + '\n'
 
    #in barcode only
    binom_prob = nchoosek(len(alignment.orig_query), n_m) * (p_m**n_m) * ((1-p_m)**(len(alignment.orig_query)-n_m))
    outstring += 'binom_prob_k_matches_in_barcode:' + str(binom_prob) + '\n'

    if report_prob is not False:
        outstring += "Marginalized Probability Given Barcode Set: " + str(report_prob) + "\n"
    for i in range(0, len(ref), blocksize):
        outstring += ref[i:i+blocksize] + '\n'
        outstring += sticks[i:i+blocksize] + '\n'
        outstring += query[i:i+blocksize] + '\n\n'
    return outstring
Example #25
0
from scipy.special import comb as nchoosek


#-----------------------------------------------------------------------------------
def l(n):
    return str(n).ljust(20, ' ')


#-----------------------------------------------------------------------------------
def r(n):
    return str(n).rjust(20, ' ')


d = 20
p = .5
q = .5
for k in range(0, d + 1, 1):

    dCk = nchoosek(d, k, exact=True)
    count = dCk * p**k * q**(d - k)
    print(l(k) + r(count * 2**d))
Example #26
0
 def get_bernstein_basis(n, a, b):
     x = np.linspace(a, b, b - a + 1)
     return [
         nchoosek(n, k) * ((x - a) / (b - a))**k *
         (1 - ((x - a) / (b - a)))**(n - k) for k in range(n + 1)
     ]