Python comb Examples, scipy.special.comb Python Examples

Example #1

0

Show file

File: sprt.py Project: statlab/permute

def hypergeom_lh(ho, ha, trial, n, g, N):
	"""
	Returns likelihood ratio for independently distributed hypergeometric random variables. 
	
	Parameters
	----------
	ho : float
	   null hypothesis
	ha : float
	   alternative hypothesis
	trial : float
	   number of good elements in recent sample 
	n : float or int
	   sample size
	g : float or int
	   number of good elements in sample 
	N : float or int
	   total population size 
	Returns
	-------
	float
	   likelihood ratio of model
	
	"""
	ho_G, ha_G = ho * (N / n), ha * (N / n)

	null_lh = (comb(ho_G, g) * comb(N - ho_G, n - g)) 
	alt_lh = (comb(ha_G, g) * comb(N - ha_G, n - g))

	return alt_lh / null_lh

Example #2

0

Show file

File: test_event.py Project: CameronTEllis/brainiak

def test_prior():
    K = 10
    T = 100

    es = EventSegment(K)
    mp = es.model_prior(T)[0]

    p_bound = np.zeros((T, K-1))
    norm = comb(T-1, K-1)
    for t in range(T-1):
        for k in range(K-1):
            # See supplementary material of Neuron paper
            # https://doi.org/10.1016/j.neuron.2017.06.041
            p_bound[t+1, k] = comb(t, k) * comb(T-t-2, K-k-2) / norm
    p_bound = np.cumsum(p_bound, axis=0)

    mp_gt = np.zeros((T, K))
    for k in range(K):
        if k == 0:
            mp_gt[:, k] = 1 - p_bound[:, 0]
        elif k == K - 1:
            mp_gt[:, k] = p_bound[:, k-1]
        else:
            mp_gt[:, k] = p_bound[:, k-1] - p_bound[:, k]

    assert np.all(np.isclose(mp, mp_gt)),\
        "Prior does not match analytic solution"

Example #3

0

Show file

File: runs.py Project: ChadFulton/statsmodels

    def pdf(self, x, k, n, p):
        '''distribution of success runs of length k or more

        Parameters
        ----------
        x : float
            count of runs of length n
        k : int
            length of runs
        n : int
            total number of observations or trials
        p : float
            probability of success in each Bernoulli trial

        Returns
        -------
        pdf : float
            probability that x runs of length of k are observed

        Notes
        -----
        not yet vectorized

        References
        ----------
        Muselli 1996, theorem 3
        '''

        q = 1-p
        m = np.arange(x, (n+1)//(k+1)+1)[:,None]
        terms = (-1)**(m-x) * comb(m, x) * p**(m*k) * q**(m-1) \
                * (comb(n - m*k, m - 1) + q * comb(n - m*k, m))
        return terms.sum(0)

Example #4

0

Show file

File: TwoStageClassification.py Project: Lagaunne-Timotius/Machine-Learning

def rand_score(labels_true, labels_pred):
   
   labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
   n_samples = labels_true.shape[0]
   classes = np.unique(labels_true)
   clusters = np.unique(labels_pred)
   # Special limit cases: no clustering since the data is not split;
   # or trivial clustering where each document is assigned a unique cluster.
   # These are perfect matches hence return 1.0.
   if (classes.shape[0] == clusters.shape[0] == 1
           or classes.shape[0] == clusters.shape[0] == 0
           or classes.shape[0] == clusters.shape[0] == len(labels_true)):
       return 1.0

   contingency = contingency_matrix(labels_true, labels_pred)

   # Compute the ARI using the contingency data
   sum_comb_c = sum(comb2(n_c) for n_c in contingency.sum(axis=1))
   sum_comb_k = sum(comb2(n_k) for n_k in contingency.sum(axis=0))

   sum_comb = sum(comb2(n_ij) for n_ij in contingency.flatten())
   t_p=sum_comb
   f_p=sum_comb_c-sum_comb
   f_n=sum_comb_k-sum_comb
   t_n=float(comb(n_samples, 2))-t_p-f_p-f_n
   result=(t_n+t_p)/float(comb(n_samples, 2))
   return result

Example #5

0

Show file

File: 11-10.py Project: haru0422kr/CoalSim

def recurTraversal(mean_sep_time, sample):
    #base case
    global total_branch_length, total_mutations
    weight = 0;
    if (sample.left == None) & (sample.right == None):
        total_branch_length += sample.time
        identity = str(sample.getIdentity())
        if not 'A' in identity:
            k = 1
        else:
            k = len(sample.descendent_list)
        weight = ( k * (sample_size - k)) / comb(sample_size, 2);
        mean_sep_time = mean_sep_time + (weight * sample.time);
        sample.mutations = poisson.rvs(mu * sample.time)
        total_mutations += sample.getMutations()
        return mean_sep_time
    mean_sep_time = recurTraversal(mean_sep_time, sample.right)
    current = sample.right
    while current.next != None:
        mean_sep_time = recurTraversal(mean_sep_time, current.next)
        current = current.next
    total_branch_length += sample.time
    
    identity = str(sample.getIdentity())
    if not 'A' in identity:
        k = 1
    else:
        k = len(sample.descendent_list)
    weight = ( k * (sample_size - k)) / comb(sample_size, 2);
    mean_sep_time = mean_sep_time + (weight * sample.time);
    sample.mutations = poisson.rvs(mu * sample.time)
    total_mutations += sample.getMutations()
    return mean_sep_time

Example #6

0

Show file

File: runs.py Project: ChadFulton/statsmodels

 def runs_prob_odd(self, r):
     n0, n1 = self.n0, self.n1
     k = (r+1)//2
     tmp0 = comb(n0-1, k-1)
     tmp1 = comb(n1-1, k-2)
     tmp3 = comb(n0-1, k-2)
     tmp4 = comb(n1-1, k-1)
     return (tmp0 * tmp1 + tmp3 * tmp4)  / self.comball

Example #7

0

Show file

File: DataPrep.py Project: Stogie03/PromoAnalytics

 def validateInputData(self):
     
     self.getDataFrame()
     
     #check if dataframe -> df exists
     df_exists = 'self.df' in locals() or 'self.df' in globals()
     try:
         self.df
     except NameError:
         df_exists=False
     else:
         df_exists=True
         
     if df_exists==True:
         df_names= self.df.columns.values
     else:
         print("There was an error loading the Data frame")
         
     #test the non promo column names
     if df_exists==True:
         
         if (np.array_equal(stdColNames,df_names[0:stdColCount +1])):    
             stColNamesPass=True
         else:
             stColNamesPass=False
         
         #test the number of promo columns
         
         df_pnames=df_names[stdColCount +1:df_names.size]  #get the promo columns from the dataframe
         possibleValues=np.zeros((limit,limit))
         
         for x in range(1,limit):
         
             for z in range(1,limit):
                 if z<2 and x < 2:
                    possibleValues[x,z] = int(comb(z,1,exact=False)) 
                 if z >= 2 and z >= x:
                     possibleValues[x,z] = int(possibleValues[x-1,z] + comb(z,x,exact=False))
                    
         if df_pnames.size in possibleValues[:, :]:
             print ("VALUE FOUND")
             print df_pnames.size
             possiblePromoFormat=self.returnIndex2DArray(possibleValues, df_pnames.size)
             print possiblePromoFormat
         else:
             print ("VALUE NOT FOUND. Promos not set up correctly in input file")
             print df_pnames.size
                 
         #print df_pnames
         #print df_names
         print possibleValues
         self.findSoloPromos(df_pnames, possiblePromoFormat)
         print ('Do the standard variables pass? %s' % (stColNamesPass))
         self.findMultiPromos(df_pnames, possiblePromoFormat)
     #this is the boolean to say the validation passed
     objInitDataVal = True

Example #8

0

Show file

File: utils.py Project: egtaonline/GameAnalysis

def _acombr(n, k):
    """Combinations with repetitions"""
    # pylint: disable-msg=invalid-name
    # This uses dynamic programming to compute everything
    num = sps.comb(n, k, repetition=True, exact=True)
    grid = np.zeros((num, n), dtype=int)
    memoized = {}

    # This recursion breaks if asking for numbers that are too large (stack
    # overflow), but the order to fill n and k is predictable, it may be better
    # to to use a for loop.
    def fill_region(n, k, region):
        """Recursively fill a region"""
        if n == 1:
            region[0, 0] = k
            return
        elif k == 0:
            region.fill(0)
            return
        if (n, k) in memoized:
            np.copyto(region, memoized[n, k])
            return
        memoized[n, k] = region
        o = 0
        for ki in range(k, -1, -1):
            n_ = n - 1
            k_ = k - ki
            m = sps.comb(n_, k_, repetition=True, exact=True)
            region[o:o + m, 0] = ki
            fill_region(n_, k_, region[o:o + m, 1:])
            o += m

    fill_region(n, k, grid)
    return grid

Example #9

0

Show file

File: leastsqnrm.py Project: STScI-JWST/jwst

def return_CAs(amps, N=7):
    """
    Short Summary
    -------------
    Calculate closure amplitudes

    Parameters
    ----------
    amps: 1D float array
         fringe amplitudes

    N: integer
        number of holes

    Returns
    -------
    CAs: 1D float array
        closure amplitudes
    """
    arr = populate_symmamparray(amps, N=N) # fringe amp array
    nn = 0
    CAs = np.zeros(int(comb(N, 4)))
    for ii in range(N - 3):
        for jj in range(N - ii - 3):
            for kk in range(N - jj - ii - 3):
                for ll in range(N - jj - ii - kk - 3):
                    CAs[nn + ll] = arr[ii, jj + ii + 1] \
                           * arr[ll + ii + jj + kk + 3, kk + jj + ii + 2] \
                           / (arr[ii, kk + ii + jj + 2] * \
                              arr[jj + ii + 1, ll + ii + jj + kk + 3])
                nn = nn + ll + 1

    return CAs

Example #10

0

Show file

File: ewald.py Project: adengz/pymatgen

    def __init__(self, matrix, m_list, num_to_return=1, algo=ALGO_FAST):
        # Setup and checking of inputs
        self._matrix = copy(matrix)
        # Make the matrix diagonally symmetric (so matrix[i,:] == matrix[:,j])
        for i in range(len(self._matrix)):
            for j in range(i, len(self._matrix)):
                value = (self._matrix[i, j] + self._matrix[j, i]) / 2
                self._matrix[i, j] = value
                self._matrix[j, i] = value

        # sort the m_list based on number of permutations
        self._m_list = sorted(m_list, key=lambda x: comb(len(x[2]), x[1]),
                              reverse=True)

        for mlist in self._m_list:
            if mlist[0] > 1:
                raise ValueError('multiplication fractions must be <= 1')
        self._current_minimum = float('inf')
        self._num_to_return = num_to_return
        self._algo = algo
        if algo == EwaldMinimizer.ALGO_COMPLETE:
            raise NotImplementedError('Complete algo not yet implemented for '
                                      'EwaldMinimizer')

        self._output_lists = []
        # Tag that the recurse function looks at at each level. If a method
        # sets this to true it breaks the recursion and stops the search.
        self._finished = False

        self._start_time = datetime.utcnow()

        self.minimize_matrix()

        self._best_m_list = self._output_lists[0][1]
        self._minimized_sum = self._output_lists[0][0]

Example #11

0

Show file

File: combinatorics.py Project: AsiaBartnik/QuantEcon.py

def k_array_rank(a):
    """
    Given an array `a` of k distinct nonnegative integers, sorted in
    ascending order, return its ranking in the lexicographic ordering of
    the descending sequences of the elements [1]_.

    Parameters
    ----------
    a : ndarray(int, ndim=1)
        Array of length k.

    Returns
    -------
    idx : scalar(int)
        Ranking of `a`.

    References
    ----------
    .. [1] `Combinatorial number system
       <https://en.wikipedia.org/wiki/Combinatorial_number_system>`_,
       Wikipedia.

    """
    k = len(a)
    idx = int(a[0])  # Convert to Python int
    for i in range(1, k):
        idx += comb(a[i], i+1, exact=True)
    return idx

Example #12

0

Show file

def filter_df_by_particles_in_frame(data_frame, num_particles, mode='equal'):
    '''Return a DataFrame where just the frames with the requested number
    of particles are present. This only works on DataFrames that have gone
    through find_nn_ver_2
    
    :param data_frame: The input data_frame
    :param num_particles: The number of particles you want in each frame
    :param mode: Can be 'equal' means only frames with num_particles
    are returned. 'less' means only frames less than or equal to num_particles are
    returned. 'greater' means only frames greater than or equal to num_particles 
    are returned.
    :return data_frame:
    '''
    # The function does not compute the number of particles in each frame
    # but instead the number of entries for a given number of particles
    # The number of entries = 2 * (num_particles nCr 2)
    from scipy.special import comb
    if num_particles!=1:
        num_particles = 2 * (comb(num_particles,2))
    data = data_frame.copy()
    part_num_in_frame = data.groupby('frame').apply(len)
    if mode == 'equal':
        return data.set_index('frame')[part_num_in_frame==num_particles].reset_index()
    elif mode == 'less':
        return data.set_index('frame')[part_num_in_frame<=num_particles].reset_index()
    elif mode == 'greater':
        return data.set_index('frame')[part_num_in_frame>=num_particles].reset_index()

Example #13

0

Show file

File: leastsqnrm.py Project: STScI-JWST/jwst

def redundant_cps(deltaps, N=7):
    """
    Short Summary
    -------------
    Calculate closure phases for each set of 3 holes

    Parameters
    ----------
    deltaps: 1D float array
        pistons between each pair of holes

    N: integer
        number of holes

    Returns
    -------
    cps: 1D float array
        closure phases
    """
    arr = populate_antisymmphasearray(deltaps, N=N) # fringe phase array
    cps = np.zeros(int(comb(N, 3)))
    nn = 0

    for kk in range(N - 2):
        for ii in range(N - kk - 2):
            for jj in range(N - kk - ii - 2):
                cps[nn + jj] = arr[kk, ii + kk + 1] \
                       + arr[ii + kk + 1, jj + ii + kk + 2] \
                       + arr[jj + ii + kk + 2, kk]

            nn += jj + 1

    return cps

Example #14

0

Show file

File: holoborodko_diff.py Project: jslavin/holoborodko_diff

def coeffs(M):
    """
    Generate the "Smooth noise-robust differentiators" as defined in Pavel
    Holoborodko's formula for c_k

    Parameters
    ----------
    M : int
        the order of the differentiator

    c : float array of length M
        coefficents for k = 1 to M
    """
    m = (2*M - 2)/2
    k = np.arange(1, M+1)
    c = 1./2.**(2*m + 1)*(comb(2*m, m - k + 1) - comb(2*m, m - k - 1))
    return c

Example #15

0

Show file

File: scan_clustering.py Project: mobiusklein/ms_deisotope

 def _incremental_similarity(self, scan, *args, **kwargs):
     new_sims = self._calculate_similarity_with(scan, *args, **kwargs)
     aggregate_size = comb(len(self), 2)
     n = (aggregate_size + len(new_sims))
     if n == 0:
         n = 1
     self._average_similarity = (aggregate_size * self.average_similarity() + sum(new_sims)
                                ) / n

Example #16

0

Show file

File: utils.py Project: egtaonline/GameAnalysis

def comb(n, k):
    """Return n choose k

    This function works on arrays, and will properly return a python integer
    object if the number is too large to be stored in a 64 bit integer.
    """
    # pylint: disable-msg=invalid-name
    res = np.rint(sps.comb(n, k, False))
    if np.all(res < _MAX_INT_FLOAT): # pylint: disable=no-else-return
        return res.astype(int)
    elif isinstance(n, abc.Iterable) or isinstance(k, abc.Iterable):
        broad = np.broadcast(np.asarray(n), np.asarray(k))
        res = np.empty(broad.shape, dtype=object)
        res.flat = [sps.comb(n_, k_, True) for n_, k_ in broad]
        return res
    else:
        return sps.comb(n, k, True)

Example #17

0

Show file

File: binomial_score.py Project: mobiusklein/glycan_profiling

def binomial_pmf(n, i, p):
    try:
        return comb(n, i, exact=True) * (p ** i) * ((1 - p) ** (n - i))
    except OverflowError:
        dn = Decimal(n)
        di = Decimal(i)
        dp = Decimal(p)
        x = math.factorial(dn) / (math.factorial(di) * math.factorial(dn - di))
        return float(x * dp ** di * ((1 - dp) ** (dn - di)))

Example #18

0

Show file

File: test_bimatrix_generators.py Project: AsiaBartnik/QuantEcon.py

    def test_payoff_values(self):
        possible_values = [0, 1]
        for payoff_array in self.g.payoff_arrays:
            ok_(np.isin(payoff_array, possible_values).all())

        max_num_dominated_subsets = \
            sum([comb(i, self.k, exact=True) for i in range(self.n)])
        ok_(self.g.payoff_arrays[0].sum() <= max_num_dominated_subsets)
        ok_((self.g.payoff_arrays[1].sum(axis=1) == self.k).all())

Example #19

0

Show file

File: logistic_regression.py Project: fushuyue/Ml_HFT

def getcomb():
    import scipy.special as sp
    fr = open('/Users/shengdongliu/Downloads/0401.txt')
    for line in fr.readlines():
        lineArr = line.strip().split()
        totalcolumn=len(lineArr)-2
        break
    number=sp.comb(totalcolumn,2,exact=False)
    return number

Example #20

0

Show file

File: funspace.py Project: koneksys/KFE

    def __init__(self, dimension, degree, varnamelist):

        try:
            isinstance(varnamelist, list)
            isinstance(dimension, int)
            isinstance(degree, int)
            len(varnamelist) == degree
            for var in varnamelist:
                isinstance(var, str)
            0 < dimension <= 3
            0 < degree
        except:
            raise NameError('dimension and degree of type integer with 0<dimension<=3, 0<degree, '
                            'list with element of type string and length varname == degree')
        varsymbollist=[]
        for var in varnamelist:
            varsymbollist.append(symbols(var))


        #calculate the number of degree of freedom
        self.dofnumber = int(comb(dimension+degree, degree))

        coefvec = MatrixSymbol('c', 1, self.dofnumber)

        monomiallist = [1]
        if dimension == 1:
            for i in range(1, degree+1):
                for k in range(0, dimension):
                    monomiallist.append(pow(varsymbollist[0], i))

        elif dimension == 2:
            for i in range(1, degree+1):
                monomiallist.append(pow(varsymbollist[0], i))
                for j in range(1, i):
                    monomiallist.append(pow(varsymbollist[0], i-j)*pow(varsymbollist[1], j))
                monomiallist.append(pow(varsymbollist[1], i))

        elif dimension == 3:
            for i in range(1, degree+1):
                monomiallist.append(pow(varsymbollist[0], i))
                for j in range(1, i):
                    monomiallist.append(pow(varsymbollist[0], i-j)*pow(varsymbollist[1], j))
                monomiallist.append(pow(varsymbollist[1], i))

                for j in range(1, i):
                    monomiallist.append(pow(varsymbollist[0], i-j)*pow(varsymbollist[2], j))

                for j in range(1, i):
                    monomiallist.append(pow(varsymbollist[1], i-j)*pow(varsymbollist[2], j))
                monomiallist.append(pow(varsymbollist[2], i))

        self.basis = monomiallist
        self.var = varsymbollist
        funmat = Matrix(coefvec)*Matrix(monomiallist)
        fun = funmat[0]
        self.fun = fun

Example #21

0

Show file

File: lottos.py Project: benciboy/examples_pitfails

def indextocomb( ind, k=6, n=45):
    subs = k
    var = 0
    komb = []
    for var in range(k):

        while ( n>=subs ) and ( ind < comb(n,subs) ):
                n-=1

        if (n>=subs):
                komb.append(n+1)
                ind = ind - comb(n,subs)
        else:
                komb.append(subs)

        subs-=1

    komb.reverse()
    return komb

Example #22

0

Show file

File: plottingStuff.py Project: chemaoxfz/proteinInteractionSim

def theoryE(beta,epsilon=1,m=2,N=4):
    if N==m: return -N*np.ones(len(beta))
    else:
        kmax = min(N-m+1,m)
        E=np.zeros(len(beta))
        temp1=np.zeros([kmax, len(beta)])
        temp2=np.zeros([kmax, len(beta)])
        for t in xrange(kmax):
            k=t+np.float64(1.)
            temp=1./k * comb(m-1,k-1)*comb(N-m-1,k-1)*np.exp(-k*epsilon*beta)
            temp1[t]=temp*(m-k)*epsilon
            temp2[t]=temp
        tempSum1=np.sum(temp1,axis=0)
        tempSum2=np.sum(temp2,axis=0)
    #    tempSum2+=np.ones(len(beta))*1e-10 #for numerical stability
    #    tempSum1+=np.ones(len(beta))*9e-10
    #    pdb.set_trace()
        E=- tempSum1/tempSum2
    return E

Example #23

0

Show file

File: hammingHashTable.py Project: yuanwang1988/semantichashing

def kbits(n, k):
    result = np.zeros((comb(n,k), n), dtype=bool)
    idx = 0
    for bits in itertools.combinations(range(n), k):
        s = np.zeros(n, dtype=bool)
        for bit in bits:
            s[bit] = 1
        result[idx,:] = s
        idx += 1
    return result

Example #24

0

Show file

File: moment_helpers.py Project: bashtage/statsmodels

 def _local_counts(mnc):
     mnc = [1] + list(mnc)
     kappa = [1]
     for nn, m in enumerate(mnc[1:]):
         n = nn + 1
         kappa.append(m)
         for k in range(1, n):
             num_ways = comb(n - 1, k - 1, exact=True)
             kappa[n] -= num_ways * kappa[k] * mnc[n - k]
     return kappa[1:]

Example #25

0

Show file

File: owmosaic.py Project: PrimozGodec/orange3

 def state_count(self):
     """
     Return the number of combinations, starting with a single attribute
     if Mosaic is colored by class distributions, and two if by Pearson
     """
     n_attrs = len(self.master.discrete_data.domain.attributes)
     min_attrs = 1 if self._compute_class_dists() else 2
     max_attrs = min(n_attrs, self.max_attrs)
     return sum(comb(n_attrs, k, exact=True)
                for k in range(min_attrs, max_attrs + 1))

Example #26

0

Show file

File: svm.py Project: joypai/23andMe1000Genomes

def Distance(x, y):
	# x = "010011"
	# y = "010101"
	score_run = []
	word_len = len(x)
	score = 0
	run_len = 0

	if x == y:
		print "identical"
		return comb(word_len, 2)

	print "word_len=", word_len
	# initialization
	for run_len in range(word_len):
		score_run.append(comb(run_len, 2))

	# score_run.pop(0)
	print "score_run= ", score_run

	num_x = int(x, 2)
	num_y = int(y, 2)

	diff = num_x ^ num_y
	diff_shifted = diff << 1
	change_bit = diff ^ diff_shifted

	print "change_bit=", change_bit

	for i in range(word_len):
		print "score=", score
		run_len = leading_zeros(change_bit, word_len)
		print "run_len= ", run_len
		if run_len == -1:
			break
		# print "run_len= ", run_len
		# print "change_bit=", change_bit
		print "score_run[run_len]=", score_run[run_len]
		score += score_run[run_len]
		change_bit <<= (run_len)
	
	return score

Example #27

0

Show file

File: moment_helpers.py Project: bashtage/statsmodels

 def _local_counts(kappa):
     mc = [1, 0.0]  # _kappa[0]]  #insert 0-moment and mean
     kappa0 = kappa[0]
     kappa = [1] + list(kappa)
     for nn, m in enumerate(kappa[2:]):
         n = nn + 2
         mc.append(0)
         for k in range(n - 1):
             mc[n] += comb(n - 1, k, exact=True) * kappa[n - k] * mc[k]
     mc[1] = kappa0  # insert mean as first moments by convention
     return mc[1:]

Example #28

0

Show file

File: lottos.py Project: benciboy/examples_pitfails

def combtoindex(arg):
    ''' az argumentum lista kombinaciohoz tarozo index kiszamitasa  '''
    arg.reverse()
    result = 0
    k = len(arg)
    for var in range(0,k):
        if ((arg[var]-1)<(k-var)) :
            result += 0
        else:
            result += comb(arg[var]-1,k-var)
    return int(result)

Example #29

0

Show file

File: moment_helpers.py Project: bashtage/statsmodels

 def _local_counts(mc):
     mean = mc[0]
     mc = [1] + list(mc)  # add zero moment = 1
     mc[1] = 0  # define central mean as zero for formula
     mnc = [1, mean]  # zero and first raw moments
     for nn, m in enumerate(mc[2:]):
         n = nn + 2
         mnc.append(0)
         for k in range(n + 1):
             mnc[n] += comb(n, k, exact=True) * mc[k] * mean ** (n - k)
     return mnc[1:]

Example #30

0

Show file

File: pourbaix_diagram.py Project: ExpHP/pymatgen

    def _generate_multielement_entries(self, entries, forced_include=None,
                                       nproc=None):
        """
        Create entries for multi-element Pourbaix construction.

        This works by finding all possible linear combinations
        of entries that can result in the specified composition
        from the initialized comp_dict.

        Args:
            entries ([PourbaixEntries]): list of pourbaix entries
                to process into MultiEntries
            forced_include ([PourbaixEntries]) list of pourbaix entries
                that must be included in multielement entries
            nproc (int): number of processes to be used in parallel
                treatment of entry combos
        """
        N = len(self._elt_comp)  # No. of elements
        total_comp = Composition(self._elt_comp)
        forced_include = forced_include or []

        # generate all combinations of compounds that have all elements
        entry_combos = [itertools.combinations(
            entries, j + 1 - len(forced_include)) for j in range(N)]
        entry_combos = itertools.chain.from_iterable(entry_combos)
        if forced_include:
            entry_combos = [forced_include + list(ec) for ec in entry_combos]
        entry_combos = filter(lambda x: total_comp < MultiEntry(x).composition,
                              entry_combos)

        # Generate and filter entries
        processed_entries = []
        total = sum([comb(len(entries), j + 1 - len(forced_include))
                     for j in range(N)])
        if total > 1e6:
            warnings.warn("Your pourbaix diagram includes {} entries and may "
                          "take a long time to generate.".format(total))

        # Parallel processing of multi-entry generation
        if nproc is not None:
            f = partial(self.process_multientry, prod_comp=total_comp)
            with Pool(nproc) as p:
                processed_entries = list(tqdm(p.imap(f, entry_combos),
                                              total=total))
            processed_entries = list(filter(bool, processed_entries))
        # Serial processing of multi-entry generation
        else:
            for entry_combo in entry_combos:
                processed_entry = self.process_multientry(entry_combo, total_comp)
                if processed_entry is not None:
                    processed_entries.append(processed_entry)

        return processed_entries

Example #31

0

Show file

def expansion_coeff(angmom, mag, i, j, k):
    r"""Calculate the real solid harmonic expansion coefficient.

    .. math::

        C^angmom,mag_i,j,k = -1^{i + k - shift_factor} * (1/4)^i * {angmom \choose i}
        * {(angmom - i) \choose (\abs{mag} + i)} * {i \choose j} * {\abs{mag} \choose 2 * k},

    where shift_factor = 0 if mag >= 0 and shift_factor = 1/2 if mag < 0.

    Parameters
    ----------
    angmom : int
        The angular momentum of the Gaussian primitive(s).
    mag : int
        The magnetic quantum number(s) of the Gaussian primitive(s).
    i, j : int
        The generator indices for the expansion coefficient.
    k : float
        The generator indices for the expansion coefficient.

    Returns
    -------
    coeff : float
        The real solid harmonic expansion coefficient.

    Raises
    ------
    TypeError
        If `angmom` is not an integer.
        If `mag` is not an integer.
        If `i` is not an integer.
        If `j` is not an integer.
        If `k` is not a float.
    ValueError
        If `angmom` is negative.
        If `mag` has a greater magnitude than angmom.
        If `k is not either an integer (mag >= 0) or a half integer (mag < 0).

    """
    if not isinstance(angmom, int):
        raise TypeError("Angular momentum must be an integer.")
    if angmom < 0:
        raise ValueError("Angular momentum must be a non-negative integer.")
    if not isinstance(mag, int):
        raise TypeError("The magnetic quantum number must be an integer.")
    if np.abs(mag) > angmom:
        raise ValueError(
            "The magnetic quantum number must be between -(`angmom`) and `angmom`."
        )
    if not isinstance(i, int):
        raise TypeError("Index `i` must be an integer")
    if not isinstance(j, int):
        raise TypeError("Index `j` must be an integer")
    if isinstance(k, int):
        k = float(k)
    if not isinstance(k, float):
        raise TypeError("Index `k` must be a float.")
    if k != int(k) and mag >= 0:
        raise ValueError(
            "Index `k` must be an integer for non-negative magnetic quantum numbers."
        )
    if k != int(k) + 0.5 and mag < 0:
        raise ValueError(
            "Index `k` must be a half integer for negative magnetic quantum numbers."
        )

    if mag < 0:
        return np.real(
            (complex(-1))**(i + k - shift_factor(mag)) * (1 / 4)**i *
            comb(angmom, i) * comb(angmom - i,
                                   np.abs(mag) + i) * comb(i, j) *
            comb(np.abs(mag), 2 * k))
    return ((-1)**(i + k - shift_factor(mag)) * (1 / 4)**i * comb(angmom, i) *
            comb(angmom - i,
                 np.abs(mag) + i) * comb(i, j) * comb(np.abs(mag), 2 * k))

Example #32

0

Show file

File: shapley_decomposition.py Project: godzilla-but-nicer/boolmininfo

def shapley_full(df, model, endog, exog, fout):
    # n is the size of the largest sets of permutations
    n = len(exog)
    n_combs = 0
    n_combs = sum([comb(16, k) for k in range(16)])

    #final_matrix = np.zeros((n, n_combs/n))
    final_matrix = [[] for x in range(n)]

    def get_rsquared_for_sets(sets):
        for s in sets:
            features = [exog[i] for i in s]
            fs = []
            for f in features:
                if '+' in f:
                    for ef in f.split('+'):
                        fs.append(ef)
                else:
                    fs.append(f)
            features = fs
            this_model = model(data=df,
                               formula="%s ~ %s" %
                               (endog[0], '+'.join(features)))
            results = this_model.fit(maxiter=5000, disp=False)
            # for OLS
            rsquared = results.rsquared_adj
            # for poisson
            # rsquared = pearsonr(df[endog[0]],this_model.predict(results.params))[0]
            yield (s, rsquared)

    def concat_tuple(tup, final):
        state = ()
        for i in tup:
            state = state + (i, )
        state = state + (final, )
        return state

    start_time = time()

    # these are our R2 for single variable models
    rsquareds = dict()
    for combo, rsquared in get_rsquared_for_sets([(i, ) for i in range(n)]):
        rsquareds[str(combo[-1])] = rsquared
        # the prior rsquared is 0, for the model with no dependent variables
        adjusted_value = (comb(n - 1, 0))**-1 * (rsquared - 0)
        final_matrix[combo[-1]].append(adjusted_value)

    for k in tqdm(range(2, n + 1)):
        for combo, rsquared in get_rsquared_for_sets(
                combinations(range(n), k - 1)):
            combo_string = '.'.join(map(str, sorted(combo)))
            rsquareds[combo_string] = rsquared

    combo, rsquared = list(get_rsquared_for_sets([tuple(range(n))]))[0]
    rsquareds['.'.join(map(str, sorted(combo)))] = rsquared

    # calculate the difference for adding in the new variable
    for k in range(2, n + 1):
        for i in range(n):
            all_but_i = list(range(n))
            del all_but_i[i]
            for prior_combo in combinations(all_but_i, k - 1):
                combo = '.'.join(map(str, sorted(list(prior_combo) + [i])))
                prior_combo = '.'.join(map(str, sorted(prior_combo)))
                diff = rsquareds[combo] - rsquareds[prior_combo]
                final_matrix[i].append((comb(n - 1, k - 1))**-1 * diff)

    print("Ran for %d minutes." % int((time() - start_time) / 60))

    # final model
    combo, rsquared = list(get_rsquared_for_sets([tuple(range(n))]))[0]

    phis = [1 / n * sum(final_matrix[i]) for i in range(n)]
    fout.write('rsquared: ' + str(rsquared) + '\n')
    fout.write('shapely_computed: ' + str(sum(phis)) + '\n')
    for i in range(n):
        #print("%s: %s, %i" % (exog[i],' '.join(map(str,final_matrix[i])), np.mean(final_matrix[i])))
        fout.write("%s: %.4f, %.2f%%\n" % (exog[i], phis[i],
                                           (phis[i] / rsquared) * 100))

Example #33

0

Show file

def do_problem_four_part_a(sample_size: int, num_students_observed: int,
                           p: float):
    """
        Homework 2, Problem 2, Part A

        We execute a solution to this problem in two parts. First, we compute the theoretical solution. That is to say, we
        compute an exact value for:

            P(Y >= 12 ; p = 0.7) = sum from k=12 to k=20 (20 choose k) p^(k)(1-p)^(20-k)


    """
    probability = 0
    print(probability)
    pmf_y = [
        comb(sample_size, k) * np.power(p, k) *
        np.power(1 - p, sample_size - k) for k in range(0, sample_size + 1)
    ]
    cdf_y = []
    cum_prob = 0
    for i in range(0, 21):
        cum_prob += pmf_y[i]
        cdf_y.append(cum_prob)

    probability = sum(pmf_y[num_students_observed:])

    print(
        f"""The probability of observing at least {observation_count} students applying 
    probability is: {probability}""")

    width = 0.35
    labels = [f'X={x}' for x in range(0, 21)]
    x_pts = [x for x in range(0, 21)]

    fig, ax = plt.subplots()
    x = np.arange(len(labels))
    rects1 = ax.bar(x - width / 2, pmf_y, width, label='PMF')
    rects2 = ax.bar(x + width / 2, cdf_y, width, label='CDF')

    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel('Probability')
    ax.set_title('PDF and CDF for Bin(n, k)')
    ax.set_xticks(x)
    ax.set_xticklabels(labels)
    ax.legend()

    def autolabel(rects):
        """Attach a text label above each bar in *rects*, displaying its height."""
        for rect in rects:
            height = round(rect.get_height(), 3)
            ax.annotate(
                '{}'.format(height),
                xy=(rect.get_x() + rect.get_width() / 2, height),
                xytext=(0, 3),  # 3 points vertical offset
                textcoords="offset points",
                ha='center',
                va='bottom')

    autolabel(rects1)
    autolabel(rects2)

    fig.tight_layout()

    plt.show()

    return probability

Example #34

0

Show file

def dicke(state, G, k):
    for i in range(0, 2**len(G.nodes)):
        if num_ones(i) == k:
            state[i] = 1 / (np.sqrt(comb(len(G.nodes), k)))
    return state

Example #35

0

Show file

File: num_board_st.py Project: totorotoby/learnRL

import math
from scipy.special import comb

states = 1

for i in range(1, 10):

    top = math.factorial(9) / math.factorial(9 - i)

    try:
        bottom = math.factorial((i + 1) // 2) * math.factorial(i // 2)
    except ValueError:
        bottom = 1

    states += (top / bottom)

states = states - 8 * (comb(6, 5) + comb(6, 4) + comb(6, 3) + 2 * (comb(6, 4)))
print(states)

Example #36

0

Show file

File: Mix_Def.py Project: wtteue/FedShapley

            print('round {}, loss={}'.format(round_num, loss))'''

        m = np.dot(test_images, np.asarray(model['weights']))
        test_result = m + np.asarray(model['bias'])
        y = tf.nn.softmax(test_result)
        correct_prediction = tf.equal(tf.argmax(y, 1),
                                      tf.arg_max(test_labels_onehot, 1))
        #print(list(tf.argmax(y, 1).numpy()))
        #print(list(tf.arg_max(test_labels_onehot, 1).numpy()))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        group_shapley_value.append(accuracy.numpy())
        print("combination finished ", time.time() - start_time)
        print(
            str(ss) + "\t" +
            str(group_shapley_value[len(group_shapley_value) - 1]))

    agent_shapley = []
    for index in range(NUM_AGENT):
        shapley = 0.0
        for j in all_sets:
            if index in j:
                remove_list_index = remove_list_indexed(index, j, all_sets)
                if remove_list_index != -1:
                    shapley += (
                        group_shapley_value[shapley_list_indexed(j, all_sets)]
                        - group_shapley_value[remove_list_index]) / (comb(
                            NUM_AGENT - 1, len(all_sets[remove_list_index])))
        agent_shapley.append(shapley)
    for ag_s in agent_shapley:
        print(ag_s)
    print("end_time", time.time() - start_time)

Example #37

0

Show file

File: genpareto.py Project: sk48880/statsmodels

 def _munp(self, n, c):
     k = np.arange(0, n + 1)
     val = (1.0 / c)**n * np.sum(comb(n, k) * (-1)**k / (1.0 + c * k),
                                 axis=0)
     return where(c * n > -1, val, inf)

Example #38

0

Show file

def b(n, k, CR):
    return comb(n, k, exact=True) * np.power(CR, k) * np.power(1 - CR, n - k)

Example #39

0

Show file

# https://atcoder.jp/contests/abc159/tasks/abc159_a

from scipy.special import comb
N, M = map(int, input().split())
c = comb(N+M, 2, exact=True) - ((comb(M, 1, exact=True))*(comb(N, 1, exact=True)))
print(c)

Example #40

0

Show file

def invhilbert(n, exact=False):
    """
    Compute the inverse of the Hilbert matrix of order `n`.

    The entries in the inverse of a Hilbert matrix are integers.  When `n`
    is greater than 14, some entries in the inverse exceed the upper limit
    of 64 bit integers.  The `exact` argument provides two options for
    dealing with these large integers.

    Parameters
    ----------
    n : int
        The order of the Hilbert matrix.
    exact : bool
        If False, the data type of the array that is returned is np.float64,
        and the array is an approximation of the inverse.
        If True, the array is the exact integer inverse array.  To represent
        the exact inverse when n > 14, the returned array is an object array
        of long integers.  For n <= 14, the exact inverse is returned as an
        array with data type np.int64.

    Returns
    -------
    invh : (n, n) ndarray
        The data type of the array is np.float64 if `exact` is False.
        If `exact` is True, the data type is either np.int64 (for n <= 14)
        or object (for n > 14).  In the latter case, the objects in the
        array will be long integers.

    See Also
    --------
    hilbert : Create a Hilbert matrix.

    Notes
    -----
    .. versionadded:: 0.10.0

    Examples
    --------
    >>> from scipy.linalg import invhilbert
    >>> invhilbert(4)
    array([[   16.,  -120.,   240.,  -140.],
           [ -120.,  1200., -2700.,  1680.],
           [  240., -2700.,  6480., -4200.],
           [ -140.,  1680., -4200.,  2800.]])
    >>> invhilbert(4, exact=True)
    array([[   16,  -120,   240,  -140],
           [ -120,  1200, -2700,  1680],
           [  240, -2700,  6480, -4200],
           [ -140,  1680, -4200,  2800]], dtype=int64)
    >>> invhilbert(16)[7,7]
    4.2475099528537506e+19
    >>> invhilbert(16, exact=True)[7,7]
    42475099528537378560L

    """
    from scipy.special import comb
    if exact:
        if n > 14:
            dtype = object
        else:
            dtype = np.int64
    else:
        dtype = np.float64
    invh = np.empty((n, n), dtype=dtype)
    for i in xrange(n):
        for j in xrange(0, i + 1):
            s = i + j
            invh[i, j] = ((-1)**s * (s + 1) * comb(n + i, n - j - 1, exact) *
                          comb(n + j, n - i - 1, exact) * comb(s, i, exact)**2)
            if i != j:
                invh[j, i] = invh[i, j]
    return invh

Example #41

0

Show file

File: gl.py Project: Lynn524552751/Flynn

#coding:utf-8
import pylab as pl
import numpy as np
from scipy import stats
from scipy.special import comb, perm
import math

n = 16
p = 1 / 3
print(p)
#k=np.arange(15,16)
#binomail = stats.binom.pmf(k,n,p)
#print(binomail)
print(comb(15, 12) * math.pow(perm(3, 1), 3))
print(math.pow(perm(3, 1), 15))
res = comb(15, 12) / math.pow(perm(3, 1), 15)
print(res * 100)
print(math.pow(1 / 3, 12) * comb(15, 12))

Example #42

0

Show file

def main():
    ####### Parsing parameters and preparing data #######
    parser = argparse.ArgumentParser(prog='GMM-demux', conflict_handler='resolve')

    # Positional arguments have * number of arguments atm.
    parser.add_argument('input_path', help = "The input path of mtx files from cellRanger pipeline.", nargs="*")
    parser.add_argument('hto_array', help = "Names of the HTO tags, separated by ','.", nargs="*")

    # Optional arguments.
    parser.add_argument("-k", "--skip", help="Load a full classification report and skip the mtx folder. Requires a path argument to the full report folder. When specified, the user no longer needs to provide the mtx folder.", type=str)
    parser.add_argument("-x", "--extract", help="Names of the HTO tag(s) to extract, separated by ','. Joint HTO samples are combined with '+', such as 'HTO_1+HTO_2'.", type=str)
    parser.add_argument("-o", "--output", help="The path for storing the Same-Sample-Droplets (SSDs). SSDs are stored in mtx format. Requires a path argument.", type=str, default="SSD_mtx")
    parser.add_argument("-f", "--full", help="Generate the full classification report. Requires a path argument.", type=str)
    parser.add_argument("-c", "--csv", help="Take input in csv format, instead of mmx format.", action='store_true')
    parser.add_argument("-t", "--threshold", help="Provide the confidence threshold value. Requires a float in (0,1). Default value: 0.8", type=float, default=0.8)
    parser.add_argument("-s", "--simplified", help="Generate the simplified classification report. Requires a path argument.", type=str)
    parser.add_argument("-u", "--summary", help = "Generate the statstic summary of the dataset. Including MSM, SSM rates. Requires an estimated total number of cells in the assay as input.", type=int)
    parser.add_argument("-r", "--report", help="Store the data summary report. Requires a file argument. Only executes if -u is set.", type=str)
    parser.add_argument("-e", "--examine", help="Provide the cell list. Requires a file argument. Only executes if -u is set.", type=str)
    parser.add_argument("-a", "--ambiguous", help="The estimated chance of having a phony GEM getting included in a pure type GEM cluster by the clustering algorithm. Requires a float in (0, 1). Default value: 0.05. Only executes if -e executes.", type=float, default=0.05)
    
    print("==============================GMM-Demux Initialization==============================")

    args = parser.parse_args()

    confidence_threshold = args.threshold
    print("Confidence threshold:", confidence_threshold)


    # Classify droplets
    if not args.skip:
        # Overwrite the positional arguments
        parser.add_argument('input_path', help = "The input path of mtx files from cellRanger pipeline.")
        parser.add_argument('hto_array', help = "Names of the HTO tags, separated by ','.")
        args = parser.parse_args()

        input_path = args.input_path
        hto_array = args.hto_array.split(',')

        output_path = args.output
        print("Output directory:", output_path)

        #TODO: add CLR to csv data.
        if args.csv:
            full_df, GMM_df = GMM_IO.read_csv(input_path, hto_array)
        else:
            full_df, GMM_df = GMM_IO.read_cellranger(input_path, hto_array)
        
        GEM_num = GMM_df.shape[0]
        sample_num = GMM_df.shape[1]


        ####### Run classifier #######
        base_bv_array = compute_venn.obtain_base_bv_array(sample_num)
        #print([int(i) for i in base_bv_array])
        (high_array, low_array) = classify_drops.obtain_arrays(GMM_df)

        # Obtain extract array.
        if args.extract:
            extract_id_ary = []
            tag_name_ary = []

            for tag_name in args.extract.split(','):
                tag_name_ary.append(tag_name.split('+') )

            for tag_ary in tag_name_ary:
                mask = compute_venn.init_mask(sample_num)
                for tag in tag_ary:
                    hto_idx = hto_array.index(tag)
                    bv = compute_venn.set_bit(mask, hto_idx)

                for idx in range(0, len(base_bv_array) ):
                    if base_bv_array[idx] == mask:
                        extract_id = idx

                extract_id_ary.append(extract_id)

        else:
            extract_id_ary = None 


        # Obtain classification result
        GMM_full_df, class_name_ary = \
                classify_drops.classify_drops(base_bv_array, high_array, low_array, sample_num, GEM_num, GMM_df.index, GMM_df.columns.values)

        # Store classification results
        if args.full:
            print("Full classification result is stored in", args.full)
            classify_drops.store_full_classify_result(GMM_full_df, class_name_ary, args.full)

        if args.simplified:
            ########## Paper Specific ############
            #purified_df = classify_drops.purify_droplets(GMM_full_df, confidence_threshold)
            ########## Paper Specific ############
            print("Simplified classification result is stored in", args.simplified)
            classify_drops.store_simplified_classify_result(GMM_full_df, class_name_ary, args.simplified, sample_num, confidence_threshold)
        
        # Clean up bad drops
        purified_df = classify_drops.purify_droplets(GMM_full_df, confidence_threshold)

        # Store SSD result
        print("MSM-free droplets are stored in folder", output_path, "\n")
        
        SSD_idx = classify_drops.obtain_SSD_list(purified_df, sample_num, extract_id_ary)
        SSD_df = GMM_IO.store_cellranger(full_df, SSD_idx, output_path)

        # Record sample names for summary report.
        sampe_names = GMM_df.columns

    # Parse the full report.
    else:
        GMM_full_df, sample_num, class_name_ary, sampe_names = classify_drops.read_full_classify_result(args.skip)
        base_bv_array = compute_venn.obtain_base_bv_array(sample_num)
        purified_df = classify_drops.purify_droplets(GMM_full_df, confidence_threshold)
        SSD_idx = classify_drops.obtain_SSD_list(purified_df, sample_num)


    ####### If extract is eanbled, other functions are disabled #######
    if args.extract:
        exit()


    ####### Estimate SSM #######
    if args.summary:
        # Count bad drops
        negative_num, unclear_num = classify_drops.count_bad_droplets(GMM_full_df, confidence_threshold)

        estimated_total_cell_num = args.summary

        # Infer parameters
        HTO_GEM_ary = compute_venn.obtain_HTO_GEM_num(purified_df, base_bv_array, sample_num)

        params0 = [80000, 0.5]

        for i in range(sample_num):
            params0.append(round(HTO_GEM_ary[i] * estimated_total_cell_num / sum(HTO_GEM_ary[:sample_num])))

        combination_counter = 0
        try:
            for i in range(1, sample_num + 1):
                combination_counter += comb(sample_num, i, True)
                HTO_GEM_ary_main = HTO_GEM_ary[0:combination_counter]
                params0 = compute_venn.obtain_experiment_params(base_bv_array, HTO_GEM_ary_main, sample_num, estimated_total_cell_num, params0)
        except:
            print("GMM cannot find a viable solution that satisfies the droplet formation model. SSM rate estimation terminated.")
            sys.exit(0)
                

        # Legacy parameter estimation
        #(cell_num_ary, drop_num, capture_rate) = compute_venn.obtain_HTO_cell_n_drop_num(purified_df, base_bv_array, sample_num, estimated_total_cell_num, confidence_threshold)
        (drop_num, capture_rate, *cell_num_ary) = params0

        SSM_rate_ary = [estimator.compute_SSM_rate_with_cell_num(cell_num_ary[i], drop_num) for i in range(sample_num)]
        rounded_cell_num_ary = [round(cell_num) for cell_num in cell_num_ary]
        SSD_count_ary = classify_drops.get_SSD_count_ary(purified_df, SSD_idx, sample_num)
        count_ary = classify_drops.count_by_class(purified_df, base_bv_array)
        MSM_rate, SSM_rate, singlet_rate = compute_venn.gather_multiplet_rates(count_ary, SSM_rate_ary, sample_num)

        # Generate report
        full_report_dict = {
            "#Drops": round(drop_num),
            "Capture rate": "%5.2f" % (capture_rate * 100),
            "#Cells": sum(rounded_cell_num_ary),
            "Singlet": "%5.2f" % (singlet_rate * 100),
            "MSM": "%5.2f" % (MSM_rate * 100),
            "SSM": "%5.2f" % (SSM_rate * 100),
            "RSSM": "%5.2f" % (estimator.compute_relative_SSM_rate(SSM_rate, singlet_rate) * 100),
            "Negative": "%5.2f" % (negative_num / GMM_full_df.shape[0] * 100),
            "Unclear": "%5.2f" % (unclear_num / GMM_full_df.shape[0] * 100)
            }
        full_report_columns = [
            "#Drops",
            "Capture rate",
            "#Cells",
            "Singlet",
            "MSM",
            "SSM",
            "RSSM",
            "Negative",
            "Unclear"
            ]

        full_report_df = pd.DataFrame(full_report_dict, index = ["Total"], columns=full_report_columns)

        print("==============================Full Report==============================")
        print(tabulate(full_report_df, headers='keys', tablefmt='psql'))
        print ("\n\n")
        print("==============================Per Sample Report==============================")
        sample_df = pd.DataFrame(data=[
            ["%d" % num for num in rounded_cell_num_ary],
            ["%d" % num for num in SSD_count_ary],
            ["%5.2f" % (num * 100) for num in SSM_rate_ary]
            ],
            columns = sampe_names, index = ["#Cells", "#SSDs", "RSSM"])
        print(tabulate(sample_df, headers='keys', tablefmt='psql'))

        if args.report:
            print("\n\n***Summary report is stored in folder", args.report)
            with open(args.report, "w") as report_file:
                report_file.write("==============================Full Report==============================\n")
            with open(args.report, "a") as report_file:
                report_file.write(tabulate(full_report_df, headers='keys', tablefmt='psql'))
            with open(args.report, "a") as report_file:
                report_file.write("\n\n")
                report_file.write("==============================Per Sample Report==============================\n")
            with open(args.report, "a") as report_file:
                report_file.write(tabulate(sample_df, headers='keys', tablefmt='psql'))


        # Verify cell type 
        if args.examine:
            print("\n\n==============================Verifying the GEM Cluster==============================")

            ambiguous_rate = args.ambiguous
            print("Ambiguous rate:", ambiguous_rate)

            simplified_df = classify_drops.store_simplified_classify_result(purified_df, class_name_ary, None, sample_num, confidence_threshold)

            cell_list_path = args.examine
            cell_list = [line.rstrip('\n') for line in open(args.examine)]
            cell_list = list(set(cell_list).intersection(simplified_df.index.tolist()))

            ########## Paper Specific ############
            #cell_list_df = pd.read_csv(args.examine, index_col = 0)
            #cell_list = cell_list_df.index.tolist()
            ########## Paper Specific ############

            MSM_list = classify_drops.obtain_MSM_list(simplified_df, sample_num, cell_list)

            GEM_num = len(cell_list)
            MSM_num = len(MSM_list)
            print("GEM count: ", GEM_num, " | MSM count: ", MSM_num)

            phony_test_pvalue = estimator.test_phony_hypothesis(MSM_num, GEM_num, rounded_cell_num_ary, capture_rate)
            pure_test_pvalue = estimator.test_pure_hypothesis(MSM_num, drop_num, GEM_num, rounded_cell_num_ary, capture_rate, ambiguous_rate)

            print("Phony-type testing. P-value: ", phony_test_pvalue)
            print("Pure-type testing. P-value: ", pure_test_pvalue)
            
            cluster_type = ""

            if phony_test_pvalue < 0.01 and pure_test_pvalue > 0.01:
                cluster_type = " pure"
            elif pure_test_pvalue < 0.01 and phony_test_pvalue > 0.01:
                cluster_type = " phony"
            else:
                cluster_type = "n unclear"

            print("Conclusion: The cluster is a" + cluster_type + " cluster.")

Example #43

0

Show file

for a in range(len(v1)):
    sql = "select `夏普值` from `性質表` where name = '" + v1[a] + "'"
    cursor.execute(sql)
    result_select = cursor.fetchall()
    sharp[a] = result_select[0][0]

db.close()

v = len(v1)

while (v > 4):
    # while(v>23):
    # print("gogowhile")

    choose_code = []
    for i in range(int(comb(v, 4))):
        choose_code.append(0)

    db = pymysql.connect("localhost", "root", "esfortest", "etf")
    cursor = db.cursor()

    code = []
    for produce in range(0, len(v1)):
        if record_v1[produce] == 0:
            # code[a] = produce
            code.append(produce)
            a += 1

    # v-=1

    # w:比例 name:名稱 min_risk:風險

Example #44

0

Show file

def f1_score(model_generated_cluster_labels, target_labels, feature_coll,
             computed_centroids):
    from scipy.special import comb

    d = np.zeros(len(feature_coll))
    for i in range(len(feature_coll)):
        d[i] = np.linalg.norm(
            feature_coll[i, :] -
            computed_centroids[model_generated_cluster_labels[i], :])

    labels_pred = np.zeros(len(feature_coll))
    for i in np.unique(model_generated_cluster_labels):
        index = np.where(model_generated_cluster_labels == i)[0]
        ind = np.argmin(d[index])
        cid = index[ind]
        labels_pred[index] = cid

    N = len(target_labels)

    # cluster n_labels
    avail_labels = np.unique(target_labels)
    n_labels = len(avail_labels)

    # count the number of objects in each cluster
    count_cluster = np.zeros(n_labels)
    for i in range(n_labels):
        count_cluster[i] = len(np.where(target_labels == avail_labels[i])[0])

    # build a mapping from item_id to item index
    keys = np.unique(labels_pred)
    num_item = len(keys)
    values = range(num_item)
    item_map = dict()
    for i in range(len(keys)):
        item_map.update([(keys[i], values[i])])

    # count the number of objects of each item
    count_item = np.zeros(num_item)
    for i in range(N):
        index = item_map[labels_pred[i]]
        count_item[index] = count_item[index] + 1

    # compute True Positive (TP) plus False Positive (FP)
    tp_fp = 0
    for k in range(n_labels):
        if count_cluster[k] > 1:
            tp_fp = tp_fp + comb(count_cluster[k], 2)

    # compute True Positive (TP)
    tp = 0
    for k in range(n_labels):
        member = np.where(target_labels == avail_labels[k])[0]
        member_ids = labels_pred[member]

        count = np.zeros(num_item)
        for j in range(len(member)):
            index = item_map[member_ids[j]]
            count[index] = count[index] + 1

        for i in range(num_item):
            if count[i] > 1:
                tp = tp + comb(count[i], 2)

    # False Positive (FP)
    fp = tp_fp - tp

    # compute False Negative (FN)
    count = 0
    for j in range(num_item):
        if count_item[j] > 1:
            count = count + comb(count_item[j], 2)

    fn = count - tp

    # compute F measure
    P = tp / (tp + fp)
    R = tp / (tp + fn)
    beta = 1
    F = (beta * beta + 1) * P * R / (beta * beta * P + R)

    return F

Example #45

0

Show file

def V(p, M, h):
    v = 0
    for i in range(h, M + 1):
        v = comb(M, i) * (p * pow(S(p, i), i) * pow((1 - S(p, i)), M - i)) + v
    return v

Example #46

0

Show file

File: test_COB.py Project: lisabang/Camoco

def test_shape(testCOB):
    assert len(testCOB.coex) == comb(testCOB.num_genes(), 2)

Example #47

0

Show file

            kmeans = KMeans(num_clusters).fit(ints)
            centers = []
            for c in kmeans.cluster_centers_:
                cv2.circle(frame, tuple([int(x) for x in c]), 15, (255,255,0), 5,-1)
                if c[1] > heigh_const and c[1] < frame.shape[0] and c[0] < frame.shape[1]:
                    centers.append(c)

            centers = np.array(centers)
            centers = centers[np.all(centers>0, axis=1)]
        else:
            h_best = h_last_frame
            white_pixels_old = white_pixels_last_frame[(count-1)%10]
    used = None
    epsilon = 500
    done_loops = 0
    iters = int(comb(num_clusters,4)*.9)+1
    if h_best is None and len(centers) > 3 and est:
        est = False
        for _ in range(iters):
            corners = centers[np.random.choice(centers.shape[0],4,replace=False)]
            if np.linalg.det(np.hstack((corners[:3],[[1],[1],[1]]))) < epsilon:
                continue
            if np.linalg.det(np.hstack((corners[[0,2,3]],[[1],[1],[1]]))) < epsilon:
                continue
            if np.linalg.det(np.hstack((corners[[0,1,3]],[[1],[1],[1]]))) < epsilon:
                continue
            if np.linalg.det(np.hstack((corners[1:],[[1],[1],[1]]))) < epsilon:
                continue

            done_loops += 1
            unused=corners

Example #48

0

Show file

# print '\ntest 4'
# ls = np.array([4,2,1]).astype(int)
# n = 3
# maxs = np.array([2,4,1]).astype(int)
# for i in p_wc(ls, n, maxs):
# 	print i
# print '\ntest 5'
# ls = np.array([4,2,1]).astype(int)
# n = 3
# maxs = np.array([0,10,3]).astype(int)
# for i in p_wc(ls, n, maxs):
# 	print i
# quit()


wc_count = lambda n,k: int(comb(n+k-1,k-1)) # count total number of wcs, n=balls, k=boxes


def ind2mass_genseries(N, n, indMat):
	'''
	INPUT:
		N :: Integer
			# number of people
		n :: Integer
			# number of bins
		indMat :: List<List<Float>>
			# individual matrix
	OUTPUT:
		List<List<Float>>
			# mass matrix according to my generalized series formula (without Java speedup)
	'''

Example #49

0

Show file

        cluster_labels = np.load(
            os.path.join(embedding_directory,
                         'cluster_labels_{}.npy'.format(preference)))
        n = len(cluster_labels)
        assert len(cluster_centres) == (cluster_labels.max() + 1)

        cluster_sizes = np.array([(cluster_labels == l).sum()
                                  for l in range(len(cluster_centres))])
        cluster_centres = cluster_centres[cluster_sizes >= 3]

        take_indices = []
        for i in cluster_centres:
            for j in cluster_centres:
                if j > i:
                    take_indices.append(
                        comb(n, 2, exact=True) - comb(n - i, 2, exact=True) +
                        (j - i - 1))
        take_indices = np.array(take_indices)
        assert len(cluster_centres) == len(squareform(take_indices))

        Ds.append(distance_matrix[take_indices])

    del distance_matrix

    eigenvalues = []
    reconerrors = []

    np.random.seed(2019)
    print 'Generating embeddings'
    for pref, distances in zip(preferences, Ds):
        print pref

Example #50

0

Show file

File: BinomialTree.py Project: fagan2888/FermiServer

 def calculateProb(self):
     self.Probs = np.zeros(self.currStep+1)
     for i in range(self.currStep+1):
         self.Probs[i] = comb(self.currStep,i)*math.pow(self.pr,i)*math.pow(1-self.pr,self.currStep-i)
     self.Probs.reshape(1,-1)

Example #51

0

Show file

File: CISD.py Project: yramis/psi4numpy

print('\nStarting SCF and integral build...')
t = time.time()

# First compute SCF energy using Psi4
scf_e, wfn = psi4.energy('SCF', return_wfn=True)

# Grab data from wavfunction class
C = wfn.Ca()
ndocc = wfn.doccpi()[0]
nmo = wfn.nmo()
nvirt = nmo - ndocc

# Compute size of Hamiltonian in GB
from scipy.special import comb
nDet_S = ndocc * nvirt * 2
nDet_D = 2 * comb(ndocc, 2) * comb(nvirt, 2) + ndocc**2 * nvirt**2
nDet = 1 + nDet_S + nDet_D
H_Size = nDet**2 * 8e-9
print('\nSize of the Hamiltonian Matrix will be %4.2f GB.' % H_Size)
if H_Size > numpy_memory:
    clean()
    raise Exception(
        "Estimated memory utilization (%4.2f GB) exceeds numpy_memory \
                    limit of %4.2f GB." % (H_Size, numpy_memory))

# Integral generation from Psi4's MintsHelper
t = time.time()
mints = psi4.core.MintsHelper(wfn.basisset())
H = np.asarray(mints.ao_kinetic()) + np.asarray(mints.ao_potential())

print('\nTotal time taken for ERI integrals: %.3f seconds.\n' %

Example #52

0

Show file

File: correlation.py Project: luthieisilra/chainladder-python

    def __init__(self, triangle, p_critical=.1, total=True):
        def pZlower(z, n, p=0.5):
            return min(1, 2 * binom.cdf(z, n, p))

        self.p_critical = p_critical
        self.total = total
        if triangle.array_backend != 'numpy':
            triangle = triangle.set_backend('numpy')
        else:
            triangle = copy.deepcopy(triangle)
        xp = triangle.get_array_module()
        lr = triangle.link_ratio
        m1 = xp.apply_along_axis(rankdata, 2, lr.values) * (lr.values * 0 + 1)
        med = xp.nanmedian(m1, axis=2, keepdims=True)
        m1large = (xp.nan_to_num(m1) > med) + (lr.values * 0)
        m1small = (xp.nan_to_num(m1) < med) + (lr.values * 0)
        m2large = triangle.link_ratio
        m2large.values = m1large
        m2small = triangle.link_ratio
        m2small.values = m1small
        S = xp.nan_to_num(m2small.dev_to_val().sum(axis=2).values)
        L = xp.nan_to_num(m2large.dev_to_val().sum(axis=2).values)
        z = xp.minimum(L, S)
        n = L + S
        m = xp.floor((n - 1) / 2)
        c = comb(n - 1, m)
        EZ = (n / 2) - c * n / (2**n)
        VarZ = n * (n - 1) / 4 - c * n * (n - 1) / (2**n) + EZ - EZ**2
        if not self.total:
            T = []
            for i in range(0, xp.max(m1large.shape[2:]) + 1):
                T.append([
                    pZlower(i, j, 0.5)
                    for j in range(0,
                                   xp.max(m1large.shape[2:]) + 1)
                ])
            T = np.array(T)
            z_idx, n_idx = z.astype(int), n.astype(int)
            self.probs = T[z_idx, n_idx]
            z_critical = triangle[
                triangle.valuation > triangle.valuation.min()]
            z_critical = z_critical.dev_to_val().dropna().sum('origin') * 0
            z_critical.values = (np.array(self.probs) < p_critical)
            z_critical.odims = ['(All)']
            self.z_critical = z_critical
            self.z = copy.deepcopy(self.z_critical)
            self.z.values = z
            self.z_expectation = copy.deepcopy(self.z_critical)
            self.z_expectation.values = EZ
            self.z_variance = copy.deepcopy(self.z_critical)
            self.z_variance.values = VarZ
        else:
            ci2 = norm.ppf(0.5 - (1 - p_critical) / 2) * xp.sqrt(
                xp.sum(VarZ, axis=-1))
            self.range = (xp.sum(VarZ, axis=-1) + ci2,
                          xp.sum(VarZ, axis=-1) - ci2)
            idx = triangle._idx_table().index
            self.z_critical = pd.DataFrame(
                ((self.range[0] > VarZ.sum(axis=-1)) | \
                (VarZ.sum(axis=-1) > self.range[1]))[..., 0],
                columns=triangle.vdims, index=idx)
            self.z = pd.DataFrame(z.sum(axis=-1)[..., 0],
                                  columns=triangle.vdims,
                                  index=idx)
            self.z_expectation = pd.DataFrame(EZ.sum(axis=-1)[..., 0],
                                              columns=triangle.vdims,
                                              index=idx)
            self.z_variance = pd.DataFrame(VarZ.sum(axis=-1)[..., 0],
                                           columns=triangle.vdims,
                                           index=idx)

Example #53

0

Show file

File: special_matrices.py Project: Fomka509/Python

def invpascal(n, kind='symmetric', exact=True):
    """
    Returns the inverse of the n x n Pascal matrix.

    The Pascal matrix is a matrix containing the binomial coefficients as
    its elements.

    Parameters
    ----------
    n : int
        The size of the matrix to create; that is, the result is an n x n
        matrix.
    kind : str, optional
        Must be one of 'symmetric', 'lower', or 'upper'.
        Default is 'symmetric'.
    exact : bool, optional
        If `exact` is True, the result is either an array.txt of type
        ``numpy.int64`` (if `n` <= 35) or an object array.txt of Python integers.
        If `exact` is False, the coefficients in the matrix are computed using
        `scipy.special.comb` with `exact=False`.  The result will be a floating
        point array.txt, and for large `n`, the values in the array.txt will not be the
        exact coefficients.

    Returns
    -------
    invp : (n, n) ndarray
        The inverse of the Pascal matrix.

    See Also
    --------
    pascal

    Notes
    -----

    .. versionadded:: 0.16.0

    References
    ----------
    .. [1] "Pascal matrix", https://en.wikipedia.org/wiki/Pascal_matrix
    .. [2] Cohen, A. M., "The inverse of a Pascal matrix", Mathematical
           Gazette, 59(408), pp. 111-112, 1975.

    Examples
    --------
    >>> from scipy.linalg import invpascal, pascal
    >>> invp = invpascal(5)
    >>> invp
    array.txt([[  5, -10,  10,  -5,   1],
           [-10,  30, -35,  19,  -4],
           [ 10, -35,  46, -27,   6],
           [ -5,  19, -27,  17,  -4],
           [  1,  -4,   6,  -4,   1]])

    >>> p = pascal(5)
    >>> p.dot(invp)
    array.txt([[ 1.,  0.,  0.,  0.,  0.],
           [ 0.,  1.,  0.,  0.,  0.],
           [ 0.,  0.,  1.,  0.,  0.],
           [ 0.,  0.,  0.,  1.,  0.],
           [ 0.,  0.,  0.,  0.,  1.]])

    An example of the use of `kind` and `exact`:

    >>> invpascal(5, kind='lower', exact=False)
    array.txt([[ 1., -0.,  0., -0.,  0.],
           [-1.,  1., -0.,  0., -0.],
           [ 1., -2.,  1., -0.,  0.],
           [-1.,  3., -3.,  1., -0.],
           [ 1., -4.,  6., -4.,  1.]])

    """
    from scipy.special import comb

    if kind not in ['symmetric', 'lower', 'upper']:
        raise ValueError("'kind' must be 'symmetric', 'lower' or 'upper'.")

    if kind == 'symmetric':
        if exact:
            if n > 34:
                dt = object
            else:
                dt = np.int64
        else:
            dt = np.float64
        invp = np.empty((n, n), dtype=dt)
        for i in range(n):
            for j in range(0, i + 1):
                v = 0
                for k in range(n - i):
                    v += comb(i + k, k, exact=exact) * comb(
                        i + k, i + k - j, exact=exact)
                invp[i, j] = (-1)**(i - j) * v
                if i != j:
                    invp[j, i] = invp[i, j]
    else:
        # For the 'lower' and 'upper' cases, we computer the inverse by
        # changing the sign of every other diagonal of the pascal matrix.
        invp = pascal(n, kind=kind, exact=exact)
        if invp.dtype == np.uint64:
            # This cast from np.uint64 to int64 OK, because if `kind` is not
            # "symmetric", the values in invp are all much less than 2**63.
            invp = invp.view(np.int64)

        # The toeplitz matrix has alternating bands of 1 and -1.
        invp *= toeplitz((-1)**np.arange(n)).astype(invp.dtype)

    return invp

Example #54

0

Show file

File: _supervised.py Project: QPanAI/FM-scikit-learn

def _comb2(n):
    # the exact version is faster for k == 2: use it by default globally in
    # this module instead of the float approximate variant
    return comb(n, 2, exact=1)

Example #55

0

Show file

File: 104-中国象棋状态总数4.py Project: weiyinfu/ChessStateCount

def c(n, k):
    t = (n, k)
    if t not in c_map:
        c_map[t] = sp.comb(n, k, exact=True)
    return c_map[t]

Example #56

0

Show file

def compute_clutering_metric(idx, item_ids):

    N = len(idx)

    # cluster centers
    centers = np.unique(idx)
    num_cluster = len(centers)
    # print('Number of clusters: #d\n' % num_cluster);

    # count the number of objects in each cluster
    count_cluster = np.zeros(num_cluster)
    for i in range(num_cluster):
        count_cluster[i] = len(np.where(idx == centers[i])[0])

    # build a mapping from item_id to item index
    keys = np.unique(item_ids)
    num_item = len(keys)
    values = range(num_item)
    item_map = dict()
    for i in range(len(keys)):
        item_map.update([(keys[i], values[i])])

    # count the number of objects of each item
    count_item = np.zeros(num_item)
    for i in range(N):
        index = item_map[item_ids[i]]
        count_item[index] = count_item[index] + 1

    # compute purity
    purity = 0
    for i in range(num_cluster):
        member = np.where(idx == centers[i])[0]
        member_ids = item_ids[member]

        count = np.zeros(num_item)
        for j in range(len(member)):
            index = item_map[member_ids[j]]
            count[index] = count[index] + 1
        purity = purity + max(count)

    # compute Normalized Mutual Information (NMI)
    count_cross = np.zeros((num_cluster, num_item))
    for i in range(N):
        index_cluster = np.where(idx[i] == centers)[0]
        index_item = item_map[item_ids[i]]
        count_cross[index_cluster, index_item] = count_cross[index_cluster, index_item] + 1

    # mutual information
    I = 0
    for k in range(num_cluster):
        for j in range(num_item):
            if count_cross[k, j] > 0:
                s = count_cross[k, j] / N * math.log(N * count_cross[k, j] / (count_cluster[k] * count_item[j]))
                I = I + s

    # entropy
    H_cluster = 0
    for k in range(num_cluster):
        s = -count_cluster[k] / N * math.log(count_cluster[k] / float(N))
        H_cluster = H_cluster + s

    H_item = 0
    for j in range(num_item):
        s = -count_item[j] / N * math.log(count_item[j] / float(N))
        H_item = H_item + s

    NMI = 2 * I / (H_cluster + H_item)

    # compute True Positive (TP) plus False Positive (FP)
    tp_fp = 0
    for k in range(num_cluster):
        if count_cluster[k] > 1:
            tp_fp = tp_fp + comb(count_cluster[k], 2)

    # compute True Positive (TP)
    tp = 0
    for k in range(num_cluster):
        member = np.where(idx == centers[k])[0]
        member_ids = item_ids[member]

        count = np.zeros(num_item)
        for j in range(len(member)):
            index = item_map[member_ids[j]]
            count[index] = count[index] + 1

        for i in range(num_item):
            if count[i] > 1:
                tp = tp + comb(count[i], 2)

    # False Positive (FP)
    fp = tp_fp - tp

    # compute False Negative (FN)
    count = 0
    for j in range(num_item):
        if count_item[j] > 1:
            count = count + comb(count_item[j], 2)

    fn = count - tp

    # compute F measure
    P = tp / (tp + fp)
    R = tp / (tp + fn)
    beta = 1
    F = (beta*beta + 1) * P * R / (beta*beta * P + R)

    return NMI, F

Example #57

0

Show file

File: rarefy.py Project: serdar-/ecopy

def rarefy(x, method='rarefy', size=None, breakNA=True):
    '''
	Docstring for function ecopy.rarefy
	========================
	Various rarefaction techniques for a site x species matrix.
		All indices computed along rows (axis = 1)

	Use
	----
	rarefy(x, method='rarefy', size=None, breakNA=True)

	Parameters
	----------
	x:  numpy array or pandas dataframe with observations as rows
		and descriptors as columns
	method: a method used for rarefaction
		rarefy: Calculates estimated richness rarified to a given sample 
			size (see size parameter).
			sum(1 - nCr(N-Ni, size) / (nCr(N, size)))
		rareCurve: Draws a rarefaction curve for each site (row). 
			Rarefaction curves use the following functoin
			Sn - sum(1 - nCr(N-Ni, i))/nCr(N, i)
			
	size: the sample size used in rarefaction. Can be left empty,
		in which case size is the minimum of row sums (number of 
		individuals from the sparsest site). Can be a single number, which applies
		the same size to all rows. Can be a numpy array that contains 
		different sizes for each site.
	breakNA: should the process halt if the matrix contains any NAs?
		if False, then NA's undergo pairwise deletion during distance calculation,
		such that when calculating the distance between two rows, if any
		species is missing from a row, that species is removed from both rows

	Example
	--------
	import ecopy as ep
	BCI = ep.load_data('BCI')
	
	# calculate rarefied species richness
	rareRich = ep.rarefy(BCI, 'rarefy')

	# draw rarefaction curves
	ep.rarefy(BCI, 'rarecurve')
	'''
    listofmethods = ['rarefy', 'rarecurve']
    if not isinstance(breakNA, bool):
        msg = 'removaNA argument must be boolean'
        raise ValueError(msg)
    if method not in listofmethods:
        msg = 'method argument {0!s} is not an accepted rarefaction method'.format(
            method)
        raise ValueError(msg)
    if not isinstance(x, (DataFrame, np.ndarray)):
        msg = 'x argument must be a numpy array or pandas dataframe'
        raise ValueError(msg)
    if size is not None:
        if not isinstance(size, (int, float, np.ndarray)):
            msg = 'size must be integer, float, or numpy array'
            raise ValueError(msg)
    if isinstance(x, DataFrame):
        if (x.dtypes == 'object').any():
            msg = 'DataFrame can only contain numeric values'
        if breakNA:
            if x.isnull().any().any():
                msg = 'DataFrame contains null values'
                raise ValueError(msg)
        if (x < 0).any().any():
            msg = 'DataFrame contains negative values'
            raise ValueError(msg)
        if method == 'rarefy':
            if size is None:
                sums = x.apply(sum, axis=1)
                size = np.min(sums)
                rich = x.apply(rare, axis=1, args=(size, ))
                return rich
            else:
                if isinstance(size, (int, float)):
                    rich = x.apply(rare, axis=1, args=(size, ))
                    return rich
                else:
                    if len(size) != len(x):
                        msg = 'length of size does not match number of rows'
                        raise ValueError(msg)
                    z = x.copy()
                    z['size'] = size
                    rich = z.apply(rare_wrapper, axis=1)
                    return rich
        if method == 'rarecurve':
            z = x.copy()
            z.reset_index(inplace=True)
            z.apply(rCurve, axis=1)
            plt.xlabel('Number of Individuals')
            plt.ylabel('Number of Species')
            plt.show()
    if isinstance(x, np.ndarray):
        if breakNA:
            if np.isnan(np.sum(x)):
                msg = 'Array contains null values'
                raise ValueError(msg)
        if (x < 0).any():
            msg = 'Array contains negative values'
            raise ValueError(msg)
        if method == 'rarefy':
            if size is None:
                sums = np.apply_along_axis(np.nansum, 1, x)
                size = np.min(sums)
                rich = np.apply_along_axis(rare, 1, x, size)
                return rich
            else:
                if isinstance(size, (int, float)):
                    rich = np.apply_along_axis(rare, 1, x, size)
                    return rich
                else:
                    if len(size) != x.shape[0]:
                        msg = 'length of size does not match number of rows'
                        raise ValueError(msg)
                    N = np.nansum(x, axis=1)
                    diff = (N[:, np.newaxis] - x).T
                    return np.sum(1 - comb(diff, size) / comb(N, size), axis=0)
        if method == 'rarecurve':
            z = DataFrame(x)
            z.reset_index(inplace=True)
            z.apply(rCurve, axis=1)
            plt.xlabel('Number of Individuals')
            plt.ylabel('Number of Species')
            plt.show()

Example #58

0

Show file

File: mendel.py Project: Scimoose/portfolio

def mendel_dominant_prob(dom, het, rec):
    sum = dom + het + rec
    total = comb(sum, 2)
    print(total)

Example #59

0

Show file

def pascal(n, kind='symmetric', exact=True):
    """
    Returns the n x n Pascal matrix.

    The Pascal matrix is a matrix containing the binomial coefficients as
    its elements.

    .. versionadded:: 0.11.0

    Parameters
    ----------
    n : int
        The size of the matrix to create; that is, the result is an n x n
        matrix.
    kind : str, optional
        Must be one of 'symmetric', 'lower', or 'upper'.
        Default is 'symmetric'.
    exact : bool, optional
        If `exact` is True, the result is either an array of type
        numpy.uint64 (if n <= 35) or an object array of Python long integers.
        If `exact` is False, the coefficients in the matrix are computed using
        `scipy.special.comb` with `exact=False`.  The result will be a floating
        point array, and the values in the array will not be the exact
        coefficients, but this version is much faster than `exact=True`.

    Returns
    -------
    p : (n, n) ndarray
        The Pascal matrix.

    Notes
    -----
    See http://en.wikipedia.org/wiki/Pascal_matrix for more information
    about Pascal matrices.

    Examples
    --------
    >>> from scipy.linalg import pascal
    >>> pascal(4)
    array([[ 1,  1,  1,  1],
           [ 1,  2,  3,  4],
           [ 1,  3,  6, 10],
           [ 1,  4, 10, 20]], dtype=uint64)
    >>> pascal(4, kind='lower')
    array([[1, 0, 0, 0],
           [1, 1, 0, 0],
           [1, 2, 1, 0],
           [1, 3, 3, 1]], dtype=uint64)
    >>> pascal(50)[-1, -1]
    25477612258980856902730428600L
    >>> from scipy.special import comb
    >>> comb(98, 49, exact=True)
    25477612258980856902730428600L

    """

    from scipy.special import comb
    if kind not in ['symmetric', 'lower', 'upper']:
        raise ValueError("kind must be 'symmetric', 'lower', or 'upper'")

    if exact:
        if n > 35:
            L_n = np.empty((n, n), dtype=object)
            L_n.fill(0)
        else:
            L_n = np.zeros((n, n), dtype=np.uint64)
        for i in range(n):
            for j in range(i + 1):
                L_n[i, j] = comb(i, j, exact=True)
    else:
        L_n = comb(*np.ogrid[:n, :n])

    if kind is 'lower':
        p = L_n
    elif kind is 'upper':
        p = L_n.T
    else:
        p = np.dot(L_n, L_n.T)

    return p

Example #60

0

Show file

 def test_big(self):
     p = pascal(50)
     assert_equal(p[-1, -1], comb(98, 49, exact=True))