def hypergeom_lh(ho, ha, trial, n, g, N): """ Returns likelihood ratio for independently distributed hypergeometric random variables. Parameters ---------- ho : float null hypothesis ha : float alternative hypothesis trial : float number of good elements in recent sample n : float or int sample size g : float or int number of good elements in sample N : float or int total population size Returns ------- float likelihood ratio of model """ ho_G, ha_G = ho * (N / n), ha * (N / n) null_lh = (comb(ho_G, g) * comb(N - ho_G, n - g)) alt_lh = (comb(ha_G, g) * comb(N - ha_G, n - g)) return alt_lh / null_lh
def test_prior(): K = 10 T = 100 es = EventSegment(K) mp = es.model_prior(T)[0] p_bound = np.zeros((T, K-1)) norm = comb(T-1, K-1) for t in range(T-1): for k in range(K-1): # See supplementary material of Neuron paper # https://doi.org/10.1016/j.neuron.2017.06.041 p_bound[t+1, k] = comb(t, k) * comb(T-t-2, K-k-2) / norm p_bound = np.cumsum(p_bound, axis=0) mp_gt = np.zeros((T, K)) for k in range(K): if k == 0: mp_gt[:, k] = 1 - p_bound[:, 0] elif k == K - 1: mp_gt[:, k] = p_bound[:, k-1] else: mp_gt[:, k] = p_bound[:, k-1] - p_bound[:, k] assert np.all(np.isclose(mp, mp_gt)),\ "Prior does not match analytic solution"
def pdf(self, x, k, n, p): '''distribution of success runs of length k or more Parameters ---------- x : float count of runs of length n k : int length of runs n : int total number of observations or trials p : float probability of success in each Bernoulli trial Returns ------- pdf : float probability that x runs of length of k are observed Notes ----- not yet vectorized References ---------- Muselli 1996, theorem 3 ''' q = 1-p m = np.arange(x, (n+1)//(k+1)+1)[:,None] terms = (-1)**(m-x) * comb(m, x) * p**(m*k) * q**(m-1) \ * (comb(n - m*k, m - 1) + q * comb(n - m*k, m)) return terms.sum(0)
def rand_score(labels_true, labels_pred): labels_true, labels_pred = check_clusterings(labels_true, labels_pred) n_samples = labels_true.shape[0] classes = np.unique(labels_true) clusters = np.unique(labels_pred) # Special limit cases: no clustering since the data is not split; # or trivial clustering where each document is assigned a unique cluster. # These are perfect matches hence return 1.0. if (classes.shape[0] == clusters.shape[0] == 1 or classes.shape[0] == clusters.shape[0] == 0 or classes.shape[0] == clusters.shape[0] == len(labels_true)): return 1.0 contingency = contingency_matrix(labels_true, labels_pred) # Compute the ARI using the contingency data sum_comb_c = sum(comb2(n_c) for n_c in contingency.sum(axis=1)) sum_comb_k = sum(comb2(n_k) for n_k in contingency.sum(axis=0)) sum_comb = sum(comb2(n_ij) for n_ij in contingency.flatten()) t_p=sum_comb f_p=sum_comb_c-sum_comb f_n=sum_comb_k-sum_comb t_n=float(comb(n_samples, 2))-t_p-f_p-f_n result=(t_n+t_p)/float(comb(n_samples, 2)) return result
def recurTraversal(mean_sep_time, sample): #base case global total_branch_length, total_mutations weight = 0; if (sample.left == None) & (sample.right == None): total_branch_length += sample.time identity = str(sample.getIdentity()) if not 'A' in identity: k = 1 else: k = len(sample.descendent_list) weight = ( k * (sample_size - k)) / comb(sample_size, 2); mean_sep_time = mean_sep_time + (weight * sample.time); sample.mutations = poisson.rvs(mu * sample.time) total_mutations += sample.getMutations() return mean_sep_time mean_sep_time = recurTraversal(mean_sep_time, sample.right) current = sample.right while current.next != None: mean_sep_time = recurTraversal(mean_sep_time, current.next) current = current.next total_branch_length += sample.time identity = str(sample.getIdentity()) if not 'A' in identity: k = 1 else: k = len(sample.descendent_list) weight = ( k * (sample_size - k)) / comb(sample_size, 2); mean_sep_time = mean_sep_time + (weight * sample.time); sample.mutations = poisson.rvs(mu * sample.time) total_mutations += sample.getMutations() return mean_sep_time
def runs_prob_odd(self, r): n0, n1 = self.n0, self.n1 k = (r+1)//2 tmp0 = comb(n0-1, k-1) tmp1 = comb(n1-1, k-2) tmp3 = comb(n0-1, k-2) tmp4 = comb(n1-1, k-1) return (tmp0 * tmp1 + tmp3 * tmp4) / self.comball
def validateInputData(self): self.getDataFrame() #check if dataframe -> df exists df_exists = 'self.df' in locals() or 'self.df' in globals() try: self.df except NameError: df_exists=False else: df_exists=True if df_exists==True: df_names= self.df.columns.values else: print("There was an error loading the Data frame") #test the non promo column names if df_exists==True: if (np.array_equal(stdColNames,df_names[0:stdColCount +1])): stColNamesPass=True else: stColNamesPass=False #test the number of promo columns df_pnames=df_names[stdColCount +1:df_names.size] #get the promo columns from the dataframe possibleValues=np.zeros((limit,limit)) for x in range(1,limit): for z in range(1,limit): if z<2 and x < 2: possibleValues[x,z] = int(comb(z,1,exact=False)) if z >= 2 and z >= x: possibleValues[x,z] = int(possibleValues[x-1,z] + comb(z,x,exact=False)) if df_pnames.size in possibleValues[:, :]: print ("VALUE FOUND") print df_pnames.size possiblePromoFormat=self.returnIndex2DArray(possibleValues, df_pnames.size) print possiblePromoFormat else: print ("VALUE NOT FOUND. Promos not set up correctly in input file") print df_pnames.size #print df_pnames #print df_names print possibleValues self.findSoloPromos(df_pnames, possiblePromoFormat) print ('Do the standard variables pass? %s' % (stColNamesPass)) self.findMultiPromos(df_pnames, possiblePromoFormat) #this is the boolean to say the validation passed objInitDataVal = True
def _acombr(n, k): """Combinations with repetitions""" # pylint: disable-msg=invalid-name # This uses dynamic programming to compute everything num = sps.comb(n, k, repetition=True, exact=True) grid = np.zeros((num, n), dtype=int) memoized = {} # This recursion breaks if asking for numbers that are too large (stack # overflow), but the order to fill n and k is predictable, it may be better # to to use a for loop. def fill_region(n, k, region): """Recursively fill a region""" if n == 1: region[0, 0] = k return elif k == 0: region.fill(0) return if (n, k) in memoized: np.copyto(region, memoized[n, k]) return memoized[n, k] = region o = 0 for ki in range(k, -1, -1): n_ = n - 1 k_ = k - ki m = sps.comb(n_, k_, repetition=True, exact=True) region[o:o + m, 0] = ki fill_region(n_, k_, region[o:o + m, 1:]) o += m fill_region(n, k, grid) return grid
def return_CAs(amps, N=7): """ Short Summary ------------- Calculate closure amplitudes Parameters ---------- amps: 1D float array fringe amplitudes N: integer number of holes Returns ------- CAs: 1D float array closure amplitudes """ arr = populate_symmamparray(amps, N=N) # fringe amp array nn = 0 CAs = np.zeros(int(comb(N, 4))) for ii in range(N - 3): for jj in range(N - ii - 3): for kk in range(N - jj - ii - 3): for ll in range(N - jj - ii - kk - 3): CAs[nn + ll] = arr[ii, jj + ii + 1] \ * arr[ll + ii + jj + kk + 3, kk + jj + ii + 2] \ / (arr[ii, kk + ii + jj + 2] * \ arr[jj + ii + 1, ll + ii + jj + kk + 3]) nn = nn + ll + 1 return CAs
def __init__(self, matrix, m_list, num_to_return=1, algo=ALGO_FAST): # Setup and checking of inputs self._matrix = copy(matrix) # Make the matrix diagonally symmetric (so matrix[i,:] == matrix[:,j]) for i in range(len(self._matrix)): for j in range(i, len(self._matrix)): value = (self._matrix[i, j] + self._matrix[j, i]) / 2 self._matrix[i, j] = value self._matrix[j, i] = value # sort the m_list based on number of permutations self._m_list = sorted(m_list, key=lambda x: comb(len(x[2]), x[1]), reverse=True) for mlist in self._m_list: if mlist[0] > 1: raise ValueError('multiplication fractions must be <= 1') self._current_minimum = float('inf') self._num_to_return = num_to_return self._algo = algo if algo == EwaldMinimizer.ALGO_COMPLETE: raise NotImplementedError('Complete algo not yet implemented for ' 'EwaldMinimizer') self._output_lists = [] # Tag that the recurse function looks at at each level. If a method # sets this to true it breaks the recursion and stops the search. self._finished = False self._start_time = datetime.utcnow() self.minimize_matrix() self._best_m_list = self._output_lists[0][1] self._minimized_sum = self._output_lists[0][0]
def k_array_rank(a): """ Given an array `a` of k distinct nonnegative integers, sorted in ascending order, return its ranking in the lexicographic ordering of the descending sequences of the elements [1]_. Parameters ---------- a : ndarray(int, ndim=1) Array of length k. Returns ------- idx : scalar(int) Ranking of `a`. References ---------- .. [1] `Combinatorial number system <https://en.wikipedia.org/wiki/Combinatorial_number_system>`_, Wikipedia. """ k = len(a) idx = int(a[0]) # Convert to Python int for i in range(1, k): idx += comb(a[i], i+1, exact=True) return idx
def filter_df_by_particles_in_frame(data_frame, num_particles, mode='equal'): '''Return a DataFrame where just the frames with the requested number of particles are present. This only works on DataFrames that have gone through find_nn_ver_2 :param data_frame: The input data_frame :param num_particles: The number of particles you want in each frame :param mode: Can be 'equal' means only frames with num_particles are returned. 'less' means only frames less than or equal to num_particles are returned. 'greater' means only frames greater than or equal to num_particles are returned. :return data_frame: ''' # The function does not compute the number of particles in each frame # but instead the number of entries for a given number of particles # The number of entries = 2 * (num_particles nCr 2) from scipy.special import comb if num_particles!=1: num_particles = 2 * (comb(num_particles,2)) data = data_frame.copy() part_num_in_frame = data.groupby('frame').apply(len) if mode == 'equal': return data.set_index('frame')[part_num_in_frame==num_particles].reset_index() elif mode == 'less': return data.set_index('frame')[part_num_in_frame<=num_particles].reset_index() elif mode == 'greater': return data.set_index('frame')[part_num_in_frame>=num_particles].reset_index()
def redundant_cps(deltaps, N=7): """ Short Summary ------------- Calculate closure phases for each set of 3 holes Parameters ---------- deltaps: 1D float array pistons between each pair of holes N: integer number of holes Returns ------- cps: 1D float array closure phases """ arr = populate_antisymmphasearray(deltaps, N=N) # fringe phase array cps = np.zeros(int(comb(N, 3))) nn = 0 for kk in range(N - 2): for ii in range(N - kk - 2): for jj in range(N - kk - ii - 2): cps[nn + jj] = arr[kk, ii + kk + 1] \ + arr[ii + kk + 1, jj + ii + kk + 2] \ + arr[jj + ii + kk + 2, kk] nn += jj + 1 return cps
def coeffs(M): """ Generate the "Smooth noise-robust differentiators" as defined in Pavel Holoborodko's formula for c_k Parameters ---------- M : int the order of the differentiator c : float array of length M coefficents for k = 1 to M """ m = (2*M - 2)/2 k = np.arange(1, M+1) c = 1./2.**(2*m + 1)*(comb(2*m, m - k + 1) - comb(2*m, m - k - 1)) return c
def _incremental_similarity(self, scan, *args, **kwargs): new_sims = self._calculate_similarity_with(scan, *args, **kwargs) aggregate_size = comb(len(self), 2) n = (aggregate_size + len(new_sims)) if n == 0: n = 1 self._average_similarity = (aggregate_size * self.average_similarity() + sum(new_sims) ) / n
def comb(n, k): """Return n choose k This function works on arrays, and will properly return a python integer object if the number is too large to be stored in a 64 bit integer. """ # pylint: disable-msg=invalid-name res = np.rint(sps.comb(n, k, False)) if np.all(res < _MAX_INT_FLOAT): # pylint: disable=no-else-return return res.astype(int) elif isinstance(n, abc.Iterable) or isinstance(k, abc.Iterable): broad = np.broadcast(np.asarray(n), np.asarray(k)) res = np.empty(broad.shape, dtype=object) res.flat = [sps.comb(n_, k_, True) for n_, k_ in broad] return res else: return sps.comb(n, k, True)
def binomial_pmf(n, i, p): try: return comb(n, i, exact=True) * (p ** i) * ((1 - p) ** (n - i)) except OverflowError: dn = Decimal(n) di = Decimal(i) dp = Decimal(p) x = math.factorial(dn) / (math.factorial(di) * math.factorial(dn - di)) return float(x * dp ** di * ((1 - dp) ** (dn - di)))
def test_payoff_values(self): possible_values = [0, 1] for payoff_array in self.g.payoff_arrays: ok_(np.isin(payoff_array, possible_values).all()) max_num_dominated_subsets = \ sum([comb(i, self.k, exact=True) for i in range(self.n)]) ok_(self.g.payoff_arrays[0].sum() <= max_num_dominated_subsets) ok_((self.g.payoff_arrays[1].sum(axis=1) == self.k).all())
def getcomb(): import scipy.special as sp fr = open('/Users/shengdongliu/Downloads/0401.txt') for line in fr.readlines(): lineArr = line.strip().split() totalcolumn=len(lineArr)-2 break number=sp.comb(totalcolumn,2,exact=False) return number
def __init__(self, dimension, degree, varnamelist): try: isinstance(varnamelist, list) isinstance(dimension, int) isinstance(degree, int) len(varnamelist) == degree for var in varnamelist: isinstance(var, str) 0 < dimension <= 3 0 < degree except: raise NameError('dimension and degree of type integer with 0<dimension<=3, 0<degree, ' 'list with element of type string and length varname == degree') varsymbollist=[] for var in varnamelist: varsymbollist.append(symbols(var)) #calculate the number of degree of freedom self.dofnumber = int(comb(dimension+degree, degree)) coefvec = MatrixSymbol('c', 1, self.dofnumber) monomiallist = [1] if dimension == 1: for i in range(1, degree+1): for k in range(0, dimension): monomiallist.append(pow(varsymbollist[0], i)) elif dimension == 2: for i in range(1, degree+1): monomiallist.append(pow(varsymbollist[0], i)) for j in range(1, i): monomiallist.append(pow(varsymbollist[0], i-j)*pow(varsymbollist[1], j)) monomiallist.append(pow(varsymbollist[1], i)) elif dimension == 3: for i in range(1, degree+1): monomiallist.append(pow(varsymbollist[0], i)) for j in range(1, i): monomiallist.append(pow(varsymbollist[0], i-j)*pow(varsymbollist[1], j)) monomiallist.append(pow(varsymbollist[1], i)) for j in range(1, i): monomiallist.append(pow(varsymbollist[0], i-j)*pow(varsymbollist[2], j)) for j in range(1, i): monomiallist.append(pow(varsymbollist[1], i-j)*pow(varsymbollist[2], j)) monomiallist.append(pow(varsymbollist[2], i)) self.basis = monomiallist self.var = varsymbollist funmat = Matrix(coefvec)*Matrix(monomiallist) fun = funmat[0] self.fun = fun
def indextocomb( ind, k=6, n=45): subs = k var = 0 komb = [] for var in range(k): while ( n>=subs ) and ( ind < comb(n,subs) ): n-=1 if (n>=subs): komb.append(n+1) ind = ind - comb(n,subs) else: komb.append(subs) subs-=1 komb.reverse() return komb
def theoryE(beta,epsilon=1,m=2,N=4): if N==m: return -N*np.ones(len(beta)) else: kmax = min(N-m+1,m) E=np.zeros(len(beta)) temp1=np.zeros([kmax, len(beta)]) temp2=np.zeros([kmax, len(beta)]) for t in xrange(kmax): k=t+np.float64(1.) temp=1./k * comb(m-1,k-1)*comb(N-m-1,k-1)*np.exp(-k*epsilon*beta) temp1[t]=temp*(m-k)*epsilon temp2[t]=temp tempSum1=np.sum(temp1,axis=0) tempSum2=np.sum(temp2,axis=0) # tempSum2+=np.ones(len(beta))*1e-10 #for numerical stability # tempSum1+=np.ones(len(beta))*9e-10 # pdb.set_trace() E=- tempSum1/tempSum2 return E
def kbits(n, k): result = np.zeros((comb(n,k), n), dtype=bool) idx = 0 for bits in itertools.combinations(range(n), k): s = np.zeros(n, dtype=bool) for bit in bits: s[bit] = 1 result[idx,:] = s idx += 1 return result
def _local_counts(mnc): mnc = [1] + list(mnc) kappa = [1] for nn, m in enumerate(mnc[1:]): n = nn + 1 kappa.append(m) for k in range(1, n): num_ways = comb(n - 1, k - 1, exact=True) kappa[n] -= num_ways * kappa[k] * mnc[n - k] return kappa[1:]
def state_count(self): """ Return the number of combinations, starting with a single attribute if Mosaic is colored by class distributions, and two if by Pearson """ n_attrs = len(self.master.discrete_data.domain.attributes) min_attrs = 1 if self._compute_class_dists() else 2 max_attrs = min(n_attrs, self.max_attrs) return sum(comb(n_attrs, k, exact=True) for k in range(min_attrs, max_attrs + 1))
def Distance(x, y): # x = "010011" # y = "010101" score_run = [] word_len = len(x) score = 0 run_len = 0 if x == y: print "identical" return comb(word_len, 2) print "word_len=", word_len # initialization for run_len in range(word_len): score_run.append(comb(run_len, 2)) # score_run.pop(0) print "score_run= ", score_run num_x = int(x, 2) num_y = int(y, 2) diff = num_x ^ num_y diff_shifted = diff << 1 change_bit = diff ^ diff_shifted print "change_bit=", change_bit for i in range(word_len): print "score=", score run_len = leading_zeros(change_bit, word_len) print "run_len= ", run_len if run_len == -1: break # print "run_len= ", run_len # print "change_bit=", change_bit print "score_run[run_len]=", score_run[run_len] score += score_run[run_len] change_bit <<= (run_len) return score
def _local_counts(kappa): mc = [1, 0.0] # _kappa[0]] #insert 0-moment and mean kappa0 = kappa[0] kappa = [1] + list(kappa) for nn, m in enumerate(kappa[2:]): n = nn + 2 mc.append(0) for k in range(n - 1): mc[n] += comb(n - 1, k, exact=True) * kappa[n - k] * mc[k] mc[1] = kappa0 # insert mean as first moments by convention return mc[1:]
def combtoindex(arg): ''' az argumentum lista kombinaciohoz tarozo index kiszamitasa ''' arg.reverse() result = 0 k = len(arg) for var in range(0,k): if ((arg[var]-1)<(k-var)) : result += 0 else: result += comb(arg[var]-1,k-var) return int(result)
def _local_counts(mc): mean = mc[0] mc = [1] + list(mc) # add zero moment = 1 mc[1] = 0 # define central mean as zero for formula mnc = [1, mean] # zero and first raw moments for nn, m in enumerate(mc[2:]): n = nn + 2 mnc.append(0) for k in range(n + 1): mnc[n] += comb(n, k, exact=True) * mc[k] * mean ** (n - k) return mnc[1:]
def _generate_multielement_entries(self, entries, forced_include=None, nproc=None): """ Create entries for multi-element Pourbaix construction. This works by finding all possible linear combinations of entries that can result in the specified composition from the initialized comp_dict. Args: entries ([PourbaixEntries]): list of pourbaix entries to process into MultiEntries forced_include ([PourbaixEntries]) list of pourbaix entries that must be included in multielement entries nproc (int): number of processes to be used in parallel treatment of entry combos """ N = len(self._elt_comp) # No. of elements total_comp = Composition(self._elt_comp) forced_include = forced_include or [] # generate all combinations of compounds that have all elements entry_combos = [itertools.combinations( entries, j + 1 - len(forced_include)) for j in range(N)] entry_combos = itertools.chain.from_iterable(entry_combos) if forced_include: entry_combos = [forced_include + list(ec) for ec in entry_combos] entry_combos = filter(lambda x: total_comp < MultiEntry(x).composition, entry_combos) # Generate and filter entries processed_entries = [] total = sum([comb(len(entries), j + 1 - len(forced_include)) for j in range(N)]) if total > 1e6: warnings.warn("Your pourbaix diagram includes {} entries and may " "take a long time to generate.".format(total)) # Parallel processing of multi-entry generation if nproc is not None: f = partial(self.process_multientry, prod_comp=total_comp) with Pool(nproc) as p: processed_entries = list(tqdm(p.imap(f, entry_combos), total=total)) processed_entries = list(filter(bool, processed_entries)) # Serial processing of multi-entry generation else: for entry_combo in entry_combos: processed_entry = self.process_multientry(entry_combo, total_comp) if processed_entry is not None: processed_entries.append(processed_entry) return processed_entries
def expansion_coeff(angmom, mag, i, j, k): r"""Calculate the real solid harmonic expansion coefficient. .. math:: C^angmom,mag_i,j,k = -1^{i + k - shift_factor} * (1/4)^i * {angmom \choose i} * {(angmom - i) \choose (\abs{mag} + i)} * {i \choose j} * {\abs{mag} \choose 2 * k}, where shift_factor = 0 if mag >= 0 and shift_factor = 1/2 if mag < 0. Parameters ---------- angmom : int The angular momentum of the Gaussian primitive(s). mag : int The magnetic quantum number(s) of the Gaussian primitive(s). i, j : int The generator indices for the expansion coefficient. k : float The generator indices for the expansion coefficient. Returns ------- coeff : float The real solid harmonic expansion coefficient. Raises ------ TypeError If `angmom` is not an integer. If `mag` is not an integer. If `i` is not an integer. If `j` is not an integer. If `k` is not a float. ValueError If `angmom` is negative. If `mag` has a greater magnitude than angmom. If `k is not either an integer (mag >= 0) or a half integer (mag < 0). """ if not isinstance(angmom, int): raise TypeError("Angular momentum must be an integer.") if angmom < 0: raise ValueError("Angular momentum must be a non-negative integer.") if not isinstance(mag, int): raise TypeError("The magnetic quantum number must be an integer.") if np.abs(mag) > angmom: raise ValueError( "The magnetic quantum number must be between -(`angmom`) and `angmom`." ) if not isinstance(i, int): raise TypeError("Index `i` must be an integer") if not isinstance(j, int): raise TypeError("Index `j` must be an integer") if isinstance(k, int): k = float(k) if not isinstance(k, float): raise TypeError("Index `k` must be a float.") if k != int(k) and mag >= 0: raise ValueError( "Index `k` must be an integer for non-negative magnetic quantum numbers." ) if k != int(k) + 0.5 and mag < 0: raise ValueError( "Index `k` must be a half integer for negative magnetic quantum numbers." ) if mag < 0: return np.real( (complex(-1))**(i + k - shift_factor(mag)) * (1 / 4)**i * comb(angmom, i) * comb(angmom - i, np.abs(mag) + i) * comb(i, j) * comb(np.abs(mag), 2 * k)) return ((-1)**(i + k - shift_factor(mag)) * (1 / 4)**i * comb(angmom, i) * comb(angmom - i, np.abs(mag) + i) * comb(i, j) * comb(np.abs(mag), 2 * k))
def shapley_full(df, model, endog, exog, fout): # n is the size of the largest sets of permutations n = len(exog) n_combs = 0 n_combs = sum([comb(16, k) for k in range(16)]) #final_matrix = np.zeros((n, n_combs/n)) final_matrix = [[] for x in range(n)] def get_rsquared_for_sets(sets): for s in sets: features = [exog[i] for i in s] fs = [] for f in features: if '+' in f: for ef in f.split('+'): fs.append(ef) else: fs.append(f) features = fs this_model = model(data=df, formula="%s ~ %s" % (endog[0], '+'.join(features))) results = this_model.fit(maxiter=5000, disp=False) # for OLS rsquared = results.rsquared_adj # for poisson # rsquared = pearsonr(df[endog[0]],this_model.predict(results.params))[0] yield (s, rsquared) def concat_tuple(tup, final): state = () for i in tup: state = state + (i, ) state = state + (final, ) return state start_time = time() # these are our R2 for single variable models rsquareds = dict() for combo, rsquared in get_rsquared_for_sets([(i, ) for i in range(n)]): rsquareds[str(combo[-1])] = rsquared # the prior rsquared is 0, for the model with no dependent variables adjusted_value = (comb(n - 1, 0))**-1 * (rsquared - 0) final_matrix[combo[-1]].append(adjusted_value) for k in tqdm(range(2, n + 1)): for combo, rsquared in get_rsquared_for_sets( combinations(range(n), k - 1)): combo_string = '.'.join(map(str, sorted(combo))) rsquareds[combo_string] = rsquared combo, rsquared = list(get_rsquared_for_sets([tuple(range(n))]))[0] rsquareds['.'.join(map(str, sorted(combo)))] = rsquared # calculate the difference for adding in the new variable for k in range(2, n + 1): for i in range(n): all_but_i = list(range(n)) del all_but_i[i] for prior_combo in combinations(all_but_i, k - 1): combo = '.'.join(map(str, sorted(list(prior_combo) + [i]))) prior_combo = '.'.join(map(str, sorted(prior_combo))) diff = rsquareds[combo] - rsquareds[prior_combo] final_matrix[i].append((comb(n - 1, k - 1))**-1 * diff) print("Ran for %d minutes." % int((time() - start_time) / 60)) # final model combo, rsquared = list(get_rsquared_for_sets([tuple(range(n))]))[0] phis = [1 / n * sum(final_matrix[i]) for i in range(n)] fout.write('rsquared: ' + str(rsquared) + '\n') fout.write('shapely_computed: ' + str(sum(phis)) + '\n') for i in range(n): #print("%s: %s, %i" % (exog[i],' '.join(map(str,final_matrix[i])), np.mean(final_matrix[i]))) fout.write("%s: %.4f, %.2f%%\n" % (exog[i], phis[i], (phis[i] / rsquared) * 100))
def do_problem_four_part_a(sample_size: int, num_students_observed: int, p: float): """ Homework 2, Problem 2, Part A We execute a solution to this problem in two parts. First, we compute the theoretical solution. That is to say, we compute an exact value for: P(Y >= 12 ; p = 0.7) = sum from k=12 to k=20 (20 choose k) p^(k)(1-p)^(20-k) """ probability = 0 print(probability) pmf_y = [ comb(sample_size, k) * np.power(p, k) * np.power(1 - p, sample_size - k) for k in range(0, sample_size + 1) ] cdf_y = [] cum_prob = 0 for i in range(0, 21): cum_prob += pmf_y[i] cdf_y.append(cum_prob) probability = sum(pmf_y[num_students_observed:]) print( f"""The probability of observing at least {observation_count} students applying probability is: {probability}""") width = 0.35 labels = [f'X={x}' for x in range(0, 21)] x_pts = [x for x in range(0, 21)] fig, ax = plt.subplots() x = np.arange(len(labels)) rects1 = ax.bar(x - width / 2, pmf_y, width, label='PMF') rects2 = ax.bar(x + width / 2, cdf_y, width, label='CDF') # Add some text for labels, title and custom x-axis tick labels, etc. ax.set_ylabel('Probability') ax.set_title('PDF and CDF for Bin(n, k)') ax.set_xticks(x) ax.set_xticklabels(labels) ax.legend() def autolabel(rects): """Attach a text label above each bar in *rects*, displaying its height.""" for rect in rects: height = round(rect.get_height(), 3) ax.annotate( '{}'.format(height), xy=(rect.get_x() + rect.get_width() / 2, height), xytext=(0, 3), # 3 points vertical offset textcoords="offset points", ha='center', va='bottom') autolabel(rects1) autolabel(rects2) fig.tight_layout() plt.show() return probability
def dicke(state, G, k): for i in range(0, 2**len(G.nodes)): if num_ones(i) == k: state[i] = 1 / (np.sqrt(comb(len(G.nodes), k))) return state
import math from scipy.special import comb states = 1 for i in range(1, 10): top = math.factorial(9) / math.factorial(9 - i) try: bottom = math.factorial((i + 1) // 2) * math.factorial(i // 2) except ValueError: bottom = 1 states += (top / bottom) states = states - 8 * (comb(6, 5) + comb(6, 4) + comb(6, 3) + 2 * (comb(6, 4))) print(states)
print('round {}, loss={}'.format(round_num, loss))''' m = np.dot(test_images, np.asarray(model['weights'])) test_result = m + np.asarray(model['bias']) y = tf.nn.softmax(test_result) correct_prediction = tf.equal(tf.argmax(y, 1), tf.arg_max(test_labels_onehot, 1)) #print(list(tf.argmax(y, 1).numpy())) #print(list(tf.arg_max(test_labels_onehot, 1).numpy())) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) group_shapley_value.append(accuracy.numpy()) print("combination finished ", time.time() - start_time) print( str(ss) + "\t" + str(group_shapley_value[len(group_shapley_value) - 1])) agent_shapley = [] for index in range(NUM_AGENT): shapley = 0.0 for j in all_sets: if index in j: remove_list_index = remove_list_indexed(index, j, all_sets) if remove_list_index != -1: shapley += ( group_shapley_value[shapley_list_indexed(j, all_sets)] - group_shapley_value[remove_list_index]) / (comb( NUM_AGENT - 1, len(all_sets[remove_list_index]))) agent_shapley.append(shapley) for ag_s in agent_shapley: print(ag_s) print("end_time", time.time() - start_time)
def _munp(self, n, c): k = np.arange(0, n + 1) val = (1.0 / c)**n * np.sum(comb(n, k) * (-1)**k / (1.0 + c * k), axis=0) return where(c * n > -1, val, inf)
def b(n, k, CR): return comb(n, k, exact=True) * np.power(CR, k) * np.power(1 - CR, n - k)
# https://atcoder.jp/contests/abc159/tasks/abc159_a from scipy.special import comb N, M = map(int, input().split()) c = comb(N+M, 2, exact=True) - ((comb(M, 1, exact=True))*(comb(N, 1, exact=True))) print(c)
def invhilbert(n, exact=False): """ Compute the inverse of the Hilbert matrix of order `n`. The entries in the inverse of a Hilbert matrix are integers. When `n` is greater than 14, some entries in the inverse exceed the upper limit of 64 bit integers. The `exact` argument provides two options for dealing with these large integers. Parameters ---------- n : int The order of the Hilbert matrix. exact : bool If False, the data type of the array that is returned is np.float64, and the array is an approximation of the inverse. If True, the array is the exact integer inverse array. To represent the exact inverse when n > 14, the returned array is an object array of long integers. For n <= 14, the exact inverse is returned as an array with data type np.int64. Returns ------- invh : (n, n) ndarray The data type of the array is np.float64 if `exact` is False. If `exact` is True, the data type is either np.int64 (for n <= 14) or object (for n > 14). In the latter case, the objects in the array will be long integers. See Also -------- hilbert : Create a Hilbert matrix. Notes ----- .. versionadded:: 0.10.0 Examples -------- >>> from scipy.linalg import invhilbert >>> invhilbert(4) array([[ 16., -120., 240., -140.], [ -120., 1200., -2700., 1680.], [ 240., -2700., 6480., -4200.], [ -140., 1680., -4200., 2800.]]) >>> invhilbert(4, exact=True) array([[ 16, -120, 240, -140], [ -120, 1200, -2700, 1680], [ 240, -2700, 6480, -4200], [ -140, 1680, -4200, 2800]], dtype=int64) >>> invhilbert(16)[7,7] 4.2475099528537506e+19 >>> invhilbert(16, exact=True)[7,7] 42475099528537378560L """ from scipy.special import comb if exact: if n > 14: dtype = object else: dtype = np.int64 else: dtype = np.float64 invh = np.empty((n, n), dtype=dtype) for i in xrange(n): for j in xrange(0, i + 1): s = i + j invh[i, j] = ((-1)**s * (s + 1) * comb(n + i, n - j - 1, exact) * comb(n + j, n - i - 1, exact) * comb(s, i, exact)**2) if i != j: invh[j, i] = invh[i, j] return invh
#coding:utf-8 import pylab as pl import numpy as np from scipy import stats from scipy.special import comb, perm import math n = 16 p = 1 / 3 print(p) #k=np.arange(15,16) #binomail = stats.binom.pmf(k,n,p) #print(binomail) print(comb(15, 12) * math.pow(perm(3, 1), 3)) print(math.pow(perm(3, 1), 15)) res = comb(15, 12) / math.pow(perm(3, 1), 15) print(res * 100) print(math.pow(1 / 3, 12) * comb(15, 12))
def main(): ####### Parsing parameters and preparing data ####### parser = argparse.ArgumentParser(prog='GMM-demux', conflict_handler='resolve') # Positional arguments have * number of arguments atm. parser.add_argument('input_path', help = "The input path of mtx files from cellRanger pipeline.", nargs="*") parser.add_argument('hto_array', help = "Names of the HTO tags, separated by ','.", nargs="*") # Optional arguments. parser.add_argument("-k", "--skip", help="Load a full classification report and skip the mtx folder. Requires a path argument to the full report folder. When specified, the user no longer needs to provide the mtx folder.", type=str) parser.add_argument("-x", "--extract", help="Names of the HTO tag(s) to extract, separated by ','. Joint HTO samples are combined with '+', such as 'HTO_1+HTO_2'.", type=str) parser.add_argument("-o", "--output", help="The path for storing the Same-Sample-Droplets (SSDs). SSDs are stored in mtx format. Requires a path argument.", type=str, default="SSD_mtx") parser.add_argument("-f", "--full", help="Generate the full classification report. Requires a path argument.", type=str) parser.add_argument("-c", "--csv", help="Take input in csv format, instead of mmx format.", action='store_true') parser.add_argument("-t", "--threshold", help="Provide the confidence threshold value. Requires a float in (0,1). Default value: 0.8", type=float, default=0.8) parser.add_argument("-s", "--simplified", help="Generate the simplified classification report. Requires a path argument.", type=str) parser.add_argument("-u", "--summary", help = "Generate the statstic summary of the dataset. Including MSM, SSM rates. Requires an estimated total number of cells in the assay as input.", type=int) parser.add_argument("-r", "--report", help="Store the data summary report. Requires a file argument. Only executes if -u is set.", type=str) parser.add_argument("-e", "--examine", help="Provide the cell list. Requires a file argument. Only executes if -u is set.", type=str) parser.add_argument("-a", "--ambiguous", help="The estimated chance of having a phony GEM getting included in a pure type GEM cluster by the clustering algorithm. Requires a float in (0, 1). Default value: 0.05. Only executes if -e executes.", type=float, default=0.05) print("==============================GMM-Demux Initialization==============================") args = parser.parse_args() confidence_threshold = args.threshold print("Confidence threshold:", confidence_threshold) # Classify droplets if not args.skip: # Overwrite the positional arguments parser.add_argument('input_path', help = "The input path of mtx files from cellRanger pipeline.") parser.add_argument('hto_array', help = "Names of the HTO tags, separated by ','.") args = parser.parse_args() input_path = args.input_path hto_array = args.hto_array.split(',') output_path = args.output print("Output directory:", output_path) #TODO: add CLR to csv data. if args.csv: full_df, GMM_df = GMM_IO.read_csv(input_path, hto_array) else: full_df, GMM_df = GMM_IO.read_cellranger(input_path, hto_array) GEM_num = GMM_df.shape[0] sample_num = GMM_df.shape[1] ####### Run classifier ####### base_bv_array = compute_venn.obtain_base_bv_array(sample_num) #print([int(i) for i in base_bv_array]) (high_array, low_array) = classify_drops.obtain_arrays(GMM_df) # Obtain extract array. if args.extract: extract_id_ary = [] tag_name_ary = [] for tag_name in args.extract.split(','): tag_name_ary.append(tag_name.split('+') ) for tag_ary in tag_name_ary: mask = compute_venn.init_mask(sample_num) for tag in tag_ary: hto_idx = hto_array.index(tag) bv = compute_venn.set_bit(mask, hto_idx) for idx in range(0, len(base_bv_array) ): if base_bv_array[idx] == mask: extract_id = idx extract_id_ary.append(extract_id) else: extract_id_ary = None # Obtain classification result GMM_full_df, class_name_ary = \ classify_drops.classify_drops(base_bv_array, high_array, low_array, sample_num, GEM_num, GMM_df.index, GMM_df.columns.values) # Store classification results if args.full: print("Full classification result is stored in", args.full) classify_drops.store_full_classify_result(GMM_full_df, class_name_ary, args.full) if args.simplified: ########## Paper Specific ############ #purified_df = classify_drops.purify_droplets(GMM_full_df, confidence_threshold) ########## Paper Specific ############ print("Simplified classification result is stored in", args.simplified) classify_drops.store_simplified_classify_result(GMM_full_df, class_name_ary, args.simplified, sample_num, confidence_threshold) # Clean up bad drops purified_df = classify_drops.purify_droplets(GMM_full_df, confidence_threshold) # Store SSD result print("MSM-free droplets are stored in folder", output_path, "\n") SSD_idx = classify_drops.obtain_SSD_list(purified_df, sample_num, extract_id_ary) SSD_df = GMM_IO.store_cellranger(full_df, SSD_idx, output_path) # Record sample names for summary report. sampe_names = GMM_df.columns # Parse the full report. else: GMM_full_df, sample_num, class_name_ary, sampe_names = classify_drops.read_full_classify_result(args.skip) base_bv_array = compute_venn.obtain_base_bv_array(sample_num) purified_df = classify_drops.purify_droplets(GMM_full_df, confidence_threshold) SSD_idx = classify_drops.obtain_SSD_list(purified_df, sample_num) ####### If extract is eanbled, other functions are disabled ####### if args.extract: exit() ####### Estimate SSM ####### if args.summary: # Count bad drops negative_num, unclear_num = classify_drops.count_bad_droplets(GMM_full_df, confidence_threshold) estimated_total_cell_num = args.summary # Infer parameters HTO_GEM_ary = compute_venn.obtain_HTO_GEM_num(purified_df, base_bv_array, sample_num) params0 = [80000, 0.5] for i in range(sample_num): params0.append(round(HTO_GEM_ary[i] * estimated_total_cell_num / sum(HTO_GEM_ary[:sample_num]))) combination_counter = 0 try: for i in range(1, sample_num + 1): combination_counter += comb(sample_num, i, True) HTO_GEM_ary_main = HTO_GEM_ary[0:combination_counter] params0 = compute_venn.obtain_experiment_params(base_bv_array, HTO_GEM_ary_main, sample_num, estimated_total_cell_num, params0) except: print("GMM cannot find a viable solution that satisfies the droplet formation model. SSM rate estimation terminated.") sys.exit(0) # Legacy parameter estimation #(cell_num_ary, drop_num, capture_rate) = compute_venn.obtain_HTO_cell_n_drop_num(purified_df, base_bv_array, sample_num, estimated_total_cell_num, confidence_threshold) (drop_num, capture_rate, *cell_num_ary) = params0 SSM_rate_ary = [estimator.compute_SSM_rate_with_cell_num(cell_num_ary[i], drop_num) for i in range(sample_num)] rounded_cell_num_ary = [round(cell_num) for cell_num in cell_num_ary] SSD_count_ary = classify_drops.get_SSD_count_ary(purified_df, SSD_idx, sample_num) count_ary = classify_drops.count_by_class(purified_df, base_bv_array) MSM_rate, SSM_rate, singlet_rate = compute_venn.gather_multiplet_rates(count_ary, SSM_rate_ary, sample_num) # Generate report full_report_dict = { "#Drops": round(drop_num), "Capture rate": "%5.2f" % (capture_rate * 100), "#Cells": sum(rounded_cell_num_ary), "Singlet": "%5.2f" % (singlet_rate * 100), "MSM": "%5.2f" % (MSM_rate * 100), "SSM": "%5.2f" % (SSM_rate * 100), "RSSM": "%5.2f" % (estimator.compute_relative_SSM_rate(SSM_rate, singlet_rate) * 100), "Negative": "%5.2f" % (negative_num / GMM_full_df.shape[0] * 100), "Unclear": "%5.2f" % (unclear_num / GMM_full_df.shape[0] * 100) } full_report_columns = [ "#Drops", "Capture rate", "#Cells", "Singlet", "MSM", "SSM", "RSSM", "Negative", "Unclear" ] full_report_df = pd.DataFrame(full_report_dict, index = ["Total"], columns=full_report_columns) print("==============================Full Report==============================") print(tabulate(full_report_df, headers='keys', tablefmt='psql')) print ("\n\n") print("==============================Per Sample Report==============================") sample_df = pd.DataFrame(data=[ ["%d" % num for num in rounded_cell_num_ary], ["%d" % num for num in SSD_count_ary], ["%5.2f" % (num * 100) for num in SSM_rate_ary] ], columns = sampe_names, index = ["#Cells", "#SSDs", "RSSM"]) print(tabulate(sample_df, headers='keys', tablefmt='psql')) if args.report: print("\n\n***Summary report is stored in folder", args.report) with open(args.report, "w") as report_file: report_file.write("==============================Full Report==============================\n") with open(args.report, "a") as report_file: report_file.write(tabulate(full_report_df, headers='keys', tablefmt='psql')) with open(args.report, "a") as report_file: report_file.write("\n\n") report_file.write("==============================Per Sample Report==============================\n") with open(args.report, "a") as report_file: report_file.write(tabulate(sample_df, headers='keys', tablefmt='psql')) # Verify cell type if args.examine: print("\n\n==============================Verifying the GEM Cluster==============================") ambiguous_rate = args.ambiguous print("Ambiguous rate:", ambiguous_rate) simplified_df = classify_drops.store_simplified_classify_result(purified_df, class_name_ary, None, sample_num, confidence_threshold) cell_list_path = args.examine cell_list = [line.rstrip('\n') for line in open(args.examine)] cell_list = list(set(cell_list).intersection(simplified_df.index.tolist())) ########## Paper Specific ############ #cell_list_df = pd.read_csv(args.examine, index_col = 0) #cell_list = cell_list_df.index.tolist() ########## Paper Specific ############ MSM_list = classify_drops.obtain_MSM_list(simplified_df, sample_num, cell_list) GEM_num = len(cell_list) MSM_num = len(MSM_list) print("GEM count: ", GEM_num, " | MSM count: ", MSM_num) phony_test_pvalue = estimator.test_phony_hypothesis(MSM_num, GEM_num, rounded_cell_num_ary, capture_rate) pure_test_pvalue = estimator.test_pure_hypothesis(MSM_num, drop_num, GEM_num, rounded_cell_num_ary, capture_rate, ambiguous_rate) print("Phony-type testing. P-value: ", phony_test_pvalue) print("Pure-type testing. P-value: ", pure_test_pvalue) cluster_type = "" if phony_test_pvalue < 0.01 and pure_test_pvalue > 0.01: cluster_type = " pure" elif pure_test_pvalue < 0.01 and phony_test_pvalue > 0.01: cluster_type = " phony" else: cluster_type = "n unclear" print("Conclusion: The cluster is a" + cluster_type + " cluster.")
for a in range(len(v1)): sql = "select `夏普值` from `性質表` where name = '" + v1[a] + "'" cursor.execute(sql) result_select = cursor.fetchall() sharp[a] = result_select[0][0] db.close() v = len(v1) while (v > 4): # while(v>23): # print("gogowhile") choose_code = [] for i in range(int(comb(v, 4))): choose_code.append(0) db = pymysql.connect("localhost", "root", "esfortest", "etf") cursor = db.cursor() code = [] for produce in range(0, len(v1)): if record_v1[produce] == 0: # code[a] = produce code.append(produce) a += 1 # v-=1 # w:比例 name:名稱 min_risk:風險
def f1_score(model_generated_cluster_labels, target_labels, feature_coll, computed_centroids): from scipy.special import comb d = np.zeros(len(feature_coll)) for i in range(len(feature_coll)): d[i] = np.linalg.norm( feature_coll[i, :] - computed_centroids[model_generated_cluster_labels[i], :]) labels_pred = np.zeros(len(feature_coll)) for i in np.unique(model_generated_cluster_labels): index = np.where(model_generated_cluster_labels == i)[0] ind = np.argmin(d[index]) cid = index[ind] labels_pred[index] = cid N = len(target_labels) # cluster n_labels avail_labels = np.unique(target_labels) n_labels = len(avail_labels) # count the number of objects in each cluster count_cluster = np.zeros(n_labels) for i in range(n_labels): count_cluster[i] = len(np.where(target_labels == avail_labels[i])[0]) # build a mapping from item_id to item index keys = np.unique(labels_pred) num_item = len(keys) values = range(num_item) item_map = dict() for i in range(len(keys)): item_map.update([(keys[i], values[i])]) # count the number of objects of each item count_item = np.zeros(num_item) for i in range(N): index = item_map[labels_pred[i]] count_item[index] = count_item[index] + 1 # compute True Positive (TP) plus False Positive (FP) tp_fp = 0 for k in range(n_labels): if count_cluster[k] > 1: tp_fp = tp_fp + comb(count_cluster[k], 2) # compute True Positive (TP) tp = 0 for k in range(n_labels): member = np.where(target_labels == avail_labels[k])[0] member_ids = labels_pred[member] count = np.zeros(num_item) for j in range(len(member)): index = item_map[member_ids[j]] count[index] = count[index] + 1 for i in range(num_item): if count[i] > 1: tp = tp + comb(count[i], 2) # False Positive (FP) fp = tp_fp - tp # compute False Negative (FN) count = 0 for j in range(num_item): if count_item[j] > 1: count = count + comb(count_item[j], 2) fn = count - tp # compute F measure P = tp / (tp + fp) R = tp / (tp + fn) beta = 1 F = (beta * beta + 1) * P * R / (beta * beta * P + R) return F
def V(p, M, h): v = 0 for i in range(h, M + 1): v = comb(M, i) * (p * pow(S(p, i), i) * pow((1 - S(p, i)), M - i)) + v return v
def test_shape(testCOB): assert len(testCOB.coex) == comb(testCOB.num_genes(), 2)
kmeans = KMeans(num_clusters).fit(ints) centers = [] for c in kmeans.cluster_centers_: cv2.circle(frame, tuple([int(x) for x in c]), 15, (255,255,0), 5,-1) if c[1] > heigh_const and c[1] < frame.shape[0] and c[0] < frame.shape[1]: centers.append(c) centers = np.array(centers) centers = centers[np.all(centers>0, axis=1)] else: h_best = h_last_frame white_pixels_old = white_pixels_last_frame[(count-1)%10] used = None epsilon = 500 done_loops = 0 iters = int(comb(num_clusters,4)*.9)+1 if h_best is None and len(centers) > 3 and est: est = False for _ in range(iters): corners = centers[np.random.choice(centers.shape[0],4,replace=False)] if np.linalg.det(np.hstack((corners[:3],[[1],[1],[1]]))) < epsilon: continue if np.linalg.det(np.hstack((corners[[0,2,3]],[[1],[1],[1]]))) < epsilon: continue if np.linalg.det(np.hstack((corners[[0,1,3]],[[1],[1],[1]]))) < epsilon: continue if np.linalg.det(np.hstack((corners[1:],[[1],[1],[1]]))) < epsilon: continue done_loops += 1 unused=corners
# print '\ntest 4' # ls = np.array([4,2,1]).astype(int) # n = 3 # maxs = np.array([2,4,1]).astype(int) # for i in p_wc(ls, n, maxs): # print i # print '\ntest 5' # ls = np.array([4,2,1]).astype(int) # n = 3 # maxs = np.array([0,10,3]).astype(int) # for i in p_wc(ls, n, maxs): # print i # quit() wc_count = lambda n,k: int(comb(n+k-1,k-1)) # count total number of wcs, n=balls, k=boxes def ind2mass_genseries(N, n, indMat): ''' INPUT: N :: Integer # number of people n :: Integer # number of bins indMat :: List<List<Float>> # individual matrix OUTPUT: List<List<Float>> # mass matrix according to my generalized series formula (without Java speedup) '''
cluster_labels = np.load( os.path.join(embedding_directory, 'cluster_labels_{}.npy'.format(preference))) n = len(cluster_labels) assert len(cluster_centres) == (cluster_labels.max() + 1) cluster_sizes = np.array([(cluster_labels == l).sum() for l in range(len(cluster_centres))]) cluster_centres = cluster_centres[cluster_sizes >= 3] take_indices = [] for i in cluster_centres: for j in cluster_centres: if j > i: take_indices.append( comb(n, 2, exact=True) - comb(n - i, 2, exact=True) + (j - i - 1)) take_indices = np.array(take_indices) assert len(cluster_centres) == len(squareform(take_indices)) Ds.append(distance_matrix[take_indices]) del distance_matrix eigenvalues = [] reconerrors = [] np.random.seed(2019) print 'Generating embeddings' for pref, distances in zip(preferences, Ds): print pref
def calculateProb(self): self.Probs = np.zeros(self.currStep+1) for i in range(self.currStep+1): self.Probs[i] = comb(self.currStep,i)*math.pow(self.pr,i)*math.pow(1-self.pr,self.currStep-i) self.Probs.reshape(1,-1)
print('\nStarting SCF and integral build...') t = time.time() # First compute SCF energy using Psi4 scf_e, wfn = psi4.energy('SCF', return_wfn=True) # Grab data from wavfunction class C = wfn.Ca() ndocc = wfn.doccpi()[0] nmo = wfn.nmo() nvirt = nmo - ndocc # Compute size of Hamiltonian in GB from scipy.special import comb nDet_S = ndocc * nvirt * 2 nDet_D = 2 * comb(ndocc, 2) * comb(nvirt, 2) + ndocc**2 * nvirt**2 nDet = 1 + nDet_S + nDet_D H_Size = nDet**2 * 8e-9 print('\nSize of the Hamiltonian Matrix will be %4.2f GB.' % H_Size) if H_Size > numpy_memory: clean() raise Exception( "Estimated memory utilization (%4.2f GB) exceeds numpy_memory \ limit of %4.2f GB." % (H_Size, numpy_memory)) # Integral generation from Psi4's MintsHelper t = time.time() mints = psi4.core.MintsHelper(wfn.basisset()) H = np.asarray(mints.ao_kinetic()) + np.asarray(mints.ao_potential()) print('\nTotal time taken for ERI integrals: %.3f seconds.\n' %
def __init__(self, triangle, p_critical=.1, total=True): def pZlower(z, n, p=0.5): return min(1, 2 * binom.cdf(z, n, p)) self.p_critical = p_critical self.total = total if triangle.array_backend != 'numpy': triangle = triangle.set_backend('numpy') else: triangle = copy.deepcopy(triangle) xp = triangle.get_array_module() lr = triangle.link_ratio m1 = xp.apply_along_axis(rankdata, 2, lr.values) * (lr.values * 0 + 1) med = xp.nanmedian(m1, axis=2, keepdims=True) m1large = (xp.nan_to_num(m1) > med) + (lr.values * 0) m1small = (xp.nan_to_num(m1) < med) + (lr.values * 0) m2large = triangle.link_ratio m2large.values = m1large m2small = triangle.link_ratio m2small.values = m1small S = xp.nan_to_num(m2small.dev_to_val().sum(axis=2).values) L = xp.nan_to_num(m2large.dev_to_val().sum(axis=2).values) z = xp.minimum(L, S) n = L + S m = xp.floor((n - 1) / 2) c = comb(n - 1, m) EZ = (n / 2) - c * n / (2**n) VarZ = n * (n - 1) / 4 - c * n * (n - 1) / (2**n) + EZ - EZ**2 if not self.total: T = [] for i in range(0, xp.max(m1large.shape[2:]) + 1): T.append([ pZlower(i, j, 0.5) for j in range(0, xp.max(m1large.shape[2:]) + 1) ]) T = np.array(T) z_idx, n_idx = z.astype(int), n.astype(int) self.probs = T[z_idx, n_idx] z_critical = triangle[ triangle.valuation > triangle.valuation.min()] z_critical = z_critical.dev_to_val().dropna().sum('origin') * 0 z_critical.values = (np.array(self.probs) < p_critical) z_critical.odims = ['(All)'] self.z_critical = z_critical self.z = copy.deepcopy(self.z_critical) self.z.values = z self.z_expectation = copy.deepcopy(self.z_critical) self.z_expectation.values = EZ self.z_variance = copy.deepcopy(self.z_critical) self.z_variance.values = VarZ else: ci2 = norm.ppf(0.5 - (1 - p_critical) / 2) * xp.sqrt( xp.sum(VarZ, axis=-1)) self.range = (xp.sum(VarZ, axis=-1) + ci2, xp.sum(VarZ, axis=-1) - ci2) idx = triangle._idx_table().index self.z_critical = pd.DataFrame( ((self.range[0] > VarZ.sum(axis=-1)) | \ (VarZ.sum(axis=-1) > self.range[1]))[..., 0], columns=triangle.vdims, index=idx) self.z = pd.DataFrame(z.sum(axis=-1)[..., 0], columns=triangle.vdims, index=idx) self.z_expectation = pd.DataFrame(EZ.sum(axis=-1)[..., 0], columns=triangle.vdims, index=idx) self.z_variance = pd.DataFrame(VarZ.sum(axis=-1)[..., 0], columns=triangle.vdims, index=idx)
def invpascal(n, kind='symmetric', exact=True): """ Returns the inverse of the n x n Pascal matrix. The Pascal matrix is a matrix containing the binomial coefficients as its elements. Parameters ---------- n : int The size of the matrix to create; that is, the result is an n x n matrix. kind : str, optional Must be one of 'symmetric', 'lower', or 'upper'. Default is 'symmetric'. exact : bool, optional If `exact` is True, the result is either an array.txt of type ``numpy.int64`` (if `n` <= 35) or an object array.txt of Python integers. If `exact` is False, the coefficients in the matrix are computed using `scipy.special.comb` with `exact=False`. The result will be a floating point array.txt, and for large `n`, the values in the array.txt will not be the exact coefficients. Returns ------- invp : (n, n) ndarray The inverse of the Pascal matrix. See Also -------- pascal Notes ----- .. versionadded:: 0.16.0 References ---------- .. [1] "Pascal matrix", https://en.wikipedia.org/wiki/Pascal_matrix .. [2] Cohen, A. M., "The inverse of a Pascal matrix", Mathematical Gazette, 59(408), pp. 111-112, 1975. Examples -------- >>> from scipy.linalg import invpascal, pascal >>> invp = invpascal(5) >>> invp array.txt([[ 5, -10, 10, -5, 1], [-10, 30, -35, 19, -4], [ 10, -35, 46, -27, 6], [ -5, 19, -27, 17, -4], [ 1, -4, 6, -4, 1]]) >>> p = pascal(5) >>> p.dot(invp) array.txt([[ 1., 0., 0., 0., 0.], [ 0., 1., 0., 0., 0.], [ 0., 0., 1., 0., 0.], [ 0., 0., 0., 1., 0.], [ 0., 0., 0., 0., 1.]]) An example of the use of `kind` and `exact`: >>> invpascal(5, kind='lower', exact=False) array.txt([[ 1., -0., 0., -0., 0.], [-1., 1., -0., 0., -0.], [ 1., -2., 1., -0., 0.], [-1., 3., -3., 1., -0.], [ 1., -4., 6., -4., 1.]]) """ from scipy.special import comb if kind not in ['symmetric', 'lower', 'upper']: raise ValueError("'kind' must be 'symmetric', 'lower' or 'upper'.") if kind == 'symmetric': if exact: if n > 34: dt = object else: dt = np.int64 else: dt = np.float64 invp = np.empty((n, n), dtype=dt) for i in range(n): for j in range(0, i + 1): v = 0 for k in range(n - i): v += comb(i + k, k, exact=exact) * comb( i + k, i + k - j, exact=exact) invp[i, j] = (-1)**(i - j) * v if i != j: invp[j, i] = invp[i, j] else: # For the 'lower' and 'upper' cases, we computer the inverse by # changing the sign of every other diagonal of the pascal matrix. invp = pascal(n, kind=kind, exact=exact) if invp.dtype == np.uint64: # This cast from np.uint64 to int64 OK, because if `kind` is not # "symmetric", the values in invp are all much less than 2**63. invp = invp.view(np.int64) # The toeplitz matrix has alternating bands of 1 and -1. invp *= toeplitz((-1)**np.arange(n)).astype(invp.dtype) return invp
def _comb2(n): # the exact version is faster for k == 2: use it by default globally in # this module instead of the float approximate variant return comb(n, 2, exact=1)
def c(n, k): t = (n, k) if t not in c_map: c_map[t] = sp.comb(n, k, exact=True) return c_map[t]
def compute_clutering_metric(idx, item_ids): N = len(idx) # cluster centers centers = np.unique(idx) num_cluster = len(centers) # print('Number of clusters: #d\n' % num_cluster); # count the number of objects in each cluster count_cluster = np.zeros(num_cluster) for i in range(num_cluster): count_cluster[i] = len(np.where(idx == centers[i])[0]) # build a mapping from item_id to item index keys = np.unique(item_ids) num_item = len(keys) values = range(num_item) item_map = dict() for i in range(len(keys)): item_map.update([(keys[i], values[i])]) # count the number of objects of each item count_item = np.zeros(num_item) for i in range(N): index = item_map[item_ids[i]] count_item[index] = count_item[index] + 1 # compute purity purity = 0 for i in range(num_cluster): member = np.where(idx == centers[i])[0] member_ids = item_ids[member] count = np.zeros(num_item) for j in range(len(member)): index = item_map[member_ids[j]] count[index] = count[index] + 1 purity = purity + max(count) # compute Normalized Mutual Information (NMI) count_cross = np.zeros((num_cluster, num_item)) for i in range(N): index_cluster = np.where(idx[i] == centers)[0] index_item = item_map[item_ids[i]] count_cross[index_cluster, index_item] = count_cross[index_cluster, index_item] + 1 # mutual information I = 0 for k in range(num_cluster): for j in range(num_item): if count_cross[k, j] > 0: s = count_cross[k, j] / N * math.log(N * count_cross[k, j] / (count_cluster[k] * count_item[j])) I = I + s # entropy H_cluster = 0 for k in range(num_cluster): s = -count_cluster[k] / N * math.log(count_cluster[k] / float(N)) H_cluster = H_cluster + s H_item = 0 for j in range(num_item): s = -count_item[j] / N * math.log(count_item[j] / float(N)) H_item = H_item + s NMI = 2 * I / (H_cluster + H_item) # compute True Positive (TP) plus False Positive (FP) tp_fp = 0 for k in range(num_cluster): if count_cluster[k] > 1: tp_fp = tp_fp + comb(count_cluster[k], 2) # compute True Positive (TP) tp = 0 for k in range(num_cluster): member = np.where(idx == centers[k])[0] member_ids = item_ids[member] count = np.zeros(num_item) for j in range(len(member)): index = item_map[member_ids[j]] count[index] = count[index] + 1 for i in range(num_item): if count[i] > 1: tp = tp + comb(count[i], 2) # False Positive (FP) fp = tp_fp - tp # compute False Negative (FN) count = 0 for j in range(num_item): if count_item[j] > 1: count = count + comb(count_item[j], 2) fn = count - tp # compute F measure P = tp / (tp + fp) R = tp / (tp + fn) beta = 1 F = (beta*beta + 1) * P * R / (beta*beta * P + R) return NMI, F
def rarefy(x, method='rarefy', size=None, breakNA=True): ''' Docstring for function ecopy.rarefy ======================== Various rarefaction techniques for a site x species matrix. All indices computed along rows (axis = 1) Use ---- rarefy(x, method='rarefy', size=None, breakNA=True) Parameters ---------- x: numpy array or pandas dataframe with observations as rows and descriptors as columns method: a method used for rarefaction rarefy: Calculates estimated richness rarified to a given sample size (see size parameter). sum(1 - nCr(N-Ni, size) / (nCr(N, size))) rareCurve: Draws a rarefaction curve for each site (row). Rarefaction curves use the following functoin Sn - sum(1 - nCr(N-Ni, i))/nCr(N, i) size: the sample size used in rarefaction. Can be left empty, in which case size is the minimum of row sums (number of individuals from the sparsest site). Can be a single number, which applies the same size to all rows. Can be a numpy array that contains different sizes for each site. breakNA: should the process halt if the matrix contains any NAs? if False, then NA's undergo pairwise deletion during distance calculation, such that when calculating the distance between two rows, if any species is missing from a row, that species is removed from both rows Example -------- import ecopy as ep BCI = ep.load_data('BCI') # calculate rarefied species richness rareRich = ep.rarefy(BCI, 'rarefy') # draw rarefaction curves ep.rarefy(BCI, 'rarecurve') ''' listofmethods = ['rarefy', 'rarecurve'] if not isinstance(breakNA, bool): msg = 'removaNA argument must be boolean' raise ValueError(msg) if method not in listofmethods: msg = 'method argument {0!s} is not an accepted rarefaction method'.format( method) raise ValueError(msg) if not isinstance(x, (DataFrame, np.ndarray)): msg = 'x argument must be a numpy array or pandas dataframe' raise ValueError(msg) if size is not None: if not isinstance(size, (int, float, np.ndarray)): msg = 'size must be integer, float, or numpy array' raise ValueError(msg) if isinstance(x, DataFrame): if (x.dtypes == 'object').any(): msg = 'DataFrame can only contain numeric values' if breakNA: if x.isnull().any().any(): msg = 'DataFrame contains null values' raise ValueError(msg) if (x < 0).any().any(): msg = 'DataFrame contains negative values' raise ValueError(msg) if method == 'rarefy': if size is None: sums = x.apply(sum, axis=1) size = np.min(sums) rich = x.apply(rare, axis=1, args=(size, )) return rich else: if isinstance(size, (int, float)): rich = x.apply(rare, axis=1, args=(size, )) return rich else: if len(size) != len(x): msg = 'length of size does not match number of rows' raise ValueError(msg) z = x.copy() z['size'] = size rich = z.apply(rare_wrapper, axis=1) return rich if method == 'rarecurve': z = x.copy() z.reset_index(inplace=True) z.apply(rCurve, axis=1) plt.xlabel('Number of Individuals') plt.ylabel('Number of Species') plt.show() if isinstance(x, np.ndarray): if breakNA: if np.isnan(np.sum(x)): msg = 'Array contains null values' raise ValueError(msg) if (x < 0).any(): msg = 'Array contains negative values' raise ValueError(msg) if method == 'rarefy': if size is None: sums = np.apply_along_axis(np.nansum, 1, x) size = np.min(sums) rich = np.apply_along_axis(rare, 1, x, size) return rich else: if isinstance(size, (int, float)): rich = np.apply_along_axis(rare, 1, x, size) return rich else: if len(size) != x.shape[0]: msg = 'length of size does not match number of rows' raise ValueError(msg) N = np.nansum(x, axis=1) diff = (N[:, np.newaxis] - x).T return np.sum(1 - comb(diff, size) / comb(N, size), axis=0) if method == 'rarecurve': z = DataFrame(x) z.reset_index(inplace=True) z.apply(rCurve, axis=1) plt.xlabel('Number of Individuals') plt.ylabel('Number of Species') plt.show()
def mendel_dominant_prob(dom, het, rec): sum = dom + het + rec total = comb(sum, 2) print(total)
def pascal(n, kind='symmetric', exact=True): """ Returns the n x n Pascal matrix. The Pascal matrix is a matrix containing the binomial coefficients as its elements. .. versionadded:: 0.11.0 Parameters ---------- n : int The size of the matrix to create; that is, the result is an n x n matrix. kind : str, optional Must be one of 'symmetric', 'lower', or 'upper'. Default is 'symmetric'. exact : bool, optional If `exact` is True, the result is either an array of type numpy.uint64 (if n <= 35) or an object array of Python long integers. If `exact` is False, the coefficients in the matrix are computed using `scipy.special.comb` with `exact=False`. The result will be a floating point array, and the values in the array will not be the exact coefficients, but this version is much faster than `exact=True`. Returns ------- p : (n, n) ndarray The Pascal matrix. Notes ----- See http://en.wikipedia.org/wiki/Pascal_matrix for more information about Pascal matrices. Examples -------- >>> from scipy.linalg import pascal >>> pascal(4) array([[ 1, 1, 1, 1], [ 1, 2, 3, 4], [ 1, 3, 6, 10], [ 1, 4, 10, 20]], dtype=uint64) >>> pascal(4, kind='lower') array([[1, 0, 0, 0], [1, 1, 0, 0], [1, 2, 1, 0], [1, 3, 3, 1]], dtype=uint64) >>> pascal(50)[-1, -1] 25477612258980856902730428600L >>> from scipy.special import comb >>> comb(98, 49, exact=True) 25477612258980856902730428600L """ from scipy.special import comb if kind not in ['symmetric', 'lower', 'upper']: raise ValueError("kind must be 'symmetric', 'lower', or 'upper'") if exact: if n > 35: L_n = np.empty((n, n), dtype=object) L_n.fill(0) else: L_n = np.zeros((n, n), dtype=np.uint64) for i in range(n): for j in range(i + 1): L_n[i, j] = comb(i, j, exact=True) else: L_n = comb(*np.ogrid[:n, :n]) if kind is 'lower': p = L_n elif kind is 'upper': p = L_n.T else: p = np.dot(L_n, L_n.T) return p
def test_big(self): p = pascal(50) assert_equal(p[-1, -1], comb(98, 49, exact=True))