def aggregate_vector_quantities(v_fine, fine_bds, coarse_bds, pyramid ): '''Aggregates an age-structured contact matrice to return the corresponding transmission matrix under a finer age structure.''' aggregator=make_aggregator(coarse_bds, fine_bds) # The Prem et al. estimates cut off at 80, so we all >75 year olds into one # class for consistency with these estimates: pyramid[len(fine_bds) - 1] = sum(pyramid[len(fine_bds)-1:]) pyramid = pyramid[:len(fine_bds) - 1] # Normalise to give proportions pyramid = pyramid/nsum(pyramid) # sparse matrix defines here just splits pyramid into rows corresponding to # coarse boundaries, then summing each row gives aggregated pyramid row_cols = (aggregator, arange(len(aggregator))) agg_pop_pyramid=nsum( sparse((pyramid, row_cols)), axis=1).getA().squeeze() rel_weights = pyramid / agg_pop_pyramid[aggregator] # Now define contact matrix with age classes from Li et al data pop_weight_matrix = sparse((rel_weights, row_cols)) return pop_weight_matrix * v_fine
def test_mst_matrix_as_array(self): # Verifies MST matrix func returns array with dict/trees in each cell for i in self.ifgs[3:]: i.phase_data[0, 1] = 0 # partial stack of NODATA to one cell for i in self.ifgs: i.convert_to_nans() # zeros to NaN/NODATA epochs = algorithm.get_epochs(self.ifgs)[0] res = mst._mst_matrix_as_array(self.ifgs) ys, xs = res.shape for y, x in product(range(ys), range(xs)): r = res[y, x] num_nodes = len(r) self.assertTrue(num_nodes < len(epochs.dates)) stack = array([i.phase_data[y, x] for i in self.ifgs]) # 17 ifg stack self.assertTrue( 0 == nsum(stack == 0)) # all 0s should be converted nc = nsum(isnan(stack)) exp_count = len(epochs.dates) - 1 if nc == 0: self.assertEqual(num_nodes, exp_count) elif nc > 5: # rough test: too many nans must reduce the total tree size self.assertTrue(num_nodes <= (17 - nc))
def grow(self): """Grow the population to carrying capacity The final population size is determined based on the proportion of producers present. This population is determined by drawing from a multinomial with the probability of each genotype proportional to its abundance times its fitness. """ if self.is_empty(): return if not self.diluted: return landscape = self.metapopulation.fitness_landscape final_size = self.capacity_min + \ (self.capacity_max - self.capacity_min) * \ self.prop_producers() grow_probs = self.abundances * (landscape/nsum(landscape)) if nsum(grow_probs) > 0: norm_grow_probs = grow_probs/nsum(grow_probs) self.abundances = multinomial(final_size, norm_grow_probs, 1)[0]
def get_pos_norm(pos): """ Translate positions so that centroid is in the origin and return mean norm of the translated positions. """ pos = Positions.create(pos) assert (len(pos) == 1) n = len(pos[0]) p = zeros((n, 2)) for i, node in enumerate(pos[0]): p[i, :] = pos[0][node][:2] centroid = p.sum(axis=0) / n p -= tile(centroid, (n, 1)) p_norm = nsum(sqrt(nsum(square(p), axis=1))) / n return p_norm
def get_pos_norm(pos): """ Translate positions so that centroid is in the origin and return mean norm of the translated positions. """ pos = Positions.create(pos) assert(len(pos) == 1) n = len(pos[0]) p = zeros((n, 2)) for i, node in enumerate(pos[0]): p[i, :] = pos[0][node] centroid = p.sum(axis=0)/n p -= tile(centroid, (n, 1)) p_norm = nsum(sqrt(nsum(square(p), axis=1)))/n return p_norm
def lu_inplace(A, L, U): """ LU Decomposition Takes a square `numpy.array` A and decomposes it into the lower and upper matrices `L` and `U`. This scales as O(n^3). This is a special variant that takes the `L` and `U` arrays pre-initialised to zero. If you're iterating over a lot of matrices, this can save you from allocating new `L` and `U` matrices on each iteration. From profiling, this saves ~10-20% of the runtime for small matrices. Examples -------- This is the example from wikipedia: https://en.wikipedia.org/wiki/LU_decomposition >>> import numpy as np >>> A = np.array([[4, 3], [6, 3]], dtype=np.float32) >>> L, U = np.zeros_like(A), np.zeros_like(A) >>> lu_inplace(A, L, U) >>> L array([[ 1. , -0. ], [ 1.5, 1. ]], dtype=float32) >>> U array([[ 4. , 3. ], [ 0. , -1.5]], dtype=float32) """ assert A.shape[0] == A.shape[1] assert L.shape[0] == L.shape[1] == A.shape[0] assert U.shape[0] == U.shape[1] == A.shape[0] size = len(A) for i in range(size): for k in range(size): total = nsum(L[i, 0:i] * U[0:i, k]) U[i, k] = A[i, k] - total for k in range(size): if i == k: L[i, i] = 1.0 else: total = nsum(L[k, 0:i] * U[0:i, i]) L[k, i] = (A[k, i] - total) / U[i, i]
def sig_q_e( e ): ''' Summation / integration over the random domain ''' q_e_grid = q( e, Theta_la[:, None, None, None, None], Theta_xi[None, :, None, None, None], Theta_E[None, None, :, None, None], Theta_th[None, None, None, :, None], Theta_A[None, None, None, None, :] ) q_dG_grid = q_e_grid ** 2 * dG_grid return sqrt( nsum( q_dG_grid ) - mu_q_e( e ) ** 2 )
def mu_q_e( e ): ''' Summation / integration over the random domain ''' q_e_grid = q( e, Theta_la[:, None, None, None, None], Theta_xi[None, :, None, None, None], Theta_E[None, None, :, None, None], Theta_th[None, None, None, :, None], Theta_A[None, None, None, None, :] ) q_dG_grid = q_e_grid * dG_grid # element by element product of two (m,m) arrays return nsum( q_dG_grid ) # nsum has been imported at line 3 from numpy
def sig_q_e_LHS( e ): ''' Summation / integration over the random domain ''' q_e_grid = q( e, T_la[:, None, None, None, None], T_xi[None, :, None, None, None], T_E[None, None, :, None, None], T_th[None, None, None, :, None], T_A[None, None, None, None, :] ) q_dG_grid = q_e_grid ** 2 / n_int ** n_k return sqrt( nsum( q_dG_grid ) - mu_q_e_LHS( e ) ** 2 )
def update(self, Y, S, YY=None): ''' mur.update(Y, S, YY=None) Y: data_dim x data_len S: n_states x data_len prior: NormalWishart post: NormalWishart ''' sum_S = nsum(S, 1) # --- beta beta_h = self.prior.beta + sum_S # --- nu nu_h = self.prior.nu + sum_S # --- mu sum_SY = einsum('kt,dt->dk', S, Y) beta_m = einsum('k,dk->dk', self.prior.beta, self.prior.mu) mu_h = einsum('dk,k->dk', sum_SY + beta_m, 1.0 / beta_h) # --- W if YY is None: sum_SYY = einsum('kt,dt,et->dek', S, Y, Y) else: sum_SYY = einsum('kt,det->dek', S, YY) bmm = einsum('dk,ek->dek', beta_m, self.prior.mu) bmmh = einsum('k,dk,ek->dek', beta_h, mu_h, mu_h) inv_W_h = sum_SYY + bmm - bmmh + self.prior.inv_W self.post.set_params(beta=beta_h, nu=nu_h, mu=mu_h, inv_W=inv_W_h)
def __init__(self, spec): self.spec = deepcopy(spec) self.trange = arange(0, spec['final_time'], spec['h']) self.prev = zeros_like(self.trange) self.pdi = zeros_like(self.trange) # Person-days in isolation self.rep = 1.0 / spec['latent_period'] self.rpi = 1.0 / spec['prodrome_period'] self.rir = 1.0 / spec['infectious_period'] self.beta_p = spec['RGp'] / spec['prodrome_period'] self.beta_i = spec['RGi'] / spec['infectious_period'] self.tau_p = ( spec['SAPp'] * self.rpi * (2.0**self.spec['eta'])) \ / (1.0 - spec['SAPp']) self.tau_i = ( spec['SAPi'] * self.rir * (2.0**spec['eta'])) \ / (1.0 - self.spec['SAPi']) self.weights = array(spec['pages']) / nsum(spec['pages']) self.nmax = len(self.weights) self.nbar = self.weights @ arange(1, self.nmax + 1) self.prav = zeros( self.nmax) # Probability of avoiding by household size self._initalise_matrices() if 'doubling_time' in spec: self._rescale_beta()
def _resample(data, xscale, yscale, thresh): """ Resamples/averages 'data' to return an array from the averaging of blocks of several tiles in 'data'. NB: Assumes incoherent cells are NaNs. :param data: source array to resample to different size :param xscale: number of cells to average along X axis :param yscale: number of Y axis cells to average :param thresh: minimum allowable proportion of NaN cells (range from 0.0-1.0), eg. 0.25 = 1/4 or more as NaNs results in a NaN value for the output cell. """ # TODO: make more efficient if thresh < 0 or thresh > 1: raise ValueError("threshold must be >= 0 and <= 1") xscale = int(xscale) yscale = int(yscale) ysize, xsize = data.shape xres, yres = int(xsize / xscale), int(ysize / yscale) dest = zeros((yres, xres), dtype=float32) * nan tile_cell_count = xscale * yscale # calc mean without nans (fractional threshold ignores tiles # with excess NaNs) for x in range(xres): for y in range(yres): tile = data[y * yscale:(y + 1) * yscale, x * xscale:(x + 1) * xscale] nan_fraction = nsum(isnan(tile)) / float(tile_cell_count) if nan_fraction < thresh or (nan_fraction == 0 and thresh == 0): dest[y, x] = nanmean(tile) return dest
def calc_r2(y_hat, y): try: y_mean = y.mean() except: try: y_form = [] for i in range(len(y)): y_form.append(y[i][0]) y_mean = sum(y_form)/len(y_form) except: raise ValueError('Check input data format.') s_res = nsum(square(_get_diff(y_hat, y))) s_tot = nsum(square(_get_diff(y, y_mean))) return(1 - (s_res / s_tot))
def RHS( self, N, t ): dNdt = zeros_like(N) if self.gamma is not None and self.betadxi is not None: # Death breakup term dNdt[1:] -= N[1:] * self.gamma[1:] dNdt[:-1] += self.nu * dot( self.betadxi[:-1, 1:], N[1:] * self.gamma[1:]) if self.Q is not None: Cd = zeros_like(dNdt) for i in arange(self.number_of_classes / 2): ind = slice(i, self.number_of_classes - i - 1) Cb = self.Q[i, ind] * N[i] * N[ind] Cd[i] += nsum(Cb) Cd[(i + 1):(self.number_of_classes - i - 1)] += Cb[1:] Cb[0] = 0.5 * Cb[0] dNdt[(2 * i + 1):] += Cb dNdt -= Cd if self.theta is not None: dNdt += (self.n0 * self.A0 - N / self.theta) # print('Time = {0:g}'.format(t)) return dNdt
def _ref_pixel_multi(g, half_patch_size, phase_data_or_ifg_paths, thresh, params): """ Convenience function for ref pixel optimisation """ # pylint: disable=invalid-name # phase_data_or_ifg is list of ifgs y, x, = g if isinstance(phase_data_or_ifg_paths[0], str): # this consumes a lot less memory # one ifg.phase_data in memory at any time data = [] output_dir = params[C.TMPDIR] for p in phase_data_or_ifg_paths: data_file = os.path.join(output_dir, 'ref_phase_data_{b}_{y}_{x}.npy'.format( b=os.path.basename(p).split('.')[0], y=y, x=x)) data.append(np.load(file=data_file)) else: # phase_data_or_ifg is phase_data list data = [p[y - half_patch_size:y + half_patch_size + 1, x - half_patch_size:x + half_patch_size + 1] for p in phase_data_or_ifg_paths] valid = [nsum(~isnan(d)) > thresh for d in data] if all(valid): # ignore if 1+ ifgs have too many incoherent cells sd = [std(i[~isnan(i)]) for i in data] return mean(sd) else: return np.nan
def __init__(self, hamiltonian, beta, mu, verbose = False): self.mu = mu CanonicalEnsemble.__init__(self, hamiltonian, beta, verbose) c = AnnihilationOperator(self.singleParticleBasis) muMatrix = mu * nsum([c[orb].H.dot(c[orb]) for orb in self.orderedSingleParticleStates], axis = 0) self.hamiltonian.matrix = self.hamiltonian.matrix - muMatrix self.filling = None
def cosmic_count(self, data, la=True): if la: _, cos = self.lacosmic(data) else: _, cos = self.mecosmic(data) return(nsum(cos))
def getGain(new, old, penalty=3): old = deepcopy(old) for item in new: old[arange(old.shape[0]), item] -= 1 m = sum(old[0,:])/old.shape[1] old = old - m old == old**penalty return(nsum(old))
def getCorPerformance(scores, cutoff = 0.7): scores = pd.DataFrame(scores) cors = abs(scores.corr(method="spearman")) highs = sum(nsum(cors > cutoff)) tiebreaker = nanmedian(cors[cors > cutoff]) return([highs, tiebreaker])
def __init__(self, blocksizes, all_real = True): Blocks.__init__(self, blocksizes) for size in blocksizes: if all_real: self.datablocks.append(asmatrix(zeros([size, size]))) else: self.datablocks.append(asmatrix(zeros([size, size], dtype=complex))) self.shape = [nsum(blocksizes)]*2
def getDisbalance(new, old, penalty=3, mod=1): old = deepcopy(old) for item in new: old[arange(old.shape[0]), item] += 1 * mod m = sum(old[0, :]) / old.shape[1] old = old - m old == old**penalty return (nsum(old))
def num_diff(fx, h, t=1, O=8): F = Stencil_points(O + 1, h, t) B = F * (-1)**t xf = fx[::-1][0:2 * O] FN = [f * fx[i:-O + i] for i, f in enumerate(F)] BN = [b * xf[i:-O + i] for i, b in enumerate(B)] FN[-1] = F[-1] * fx[O:] BN[-1] = B[-1] * xf[O:] BN = nsum(BN, axis=0) FN = nsum(FN, axis=0) return r_[FN, BN[::-1]] / h**t
def mst_matrix_networkx(ifgs): """ Generates MST network for a single pixel for the given ifgs using NetworkX-package algorithms. :param list ifgs: Sequence of interferogram objects :return: y: pixel y coordinate :rtype: int :return: x: pixel x coordinate :rtype: int :return: mst: list of tuples for edges in the minimum spanning tree :rtype: list """ # make default MST to optimise result when no Ifg cells in a stack are nans edges_with_weights = [(i.master, i.slave, i.nan_fraction) for i in ifgs] edges, g_nx = _minimum_spanning_edges_from_mst(edges_with_weights) # TODO: memory efficiencies can be achieved here with tiling list_of_phase_data = [i.phase_data for i in ifgs] log.debug("list_of_phase_data length: " + str(len(list_of_phase_data))) for row in list_of_phase_data: log.debug("row length in list_of_phase_data: " + str(len(row))) log.debug("row in list_of_phase_data: " + str(row)) data_stack = array(list_of_phase_data, dtype=float32) # create MSTs for each pixel in the ifg data stack nifgs = len(ifgs) for y, x in product(range(ifgs[0].nrows), range(ifgs[0].ncols)): values = data_stack[:, y, x] # vertical stack of ifg values for a pixel nan_count = nsum(isnan(values)) # optimisations: use pre-created results for all nans/no nans if nan_count == 0: yield y, x, edges continue elif nan_count == nifgs: yield y, x, nan continue # dynamically modify graph to reuse a single graph: this should avoid # repeatedly creating new graph objs & reduce RAM use ebunch_add = [] ebunch_delete = [] for value, edge in zip(values, edges_with_weights): if not isnan(value): if not g_nx.has_edge(edge[0], edge[1]): ebunch_add.append(edge) else: if g_nx.has_edge(edge[0], edge[1]): ebunch_delete.append(edge) if ebunch_add: g_nx.add_weighted_edges_from(ebunch_add) if ebunch_delete: g_nx.remove_edges_from(ebunch_delete) yield y, x, nx.minimum_spanning_tree(g_nx).edges()
def clean_correlation_matrix(evals, evecs, max_eig, phylogeny=True): """ Cleans the correlation matrix of noise, and provides an option to remove the largest eigenvector to clean the matrix of phylogeny. Arguments: evals -- Eigenvalues of correlation matrix. evecs -- Eigenvectors of correlation matrix. max_eig -- Theoretical random maximum eigenvalue. We ignore anything below the minimum eigenvalue. """ return real(nsum((x*outer(y,y) for x,y in zip(evals, evecs))))
def Bv_cartesian(self): from numpy import sum as nsum, array if self.coordsys[0].upper() != 'C': return [ nsum([B[i] * self.lattice[i] * self.latpar for i in [0, 1, 2]], axis=0) for B in self.basis ] else: return self.basis
def contract_end(self): #sums over one leg of ADT/mps as indicated in figures by contraction with a 1 leg semi circle #first sum over south and east legs of end site then dot in to second last site and make #3d again by giving 1d dummy index wth expand_dims self.sites[-2].update(tens=expand_dims( dot(self.sites[-2].m, nsum(self.sites[-1].m, (0, 2))), -1)) #delete the useless end site and change N_sites accordingly del self.sites[-1] self.N_sites = self.N_sites - 1
def likelihood_func_deriv(theta, N, X, B, z_A, z_B, mutation): # derivative of the likelihood of the function for theta p_A, p_B, f_A, f_B = p_read(theta, N, X, B, mutation) p_A_deriv = B * (1.0-B) * ((1-theta+B*theta) **(-2.0)) p_B_deriv = ((1.0-B*theta) * (-B) - (B-B*theta) * (-B))/ ((1.0-B*theta)**2) f_A_deriv = nchoosek(N,X) * X *(p_A_deriv) * ((1.0-p_A)**(N-X)) + nchoosek(N,X) * (p_A**X) * (N-X) * (-p_A_deriv) f_B_deriv = nchoosek(N,X) * X *(p_B_deriv) * ((1.0-p_B)**(N-X)) + nchoosek(N,X) * (p_B**X) * (N-X) * (-p_B_deriv) l_deriv = -1.0*nsum(1.0/(z_A*f_A + z_B*f_B) * (z_A * f_A_deriv + z_B * f_B_deriv)) return l_deriv
def getFockspaceNr(self, occupationOfSingleParticleStates = None, singleParticleStateNr = None, singleParticleState = None): """Fock state number.""" if occupationOfSingleParticleStates != None: return nsum([int(occ)*2**i for i, occ in enumerate(occupationOfSingleParticleStates)]) elif singleParticleStateNr != None: return 2**singleParticleStateNr elif singleParticleState != None: return 2**self.getSingleParticleStateNr(*singleParticleState) else: assert False, 'Need parameter.'
def setHubbardMatrix(t, u, spins, orbitals, siteSpaceTransformation = None): # TODO rm siteSTrafo spins = range(len(spins)) c = AnnihilationOperator([spins, orbitals]) no = len(orbitals) ns = len(spins) spininds = range(len(spins)) uMatrix = zeros([no,no,no,no,ns,ns]) for i, j, k, l, s1, s2 in product(orbitals,orbitals,orbitals,orbitals,spins,spins): if i == k and j == l and i == j and s1 != s2: uMatrix[i, j, k, l, s1, s2] = u * .5 if siteSpaceTransformation != None: p = array(siteSpaceTransformation) t = p.transpose().dot(t).dot(p) temp = uMatrix.copy() for i, j, k, l, s1, s2 in product(orbitals,orbitals,orbitals,orbitals,spins,spins): uMatrix[i,j,k,l,s1,s2] = nsum([p[i,m] * p[j,n] * temp[m,n,o,q,s1,s2] * p.transpose()[o,l] * p.transpose()[q,k] for m,n,o,q in product(orbitals,orbitals,orbitals,orbitals)], axis = 0) ht = [t[i,j] * c[s,i].H.dot(c[s,j]) for s in spins for i,j in product(orbitals, orbitals) if t[i,j] != 0] hu = [uMatrix[i, j, k, l, s1, s2] * c[s1,i].H.dot(c[s2, j].H).dot(c[s2, l]).dot(c[s1, k]) for i,j,k,l in product(orbitals, orbitals, orbitals, orbitals) for s1, s2 in product(spins, spins) if s1 != s2 and uMatrix[i, j, k, l, s1, s2] != 0] return nsum(ht + hu, axis = 0)
def combine(self, files, combine_method): """Combines (median, average, sum) given files""" self.logger.info("Combine for {} files with {} methdo".format( len(files), combine_method)) try: if combine_method == "median": if len(files) > 2: arrays = [] for file in files: data = self.data(file) if data is not None: arrays.append(data) arrays = ar(arrays) medi = nmed(arrays, axis=0) return medi else: raise Exception('No enough file for median method') elif combine_method == "average": if len(files) > 1: arrays = [] for file in files: data = self.data(file) if data is not None: arrays.append(data) arrays = ar(arrays) mean = nmea(arrays, axis=0) return mean else: raise Exception('No enough file for average method') elif combine_method == "sum": if len(files) > 1: arrays = [] for file in files: data = self.data(file) if data is not None: arrays.append(data) arrays = ar(arrays) ssum = nsum(arrays, axis=0) return ssum else: raise Exception('No enough file for sum method') else: raise Exception('Unknown method') except Exception as e: self.logger.error(e) return False
def setMu(self, mu): c = AnnihilationOperator(self.singleParticleBasis) nMatrix = nsum([c[orb].H.dot(c[orb]) for orb in self.orderedSingleParticleStates], axis = 0) self.hamiltonian.matrix = self.hamiltonian.matrix + self.mu * nMatrix self.mu = mu self.hamiltonian.matrix = self.hamiltonian.matrix - mu * nMatrix self.energyEigenvalues = None self.energyEigenstates = None self.partitionFunction = None self.occupation = dict() report('Chemical potential set to '+str(mu), self.verbose)
def __getitem__(self, spState): """The single particle state is given by a tuple of quantum numbers. Returns scipy.sparse.coo_matrix""" instates = list() outstates = list() spStateOR = self.getOccupationRep(singleParticleState = spState) for fockStateNr in range(self.fockspaceSize): instateOR = self.getOccupationRep(fockStateNr) if instateOR[self.orderedSingleParticleStates.index(spState)] == '1': instates.append(fockStateNr) outstates.append(self.getFockspaceNr(annihilateOccRep(spStateOR, instateOR))) signs = [(-1)**nsum([1 for k in range(self.getSingleParticleStateNr(*spState)) if self.getOccupationRep(fockstateNr)[k] == '1']) for fockstateNr in instates] return coo_matrix((signs, (outstates, instates)), [self.fockspaceSize]*2)
def Point(R, ANG, h, frames): R = [ r * ones((frames)) if isinstance(r, float) or isinstance(r, int) else r for r in R ] position = nsum([r * exp(1j * ang) for r, ang in zip(R, ANG)], axis=0) velocity = stencil.num_diff(position, h, 1) aceleration = stencil.num_diff(position, h, 2) return position, velocity, aceleration
def __init__( self, number_of_classes, t, dxi, N0=None, xi0=None, beta=None, gamma=None, Q=None, theta=None, n0=None, A0=None): self.number_of_classes = number_of_classes if xi0 is None: self.xi0 = dxi else: self.xi0 = xi0 self.n0 = n0 self.theta = theta # Uniform grid self.xi = self.xi0 + dxi * arange(self.number_of_classes) if N0 is None: N0 = zeros_like(self.xi) else: N0 = array([ quad(N0, self.xi[i] - dxi / 2., self.xi[i] + dxi / 2.)[0] for i in range(number_of_classes)]) self.nu = 2.0 # Binary breakup # Kernels setup if gamma is not None: self.gamma = gamma(self.xi) self.betadxi = zeros( (self.number_of_classes, self.number_of_classes)) for i in range(1, len(self.xi)): for j in range(i): self.betadxi[j, i] = beta(self.xi[j], self.xi[i]) self.betadxi[:, i] =\ self.betadxi[:, i] / nsum(self.betadxi[:, i]) else: self.gamma = None self.betadxi = None if Q is not None: self.Q = zeros((self.number_of_classes, self.number_of_classes)) for i in range(len(self.xi)): for j in range(len(self.xi)): self.Q[i, j] = Q(self.xi[i], self.xi[j]) # else: self.Q = None if A0 is None: self.A0 = None else: self.A0 = array([ quad(A0, self.xi[i] - dxi / 2., self.xi[i] + dxi / 2.)[0] for i in range(number_of_classes)]) # Solve procedure self.N = odeint(lambda NN, t: self.RHS(NN, t), N0, t)
def lehmannSumDynamic(elementProducts, energyDifferences, partitionFunction, mesh, zeroFrequencyTerms, imaginaryOffset, nominatorCoefficients): if type(imaginaryOffset) != list: imaginaryOffset = [imaginaryOffset]*2 result = dict() for statePair, nominators, denominators in zip(elementProducts.keys(), elementProducts.values(), energyDifferences.values()): data_p = list() for w in scatter_list(mesh): terms = [coeff * nom/(w + denom + complex(0, offset)) for noms, denom in zip(nominators, denominators) for nom, coeff, offset in zip(noms, nominatorCoefficients, imaginaryOffset)] if len(zeroFrequencyTerms.values()) > 0 and w == 0: terms += zeroFrequencyTerms[statePair] data_p.append(nsum(terms/partitionFunction)) result.update({statePair: array(allgather_list(data_p))}) return result
def test_mst_matrix_as_ifgs(self): # ensure only ifgs are returned, not individual MST graphs ifgs = small5_mock_ifgs() nifgs = len(ifgs) ys, xs = ifgs[0].shape result = mst._mst_matrix_ifgs_only(ifgs) for coord in product(range(ys), range(xs)): stack = (i.phase_data[coord] for i in self.ifgs) nc = nsum([isnan(n) for n in stack]) self.assertTrue(len(result[coord]) <= (nifgs - nc)) # HACK: type testing here is a bit grubby self.assertTrue(all([isinstance(i, MockIfg) for i in ifgs]))
def do_pcoa(dists): 'It does a Principal Coordinate Analysis on a distance matrix' # the code for this function is taken from pycogent metric_scaling.py # Principles of Multivariate analysis: A User's Perspective. # W.J. Krzanowski Oxford University Press, 2000. p106. dists = squareform(dists) e_matrix = (dists * dists) / -2.0 f_matrix = _make_f_matrix(e_matrix) eigvals, eigvecs = eigh(f_matrix) eigvecs = eigvecs.transpose() # drop imaginary component, if we got one eigvals, eigvecs = eigvals.real, eigvecs.real # convert eigvals and eigvecs to point matrix # normalized eigenvectors with eigenvalues # get the coordinates of the n points on the jth axis of the Euclidean # representation as the elements of (sqrt(eigvalj))eigvecj # must take the absolute value of the eigvals since they can be negative pca_matrix = eigvecs * sqrt(abs(eigvals))[:, newaxis] # output # get order to output eigenvectors values. reports the eigvecs according # to their cooresponding eigvals from greatest to least vector_order = list(argsort(eigvals)) vector_order.reverse() eigvals = eigvals[vector_order] # eigenvalues pcnts = (eigvals / nsum(eigvals)) * 100.0 # the outputs # eigenvectors in the original pycogent implementation, here we name them # princoords # I think that we're doing: if the eigenvectors are written as columns, # the rows of the resulting table are the coordinates of the objects in # PCO space projections = [] for name_i in range(dists.shape[0]): eigvect = [pca_matrix[vec_i, name_i] for vec_i in vector_order] projections.append(eigvect) projections = array(projections) return {'projections': projections, 'var_percentages': pcnts}
def likelihood_func_deriv(theta, N, X, B, z_A, z_B, mutation): # derivative of the likelihood of the function for theta p_A, p_B, f_A, f_B = p_read(theta, N, X, B, mutation) p_A_deriv = B * (1.0 - B) * ((1 - theta + B * theta)**(-2.0)) p_B_deriv = ((1.0 - B * theta) * (-B) - (B - B * theta) * (-B)) / ((1.0 - B * theta)**2) f_A_deriv = nchoosek(N, X) * X * (p_A_deriv) * ( (1.0 - p_A)** (N - X)) + nchoosek(N, X) * (p_A**X) * (N - X) * (-p_A_deriv) f_B_deriv = nchoosek(N, X) * X * (p_B_deriv) * ( (1.0 - p_B)** (N - X)) + nchoosek(N, X) * (p_B**X) * (N - X) * (-p_B_deriv) l_deriv = -1.0 * nsum(1.0 / (z_A * f_A + z_B * f_B) * (z_A * f_A_deriv + z_B * f_B_deriv)) return l_deriv
def update(self, dict_in): """ Expects a single value or array. If array, store the whole vector and stop. """ if self.data == []: self.fmetrics.compute_support(dict_in) self.x_f = np.ravel(self.fmetrics.x_f, order='F') self.x_f_shape = self.fmetrics.x_f.shape self.fmetrics.compute_support(dict_in) x_n_f = dict_in['x_n'] if x_n_f.shape != self.x_f.shape: x_n_f = crop_center(x_n_f, self.x_f_shape) x_n_f = np.ravel(fftn(x_n_f), order='F') d_e_bar = self.x_f - x_n_f d_e_bar = conj(d_e_bar) * d_e_bar e_bar = conj(self.x_f) * self.x_f G = [ nsum(np.take(e_bar, self.fmetrics.s_indices[k])) for k in xrange(self.fmetrics.K) ] value = tuple(np.real([(G[k] - nsum(np.take(d_e_bar,self.fmetrics.s_indices[k])))/G[k] \ for k in arange(self.fmetrics.K)])) self.data.append(value) super(RER, self).update()
def nan_fraction(self): """ Returns decimal fraction of NaN cells in the phase band. """ if (self._nodata_value is None) or (self.dataset is None): msg = 'nodata_value needs to be set for nan fraction calc.' \ 'Use ifg.nondata = NoDataValue to set nodata' raise RasterException(msg) # don't cache nan_count as client code may modify phase data nan_count = self.nan_count # handle datasets with no 0 -> NaN replacement if not self.nan_converted and (nan_count == 0): nan_count = nsum(np.isclose(self.phase_data, self._nodata_value, atol=1e-6)) return nan_count / float(self.num_cells)
def mst_matrix_networkx(ifgs): """ Generates/emits MST trees on a pixel-by-pixel basis for the given interferograms. :param ifgs: Sequence of interferogram objects :return xxxxx """ # make default MST to optimise result when no Ifg cells in a stack are nans edges_with_weights = [(i.master, i.slave, i.nan_fraction) for i in ifgs] edges, g_nx = minimum_spanning_edges_from_mst(edges_with_weights) # TODO: memory efficiencies can be achieved here with tiling data_stack = array([i.phase_data for i in ifgs], dtype=float32) # create MSTs for each pixel in the ifg data stack nifgs = len(ifgs) for y, x in product(range(ifgs[0].nrows), range(ifgs[0].ncols)): values = data_stack[:, y, x] # vertical stack of ifg values for a pixel nan_count = nsum(isnan(values)) # optimisations: use pre-created results for all nans/no nans if nan_count == 0: yield y, x, edges continue elif nan_count == nifgs: yield y, x, nan continue # dynamically modify graph to reuse a single graph: this should avoid # repeatedly creating new graph objs & reduce RAM use ebunch_add = [] ebunch_delete = [] for value, edge in zip(values, edges_with_weights): if not isnan(value): if not g_nx.has_edge(edge[0], edge[1]): ebunch_add.append(edge) else: if g_nx.has_edge(edge[0], edge[1]): ebunch_delete.append(edge) if ebunch_add: g_nx.add_weighted_edges_from(ebunch_add) if ebunch_delete: g_nx.remove_edges_from(ebunch_delete) yield y, x, minimum_spanning_tree(g_nx).edges()
def setMuByFilling(self, filling, muMin, muMax, muTol = .001, maxiter = 100): c = AnnihilationOperator(self.singleParticleBasis) nMatrix = nsum([c[orb].H.dot(c[orb]) for orb in self.orderedSingleParticleStates], axis = 0) self.hamiltonian.matrix = self.hamiltonian.matrix + self.mu * nMatrix def fillingFunction(muTrial): self.hamiltonian.matrix = self.hamiltonian.matrix - muTrial * nMatrix self.calcEigensystem() self.calcPartitionFunction() self.calcOccupation() fillingTrial = self.getTotalOccupation() self.hamiltonian.matrix = self.hamiltonian.matrix + muTrial * nMatrix report('Filling(mu='+str(muTrial)+') = '+str(fillingTrial), self.verbose) self.filling = fillingTrial return fillingTrial - filling mu = bisect(fillingFunction, muMin, muMax, xtol = muTol, maxiter = maxiter) self.mu = mu self.hamiltonian.matrix = self.hamiltonian.matrix - mu * nMatrix report('Chemical potential set to '+str(mu), self.verbose)
def update(self, Y, z, lamb): ''' r.update(Y, z, lamba) Y: array(data_dim, data_len) z: <Z>, array(aug_dim, data_len) lamb: <lamb lamb'>, array(aug_dim, aug_dim, data_dim) ''' data_len = Y.shape[-1] # --- a a = self.prior.a + 0.5 * data_len # --- b y2 = Y**2 yz = einsum('dt,lt->dlt', Y, z.mu) yzl = einsum('dlt,ldk->dt', yz, lamb.post.mu) tr_z2l2 = einsum('ljt,jldk->dt', z.expt2, lamb.post.expt2) sum_y2_yzl_tr_z2l2 = 0.5 * nsum(y2 - 2 * yzl + tr_z2l2, 1) b = self.prior.b + sum_y2_yzl_tr_z2l2[:, newaxis] self.post.set_params(a=a, b=b)
def readout(self): #contracts all but the 'present time' leg of ADT/mps and returns 1-leg reduced density matrix #for special case of rank-1 ADT just sum over 1d dummy legs and return if self.N_sites == 1: return nsum(nsum(self.sites[0].m, -1), -1) #other wise sum over all but 1-leg of last site, store as out, then successively #sum legs of new end sites to make matrices then multiply into vector 'out' out = nsum(nsum(self.sites[self.N_sites - 1].m, 0), -1) for jj in range(self.N_sites - 2): out = dot(nsum(self.sites[self.N_sites - 2 - jj].m, 0), out) out = dot(nsum(self.sites[0].m, 1), out) #after the last site, 'out' should now be the reduced density matrix return out
def embedV(subspaceVectors, blocksizes): fockspaceSize = nsum(blocksizes) assert fockspaceSize == len(subspaceVectors), 'embedding will fail' iBlock = 0 iBlockOrigin = 0 vectors = zeros([len(subspaceVectors), fockspaceSize]) x = list() y = list() data = list() for i, v in enumerate(subspaceVectors): for j, vj in enumerate(v): if not equals(vj, 0): y.append(j + iBlockOrigin) # TODO understand row/col exchange x.append(i) data.append(vj) if i == iBlockOrigin + blocksizes[iBlock] - 1: iBlockOrigin += blocksizes[iBlock] iBlock += 1 return coo_matrix((data, (x,y)), [fockspaceSize]*2)
def get_vcal_disto(self, cut=None, col=None, row=None, pix=None, vcal=True, _redo=False): cut = self.Cut(cut) + self.Cut.generate_masks( col, row, pix, exclude=False).Value + self.Cut.get_ncluster() n, v = self.get_nhits(cut), self.Calibration.get_vcals( *self.get_tree_vec( ['col', 'row', 'adc'], cut + self.Cut.get_plane(), dtype='i2')) v = nsum(insert(v, cumsum(n).astype('i').repeat(max(n) - n), 0).reshape(n.size, max(n)), axis=1 ) # fill arrays with zeros where there are less than max hits v *= (1 if vcal else Bins.Vcal2El) return self.Draw.distribution( v, title='Pulse Height Distribution', x_tit=f'Pulse Height [{"VCAL" if vcal else "e"}]', show=False)
def _make_f_matrix(matrix): """It takes an E matrix and returns an F matrix The input is the output of make_E_matrix For each element in matrix subtract mean of corresponding row and column and add the mean of all elements in the matrix """ num_rows, num_cols = matrix.shape # make a vector of the means for each row and column # column_means = (add.reduce(E_matrix) / num_rows) column_means = (add.reduce(matrix) / num_rows)[:, newaxis] trans_matrix = transpose(matrix) row_sums = add.reduce(trans_matrix) row_means = row_sums / num_cols # calculate the mean of the whole matrix matrix_mean = nsum(row_sums) / (num_rows * num_cols) # adjust each element in the E matrix to make the F matrix matrix -= row_means matrix -= column_means matrix += matrix_mean return matrix
def likelihood_func(theta, N, X, B, z_A, z_B, mutation): # Likelihood function for theta _, _, f_A, f_B = p_read(theta, N, X, B, mutation) l = -1.0*nsum(log(z_A*f_A + z_B*f_B)) return l
def convert_cartesian(lattice, directVec): from numpy import sum as nsum, array return nsum([directVec[i] * lattice[i] for i in [0, 1, 2]], axis=0)
def resample(self): from sklearn.svm import SVC from sklearn.neighbors import NearestNeighbors svc = SVC() svc.set_params(**self.svm_args) # Fit SVM and find the support vectors svc.fit(self.x, self.y) support_index = svc.support_[self.y[svc.support_] == self.minc] support_vetor = self.x[support_index] # Start with the minority class minx = self.x[self.y == self.minc] # First, find the NN of all the samples to identify samples in danger and noisy ones print("Finding the %i nearest neighbours..." % self.m, end = "") NN = NearestNeighbors(n_neighbors = self.m + 1) NN.fit(self.x) print("done!") # Now, get rid of noisy support vectors # Boolean array with True for noisy support vectors noise_bool = asarray([is_noise(x, self.y, self.m, self.minc, NN) for x in support_vetor]) # Remove noisy support vectors support_vetor = support_vetor[logical_not(noise_bool)] # Find support_vectors there are in danger (interpolation) or not (extrapolation) danger_bool = asarray([in_danger(x, self.y, self.m, self.minc, NN) for x in support_vetor]) safety_bool = logical_not(danger_bool) print_stats = (len(support_vetor), nsum(noise_bool), nsum(danger_bool), nsum(safety_bool)) print("Out of %i support vectors, %i are noisy, %i are in danger and %i are safe." % print_stats) # Proceed to find support vectors NNs among the minority class print("Finding the %i nearest neighbours..." % self.k, end = "") NN.set_params(**{'n_neighbors' : self.k + 1}) NN.fit(minx) print("done!") print("Creating synthetic samples...", end = "") # Split the number of synthetic samples between interpolation and extrapolation Pyseed(self.rs) fractions = min(max(gauss(0.5, 0.1), 0), 1) # Interpolate samples in danger nns = NN.kneighbors(support_vetor[danger_bool], return_distance=False)[:, 1:] sx1, sy1 = make_samples(support_vetor[danger_bool], minx, self.minc, nns,\ fractions * (int(self.ratio * len(minx)) + 1),\ step_size=1,\ random_state=self.rs) # Extrapolate safe samples nns = NN.kneighbors(support_vetor[safety_bool], return_distance=False)[:, 1:] sx2, sy2 = make_samples(support_vetor[safety_bool], minx, self.minc, nns,\ (1 - fractions) * int(self.ratio * len(minx)),\ step_size=-self.out_step,\ random_state=self.rs) print("done!") # Concatenate the newly generated samples to the original data set ret_x = concatenate((self.x, sx1, sx2), axis=0) ret_y = concatenate((self.y, sy1, sy2), axis=0) return ret_x, ret_y
from itertools import product from numpy import sum as nsum, trace from util import dot from operators import SingleParticleBasis, AnnihilationOperator inds = [['u', 'd'], [0, 1]] basis = SingleParticleBasis(inds) print basis.getSingleParticleBasis() print basis.orderedSingleParticleStates print 'u0 d0 d1:' print basis.getStateAlgebraically(13) print basis.getFockspaceNr((1,0,1,1)) print basis.getOccupationRep(13) print 'up1:' print basis.getFockspaceNr((0,1,0,0)) print basis.getSingleParticleStateNr('u', 1) print basis.getOccupationRep(2) print 'all single particle states:' for sps in basis.orderedSingleParticleStates: print basis.getFockspaceNr(singleParticleState = sps) print c = AnnihilationOperator([['u', 'd'], range(2)]) print c.orderedSingleParticleStates print c['u', 1].toarray() print print c['u', 1].transpose().dot(c['u', 1]).toarray() print nsum([c[s, i].transpose().dot(c[s, i]) for s, i in product(['u', 'd'], range(2))], axis = 0)
no_pergroup = 30 n_observed = no_pergroup * n_groups n_group_predictors = 1 n_predictors = 3 group = concatenate([[i] * no_pergroup for i in range(n_groups)]) group_predictors = random.normal( size=(n_groups, n_group_predictors )) # random.normal(size = (n_groups, n_group_predictors)) predictors = random.normal(size=(n_observed, n_predictors)) group_effects_a = random.normal(size=(n_group_predictors, n_predictors)) effects_a = random.normal(size=(n_groups, n_predictors)) + dot( group_predictors, group_effects_a) y = nsum(effects_a[group, :] * predictors, 1) + random.normal(size=(n_observed)) model = Model() with model: # m_g ~ N(0, .1) group_effects = Normal("group_effects", 0, .1, shape=(1, n_group_predictors, n_predictors)) # sg ~ Uniform(.05, 10) sg = Uniform("sg", .05, 10, testval=2.) # m ~ N(mg * pg, sg) effects = Normal("effects",
def sumScatteredLists(x): return nsum(allgather_list(x))
n_groups = 10 no_pergroup = 30 n_observed = no_pergroup * n_groups n_group_predictors = 1 n_predictors = 3 group = concatenate([[i] * no_pergroup for i in range(n_groups)]) group_predictors = random.normal(size=(n_groups, n_group_predictors)) # random.normal(size = (n_groups, n_group_predictors)) predictors = random.normal(size=(n_observed, n_predictors)) group_effects_a = random.normal(size=(n_group_predictors, n_predictors)) effects_a = random.normal( size=(n_groups, n_predictors)) + dot(group_predictors, group_effects_a) y = nsum( effects_a[group, :] * predictors, 1) + random.normal(size=(n_observed)) model = Model() with model: # m_g ~ N(0, .1) group_effects = Normal( "group_effects", 0, .1, shape=(1, n_group_predictors, n_predictors)) # sg ~ Uniform(.05, 10) sg = Uniform("sg", .05, 10, testval=2.) # m ~ N(mg * pg, sg) effects = Normal("effects",
def getNEigenvalues(self): fockstates = arange(self.fockspaceSize) fockstates = self.transformation.dot(fockstates) nEigenvalues = [nsum([1 for digit in self.getOccupationRep(fockstate) if digit == '1']) for fockstate in fockstates] return nEigenvalues
def bilateral_filter(src, sigmaDistance, sigmaRange, d=-1, borderType=ipcv.BORDER_WRAP, borderVal=0, maxCount=255): orig_shape = src.shape orig_size = src.size if len(orig_shape) == 2: # convert to mxnx1 array for ease of calculation src = numpy.reshape(src, (orig_shape[0], orig_shape[1], 1)) dst = numpy.zeros(src.shape) # d = filter radius. If negative, must equal double the sigma d value. Use this value to adjust the array borders too. if d < 0: d = 2 * sigmaDistance else: pass d = int(d) # work on border modes npad = ((d, d), (d, d), (0, 0)) if borderType == ipcv.BORDER_WRAP: src = numpy.pad(src, npad, mode='wrap') elif borderType == ipcv.BORDER_CONSTANT: src = numpy.pad(src, npad, mode='constant', constant_values=borderVal) elif borderType == ipcv.BORDER_REFLECT: src = numpy.pad(src, npad, mode='reflect') else: print( "Border mode not supported. Please use 'BORDER_WRAP', 'BORDER_CONSTANT'," " or 'BORDER_REFLECT'.") exit() if len(orig_shape) == 3: # This is a color image. Perform CIELAB calculation to convert color space. if src.dtype == numpy.uint8: src = cv2.cvtColor(src, cv2.COLOR_RGB2LAB) else: print("Error: Must input 8-bit-image.") exit() elif len(orig_shape) == 2: # This is a greyscale image. Values will represent luminance. Pass through pass else: print( "Error: source image passed is neither a color or greyscale image.\n 'src' should be either a 3D 3-channel\ color image or a 2D greyscale image.") # Now that our arrays are padded appropriately, we can start the filtering process. closeness = numpy.zeros( (1 + (2 * d), 1 + (2 * d))) # Definitions for the size/shape of filter. similarity = numpy.zeros((1 + (2 * d), 1 + (2 * d))) bilateralfilter = numpy.zeros((1 + (2 * d), 1 + (2 * d))) center = numpy.array(find_center(bilateralfilter)) # These two loops are the initiators for the entire image's filter. We now iterate pixel-by-pixel to calculate the # bilateral filter. #Create the closeness filter once to prevent re-iteration. for i in range(0, bilateralfilter.shape[0]): for j in range(0, bilateralfilter.shape[1]): distance = numpy.array((i, j)) closeness[i, j] = e**(-.5 * ((norm(center - distance) / sigmaDistance)**2)) count = 0 if len(orig_shape) == 2: for columns in range(d, orig_shape[0] + d): startTime = time.time() for rows in range(d, orig_shape[1] + d): # These loops initiate the iterative process for creating the bilateral filter. for i in range(0, bilateralfilter.shape[0]): for j in range(0, bilateralfilter.shape[1]): similarity[i, j] = e**(-.5 * ( (abs(src[columns, rows, 0] - src[columns + (i - d), rows + (j - d), 0]) / sigmaRange)**2)) bilateralfilter = matmul(closeness, similarity) bilateralfilter = bilateralfilter / nsum(nsum(bilateralfilter)) srcrange = src[columns - d:columns + d + 1, rows - d:rows + d + 1] dst[columns - d, rows - d] = ndot(reshape(srcrange, (-1)), reshape(bilateralfilter, (-1))) count = count + 1 # print('Row Completion Time: = {0} [s]'.format(time.time() - startTime)) # print("Percentage Complete: ", 100 * (count / orig_size)) # For Color Images else: luminance = numpy.zeros(dst[:, :, 0].shape) for columns in range(d, orig_shape[0] + d): startTime = time.time() for rows in range(d, orig_shape[1] + d): # These loops initiate the iterative process for creating the bilateral filter. for i in range(0, bilateralfilter.shape[0]): for j in range(0, bilateralfilter.shape[1]): similarity[i, j] = e**(-.5 * ( (abs(src[columns, rows, 0] - src[columns + (i - d), rows + (j - d), 0]) / sigmaRange)**2)) bilateralfilter = matmul(closeness, similarity) bilateralfilter = bilateralfilter / nsum(nsum(bilateralfilter)) srcrange = src[columns - d:columns + d + 1, rows - d:rows + d + 1, 0] luminance[columns - d, rows - d] = ndot(reshape(srcrange, (-1)), reshape(bilateralfilter, (-1))) count = count + 1 #print('Row Completion Time: = {0} [s]'.format(time.time() - startTime)) #print("Percentage Complete: \n {0}%".format(100 * (count*3 / orig_size))) #print("") # dst is created. Now we need to return to original image state. If 2D, simply is quantized and clipped. If 3D, # convert back to RGB colorspace. if len(orig_shape) == 2: dst = dst.astype(int) dst = numpy.reshape( numpy.clip(dst, 0, maxCount).astype(numpy.uint8), (orig_shape[0], orig_shape[1])) else: dst[:, :, 0] = luminance dst[:, :, 1] = src[d:orig_shape[0] + d, d:orig_shape[1] + d, 1] dst[:, :, 2] = src[d:orig_shape[0] + d, d:orig_shape[1] + d, 2] dst = dst.astype(numpy.uint8) dst = cv2.cvtColor(dst, cv2.COLOR_LAB2RGB) dst = numpy.clip(dst, 0, maxCount).astype(numpy.uint8) return dst
def mu_q_e( e ): ''' Summation / integration over the random domain ''' q_e_grid = q( e, c_arr[:, None], x_arr[None, :] ) q_dG_grid = q_e_grid * dG_grid return nsum( q_dG_grid )
def getNsEigenvalues(self): fockstates = arange(self.fockspaceSize) fockstates = self.transformation.dot(fockstates) nsEigenvalues = [nsum([1 for digit in self.getOccupationRep(fockstate)[:int(self.nrOfSingleParticleStates*.5)] if digit == '1']) for fockstate in fockstates] return nsEigenvalues
def EM_Clonal_Abundance(N, X, B=0.5, mutation='DEL', theta_tol=1E-9, maxiter=1000, consecutive_convergence= 10, full_output = False, disp=False): """ Inputs: N: a vector of total number of reads for each SNP X: a vector of total number of allele A reads, same length as N B: a vector of read bias, estimated by the number of reads of allele A divided by the total number of reads in normal sample. Default is .5 mutation: type of mutation ['DEL' | 'UPD' | {numeric}], chromosomal deletion, uniparental disomy, or copy number (total number of copies of the chromosomes) in the case of amplification. Default 'DEL'. theta_tol (optional): tolerance of theta change for convergence, default is 1E-9 maxiter: maximum number of iterations to run, default is 1000 consecutive_convergence: number of times that the change of theta has to be less than that of theta_tol, consecutively, to be deemed convergence full_output: flag to return additional outputs disp: display each iteration of EM Output: theta: estimated proprotion of tumor cells (between 0 and 1) If full_output is set to True, the following can also be returned it_count: number of iterations used """ # Initialize / guess parameters theta = 0.5 d_theta = 1.0 it_count = 0 d_theta_count = 0 z_A = X/N # probability of z_A z_B = 1.0-z_A #probability of z_B # define objective functions for theta # Maximize theta so that the log likelihood is maximized # l(theta) = sum_i[sum_zi: Qi_zi*log(p(x_i, z_i; theta)/Qi_zi)] # l(theta) = sum_i[z_Ai*log(r_A) + z_Bi*log(r_B) + log(z_Ai*f_A + z_Bi*f_B)] def likelihood_func(theta, N, X, B, z_A, z_B, mutation): # Likelihood function for theta _, _, f_A, f_B = p_read(theta, N, X, B, mutation) l = -1.0*nsum(log(z_A*f_A + z_B*f_B)) return l def likelihood_func_deriv(theta, N, X, B, z_A, z_B, mutation): # derivative of the likelihood of the function for theta p_A, p_B, f_A, f_B = p_read(theta, N, X, B, mutation) p_A_deriv = B * (1.0-B) * ((1-theta+B*theta) **(-2.0)) p_B_deriv = ((1.0-B*theta) * (-B) - (B-B*theta) * (-B))/ ((1.0-B*theta)**2) f_A_deriv = nchoosek(N,X) * X *(p_A_deriv) * ((1.0-p_A)**(N-X)) + nchoosek(N,X) * (p_A**X) * (N-X) * (-p_A_deriv) f_B_deriv = nchoosek(N,X) * X *(p_B_deriv) * ((1.0-p_B)**(N-X)) + nchoosek(N,X) * (p_B**X) * (N-X) * (-p_B_deriv) l_deriv = -1.0*nsum(1.0/(z_A*f_A + z_B*f_B) * (z_A * f_A_deriv + z_B * f_B_deriv)) return l_deriv while d_theta_count <= consecutive_convergence and it_count <= maxiter: it_count += 1 # M-Step # r_A, r_B part # r_A = sum_i2K(z_Ai/N) r_A = nsum(z_A / np.size(z_A)) r_B = nsum(z_B / np.size(z_B)) # Maximize the log likelihood to estimate for theta, (minimize the # negative of the log-likelihood) xopt, fval, ierr, numfunc = fminbound(likelihood_func, 0.0, 1.0, args=(N, X, B, z_A, z_B, mutation), full_output=True) if disp: print("theta:%f, fval:%f, ierr:%d, numfunc:%d" %(xopt, fval, ierr, numfunc)) # returns a new theta d_theta = np.abs(xopt - theta) theta = xopt if d_theta<theta_tol: d_theta_count += 1 else:# if not consecutive convergence, set convergence count to zero d_theta_count = 0 # E-Step # Set Q_i(Z_i) = p(z_i =1 | x_i ; theta) # Recalculate probabilities _, _, f_A, f_B = p_read(theta, N, X, B, mutation) f_X = r_A * f_A + r_B * f_B z_A = r_A * f_A / f_X z_B = 1.0 - z_A # end of while loop if it_count > maxiter and d_theta_count < 1: print("Theta not converged!") if full_output: return (theta, it_count) else: return theta
def moment(data, order=1): x_bar = nsum(data) / len(data) x_i = power(data - x_bar, order) return nsum(x_i) / len(data)