def stats(t, snp=None): '''Return a record array with imputation statistics.''' T = t.sample_index_to_impute imputed = t.imputed_data[:, T, :] tot_to_impute = 2 * imputed.shape[1] snp = snp if snp is not None else np.arange(t.num_snps) stats = np.zeros((len(snp),), dtype=[ ('dist_cm', 'f4'), # Genetic distance from beginning of chromosome ('count', '(2,)i4'), # Allele count ('frequency', '(2,)f4'), # Allele frequency ('call_rate', 'f4'), # Imputation Call rate ('call_rate_training', 'f4') # Imputation Call rate ]) call_rate_training = 1.0 * np.sum(np.sum(t.imputed_data[:, t.sample_index, :] != 0, axis=2), axis=1)# / (2 * len(t.sample_index)) for row, snp_index in enumerate(snp): # TODO: replace by a bulk group-by/hist? # g = t.__t.training_data[snp_index, :, :] i = imputed[snp_index, :] (c1, c2) = (len(np.where(i == 1)[0]), len(np.where(i == 2)[0])) c = c1 + c2 + SMALL_FLOAT f1, f2 = (1.0 * c1) / c, (1.0 * c2) / c call_rate = 1.0 * len(i.nonzero()[0]) / tot_to_impute # print 'c1 %4d c2 %4d f1 %.2f f2 %.2f call rate %5.2f' % (c1, c2, f1, f2, call_rate) stats[row] = (t.snp['dist_cm'][snp_index], [c1, c2], [f1, f2], call_rate, call_rate_training[snp_index]) return stats
def f1(u, eigvals, Z10, Z11): """ A component of exact time pdf (Eq. 22, HJC92). Parameters ---------- u : float u = t - tres eigvals : array_like, shape (k,) Eigenvalues of -Q matrix. Z10, Z11 (or gama10, gama11) : list of array_likes Constants for the exact open/shut time pdf. Z10, Z11 for likelihood calculation or gama10, gama11 for time distributions. Returns ------- f : ndarray """ # f = np.zeros(Z10[0].shape) # for i in range(len(eigvals)): # f += (Z10[i] + Z11[i] * u) * math.exp(-eigvals[i] * u) if Z10.ndim > 1: f = np.sum((Z10 + Z11 * u) * np.exp(-eigvals * u).reshape(Z10.shape[0],1,1), axis=0) else: f = np.sum((Z10 + Z11 * u) * np.exp(-eigvals * u)) return f
def test_matrix_assemble(dim): eps = 1000*DOLFIN_EPS (u, uu), (v, vv), (U, UU), dPP, bc = _create_dp_problem(dim) # Scalar assemble mat = assemble(u*v*U*dPP) # Create a numpy matrix based on the local size of the vector # and populate it with values from local vector loc_range = u.vector().local_range() vec_mat = np.zeros_like(mat.array()) vec_mat[range(loc_range[1] - loc_range[0]), range(loc_range[0], loc_range[1])] = u.vector().get_local() assert np.sum(np.absolute(mat.array() - vec_mat)) < eps # Vector assemble mat = assemble((uu[0]*vv[0]*UU[0] + uu[1]*vv[1]*UU[1])*dPP) # Create a numpy matrix based on the local size of the vector # and populate it with values from local vector loc_range = uu.vector().local_range() vec_mat = np.zeros_like(mat.array()) vec_mat[range(loc_range[1] - loc_range[0]), range(loc_range[0], loc_range[1])] = uu.vector().get_local() assert np.sum(np.absolute(mat.array() - vec_mat)) < eps
def test_decimate(): """Test decimation of digitizer headshapes with too many points.""" # load headshape and convert to meters hsp_mm = _get_ico_surface(5)['rr'] * 100 hsp_m = hsp_mm / 1000. # save headshape to a file in mm in temporary directory tempdir = _TempDir() sphere_hsp_path = op.join(tempdir, 'test_sphere.txt') np.savetxt(sphere_hsp_path, hsp_mm) # read in raw data using spherical hsp, and extract new hsp with warnings.catch_warnings(record=True) as w: raw = read_raw_kit(sqd_path, mrk_path, elp_txt_path, sphere_hsp_path) assert_true(any('more than' in str(ww.message) for ww in w)) # collect headshape from raw (should now be in m) hsp_dec = np.array([dig['r'] for dig in raw.info['dig']])[8:] # with 10242 points and _decimate_points set to resolution of 5 mm, hsp_dec # should be a bit over 5000 points. If not, something is wrong or # decimation resolution has been purposefully changed assert_true(len(hsp_dec) > 5000) # should have similar size, distance from center dist = np.sqrt(np.sum((hsp_m - np.mean(hsp_m, axis=0))**2, axis=1)) dist_dec = np.sqrt(np.sum((hsp_dec - np.mean(hsp_dec, axis=0))**2, axis=1)) hsp_rad = np.mean(dist) hsp_dec_rad = np.mean(dist_dec) assert_almost_equal(hsp_rad, hsp_dec_rad, places=3)
def measure_objects(self, operand, workspace): '''Performs the measurements on the requested objects''' objects = workspace.get_objects(operand.operand_objects.value) if objects.has_parent_image: area_occupied = np.sum(objects.segmented[objects.parent_image.mask]>0) perimeter = np.sum(outline(np.logical_and(objects.segmented != 0,objects.parent_image.mask))) total_area = np.sum(objects.parent_image.mask) else: area_occupied = np.sum(objects.segmented > 0) perimeter = np.sum(outline(objects.segmented) > 0) total_area = np.product(objects.segmented.shape) m = workspace.measurements m.add_image_measurement(F_AREA_OCCUPIED%(operand.operand_objects.value), np.array([area_occupied], dtype=float )) m.add_image_measurement(F_PERIMETER%(operand.operand_objects.value), np.array([perimeter], dtype=float )) m.add_image_measurement(F_TOTAL_AREA%(operand.operand_objects.value), np.array([total_area], dtype=float)) if operand.should_save_image.value: binary_pixels = objects.segmented > 0 output_image = cpi.Image(binary_pixels, parent_image = objects.parent_image) workspace.image_set.add(operand.image_name.value, output_image) return[[operand.operand_objects.value, str(area_occupied),str(perimeter),str(total_area)]]
def chol_logdet(U): if isinstance(U, np.ndarray): return 2*np.sum(np.log(np.diag(U))) elif isinstance(U, cholmod.Factor): return np.sum(np.log(U.D())) else: raise ValueError("Unknown type of Cholesky factor")
def _lmvnpdffull(obs, means, covars): """ Log probability for full covariance matrices. WARNING: In certain cases, this function will modify in-place some of the covariance matrices """ from scipy import linalg import itertools if hasattr(linalg, 'solve_triangular'): # only in scipy since 0.9 solve_triangular = linalg.solve_triangular else: # slower, but works solve_triangular = linalg.solve n_obs, n_dim = obs.shape nmix = len(means) log_prob = np.empty((n_obs, nmix)) for c, (mu, cv) in enumerate(itertools.izip(means, covars)): try: cv_chol = linalg.cholesky(cv, lower=True) except linalg.LinAlgError: # The model is most probabily stuck in a component with too # few observations, we need to reinitialize this components cv[:] = 10 * np.eye(cv.shape[0]) cv_chol = cv cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol))) cv_sol = solve_triangular(cv_chol, (obs - mu).T, lower=True).T log_prob[:, c] = -.5 * (np.sum(cv_sol ** 2, axis=1) + \ n_dim * np.log(2 * np.pi) + cv_log_det) return log_prob
def transform_scalars(self, dataset, N=25): """Add Poisson noise to tilt images""" self.progress.maximum = 1 tiltSeries = utils.get_array(dataset).astype(float) if tiltSeries is None: raise RuntimeError("No scalars found!") Ndata = tiltSeries.shape[0] * tiltSeries.shape[1] self.progress.maximum = tiltSeries.shape[2] step = 0 for i in range(tiltSeries.shape[2]): if self.canceled: return tiltImage = tiltSeries[:, :, i].copy() tiltImage = tiltImage / np.sum(tiltSeries[:, :, i]) * (Ndata * N) tiltImage = np.random.poisson(tiltImage) tiltImage = tiltImage * np.sum(tiltSeries[:, :, i]) / (Ndata * N) tiltSeries[:, :, i] = tiltImage.copy() step += 1 self.progress.value = step utils.set_array(dataset, tiltSeries)
def sum_to_shape(X, s): """ Sum axes of the array such that the resulting shape is as given. Thus, the shape of the result will be s or an error is raised. """ # First, sum and remove axes that are not in s if np.ndim(X) > len(s): axes = tuple(range(-np.ndim(X), -len(s))) else: axes = () Y = np.sum(X, axis=axes) # Second, sum axes that are 1 in s but keep the axes axes = () for i in range(-np.ndim(Y), 0): if s[i] == 1: if np.shape(Y)[i] > 1: axes = axes + (i,) else: if np.shape(Y)[i] != s[i]: raise ValueError("Shape %s can't be summed to shape %s" % (np.shape(X), s)) Y = np.sum(Y, axis=axes, keepdims=True) return Y
def log_sum_exp(x, axis=0): """ Calculates the logarithm of the sum of e to the power of input 'x'. The method tries to avoid overflows by using the relationship: log(sum(exp(x))) = alpha + log(sum(exp(x-alpha))). :Parameter: x: data. -type: float or numpy array axis: Sums along the given axis. -type: int :Return: Logarithm of the sum of exp of x. -type: float or numpy array. """ alpha = x.max(axis) - numx.log(numx.finfo(numx.float64).max)/2.0 if axis == 1: return numx.squeeze(alpha + numx.log( numx.sum( numx.exp(x.T - alpha) , axis=0))) else: return numx.squeeze(alpha + numx.log( numx.sum( numx.exp(x - alpha) , axis=0)))
def torgerson(distances, n_components=2): """ Perform classical mds (Torgerson scaling). ..note :: If the distances are euclidean then this is equivalent to projecting the original data points to the first `n` principal components. """ distances = np.asarray(distances) assert distances.shape[0] == distances.shape[1] N = distances.shape[0] # O ^ 2 D_sq = distances ** 2 # double center the D_sq rsum = np.sum(D_sq, axis=1, keepdims=True) csum = np.sum(D_sq, axis=0, keepdims=True) total = np.sum(csum) D_sq -= rsum / N D_sq -= csum / N D_sq += total / (N ** 2) B = np.multiply(D_sq, -0.5, out=D_sq) U, L, _ = np.linalg.svd(B) if n_components > N: U = np.hstack((U, np.zeros((N, n_components - N)))) L = np.hstack((L, np.zeros((n_components - N)))) U = U[:, :n_components] L = L[:n_components] D = np.diag(np.sqrt(L)) return np.dot(U, D)
def svm_loss(x, y): """ Computes the loss and gradient using for multiclass SVM classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss - dx: Gradient of the loss with respect to x """ N = x.shape[0] correct_class_scores = x[np.arange(N), y] margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0) margins[np.arange(N), y] = 0 loss = np.sum(margins) / N num_pos = np.sum(margins > 0, axis=1) dx = np.zeros_like(x) dx[margins > 0] = 1 dx[np.arange(N), y] -= num_pos dx /= N return loss, dx
def dist_tree(self, other, tol=1e-8): if self.ndim == 2: from . import d2 as sim else: from . import d3 as sim assert self.ndim == other.ndim if self.gamma != 0 or other.gamma != 0: assert np.abs(self.gamma - other.gamma) <= tol gamma = (self.gamma + other.gamma) / 2.0 box = sim.LeesEdwardsBox(sim.Vec(1,1), gamma) else: box = sim.OriginBox(1.0) sz1 = self.size_indices() assert(len(sz1) == 2) cutoff1 = int(np.sum(sz1[0])) sz2 = other.size_indices() assert(len(sz2) == 2) cutoff2 = int(np.sum(sz2[0])) vs1 = [sim.Vec(*xy) for idx in sz1 for xy in self.rs[idx]] vs2 = [sim.Vec(*xy) for idx in sz2 for xy in other.rs[idx]] tree = sim.JammingTreeBD(box, sim.vecvector(vs1), sim.vecvector(vs2), cutoff1, cutoff2) return tree
def compute_distances_no_loops(self, X): """ Compute the distance between each test point in X and each training point in self.X_train using no explicit loops. Input / Output: Same as compute_distances_two_loops """ num_test = X.shape[0] num_train = self.X_train.shape[0] dists = np.zeros((num_test, num_train)) # l2(A, X) = l2([a,b], [x,y]) = ... # sqrt( (a-x)^2 + (b-y)^2 ) = ... # = sqrt( a^2 -2ax + x^2 + b^2 -2by + y^2 ) = # = sqrt( (a^2 + b^2) + (x^2 + y^2) -2(ax + by) ) = ... # = sqrt( A^2 - 2 * A.dot(X.T) + X^2 ) = # = np.sqrt( np.sum(A ** 2) + np.sum(X ** 2) -2 * A.dot(X.T) ) # -2*A.dot(X) res = -2 * X.dot(self.X_train.T) # -2*A.dot(X) + A^2 + X^2 res = res + np.sum(X ** 2, axis=1).reshape((-1,1)) + np.sum(self.X_train ** 2, axis=1) dists = np.sqrt(res) return dists
def dice_(seg, gt): intersection = 2. * np.sum(seg * gt) denominator = (np.sum(np.square(seg)) + np.sum(np.square(gt))) if denominator == 0: return 1. similarity = intersection / denominator return similarity
def discrepancy(observed, simulated, expected): """Calculates Freeman-Tukey statistics (Freeman and Tukey 1950) as a measure of discrepancy between observed and r replicates of simulated data. This is a convenient method for assessing goodness-of-fit (see Brooks et al. 2000). D(x|\theta) = \sum_j (\sqrt{x_j} - \sqrt{e_j})^2 :Parameters: observed : Iterable of observed values (length n) simulated : Iterable of simulated values (length rxn) expected : Iterable of expected values (length rxn) :Returns: D_obs : Discrepancy of observed values D_sim : Discrepancy of simulated values """ try: simulated = simulated.astype(float) except AttributeError: simulated = simulated.trace().astype(float) try: expected = expected.astype(float) except AttributeError: expected = expected.trace().astype(float) D_obs = np.sum([(np.sqrt(observed)-np.sqrt(e))**2 for e in expected], 1) D_sim = np.sum([(np.sqrt(s)-np.sqrt(e))**2 for s,e in zip(simulated, expected)], 1) # Print p-value count = sum(s>o for o,s in zip(D_obs,D_sim)) print_('Bayesian p-value: p=%.3f' % (1.*count/len(D_obs))) return D_obs, D_sim
def get_num_diffs(cummulativeFrames): shape = cummulativeFrames.shape xMax = shape[0] yMax = shape[1] numBlocks = (xMax * yMax / PIXINCR) print numBlocks totalDiffs = np.sum(cummulativeFrames) totalDiffsAvg = totalDiffs / numBlocks print totalDiffsAvg xPos = 0 yPos = 0 allDiffs = [] #print "Starting diffs.." diff_count = 0 while yPos < yMax: xPos = 0 while xPos < xMax: chunk = cummulativeFrames[xPos:xPos+PIXINCR, yPos:yPos+PIXINCR] diffs = np.sum(chunk) diffs=1 allDiffs.append(diffs) xPos+=PIXINCR yPos+=PIXINCR for diff in allDiffs: if diff > totalDiffsAvg: diff_count+=1 return diff_count
def smooth_objective(self, x, mode='both', check_feasibility=False): """ Evaluate a smooth function and/or its gradient if mode == 'both', return both function value and gradient if mode == 'grad', return only the gradient if mode == 'func', return only the function value """ x = self.apply_offset(x) exp_x = np.exp(x) #TODO: Using transposes to scale the rows of a 2d array - should we use an affine_transform to do this? #JT: should be able to do this with np.newaxis if mode == 'both': ratio = ((self.trials/(1. + np.sum(exp_x, axis=1))) * exp_x.T).T f, g = -2. * self.scale(np.sum(self.firstcounts * x) - np.dot(self.trials, np.log(1. + np.sum(exp_x, axis=1)))), - 2 * self.scale(self.firstcounts - ratio) return f, g elif mode == 'grad': ratio = ((self.trials/(1. + np.sum(exp_x, axis=1))) * exp_x.T).T f, g = None, - 2 * self.scale(self.firstcounts - ratio) return g elif mode == 'func': f, g = -2. * self.scale(np.sum(self.firstcounts * x) - np.dot(self.trials, np.log(1. + np.sum(exp_x, axis=1)))), None return f else: raise ValueError("mode incorrectly specified")
def __init__(self, data, classes, tree_features, n_trees=100): self.n_features = np.shape(data)[1] n_rows = np.shape(data)[0] n_nans = np.sum(np.isnan(data), 0) data = data[:, n_nans < n_rows] self.n_features = np.shape(data)[1] n_nans = np.sum(np.isnan(data), 1) data = data[n_nans < self.n_features, :] self.n_rows = np.shape(data)[0] if (tree_features > self.n_features): tree_features = self.n_features self.col_list = np.zeros((n_trees, tree_features), dtype='int') self.n_trees = n_trees self.bags = [] for i in range(n_trees): cols = sample(range(self.n_features), tree_features) cols.sort() self.col_list[i, :] = cols data_temp = data[:, cols] n_nans = np.sum(np.isnan(data_temp), 1) data_temp = data_temp[n_nans == 0, :] classes_temp = classes[n_nans == 0] #bag = BaggingClassifier(n_estimators=1, max_features=tree_features) bag = RandomForestClassifier(n_estimators=1, max_features=tree_features) bag.fit(data_temp, classes_temp) self.bags.append(bag) print(np.shape(data_temp))
def __update_b_vec(self,cur_obs): # convert measurement vector into emission probabilities # repeat the observation in columns cur_obs_mat = np.tile(cur_obs,(self.V_mat.shape[1],1)).T masked_mat = cur_obs_mat == self.V_mat # Extract the probability of the observation on each link for each state p_obs_given_off_link = np.sum(self.off_links*masked_mat,axis=1) p_obs_given_on_link = np.sum(self.on_links*masked_mat,axis=1) # replicate the probability of each measurement on each link for each state p_obs_mat_off = np.tile(p_obs_given_off_link,(self.num_states,1)).T p_obs_mat_on = np.tile(p_obs_given_on_link,(self.num_states,1)).T # Compute emission probabilities tmp1 = self.codewords*p_obs_mat_on tmp2 = np.logical_not(self.codewords)*p_obs_mat_off tmp3 = tmp1 + tmp2 # divide tmp3 into groups of 4. Multiply and normalize prev = np.ones(self.num_states) start_mark = 0 end_mark = 4 group = end_mark while start_mark < self.num_links: current = np.product(tmp3[start_mark:np.minimum(self.num_links,end_mark),:],axis=0) current = current/np.sum(current) prev = (prev*current)/np.sum(prev*current) end_mark += group start_mark += group # add emission probabilities to the circular buffer self.C.add_observation(prev)
def __set_static_gaus_pmfs(self): if np.logical_not(self.off_buff.is_full()): print "The long term buffer is not yet full. This may give undesirable results" # median RSS of off-state buffer cal_med = self.off_buff.get_no_nan_median() if (np.sum(cal_med == 127) > 0) | (np.sum(np.isnan(cal_med)) > 0): sys.stderr.write('At least one link has a median of 127 or is nan\n\n') quit() if (np.sum(np.isnan(self.off_buff.get_nanvar())) > 0): sys.stderr.write('the long term buffer has a nan') quit() cal_med_mat = np.tile(cal_med,(self.V_mat.shape[1],1)).T # variance of RSS during calibration cal_var = np.maximum(self.off_buff.get_nanvar(),self.omega) #3.0 cal_var_mat = np.tile(cal_var,(self.V_mat.shape[1],1)).T # Compute the off_link emission probabilities for each link x = np.exp(- (self.V_mat - cal_med_mat)**2/(2*cal_var_mat/1.0)) # 1.0 self.off_links = self.__normalize_pmf(x) # Compute the on_link emission probabilities for each link x = np.exp(- (self.V_mat - (cal_med_mat-self.Delta))**2/(self.eta*2*cal_var_mat)) # 3 self.on_links = self.__normalize_pmf(x)
def findrotation_kabsch(coords1, coords2, align_com=True): ''' Kabsch, Wolfgang, (1976) "A solution of the best rotation to relate two sets of vectors", Acta Crystallographica 32:922 ''' # check if arrays are of same size if(coords1.size != coords2.size): raise BaseException("dimension of arrays does not match") # reshape the arrays x1 = coords1.reshape([-1,3]) x2 = coords2.reshape([-1,3]) # determine number of atoms natoms = x1.shape[0] # set both com to zero if(align_com): com1 = np.sum(x1,axis=0) / float(natoms) com2 = np.sum(x2,axis=0) / float(natoms) x1 -= com1 x2 -= com2 # calculate covariance matrix A = np.dot( x2.transpose(), x1) # and do single value decomposition u, s, v = np.linalg.svd(A) if np.linalg.det(u) * np.linalg.det(v) + 1.0 < 1e-8: s[-1] = -s[-1] u[:,-1] = -u[:,-1] return np.dot(u, v).transpose()
def compute_size(fonts, layout): """Compute width + height for the layout + line boxes + text boxes.""" for line in layout.children: for box in line.children: if isinstance(box, TextBox): font = fonts.font(box.style) box.width = font.width(box.text) # Box baseline is the relative offset from the line baseline in canvas coordinates alignment_baseline = box.style["alignment-baseline"] if alignment_baseline == "alphabetic": box.baseline = 0 elif alignment_baseline == "central": box.baseline = font.ascent * 0.5 elif alignment_baseline == "hanging": box.baseline = font.ascent elif alignment_baseline == "middle": box.baseline = font.ascent * 0.35 else: raise ValueError("Unknown alignment-baseline value: %s" % alignment_baseline) # Box top is the relative offset from the line baseline in canvas coordinates box.top = box.baseline - font.ascent # Box bottom is the relative offset from the line baseline in canvas coordinates box.bottom = box.baseline - font.descent box.height = box.bottom - box.top elif isinstance(box, MarkerBox): font = fonts.font(box.style) box.baseline = 0 box.top = box.baseline - font.ascent box.bottom = box.baseline - font.descent box.height = box.bottom - box.top box.width = box.height elif isinstance(box, (PushHyperlink, PopHyperlink)): box.baseline = 0 box.top = box.baseline box.bottom = box.baseline box.height = box.bottom - box.top box.width = 0 else: raise Exception("Unexpected box type: %s" % box) # pragma: no cover # Line top is the relative offset from the line baseline in canvas coordinates line.top = numpy.min([child.top for child in line.children]) if line.children else 0 # Line bottom is the relative offset from the line baseline in canvas coordinates line.bottom = numpy.max([child.bottom for child in line.children]) if line.children else 0 actual_line_height = line.bottom - line.top explicit_line_height = line.style["line-height"] offset = (explicit_line_height - actual_line_height) * 0.5 if offset > 0: line.top -= offset line.bottom += offset line.width = numpy.sum([child.width for child in line.children]) if line.children else 0 line.height = line.bottom - line.top layout.height = numpy.sum([line.height for line in layout.children]) if layout.children else 0
def fitBrokenToStrips(self): """Fits broken-line to strip data""" # convenience views to avoid typos bStrip = (self.stripCounts > 10) if np.sum(bStrip) < 1: return x = self.stripFeHs[bStrip] y = self.stripMedns[bStrip] # could come back to this later # guess set of parameters #guess = [-0.4, -40., -240., 40.] guess = [-0.3, 0.0, -250., 0.0] # Yes this could all be looped through... meritStraight = lambda pars, x, y: oneStraight(x, pars) - y meritBroken = lambda pars, x, y: twoStraight(x, pars) - y # do the fitting self.parsStraight, self.successStraight = \ leastsq(meritStraight,guess[0:2] ,args=(x,y)) self.parsBroken, self.successBroken = \ leastsq(meritBroken,guess[:] ,args=(x,y)) # compute the sum of residuals, over the strips (not the # stars) self.chisqStraight = np.sum(meritStraight(self.parsStraight, x, y)**2) self.chisqBroken = np.sum(meritBroken(self.parsBroken, x, y)**2)
def decider(pp_matrix, ML_dec, t, R_lim, num_annotators,C): pp_matrix2 = np.hstack((pp_matrix, ML_dec.reshape((C,1)))) #concatenate transpose of ML_decision to pp_matrix v = np.sum(pp_matrix2, axis=1)/np.sum(np.sum(pp_matrix2)) #create vector of normalized sums of pp_matrix2 maximum = np.amax(v) #initialize maximum, max value of v maxIdx = np.argmax(v) #initialize maxIdx, index of max value of v if maximum >= t[maxIdx]: #if maximum is above threshold for given class, retire image decision = 1 print('Image is retired') elif num_annotators >= R_lim: #if more than R_lim annotators have looked at image and no decision reached, pass to more experience users decision = 2 print('Image is given to the upper class') else: #if fewer than R_lim annotators have looked at image, keep image decision = 3 print('More labels are needed for the image') image_class = maxIdx return decision, image_class
def std(f): x = np.array(range(len(f))) # normalize; we do not prefer attributes with many values x = x / x.mean() xf = np.multiply(f, x) x2f = np.multiply(f, np.power(x, 2)) return np.sqrt((np.sum(x2f) - np.power(np.sum(xf), 2) / np.sum(f)) / (np.sum(f) - 1))
def _LMLgrad_beta(self, hyperparams): LATildeInfTP = numpy.dot(self._LAtildeInv.T, self._P) gBeta = .5 * (self.d * (numpy.trace(self._C) + (self.n - self.m) * self._beta_inv - self._psi_0) - self.TrYY + self._TrPP + self._beta_inv ** 2 * self.d * numpy.sum(self._LAtildeInv * self._LAtildeInv) + self._beta_inv * numpy.sum(LATildeInfTP ** 2)) return -gBeta # negative because gradient is w.r.t loglikelihood
def cond_entropy(period, data, p_bins=10, m_bins=5): ''' Compute the conditional entropy for the normalized observations ''' if period <= 0: return np.PINF r = rephase(data, period) bins, *_ = np.histogram2d(r[:,0], r[:,1], [p_bins, m_bins], [[0,1], [0,1]]) size = r.shape[0] if size > 0: divided_bins = bins / size arg_positive = divided_bins > 0 column_sums = np.sum(divided_bins, axis=1) column_sums = np.repeat(np.reshape(column_sums, (p_bins,1)), m_bins, axis=1) select_divided_bins = divided_bins[arg_positive] select_column_sums = column_sums[arg_positive] A = np.empty((p_bins, m_bins), dtype=float) A[ arg_positive] = select_divided_bins \ * np.log(select_column_sums \ / select_divided_bins) A[~arg_positive] = 0 return np.sum(A) else: return np.PINF
def _beam_map_single(self, bl_index, f_index): p_stokes = [ 0.5 * np.array([[1.0, 0.0], [0.0, 1.0]]), 0.5 * np.array([[1.0, 0.0], [0.0, -1.0]]), 0.5 * np.array([[0.0, 1.0], [1.0, 0.0]]), 0.5 * np.array([[0.0, -1.0J], [1.0J, 0.0]]) ] # Get beam maps for each feed. feedi, feedj = self.uniquepairs[bl_index] beami, beamj = self.beam(feedi, f_index), self.beam(feedj, f_index) # Get baseline separation and fringe map. uv = self.baselines[bl_index] / self.wavelengths[f_index] fringe = visibility.fringe(self._angpos, self.zenith, uv) pow_stokes = [ np.sum(beami * np.dot(beamj.conjugate(), polproj), axis=1) * self._horizon for polproj in p_stokes] # Calculate the solid angle of each beam pxarea = (4*np.pi / beami.shape[0]) om_i = np.sum(np.abs(beami)**2 * self._horizon[:, np.newaxis]) * pxarea om_j = np.sum(np.abs(beamj)**2 * self._horizon[:, np.newaxis]) * pxarea omega_A = (om_i * om_j)**0.5 # Calculate the complex visibility transfer function cv_stokes = [ p * (2 * fringe / omega_A) for p in pow_stokes ] return cv_stokes
def pick_parents(self): # compute fitness w = self.calc_fitness() # sample parents proportional to their fitness males = np.array(np.nonzero(self.sex == 0)).flatten() females = np.array(np.nonzero(self.sex == 1)).flatten() w_male = w[males]/np.sum(w[males]) w_female = w[females]/np.sum(w[females]) males = np.random.choice(males, size = self.size/2, p = w_male) females = np.random.choice(females, size = self.size/2, p = w_female) assert(len(males) == len(females)) idx = [] sexes = [] for i in range(0, len(males)): idx.append(males[i]) idx.append(females[i]) sexes.append(0) sexes.append(1) # make a copy of current population newpop = copy.deepcopy(self) newpop.sex = np.array(sexes) # now copy chromosomes from current population to new one for j in range(0, len(self.chrlen)): for i in range(0, self.size): newpop.tr[i] = self.tr[ idx[i] ] newpop.chroms[j][2*i] = self.chroms[j][ 2*idx[i] ] newpop.chroms[j][2*i+1] = self.chroms[j][ 2*idx[i]+1 ] return newpop
def GetPascalVOCMetrics( self, boundingboxes, IOUThreshold=0.5, method=MethodAveragePrecision.EveryPointInterpolation): """Get the metrics used by the VOC Pascal 2012 challenge. Get Args: boundingboxes: Object of the class BoundingBoxes representing ground truth and detected bounding boxes; IOUThreshold: IOU threshold indicating which detections will be considered TP or FP (default value = 0.5); method (default = EveryPointInterpolation): It can be calculated as the implementation in the official PASCAL VOC toolkit (EveryPointInterpolation), or applying the 11-point interpolatio as described in the paper "The PASCAL Visual Object Classes(VOC) Challenge" or EveryPointInterpolation" (ElevenPointInterpolation); Returns: A list of dictionaries. Each dictionary contains information and metrics of each class. The keys of each dictionary are: dict['class']: class representing the current dictionary; dict['precision']: array with the precision values; dict['recall']: array with the recall values; dict['AP']: average precision; dict['interpolated precision']: interpolated precision values; dict['interpolated recall']: interpolated recall values; dict['total positives']: total number of ground truth positives; dict['total TP']: total number of True Positive detections; dict['total FP']: total number of False Negative detections; """ ret = [ ] # list containing metrics (precision, recall, average precision) of each class # List with all ground truths (Ex: [imageName,class,confidence=1, (bb coordinates XYX2Y2)]) groundTruths = [] # List with all detections (Ex: [imageName,class,confidence,(bb coordinates XYX2Y2)]) detections = [] # Get all classes classes = [] # Loop through all bounding boxes and separate them into GTs and detections # print(f'bb {len(boundingboxes.getBoundingBoxes())}') for bb in boundingboxes.getBoundingBoxes(): # [imageName, class, confidence, (bb coordinates XYX2Y2)] if bb.getBBType() == BBType.GroundTruth: groundTruths.append([ bb.getImageName(), bb.getClassId(), 1, bb.getAbsoluteBoundingBox(BBFormat.XYX2Y2) ]) else: detections.append([ bb.getImageName(), bb.getClassId(), bb.getConfidence(), bb.getAbsoluteBoundingBox(BBFormat.XYX2Y2) ]) # get class if bb.getClassId() not in classes: classes.append(bb.getClassId()) classes = sorted(classes) print(f'gts {groundTruths}') print(f'predict {detections}') # Precision x Recall is obtained individually by each class # Loop through by classes for c in classes: # Get only detection of class c dects = [] [dects.append(d) for d in detections if d[1] == c] # Get only ground truths of class c gts = [] [gts.append(g) for g in groundTruths if g[1] == c] npos = len(gts) # sort detections by decreasing confidence dects = sorted(dects, key=lambda conf: conf[2], reverse=True) print(f'sorted detections {dects}') TP = np.zeros(len(dects)) FP = np.zeros(len(dects)) # create dictionary with amount of gts for each image det = Counter([cc[0] for cc in gts]) # print (f'det {det}') for key, val in det.items(): det[key] = np.zeros(val) # print(det['00003']) # print("Evaluating class: %s (%d detections)" % (str(c), len(dects))) # Loop through detections print(len(dects)) for d in range(len(dects)): # print('dect %s => %s' % (dects[d][0], dects[d][3],)) # Find ground truth image # for gt in gts: # if gt[0] == dects[d][0]: # print (gt) gt = [gt for gt in gts if gt[0] == dects[d][0]] print(f'checked_gt {gt}') iouMax = sys.float_info.min print(f'ioumax {iouMax}') for j in range(len(gt)): # print('Ground truth gt => %s' % (gt[j][3],)) print(f'iou {dects[d][3], gt[j][3]}') iou = Evaluator.iou(dects[d][3], gt[j][3]) if iou > iouMax: iouMax = iou jmax = j # Assign detection as true positive/don't care/false positive if iouMax >= IOUThreshold: print(det[dects[d][0]]) if det[dects[d][0]][jmax] == 0: TP[d] = 1 # count as true positive det[dects[d][0]][jmax] = 1 # flag as already 'seen' # print("TP") else: FP[d] = 1 # count as false positive # print("FP") # - A detected "cat" is overlaped with a GT "cat" with IOU >= IOUThreshold. else: FP[d] = 1 # count as false positive # print("FP") print(f'true pos {TP}') print(f'flase pos {FP}') # compute precision, recall and average precision acc_FP = np.cumsum(FP) acc_TP = np.cumsum(TP) rec = acc_TP / npos prec = np.divide(acc_TP, (acc_FP + acc_TP)) # Depending on the method, call the right implementation if method == MethodAveragePrecision.EveryPointInterpolation: [ap, mpre, mrec, ii] = Evaluator.CalculateAveragePrecision(rec, prec) else: [ap, mpre, mrec, _] = Evaluator.ElevenPointInterpolatedAP(rec, prec) # add class result in the dictionary to be returned r = { 'class': c, 'precision': prec, 'recall': rec, 'AP': ap, 'interpolated precision': mpre, 'interpolated recall': mrec, 'total positives': npos, 'total TP': np.sum(TP), 'total FP': np.sum(FP) } ret.append(r) return ret
def surf_semisupervised( vol_names, atlas_vol, atlas_seg, nb_surface_pts, labels=None, batch_size=1, surf_bidir=True, surface_pts_upsample_factor=2, smooth_seg_std=1, nb_labels_sample=None, sdt_vol_resize=1, align_segs=False, add_feat_axis=True ): """ Scan-to-atlas generator for semi-supervised learning using surface point clouds from segmentations. Parameters: vol_names: List of volume files to load. atlas_vol: Atlas volume array. atlas_seg: Atlas segmentation array. nb_surface_pts: Total number surface points for all structures. labels: Label list to include. If None, all labels in atlas_seg are used. Default is None. batch_size: Batch size. NOTE some features only implemented for 1. Default is 1. surf_bidir: Train with bidirectional surface distance. Default is True. surface_pts_upsample_factor: Upsample factor for surface pointcloud. Default is 2. smooth_seg_std: Segmentation smoothness sigma. Default is 1. nb_labels_sample: Number of labels to sample. Default is None. sdt_vol_resize: Resize factor for signed distance transform volumes. Default is 1. align_segs: Whether to pass in segmentation image instead. Default is False. add_feat_axis: Load volume arrays with added feature axis. Default is True. """ # some input checks assert nb_surface_pts > 0, 'number of surface point should be greater than 0' # prepare some shapes vol_shape = atlas_seg.shape sdt_shape = [int(f * sdt_vol_resize) for f in vol_shape] # compute labels from atlas, and the number of labels to sample. if labels is not None: atlas_seg = voxelmorph.py.utils.filter_labels(atlas_seg, labels) else: labels = np.sort(np.unique(atlas_seg))[1:] # use all labels by default if nb_labels_sample is None: nb_labels_sample = len(labels) # prepare keras format atlases atlas_vol_bs = np.repeat(atlas_vol[np.newaxis, ..., np.newaxis], batch_size, axis=0) atlas_seg_bs = np.repeat(atlas_seg[np.newaxis, ..., np.newaxis], batch_size, axis=0) # prepare surface extraction function std_to_surf = lambda x, y: voxelmorph.py.utils.sdt_to_surface_pts(x, y, surface_pts_upsample_factor=surface_pts_upsample_factor, thr=(1 / surface_pts_upsample_factor + 1e-5)) # prepare zeros, which will be used for outputs unused in cost functions zero_flow = np.zeros((batch_size, *vol_shape, len(vol_shape))) zero_surface_values = np.zeros((batch_size, nb_surface_pts, 1)) # precompute label edge volumes atlas_sdt = [None] * len(labels) atlas_label_vols = [None] * len(labels) nb_edges = np.zeros(len(labels)) for li, label in enumerate(labels): # if only one label, get surface points here atlas_label_vols[li] = atlas_seg == label atlas_label_vols[li] = voxelmorph.py.utils.clean_seg(atlas_label_vols[li], smooth_seg_std) atlas_sdt[li] = voxelmorph.py.utils.vol_to_sdt(atlas_label_vols[li], sdt=True, sdt_vol_resize=sdt_vol_resize) nb_edges[li] = np.sum(np.abs(atlas_sdt[li]) < 1.01) layer_edge_ratios = nb_edges / np.sum(nb_edges) # if working with all the labels passed in (i.e. no label sampling per batch), # pre-compute the atlas surface points atlas_surface_pts = np.zeros((batch_size, nb_surface_pts, len(vol_shape) + 1)) if nb_labels_sample == len(labels): nb_surface_pts_sel = voxelmorph.py.utils.get_surface_pts_per_label(nb_surface_pts, layer_edge_ratios) for li, label in enumerate(labels): # if only one label, get surface points here atlas_surface_pts_ = std_to_surf(atlas_sdt[li], nb_surface_pts_sel[li])[np.newaxis, ...] # get the surface point stack indexes for this element srf_idx = slice(int(np.sum(nb_surface_pts_sel[:li])), int(np.sum(nb_surface_pts_sel[:li + 1]))) atlas_surface_pts[:, srf_idx, :-1] = np.repeat(atlas_surface_pts_, batch_size, 0) atlas_surface_pts[:, srf_idx, -1] = li # generator gen = volgen(vol_names, return_segs=True, batch_size=batch_size, add_feat_axis=add_feat_axis) assert batch_size == 1, 'only batch size 1 supported for now' while True: # prepare data X = next(gen) X_img = X[0] X_seg = voxelmorph.py.utils.filter_labels(X[1], labels) # get random labels sel_label_idxs = range(len(labels)) # all labels if nb_labels_sample != len(labels): sel_label_idxs = np.sort(np.random.choice(range(len(labels)), size=nb_labels_sample, replace=False)) sel_layer_edge_ratios = [layer_edge_ratios[li] for li in sel_label_idxs] nb_surface_pts_sel = voxelmorph.py.utils.get_surface_pts_per_label(nb_surface_pts, sel_layer_edge_ratios) # prepare signed distance transforms and surface point arrays X_sdt_k = np.zeros((batch_size, *sdt_shape, nb_labels_sample)) atl_dt_k = np.zeros((batch_size, *sdt_shape, nb_labels_sample)) subj_surface_pts = np.zeros((batch_size, nb_surface_pts, len(vol_shape) + 1)) if nb_labels_sample != len(labels): atlas_surface_pts = np.zeros((batch_size, nb_surface_pts, len(vol_shape) + 1)) for li, sli in enumerate(sel_label_idxs): # get the surface point stack indexes for this element srf_idx = slice(int(np.sum(nb_surface_pts_sel[:li])), int(np.sum(nb_surface_pts_sel[:li+1]))) # get atlas surface points for this label if nb_labels_sample != len(labels): atlas_surface_pts_ = std_to_surf(atlas_sdt[sli], nb_surface_pts_sel[li])[np.newaxis, ...] atlas_surface_pts[:, srf_idx, :-1] = np.repeat(atlas_surface_pts_, batch_size, 0) atlas_surface_pts[:, srf_idx, -1] = sli # compute X distance from surface X_label = X_seg == labels[sli] X_label = voxelmorph.py.utils.clean_seg_batch(X_label, smooth_seg_std) X_sdt_k[..., li] = voxelmorph.py.utils.vol_to_sdt_batch(X_label, sdt=True, sdt_vol_resize=sdt_vol_resize)[..., 0] if surf_bidir: atl_dt = atlas_sdt[li][np.newaxis, ...] atl_dt_k[..., li] = np.repeat(atl_dt, batch_size, 0) ssp_lst = [std_to_surf(f[...], nb_surface_pts_sel[li]) for f in X_sdt_k[..., li]] subj_surface_pts[:, srf_idx, :-1] = np.stack(ssp_lst, 0) subj_surface_pts[:, srf_idx, -1] = li # check if returning segmentations instead of images # this is a bit hacky for basically building a segmentation-only network (no images) X_ret = X_img atlas_ret = atlas_vol_bs if align_segs: assert len(labels) == 1, 'align_seg generator is only implemented for single label' X_ret = X_seg == labels[0] atlas_ret = atlas_seg_bs == labels[0] # finally, output if surf_bidir: inputs = [X_ret, atlas_ret, X_sdt_k, atl_dt_k, subj_surface_pts, atlas_surface_pts] outputs = [atlas_ret, X_ret, zero_flow, zero_surface_values, zero_surface_values] else: inputs = [X_ret, atlas_ret, X_sdt_k, atlas_surface_pts] outputs = [atlas_ret, X_ret, zero_flow, zero_surface_values] yield (inputs, outputs)
def refresh(i): # i=3, a full epoch occurs every i=798/48 if i % self.best_args['update_interval'] == 0: z = model.extract_feature(self.feature, args, None, train_iter, N, self.xpu).values()[0] p = np.zeros((z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) # use a y that only considers the filledbyBC examples # compare soft assignments with known labels print '\n... Updating i = %f' % i print np.std( np.bincount(self.best_args['y_dec'].astype( np.int))), np.bincount(self.best_args['y_dec'].astype( np.int)) # do for overall class B and M Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=5) # This finds the indices of 5 closest neighbors labels = np.asarray(self.best_args['roi_labels']) Neg = sum(labels == np.unique(labels)[0]) #for B Pos = sum(labels == np.unique(labels)[2]) #for M TP = [] TN = [] for k in range(z.shape[0]): iclass = labels[k] dist, ind = Z_embedding_tree.query([z[k]], k=6) dist5nn, ind5nn = dist[k != ind], ind[k != ind] class5nn = labels[ind5nn] # exlcude U class class5nn = class5nn[class5nn != 'K'] if (len(class5nn) > 0): predc = [] for c in np.unique(class5nn): predc.append(sum(class5nn == c)) # predicion based on majority predclass = np.unique(class5nn)[predc == max(predc)] if (len(predclass) == 1): # compute TP if M if (iclass == 'M'): TP.append(predclass[0] == iclass) # compute TN if B if (iclass == 'B'): TN.append(predclass[0] == iclass) if (len(predclass) == 2): # compute TP if M if (iclass == 'M'): TP.append(predclass[1] == iclass) # compute TN if B if (iclass == 'B'): TN.append(predclass[0] == iclass) # compute TPR and TNR TPR = sum(TP) / float(Pos) TNR = sum(TN) / float(Neg) Acc = sum(TP + TN) / float(Pos + Neg) print "True Posite Rate (TPR) = %f " % TPR print "True Negative Rate (TNR) = %f " % TNR print "Accuracy (Acc) = %f " % Acc # save best args self.best_args['acci'].append(Acc) if (Acc >= self.maxAcc): print 'Improving maxAcc = {}'.format(Acc) for key, v in args.items(): self.best_args[key] = args[key] self.maxAcc = Acc self.best_args['bestacci'].append(Acc) if (i % self.best_args['plot_interval'] == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) ax = fig.add_subplot(4, 4, 1 + self.ploti) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, ax, title="Epoch %d z_tsne iter (%d)" % (self.ploti, i), legend=False) self.ploti = self.ploti + 1 ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers'] / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.best_args['y_pred'] ), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args[ 'update_interval'] * 200: # performs 1epoch = 615/3 = 205*1000epochs self.best_args['y_pred'] = y_pred self.best_args['p'] = p self.best_args['z'] = z self.best_args['acci'].append(Acc) return True self.best_args['y_pred'] = y_pred self.best_args['p'] = p self.best_args['z'] = z
def ce(label, pred): return np.sum(label * np.log(label / (pred + 0.000001))) / label.shape[0]
def test_02_03_same_size(self): '''Align two images keeping sizes the same''' np.random.seed(0) shape = (50, 45) i, j = np.mgrid[0:shape[0], 0:shape[1]] for offset in ((1, 0), (0, 1), (1, 1), (3, 5), (-3, 5), (3, -5), (-3, -5), (0, 5), (3, 0), (0, 0)): for mask1 in (None, np.random.uniform(size=shape) > .1): for mask2 in (None, np.random.uniform(size=shape) > .1): for method in (A.M_MUTUAL_INFORMATION, A.M_CROSS_CORRELATION): if method == A.M_CROSS_CORRELATION and ( (mask1 is not None) or (mask2 is not None)): continue image1 = np.random.randint( 0, 10, size=shape).astype(float) / 10.0 image1[np.sqrt(((i - shape[0] / 2)**2 + (j - shape[1] / 2)**2)) < 20] = .5 si1, si2 = self.slice_helper(offset[0], image1.shape[0]) sj1, sj2 = self.slice_helper(offset[1], image1.shape[1]) image2 = np.zeros(image1.shape) if method == A.M_MUTUAL_INFORMATION: image2[si2, sj2] = 1 - image1[si1, sj1] else: image2 = image1[(i + shape[0] - offset[0]) % shape[0], (j + shape[1] - offset[1]) % shape[1]] image2 += (np.random.uniform(size=shape) - .5) * .1 * np.std(image2) if mask1 is not None: image1[~mask1] = np.random.uniform( size=np.sum(~mask1)) if mask2 is not None: image2[~mask2] = np.random.uniform( size=np.sum(~mask2)) workspace, module = self.make_workspace( (image1, image2), (mask1, mask2)) self.assertTrue(isinstance(module, A.Align)) module.alignment_method.value = method module.crop_mode.value = A.C_SAME_SIZE module.run(workspace) output = workspace.image_set.get_image('Aligned0') m = workspace.measurements self.assertTrue(isinstance(m, cpmeas.Measurements)) off_i0 = -m.get_current_image_measurement( 'Align_Yshift_Aligned0') off_j0 = -m.get_current_image_measurement( 'Align_Xshift_Aligned0') off_i1 = -m.get_current_image_measurement( 'Align_Yshift_Aligned1') off_j1 = -m.get_current_image_measurement( 'Align_Xshift_Aligned1') self.assertEqual(off_i0 - off_i1, offset[0]) self.assertEqual(off_j0 - off_j1, offset[1]) si_in, si_out = self.slice_same(off_i0, shape[0]) sj_in, sj_out = self.slice_same(off_j0, shape[1]) np.testing.assert_almost_equal( image1[si_in, sj_in], output.pixel_data[si_out, sj_out]) if mask1 is not None: self.assertTrue( np.all(output.mask[si_out, sj_out] == mask1[si_in, sj_in])) temp = output.mask.copy() temp[si_out, sj_out] = False self.assertTrue(np.all(~temp)) output = workspace.image_set.get_image("Aligned1") si_in, si_out = self.slice_same(off_i1, shape[0]) sj_in, sj_out = self.slice_same(off_j1, shape[1]) np.testing.assert_almost_equal( image2[si_in, sj_in], output.pixel_data[si_out, sj_out]) if mask2 is not None: self.assertTrue( np.all(mask2[si_in, sj_in] == output.mask[si_out, sj_out])) temp = output.mask.copy() temp[si_out, sj_out] = False self.assertTrue(np.all(~temp))
def test_02_01_crop(self): '''Align two images and crop the result''' np.random.seed(0) shape = (50, 45) i, j = np.mgrid[0:shape[0], 0:shape[1]] for offset in ((3, 5), (-3, 5), (3, -5), (-3, -5), (0, 5), (3, 0), (0, 0)): # # Do something to give the image some information over # the distance, 5,5 # for mask1 in (None, np.random.uniform(size=shape) > .1): for mask2 in (None, np.random.uniform(size=shape) > .1): for method in (A.M_MUTUAL_INFORMATION, A.M_CROSS_CORRELATION): if method == A.M_CROSS_CORRELATION and ( (mask1 is not None) or (mask2 is not None)): continue image1 = np.random.randint( 0, 10, size=shape).astype(float) / 10.0 image1[np.sqrt(((i - shape[0] / 2)**2 + (j - shape[1] / 2)**2)) < 20] = .5 si1, si2 = self.slice_helper(offset[0], image1.shape[0]) sj1, sj2 = self.slice_helper(offset[1], image1.shape[1]) image2 = np.zeros(image1.shape) if method == A.M_MUTUAL_INFORMATION: image2[si2, sj2] = 1 - image1[si1, sj1] else: image2 = image1[(i + shape[0] - offset[0]) % shape[0], (j + shape[1] - offset[1]) % shape[1]] image2 += (np.random.uniform(size=shape) - .5) * .1 * np.std(image2) if mask1 is not None: image1[~mask1] = np.random.uniform( size=np.sum(~mask1)) if mask2 is not None: image2[~mask2] = np.random.uniform( size=np.sum(~mask2)) workspace, module = self.make_workspace( (image1, image2), (mask1, mask2)) self.assertTrue(isinstance(module, A.Align)) module.alignment_method.value = method module.crop_mode.value = A.C_CROP module.run(workspace) output = workspace.image_set.get_image('Aligned0') m = workspace.measurements self.assertTrue(isinstance(m, cpmeas.Measurements)) off_i0 = -m.get_current_image_measurement( 'Align_Yshift_Aligned0') off_j0 = -m.get_current_image_measurement( 'Align_Xshift_Aligned0') off_i1 = -m.get_current_image_measurement( 'Align_Yshift_Aligned1') off_j1 = -m.get_current_image_measurement( 'Align_Xshift_Aligned1') self.assertEqual(off_i0 - off_i1, offset[0]) self.assertEqual(off_j0 - off_j1, offset[1]) out_shape = output.pixel_data.shape self.assertEqual(out_shape[0], shape[0] - abs(offset[0])) self.assertEqual(out_shape[1], shape[1] - abs(offset[1])) i_slice = self.single_slice_helper( -off_i0, out_shape[0]) j_slice = self.single_slice_helper( -off_j0, out_shape[1]) np.testing.assert_almost_equal( image1[i_slice, j_slice], output.pixel_data) if mask1 is not None: self.assertTrue( np.all(output.mask == mask1[i_slice, j_slice])) if offset[0] == 0 and offset[1] == 0: self.assertFalse(output.has_crop_mask) else: temp = output.crop_mask.copy() self.assertEqual(tuple(temp.shape), shape) self.assertTrue(np.all(temp[i_slice, j_slice])) temp[i_slice, j_slice] = False self.assertTrue(np.all(~temp)) output = workspace.image_set.get_image("Aligned1") i_slice = self.single_slice_helper( -off_i1, out_shape[0]) j_slice = self.single_slice_helper( -off_j1, out_shape[1]) np.testing.assert_almost_equal( image2[i_slice, j_slice], output.pixel_data) if mask2 is not None: self.assertTrue( np.all(output.mask == mask2[i_slice, j_slice])) if offset[0] == 0 and offset[1] == 0: self.assertFalse(output.has_crop_mask) else: temp = output.crop_mask.copy() self.assertEqual(tuple(temp.shape), shape) self.assertTrue(np.all(temp[i_slice, j_slice])) temp[i_slice, j_slice] = False self.assertTrue(np.all(~temp))
def _extract_template_features( mmcif_object: mmcif_parsing.MmcifObject, pdb_id: str, mapping: Mapping[int, int], template_sequence: str, query_sequence: str, template_chain_id: str, kalign_binary_path: str) -> Tuple[Dict[str, Any], Optional[str]]: """Parses atom positions in the target structure and aligns with the query. Atoms for each residue in the template structure are indexed to coincide with their corresponding residue in the query sequence, according to the alignment mapping provided. Args: mmcif_object: mmcif_parsing.MmcifObject representing the template. pdb_id: PDB code for the template. mapping: Dictionary mapping indices in the query sequence to indices in the template sequence. template_sequence: String describing the amino acid sequence for the template protein. query_sequence: String describing the amino acid sequence for the query protein. template_chain_id: String ID describing which chain in the structure proto should be used. kalign_binary_path: The path to a kalign executable used for template realignment. Returns: A tuple with: * A dictionary containing the extra features derived from the template protein structure. * A warning message if the hit was realigned to the actual mmCIF sequence. Otherwise None. Raises: NoChainsError: If the mmcif object doesn't contain any chains. SequenceNotInTemplateError: If the given chain id / sequence can't be found in the mmcif object. QueryToTemplateAlignError: If the actual template in the mmCIF file can't be aligned to the query. NoAtomDataInTemplateError: If the mmcif object doesn't contain atom positions. TemplateAtomMaskAllZerosError: If the mmcif object doesn't have any unmasked residues. """ if mmcif_object is None or not mmcif_object.chain_to_seqres: raise NoChainsError('No chains in PDB: %s_%s' % (pdb_id, template_chain_id)) warning = None try: seqres, chain_id, mapping_offset = _find_template_in_pdb( template_chain_id=template_chain_id, template_sequence=template_sequence, mmcif_object=mmcif_object) except SequenceNotInTemplateError: # If PDB70 contains a different version of the template, we use the sequence # from the mmcif_object. chain_id = template_chain_id warning = ( f'The exact sequence {template_sequence} was not found in ' f'{pdb_id}_{chain_id}. Realigning the template to the actual sequence.' ) logger.warning(warning) # This throws an exception if it fails to realign the hit. seqres, mapping = _realign_pdb_template_to_query( old_template_sequence=template_sequence, template_chain_id=template_chain_id, mmcif_object=mmcif_object, old_mapping=mapping, kalign_binary_path=kalign_binary_path) logger.info('Sequence in %s_%s: %s successfully realigned to %s', pdb_id, chain_id, template_sequence, seqres) # The template sequence changed. template_sequence = seqres # No mapping offset, the query is aligned to the actual sequence. mapping_offset = 0 try: # Essentially set to infinity - we don't want to reject templates unless # they're really really bad. all_atom_positions, all_atom_mask = _get_atom_positions( mmcif_object, chain_id, max_ca_ca_distance=150.0) except (CaDistanceError, KeyError) as ex: raise NoAtomDataInTemplateError('Could not get atom data (%s_%s): %s' % (pdb_id, chain_id, str(ex))) from ex all_atom_positions = np.split(all_atom_positions, all_atom_positions.shape[0]) all_atom_masks = np.split(all_atom_mask, all_atom_mask.shape[0]) output_templates_sequence = [] templates_all_atom_positions = [] templates_all_atom_masks = [] for _ in query_sequence: # Residues in the query_sequence that are not in the template_sequence: templates_all_atom_positions.append( np.zeros((residue_constants.atom_type_num, 3))) templates_all_atom_masks.append( np.zeros(residue_constants.atom_type_num)) output_templates_sequence.append('-') for k, v in mapping.items(): template_index = v + mapping_offset templates_all_atom_positions[k] = all_atom_positions[template_index][0] templates_all_atom_masks[k] = all_atom_masks[template_index][0] output_templates_sequence[k] = template_sequence[v] # Alanine (AA with the lowest number of atoms) has 5 atoms (C, CA, CB, N, O). if np.sum(templates_all_atom_masks) < 5: raise TemplateAtomMaskAllZerosError( 'Template all atom mask was all zeros: %s_%s. Residue range: %d-%d' % (pdb_id, chain_id, min(mapping.values()) + mapping_offset, max(mapping.values()) + mapping_offset)) output_templates_sequence = ''.join(output_templates_sequence) templates_aatype = residue_constants.sequence_to_onehot( output_templates_sequence, residue_constants.HHBLITS_AA_TO_ID) return ({ 'template_all_atom_positions': np.array(templates_all_atom_positions), 'template_all_atom_masks': np.array(templates_all_atom_masks), 'template_sequence': output_templates_sequence.encode(), 'template_aatype': np.array(templates_aatype), 'template_domain_names': f'{pdb_id.lower()}_{chain_id}'.encode(), }, warning)
def generate_h2_pT(h5py_file, LDSC_results_file, output_h2, PS, output_pT, annotation_flag): # generate two types of prior files ### load the fixed input file ### h5f1 = h5py.File('ref/AnnotMatrix/baseline.h5', 'r') baseline = h5f1['annot'][:] h5f1.close() if annotation_flag == 'tier0': h5f1 = h5py.File('ref/AnnotMatrix/tier0.h5', 'r') tier = h5f1['annot'][:] h5f1.close() elif annotation_flag == 'tier1': h5f1 = h5py.File('ref/AnnotMatrix/tier1.h5', 'r') tier = h5f1['annot'][:] h5f1.close() elif annotation_flag == 'tier2': h5f1 = h5py.File('ref/AnnotMatrix/tier2.h5', 'r') tier = h5f1['annot'][:] h5f1.close() elif annotation_flag == 'tier3': h5f1 = h5py.File('ref/AnnotMatrix/tier3.h5', 'r') tier = h5f1['annot'][:] h5f1.close() else: exit("Illegal tier name!") annot = np.concatenate((baseline, tier), axis=1) h5f2 = h5py.File('ref/AnnotMatrix/1000G_SNP_info.h5', 'r') snp_chr = h5f2['snp_chr'][:] h5f2.close() ### get the snp list from h5py ### chromosomes_list = ['chrom_%d' % (x) for x in range(1, 23)] chromosomes_list.append('chrom_X') df = h5py.File(h5py_file, 'r') cord_data_g = df['cord_data'] SNPids = [] for chrom_str in chromosomes_list: if chrom_str in cord_data_g.keys(): g = cord_data_g[chrom_str] #Filter monomorphic SNPs (SNPs with variance equal to 0) snp_stds = g['snp_stds_ref'][...] snp_stds = snp_stds.flatten() ok_snps_filter = snp_stds > 0 pval_derived_betas = g['betas'][...] pval_derived_betas = pval_derived_betas[ok_snps_filter] sids = g['sids'][...] SNPids = np.append(SNPids, sids[ok_snps_filter]) num_snps = len(SNPids) ### overlap with SNP in annot files ### stt1 = np.in1d(snp_chr[:, 2], SNPids) ant1 = annot[stt1] snp_chr1 = snp_chr[stt1] ### check order ### if sum(snp_chr1[:, 2] == SNPids) == len(SNPids): print 'Good!' else: print 'Shit happens, sorting ant1 to have the same order as SNPids' O1 = np.argsort(snp_chr1[:, 2]) O2 = np.argsort(SNPids) O3 = np.argsort(O2) ant1 = ant1[O1][O3] ### load LDSC results ### LD_results = np.genfromtxt(LDSC_results_file, dtype=None, names=True) tau0 = LD_results['Coefficient'] ### get heritability ### sig2_0 = np.dot(ant1, tau0) ### adjust for minus terms ### sig2_0[sig2_0 < 0] = np.repeat(min(sig2_0[sig2_0 > 0]), np.sum(sig2_0 < 0)) np.sum(sig2_0) ### save prior file (h2) ### h2_out = [] for i in range(len(sig2_0)): h2_out.append( str(snp_chr1[:, 0][i]) + ' ' + str(snp_chr1[:, 2][i]) + ' ' + str(sig2_0[i]) + '\n') #np.savetxt(output_h2,(snp_chr1[:,0],snp_chr1[:,1],sig2_0),fmt="%s") ff = open(output_h2, "w") ff.writelines(h2_out) ff.close() ### start calculating p_T ### M = np.empty(annot.shape[1]) for i in range(len(M)): M[i] = np.sum(np.logical_and(annot[:, 0], annot[:, i])) bgt = datetime.datetime.now() M_T = defaultdict(int) for i in range(annot.shape[0]): tup_i = tuple(annot[i]) M_T[tup_i] += 1 edt = datetime.datetime.now() print edt - bgt bgt = datetime.datetime.now() N_T = defaultdict(int) for i in range(ant1.shape[0]): tup_i = tuple(ant1[i]) N_T[tup_i] += 1 edt = datetime.datetime.now() print edt - bgt H0 = np.dot(M, tau0) N0 = float(len(SNPids)) sig2V = np.dot(ant1, tau0) # N_T = {x:annotV1.count(x) for x in annotV1} M_TV = np.empty(ant1.shape[0]) N_TV = np.empty(ant1.shape[0]) for i in range(ant1.shape[0]): tup_i = tuple(ant1[i]) M_TV[i] = M_T[tup_i] N_TV[i] = N_T[tup_i] # for ps in PS: pr_p = (PS * N0 / H0) * M_TV * sig2V / N_TV sig2 = M_TV * sig2V / N_TV m1 = min(pr_p[pr_p > 0]) m2 = min(sig2[sig2 > 0]) pr_p[pr_p < 0] = np.repeat(m1, np.sum(pr_p < 0)) sig2[sig2 < 0] = np.repeat(m2, np.sum(sig2 < 0)) pr_p[pr_p > 1] = np.repeat(1, np.sum(pr_p > 1)) pT_out = [] for i in range(len(sig2)): pT_out.append( str(snp_chr1[:, 0][i]) + ' ' + str(snp_chr1[:, 2][i]) + ' ' + str(pr_p[i]) + ' ' + str(sig2[i]) + '\n') # ff = open(output_pT+'_'+str(ps)+'_file.txt',"w") ff = open(output_pT, "w") ff.writelines(pT_out) ff.close() return math.ceil(num_snps / 3000.0)
def _compute_norms(self, state): self.XYZ = norm(state["position"]) self.V = norm(state["velocity"]) self.legs_contact = np.sum(state["leg_contact"])
def updateAndGetBeliefStates(self, evidences): """ Given a list of (noised) distances from pacman to ghosts, returns a list of belief states about ghosts positions Arguments: ---------- - `evidences`: list of (noised) ghost positions at state x_{t} where 't' is the current time step Return: ------- - A list of Z belief states at state x_{t} about ghost positions as N*M numpy matrices of probabilities where N and M are respectively width and height of the maze layout and Z is the number of ghosts. N.B. : [0,0] is the bottom left corner of the maze """ # XXX: Your code here # if self.iter < 0: # np.save('Entropy{}_{}'.format(self.w, self.p), self.entropy) # sys.exit() # # self.iter = self.iter - 1 if (self.m or self.n) is None: self.m = self.walls.height self.n = self.walls.width if not self.board: for x in np.arange(self.n): for y in np.arange(self.m): self.board.append((x, y)) if self.transitionMatrix is None: self.transitionMatrix = self.createTransitionMatrix() if self.sensorMatrix is None: self.sensorMatrix = self.createSensorModel() beliefStates = self.beliefGhostStates # self.entropy.append(self.entropyF(beliefStates)) for i, e in enumerate(evidences): """ To manage multiple ghosts. """ col_beliefStates = np.reshape(beliefStates[i, :, :], (-1, 1)) index = self.board.index(e) O_col = self.sensorMatrix[:, index] O = np.diag(O_col) """ O = Observation matrix. """ col_bel = np.dot(O, self.transitionMatrix) col_beliefStates = np.dot(col_bel, col_beliefStates) alpha = 1 / (np.sum(col_beliefStates)) col_beliefStates = alpha * col_beliefStates beliefState = col_beliefStates.reshape((self.n, self.m)) beliefStates[i, :, :] = beliefState # XXX: End of your code self.beliefGhostStates = beliefStates return beliefStates
V = np.dot(A, q_i) Bq, Br = gram_schmidt_qr(V) diagonal_Br = np.diagonal(Br) return Bq, diagonal_Br input_data = np.loadtxt(open('iris1.csv', 'rb'), delimiter=',', usecols=range(0, 4)) n, c = input_data.shape for i in range(c): col_mean = np.mean(input_data[:, i]) input_data[:, i] = input_data[:, i] - col_mean A = np.dot(np.transpose(input_data), input_data) / n eigen_vectors, eigen_values = my_pca(A) standard_deviation = np.sqrt(eigen_values) print("Standard deviation") print(standard_deviation) proportion_of_variance = eigen_values / np.sum(eigen_values) print("Proportion of Variance") print(proportion_of_variance) # In[ ]: # In[ ]:
def fit(self, X, y): """Fit a semi-supervised label propagation model based All the input data is provided matrix X (labeled and unlabeled) and corresponding label matrix y with a dedicated marker value for unlabeled samples. Parameters ---------- X : array-like, shape = [n_samples, n_features] A {n_samples by n_samples} size matrix will be created from this y : array_like, shape = [n_samples] n_labeled_samples (unlabeled points are marked as -1) All unlabeled samples will be transductively assigned labels Returns ------- self : returns an instance of self. """ if sparse.isspmatrix(X): self.X_ = X else: self.X_ = np.asarray(X) # actual graph construction (implementations should override this) graph_matrix = self._build_graph() # label construction # construct a categorical distribution for classification only classes = np.unique(y) classes = (classes[classes != -1]) self.classes_ = classes n_samples, n_classes = len(y), len(classes) y = np.asarray(y) unlabeled = y == -1 clamp_weights = np.ones((n_samples, 1)) clamp_weights[unlabeled, 0] = self.alpha # initialize distributions self.label_distributions_ = np.zeros((n_samples, n_classes)) for label in classes: self.label_distributions_[y == label, classes == label] = 1 y_static = np.copy(self.label_distributions_) if self.alpha > 0.: y_static *= 1 - self.alpha y_static[unlabeled] = 0 l_previous = np.zeros((self.X_.shape[0], n_classes)) remaining_iter = self.max_iter if sparse.isspmatrix(graph_matrix): graph_matrix = graph_matrix.tocsr() while (_not_converged(self.label_distributions_, l_previous, self.tol) and remaining_iter > 1): l_previous = self.label_distributions_ self.label_distributions_ = safe_sparse_dot( graph_matrix, self.label_distributions_) # clamp self.label_distributions_ = np.multiply( clamp_weights, self.label_distributions_) + y_static remaining_iter -= 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] self.label_distributions_ /= normalizer # set the transduction item transduction = self.classes_[np.argmax(self.label_distributions_, axis=1)] self.transduction_ = transduction.ravel() return self
def main(): l_rate = 10 repeat = 12000 Lambda = 0.005 data = [] for i in range(18): data.append([]) n_row = 0 text = open(train_path, 'r') #, encoding='big5') row = csv.reader(text, delimiter=",") for r in row: if n_row != 0: for i in range(3, 27): if r[i] != "NR": data[(n_row - 1) % 18].append(float(r[i])) else: data[(n_row - 1) % 18].append(float(0)) n_row = n_row + 1 text.close() x = [] y = [] for i in range(12): for j in range(471): x.append([]) for t in range(18): for s in range(9): x[471 * i + j].append(data[t][480 * i + j + s]) y.append(data[9][480 * i + j + 9]) x = np.array(x) y = np.array(y) # pick Variable X = [1] * len(x) for row in range(len(x)): X[row] = x[row][Variable_list] x = np.array(X) ''' x_transpose = list(map(list, zip(*x))) Varlist = [] Minlist = [] for row in range(x.shape[1]): Varlist.append(np.std(x_transpose[row])) Minlist.append(np.mean(x_transpose[row])) for row in range(x.shape[0]): for col in range(x.shape[1]): x[row][col] = (x[row][col] - Minlist[col]) / Varlist[col] ''' # add square term x = np.concatenate((x, x**2), axis=1) # add bias x = np.concatenate((np.ones((x.shape[0], 1)), x), axis=1) w = np.zeros(len(x[0])) ## Linear regression x_t = x.transpose() s_gra = np.zeros(len(x[0])) for i in range(repeat): hypo = np.dot(x, w) loss = hypo - y + Lambda * np.sum(w**2) cost = np.sum(loss**2) / len(x) cost_a = math.sqrt(cost) gra = np.dot(x_t, loss) s_gra += gra**2 ada = np.sqrt(s_gra) w = w - l_rate * gra / ada if i % 100 == 0: print('iteration: %d | Cost: %f ' % (i, cost_a)) # save model np.save('model_best.npy', w)
def track(self, bb, prev, curr): self._n_samples = 100 self._fb_max_dist = 1 self._ds_factor = 0.95 self._min_n_points = 10 # sample points inside the bounding box p0 = np.empty((self._n_samples, 2)) p0[:, 0] = np.random.randint(bb[0], bb[2] + 1, self._n_samples) p0[:, 1] = np.random.randint(bb[1], bb[3] + 1, self._n_samples) p0 = p0.astype(np.float32) # forward-backward tracking p1, st, err = cv2.calcOpticalFlowPyrLK(prev, curr, p0, None, **self.lk_params) indx = np.where(st == 1)[0] p0 = p0[indx, :] p1 = p1[indx, :] p0r, st, err = cv2.calcOpticalFlowPyrLK(curr, prev, p1, None, **self.lk_params) if err is None: return None # check forward-backward error and min number of points fb_dist = np.abs(p0 - p0r).max(axis=1) good = fb_dist < self._fb_max_dist # keep half of the points err = err[good].flatten() if len(err) < self._min_n_points: return None indx = np.argsort(err) half_indx = indx[:len(indx) // 2] p0 = (p0[good])[half_indx] p1 = (p1[good])[half_indx] # estimate displacement dx = np.median(p1[:, 0] - p0[:, 0]) dy = np.median(p1[:, 1] - p0[:, 1]) # all pairs in prev and curr i, j = np.triu_indices(len(p0), k=1) pdiff0 = p0[i] - p0[j] pdiff1 = p1[i] - p1[j] # estimate change in scale p0_dist = np.sum(pdiff0 ** 2, axis=1) p1_dist = np.sum(pdiff1 ** 2, axis=1) ds = np.sqrt(np.median(p1_dist / (p0_dist + 2**-23))) ds = (1.0 - self._ds_factor) + self._ds_factor * ds; # update bounding box dx_scale = (ds - 1.0) * 0.5 * (bb[3] - bb[1] + 1) dy_scale = (ds - 1.0) * 0.5 * (bb[2] - bb[0] + 1) bb_curr = (int(bb[0] + dx - dx_scale + 0.5), int(bb[1] + dy - dy_scale + 0.5), int(bb[2] + dx + dx_scale + 0.5), int(bb[3] + dy + dy_scale + 0.5)) if bb_curr[0] >= bb_curr[2] or bb_curr[1] >= bb_curr[3]: return None bb_curr = (min(max(0, bb_curr[0]), curr.shape[1]), min(max(0, bb_curr[1]), curr.shape[0]), min(max(0, bb_curr[2]), curr.shape[1]), min(max(0, bb_curr[3]), curr.shape[0])) return bb_curr
fieldnum = 10 fieldsize = 30. #fieldsize in arcseconds smoothingscale = 2 mapsave = "n" galaxynum = 20000 gesnum = pointnum*fieldnum kernelnum1 = int(gesnum) kernelnum = kernelnum1-1 fieldsize = fieldsize/3600. startt = time() print("Filling the D-functions") Dstar1,D1 = makefield.filld(kernelnum,fieldsize,pointnum) Dstar1 = Dstar1/np.sum(np.abs(Dstar1)) D1 = D1/np.sum(np.abs(D1)) Dstar1r = Dstar1.real Dstar1i = Dstar1.imag D1r = D1.real D1i = D1.imag print("Finished filling. Time:",round(time()-startt),"s") #print (np.sum(Dstar1)) #print (np.sum(D1)) def getgamma(kappa): kappa = kappa.astype("complex") print("Convolving kappa to gamma...") kappar = kappa.real kappai = kappa.imag gammar = convolve_fft(kappar,D1r,boundary="wrap") - convolve_fft(kappai,D1i,boundary="wrap")
def loss(desired, final): return 0.5 * np.sum(desired - final)**2
def create_plot_height_quantile_remove_over_height(in_dir, out_dir, medial_level, gridSize, sensor_d='e'): if not os.path.isdir(out_dir): os.makedirs(out_dir) out_sub_dir = os.path.join(out_dir, str(medial_level), str(gridSize)) if not os.path.isdir(out_sub_dir): os.makedirs(out_sub_dir) d1 = date(2018, 4, 15) # start date d2 = date(2018, 7, 29) # end date delta = d2 - d1 # timedelta for i in range(delta.days + 1): str_date = str(d1 + timedelta(days=i)) base_name = '{}_heightHist_{}_{}.npy'.format(str_date, sensor_d, str(gridSize)) #2017-06-30_heightHist_e_0.npy file_path = os.path.join(in_dir, base_name) if not os.path.isfile(file_path): continue heightHist = np.load(file_path) heightHist = heightHist[medial_level] if np.amax(heightHist) == 0: continue nonZeroIndex = np.nonzero(heightHist) max_y_ind = np.amax(nonZeroIndex[2]) out_hist = np.zeros((864, 3)) out_hist[:] = np.nan array_ind = 0 save_flag = False for x in range(54): for y in range(16): if np.amax(heightHist[x,y])>0: if heightHist[x,y,max_y_ind] < 10: save_item = np.zeros(3) save_item[0] = x save_item[1] = y targetHist = heightHist[x,y] targetHist = targetHist/np.sum(targetHist) quantiles = np.cumsum(targetHist) b = np.arange(len(quantiles)) c = b[quantiles>0.98] save_item[2] = min(c) out_hist[array_ind] = save_item save_flag = True array_ind += 1 if save_flag: out_csv_path = os.path.join(out_sub_dir, '{}_heightHist.csv'.format(str_date)) with open(out_csv_path, 'w') as f: for i in range(len(out_hist)): out_np_line = out_hist[i] if np.amax(out_np_line) > 0: out_np_line = out_np_line.astype(int) print_line = ','.join(map(str,out_np_line)) f.write(print_line+'\n') else: print_line = ','.join(map(str,out_np_line)) f.write(print_line+'\n') return
def convert_model(from_popn, from_model, from_vars, to_popn, to_model, to_vars): """ Convert from one model to another model of a different type Generally this will involve projecting impulse responses, etc. It's hairy business. """ # Idea: Get the state of the GLMs, e.g. the impulse responses, etc. # Project those states onto the parameters of the to-model N = from_popn.N from_state = from_popn.eval_state(from_vars) to_state = to_popn.eval_state(to_vars) conv_vars = None if from_model['impulse']['type'].lower() == 'basis': if to_model['impulse']['type'].lower() == 'normalized' or \ to_model['impulse']['type'].lower() == 'dirichlet': import copy conv_vars = copy.deepcopy(to_vars) # To convert from basis -> normalized, project the impulse # responses onto the normalized basis, divide by the area # under the curve to get the weight. W = np.zeros((N,N)) for n2 in np.arange(N): B = to_state['glms'][n2]['imp']['basis'].shape[1] w_ir_n2 = np.zeros((N,B)) for n1 in np.arange(N): # Solve a nonnegative least squares problem (w_ir_n1n2p, residp) = nnls(to_state['glms'][n2]['imp']['basis'], from_state['glms'][n2]['imp']['impulse'][n1,:]) (w_ir_n1n2n, residn) = nnls(to_state['glms'][n2]['imp']['basis'], -1.0*from_state['glms'][n2]['imp']['impulse'][n1,:]) # Take the better of the two solutions if residp < residn: Wsgn = 1.0 w_ir_n1n2 = w_ir_n1n2p else: Wsgn = -1.0 w_ir_n1n2 = w_ir_n1n2n # Normalized weights must be > 0, sum to 1 w_ir_n1n2 = w_ir_n1n2 w_ir_n1n2 = np.clip(w_ir_n1n2,0.001,np.Inf) # Normalize the impulse response to get a weight W[n1,n2] = Wsgn*np.sum(w_ir_n1n2) # Set impulse response to normalized impulse response w_ir_n2[n1,:] = w_ir_n1n2 / np.sum(w_ir_n1n2) # Update to_vars if to_model['impulse']['type'].lower() == 'normalized': conv_vars['glms'][n2]['imp']['w_lng'] = np.log(w_ir_n2.flatten()) if to_model['impulse']['type'].lower() == 'dirichlet': for n1 in range(N): # Scale up the weights such that the average is preserved alpha = to_popn.glm.imp_model.alpha B = to_popn.glm.imp_model.B conv_vars['glms'][n2]['imp']['g_%d' % n1] = alpha * B * w_ir_n2[n1,:] # Update to_vars conv_vars['net']['weights']['W'] = W.flatten() # Threshold the adjacency matrix to start with the right level of sparsity if 'rho' in to_model['network']['graph'].keys(): W_sorted = np.sort(np.abs(W.ravel())) thresh = W_sorted[np.floor((1.0-2.0*to_model['network']['graph']['rho'])*(N**2-N)-N)] conv_vars['net']['graph']['A'] = (np.abs(W) >= thresh).astype(np.int8) else: conv_vars['net']['graph']['A'] = np.ones((N,N), dtype=np.int8) # Copy over the bias for n in np.arange(N): conv_vars['glms'][n]['bias']['bias'] = from_vars['glms'][n]['bias']['bias'] # Copy over the background params if 'sharedtuningcurves' in to_model['latent'] and \ from_model['bkgd']['type'] == 'spatiotemporal': convert_stimulus_filters_to_sharedtc(from_popn, from_model, from_vars, to_popn, to_model, conv_vars) return conv_vars
def create_plot_betyCsv(in_dir, out_dir, medial_level, gridSize, para = 0.98, sensor_d='e'): if not os.path.isdir(out_dir): os.makedirs(out_dir) out_sub_dir = os.path.join(out_dir, str(medial_level), str(gridSize)) if not os.path.isdir(out_sub_dir): os.makedirs(out_sub_dir) d1 = date(2018, 4, 25) # start date d2 = date(2018, 8, 1) # end date delta = d2 - d1 # timedelta (fields, traits) = get_traits_table_height_quantile() for i in range(delta.days + 1): str_date = str(d1 + timedelta(days=i)) base_name = '{}_heightHist_{}_{}.npy'.format(str_date, sensor_d, str(gridSize)) #2017-06-30_heightHist_e_0.npy file_path = os.path.join(in_dir, base_name) if not os.path.isfile(file_path): continue heightHist = np.load(file_path) heightHist = heightHist[medial_level] if np.amax(heightHist) == 0: continue nonZeroIndex = np.nonzero(heightHist) max_y_ind = np.amax(nonZeroIndex[2]) out_hist = np.zeros((864, 3)) out_hist[:] = np.nan array_ind = 0 save_flag = False for x in range(54): for y in range(16): if np.amax(heightHist[x,y])>0: if heightHist[x,y,max_y_ind] < 50: save_item = np.zeros(3) save_item[0] = x save_item[1] = y targetHist = heightHist[x,y] targetHist = targetHist/np.sum(targetHist) quantiles = np.cumsum(targetHist) b = np.arange(len(quantiles)) c = b[quantiles>para] save_item[2] = min(c)-5 out_hist[array_ind] = save_item save_flag = True array_ind += 1 if save_flag: out_csv_path = os.path.join(out_sub_dir, '{}_99th_quantile.csv'.format(str_date)) csvHandle = open(out_csv_path, 'w') csvHandle.write(','.join(map(str, fields)) + '\n') for j in range(len(out_hist)): out_np_line = out_hist[j] if np.amax(out_np_line) > 0: str_time = str_date+'T12:00:00' traits['local_datetime'] = str_time traits['99th_quantile_canopy_height'] = str(int(out_np_line[2])) traits['site'] = parse_site_from_range_column(out_np_line[0], out_np_line[1], 6) trait_list = generate_traits_list_height(traits) csvHandle.write(','.join(map(str, trait_list)) + '\n') csvHandle.close() #submitToBety(out_file) #betydb.submit_traits(out_file, filetype='csv', betykey=betydb.get_bety_key(), betyurl=betydb.get_bety_url()) return
def circuit_integrals(u_abs, u, v, w, lon, lat, glon, glat, z, r): # Integrate u.dl around the circuit of trajectories # 1st and last 2 trajectories are the same so don't double count dlambda, dx, dy, dz = [], [], [], [] for n in range(1, len(u) - 1): # dlambda is length along true longitudes to match the direction of # the Earth rotation dlambda.append(r[n] * np.cos(lat[n]) * 0.5 * (lon[n + 1] - lon[n - 1])) # dx and dy are in the direction of the rotated grid which corresponds # to the wind fields in the forecast dx.append(r[n] * np.cos(glat[n]) * 0.5 * (glon[n + 1] - glon[n - 1])) dy.append(r[n] * 0.5 * (glat[n + 1] - glat[n - 1])) # dz is independent of grid rotation dz.append(0.5 * (z[n + 1] - z[n - 1])) dlambda = np.array(dlambda) dx = np.array(dx) dy = np.array(dy) dz = np.array(dz) # \int dl: Tracks the errors in each calculation (should be zero) dx_tot = np.sum(dx) dy_tot = np.sum(dy) dz_tot = np.sum(dz) dlambda_tot = np.sum(dlambda) # \int |dl| length = np.sum(np.sqrt(dx ** 2 + dy ** 2 + dz ** 2)) # u * r cos(phi) dlambda circ_u = u[1:-1] * dx # v * r dphi circ_v = v[1:-1] * dy # w * dz circ_w = w[1:-1] * dz # u_abs * r cos(phi) dlambda circ_p = u_abs[1:-1] * dlambda """ r_ave = 0.5 * (r[1:] + r[:-1]) dlambda = r_ave * np.cos(0.5 * (lat[1:] + lat[:-1])) * (lon[1:] - lon[:-1]) dx = r_ave * np.cos(0.5 * (glat[1:] + glat[:-1])) * (glon[1:] - glon[:-1]) dy = r_ave * (glat[1:] - glat[:-1]) dz = (z[1:] - z[:-1]) # \int dl dx_tot = np.sum(dx) dy_tot = np.sum(dy) dz_tot = np.sum(dz) dlambda_tot = np.sum(dlambda) # \int |dl| length = np.sum(np.sqrt(dx ** 2 + dy ** 2 + dz ** 2)) # u * r cos(phi) dlambda circ_u = 0.5 * (u[1:] + u[:-1]) * dx # v * r dphi circ_v = 0.5 * (v[1:] + v[:-1]) * dy # w * dz circ_w = 0.5 * (w[1:] + w[:-1]) * dz # u_abs * r cos(phi) dlambda circ_p = 0.5 * (u_abs[1:] + u_abs[:-1]) * dlambda """ rel_circulation = np.sum(circ_u + circ_v + circ_w) planetary_circulation = np.sum(circ_p) abs_circulation = np.sum(circ_u + circ_v + circ_w + circ_p) return (dx_tot, dy_tot, dz_tot, dlambda_tot, length, rel_circulation, planetary_circulation, abs_circulation)
def projections(iswt, iorient, K=20, inc=1, aspace=None, arange=None): if (K < 4 or K > 1024): print( 'Invalid average value. Accepted values between 4 and half the size of your image. Setting default value.' ) K = 12 if (inc > 90 or inc < 0): print('Invalid Delta, must be positive and less than 90') inc = 1 # pad the image with zeros so we don't lose anything when we rotate. iLength, iWidth = iswt.shape iDiag = math.sqrt(iLength**2 + iWidth**2) LengthPad = math.ceil(iDiag - iLength) + 1 WidthPad = math.ceil(iDiag - iWidth) + 1 padIMG = np.zeros((iLength + LengthPad, iWidth + WidthPad)) pad1 = int(math.ceil(LengthPad / 2)) pad2 = int(math.ceil(LengthPad / 2) + iLength) pad3 = int(math.ceil(WidthPad / 2)) pad4 = int(math.ceil(WidthPad / 2) + iWidth) padIMG[pad1:pad2, pad3:pad4] = iswt padIMGOR = np.zeros((iLength + LengthPad, iWidth + WidthPad)) padIMGOR[pad1:pad2, pad3:pad4] = iorient # # loop over the number of angles, rotate 90-theta (because we can easily sum # if we look at stuff from the top), and then add up. Don't perform any # interpolation on the rotating. # # -90 and 90 are the same, we must remove 90 THETA = list(range(-90, 90, inc)) th = np.zeros(len(THETA)) + np.inf if (arange): for ang in aspace: k = ang + 90 kplus = k + arange kminus = k - arange if (kplus > 179): kplus = 179 if (kminus < 0): kminus = 0 th[k:kplus] = THETA[k:kplus] th[kminus:k] = THETA[kminus:k] else: th = THETA th = np.array(th, dtype=np.float32) * np.pi * (1 / 180.0) n = len(THETA) PR = np.zeros((padIMG.shape[1], n)) M = padIMG # > 0 iPL, iPW = padIMG.shape center = (iPL / 2, iPW / 2) for i in range(n): if (th[i] != np.inf): final = oft(M, K, padIMGOR, th[i]) Mt = cv2.getRotationMatrix2D(center, -THETA[i], 1.0) rotated = cv2.warpAffine(final, Mt, (iPL, iPW)) PR[:, i] = (np.sum(rotated, axis=0)) else: PR[:, i] = 0 PR[np.nonzero(PR < 0)] = 0.0 PR = PR / iDiag PR = PR * 10 PR = np.multiply(PR, PR) PR = PR * 0.1 PR = PR / np.max(PR) return PR
def stereo_hist_to_bety_format(in_dir, out_dir): if not os.path.isdir(out_dir): os.makedirs(out_dir) d1 = date(2016, 10, 16) # start date d2 = date(2016, 11, 7) # end date delta = d2 - d1 # timedelta (fields, traits) = get_traits_table_stereo_height_quantile() for i in range(delta.days + 1): str_date = str(d1 + timedelta(days=i)) base_name = '{}_stereoHeight.npy'.format(str_date) #2016-10-16_stereoHeight.npy file_path = os.path.join(in_dir, base_name) if not os.path.isfile(file_path): continue heightHist = np.load(file_path) if np.amax(heightHist) == 0: continue out_hist = np.zeros((864, 3)) out_hist[:] = np.nan array_ind = 0 save_flag = False for i in range(864): if np.amax(heightHist[2*i])>0: save_item = np.zeros(3) plot_row, plot_col = plotNum_to_fieldPartition_864(i+1) save_item[0] = plot_row-1 save_item[1] = plot_col-1 targetHist = heightHist[2*i] targetHist = targetHist/np.sum(targetHist) quantiles = np.cumsum(targetHist) b = np.arange(len(quantiles)) c = b[quantiles>0.98] save_item[2] = min(c) out_hist[array_ind] = save_item save_flag = True array_ind += 1 if save_flag: out_csv_path = os.path.join(out_dir, '{}_98th_quantile_stereoHeight.csv'.format(str_date)) csvHandle = open(out_csv_path, 'w') csvHandle.write(','.join(map(str, fields)) + '\n') for j in range(len(out_hist)): out_np_line = out_hist[j] if np.amax(out_np_line) > 0: str_time = str_date+'T12:00:00' traits['local_datetime'] = str_time traits['98th_quantile_canopy_height'] = str(int(out_np_line[2])) traits['site'] = parse_site_from_range_column(out_np_line[0], out_np_line[1], 2) trait_list = generate_traits_list_height(traits) csvHandle.write(','.join(map(str, trait_list)) + '\n') csvHandle.close() #submitToBety(out_file) #betydb.submit_traits(out_file, filetype='csv', betykey=betydb.get_bety_key(), betyurl=betydb.get_bety_url()) return
def get_posterior(prior, P, outcome): n_models = len(prior) total_probability = prior * P[:, outcome] # get total_probability[i] = prior[i] * P[i, outcome] posterior = total_probability / np.sum(total_probability) ## So probability of outcome for model i is just... return posterior
def discardwrongpeaks(R, positivePairs, negativePairs, gridsize, verbose=True): """ Discard wrong peaks Discard wrong peaks is based in the following facts: - In a grid you will find positive and negative angles - Make pairs of positive and negative angles. We group first positive, then negative - If we don't have enough for a square, we finish at that point. Otherwise, we check that the pairs sum 90 degrees complementary, so we group by square. For each positive, it must exist a negative that complements, and vice versa. ------------------------------------------------------------------------ Function ERRORS: - couldn't find enough peaks fitting grid conditions """ goodpeaks = [] error = 0 error = enoughpeaks(positivePairs, negativePairs) # Error can be 0, 1 or 2. I anglepos = np.array([peak[1] for peak in positivePairs]) angleneg = np.array([peak[1] for peak in negativePairs]) if (error < 0): return -1, [], [] if error == 1: positivePairs = findCrossSequence(anglepos, gridsize) negativePairs = predictGrid(positivePairs, R, gridsize) if len(negativePairs) == 0 or len(positivePairs) == 0: return -1, [], [] if error == 2: negativePairs = findCrossSequence(angleneg, gridsize) positivePairs = predictGrid(negativePairs, R, gridsize) if len(negativePairs) == 0 or len(positivePairs) == 0: return -2, [], [] # We have 2 exceptions: 0 and -1, in that case, majority wins and is converted ### TODO: To be tested total_0 = np.sum(angleneg == 0) total_1 = np.sum(anglepos == 1) total_m1 = np.sum(angleneg == -1) total_90 = np.sum([anglepos == 90, anglepos == 89]) #We consider 90 and 89 total_m89 = np.sum([angleneg == -90, angleneg == -89, angleneg == -88]) # We consider -89 and -88 # We have to group angles in complementary sets: # -1,0,1 can show up together # -89,90 can also show up together # However, we only admit correspondences such as: (-1,0)+(90,89) or (1,2)+(-89,-88) # If we have many pairs, we only keep the majority if (total_0 > 0 and total_1 > 0) or (total_90 > 0 and total_m89 > 0): if (total_90 + total_0 + total_m1 > total_m89 + total_1): # Remove all 1's and -89 anglepos = anglepos[np.where(anglepos != 1)] angleneg = angleneg[np.where(angleneg != -89)] angleneg = angleneg[np.where(angleneg != -88)] else: # Remove all 0's and 90's anglepos = anglepos[np.where(anglepos != 0)] angleneg = angleneg[np.where(angleneg != -1)] anglepos = anglepos[np.where(anglepos != 89)] anglepos = anglepos[np.where(anglepos != 90)] # To be tested # now make 90 degrees pairs # Take first positive angles and compare with negative if they add # 90+/-5 pos_ang = np.unique(anglepos) neg_ang = np.unique(angleneg) good_angles_pos = set() good_angles_neg = set() for elp in pos_ang: for eln in neg_ang: nty = elp + np.abs(eln) if (nty > 85 and nty < 95): #good combination good_angles_pos.add(elp) good_angles_neg.add(eln) good_pos = [ positivePairs[ind] for ind, angle in enumerate(anglepos) if angle in good_angles_pos ] good_neg = [ negativePairs[ind] for ind, angle in enumerate(angleneg) if angle in good_angles_neg ] error = enoughpeaks(good_pos, good_neg) if error < 0: return -3, [], [] positivePairs = findCrossSequence(good_pos, gridsize) negativePairs = findCrossSequence(good_neg, gridsize) if len(positivePairs) == 0 and len(negativePairs) == 0: return -3, [], [] error = enoughpeaks(positivePairs, negativePairs) if error == 1: negativePairs = predictGrid(positivePairs, R, gridsize) if len(negativePairs) < 1: return -1, [], [] if error == 2: positivePairs = predictGrid(negativePairs, R, gridsize) if len(positivePairs) < 1: return -2, [], [] # get angles positivePairs = np.squeeze(np.array(positivePairs)) negativePairs = np.squeeze(np.array(negativePairs)) total_pos = positivePairs.shape[0] total_neg = negativePairs.shape[0] topval = 91 topscore = 0 for elp in positivePairs: for eln in negativePairs: val = np.abs((elp[1] - eln[1]) - 90) score = elp[2] + eln[2] if (val < topval): fangpos = elp[1] fangneg = eln[1] topscore = score topval = val elif val == topval: if score > topscore: fangpos = elp[1] fangneg = eln[1] topscore = score topval = val nty = fangpos - fangneg for i in range(total_pos): positivePairs[i, 1] = fangpos for i in range(total_neg): negativePairs[i, 1] = fangneg if verbose: print('Angle sum :' + str(nty)) if (nty < (85) and nty > (60)) and verbose: print('The angle orientations are not 90 degrees. Adjust properly.') return 0, positivePairs, negativePairs
def benchmark_mean (y_true, checkMatrix): y_true = y_true * checkMatrix return (np.sum(y_true, axis=1) / np.sum((checkMatrix == 1.0), axis=1)).reshape(y_true.shape[0], 1) * np.ones(y_true.shape)
def findCrossSequence(ipeaks, gridsize=None): # Checks which is the best combination of peaks that fits the gridsize # The allowed grid sequence can be one strike after another # or just one line. # Peaks need to be tuples [position,angle,X] # Grid size must be in pixels. This findCrossSequence expects a separation of n pixels ------------*----*-------- # If we need n,m pixels (periodical) ---*--*--------*--*------ needs modification # If gridsize is None, the pairing is omitted # Always return [] if error total_peaks = len(ipeaks) gpeaks = [] if gridsize is None: return [] # Get all possible combinations of peaks possible_pairs = list(itertools.combinations(ipeaks, 2)) error = [] saved_group = [] for pair in possible_pairs: pair = np.array(pair) total_p = pair[pair.argsort(axis=0)[:, 0]] terror = 0 dist_p = np.abs(total_p[0, 0] - total_p[1, 0]) good = 1 dif_error = np.abs(dist_p - gridsize[0]) / gridsize[0] terror = terror + dif_error # first test, distance of spacing if (dif_error > 0.35): good = 0 if (good == 1): if ( np.abs(total_p[0, 1] - total_p[1, 1]) > 2 ): # second test, difference between angles not bigger than 3 deg good = 0 if (good == 1): saved_group.append(total_p) error.append(terror) if len(saved_group) == 0: return [] # For each group check that they are not competing in the same distances k = 0 good_group = [] if (len(saved_group) == 1): return saved_group for i, g1 in enumerate(saved_group): for j in range(i + 1, len(saved_group)): if (i != j): g2 = saved_group[j] err_dif = np.abs(error[i] - error[j]) if (err_dif > 0.1): # We keep the set with minimum error if (error[i] < error[j]): good_group.append(g1) else: good_group.append(g2) else: #potential candidates, leave them good_group.append(g1) good_group.append(g2) if len(good_group) == 0: return [] # Now is time to select the BEST candidate # Is going to be the one that sums up the most sg = [] for g in good_group: sg.append(np.sum(g[:, 2])) maxind = np.argsort(np.array(sg))[::-1] return good_group[maxind[0]]
def eval(self, x, y, train=False): x, y = to_variable(var=(x, y.long()), cuda=self.cuda) out, _, _ = self.model(x) pred = out.data.max(dim=1, keepdim=False)[1] # get the index of the max log-probability acc = float(np.sum(pred.detach().cpu().numpy() == y.detach().cpu().numpy())) / float(len(y)) return acc, pred
def accuracy(out, labels): outputs = np.argmax(out, axis=1) return np.sum(outputs == labels)
def vector_norm( v_in ): return np.sqrt( np.sum( np.abs( v_in )**2 ) )
random.seed(0) check = True while check: training_set = checkMatrix.copy() for i in range(training_set.shape[1]): # use only rated restaurants index = list(np.where(training_set[:,i] == 1)[0]) # randomly select 30% of whole dataset flag = int(round(len(index)*0.3)) index_flag = random.sample(index,flag) training_set[index_flag,i] = 0 if np.sum(training_set,axis=1).min() > 1: check = False testing_set = checkMatrix - training_set print(f"Training set: {training_set.sum()}\nTesting set: {testing_set.sum()}\n") # ---- Use evaluation metrics to get the quantified values of model performances --- # # Use root-mean-squared-error method for model performance employing linear regression def root_mean_squared(y_true, y_predicte, R): rsme = np.sqrt(mean_squared_error(y_true[R == 1], y_predicte[R == 1])) return rsme # --- Create a benchmark model which returns average predicted rating and use it to compare ratings ----- # # Function for benchmark model mean def benchmark_mean (y_true, checkMatrix):