def getFourMoments(sequence, ax=1): finalArray = [ np.mean(sequence, axis=ax), np.var(sequence, axis=ax), skew(sequence, axis=ax), kurtosis(sequence, axis=ax), sem(sequence, axis=ax), ] if ax != None: finalArray = np.array(finalArray) finalArray = finalArray.T return np.concatenate((finalArray, np.array(mquantiles(sequence, axis=ax))), axis=ax) finalArray.extend(mquantiles(sequence, axis=ax)) return np.array(finalArray)
def getFourMoments(sequence, ax=1): finalArray = [ np.mean(sequence, axis=ax), np.var(sequence, axis=ax), skew(sequence, axis=ax), kurtosis(sequence, axis=ax), sem(sequence, axis=ax) ] if ax != None: finalArray = np.array(finalArray) finalArray = finalArray.T return np.concatenate( (finalArray, np.array(mquantiles(sequence, axis=ax))), axis=ax) finalArray.extend(mquantiles(sequence, axis=ax)) return np.array(finalArray)
def _fit_scale(self, X): self.Q5 = [] self.Q95 = [] for i in range(X.shape[1]): q5, q95 = mquantiles(X[:, i], [0.05, 0.95]) self.Q5.append(q5) self.Q95.append(q95)
def _fit_scale(self, X): self.Q5 = [] self.Q95 = [] for i in range(X.shape[1]): q5, q95 = mquantiles(X[:,i], [0.05, 0.95]) self.Q5.append(q5) self.Q95.append(q95)
def get_separators(self, nbr): """ get the computed separators, min max coulc also be taken at 2 sigma """ mini = min(self.stats) maxi = max(self.stats) return (mquantiles(list(self.stats), prob=[i*1.0/nbr for i in range(1, nbr)]).tolist(), mini, maxi)
def get_separators(self, nbr): """ get the computed separators, min max coulc also be taken at 2 sigma """ mini = min(self.stats) maxi = max(self.stats) return (mquantiles(list(self.stats), prob=[i * 1.0 / nbr for i in range(1, nbr)]).tolist(), mini, maxi)
def flip_measure_regression(self, i, snp, min_clique_size=3, small_clique=10): '''Suggested by Oren. Flip measure for sample i at a snp. This is the separation angle between the regression slopes of the kinship coefficient of clique-father vs. the kinship coefficient of clique-mother for each of the two cliques. The angle is scaled to [-1,1].''' c0, c1 = self.ibd_clique_sample_ids(i, snp, 0), self.ibd_clique_sample_ids(i, snp, 1) if min(c0.shape[0], c1.shape[0]) < min_clique_size: sep, k, line, c = None, None, None, None else: c = [c0, c1] parents = [self.ped.sample_id[self.ped.graph.predecessors(i)[a]] for a in im.constants.ALLELES] K = lambda k, l: np.array([self.params.kinship(parents[k], x) for x in c[l]]) # Perform least-squares fit of both cliques # k_threshold = Aligner._K_THRESHOLD prob = [0.25, 0.75] # Quantile region to include in fit k00 = K(0, 0) k10 = K(1, 0) # good = (k00 <= k_threshold) | (k10 <= k_threshold) m0, m1 = mquantiles(k00, prob=prob), mquantiles(k10, prob=prob) good = (k00 >= m0[0]) & (k00 <= m0[1]) & (k10 >= m1[0]) & (k10 <= m1[1]) if len(c0) >= small_clique else np.arange(len(c0)) a0, b0 = linefit(k00[good], k10[good], 1) # print len(c0), np.where(good)[0] k01 = K(0, 1) k11 = K(1, 1) # good = (k01 <= k_threshold) | (k11 <= k_threshold) m0, m1 = mquantiles(k01, prob=prob), mquantiles(k11, prob=prob) good = (k01 >= m0[0]) & (k01 <= m0[1]) & (k11 >= m1[0]) & (k11 <= m1[1]) if len(c1) >= small_clique else np.arange(len(c1)) a1, b1 = linefit(k01[good], k11[good], 1) # print len(c1), np.where(good)[0] k = (k00, k10, k01, k11) line = (a0, b0, a1, b1) # Calculate the separation angle if np.abs(a0 - a1) < 1e-15: sep = 0 else: e = (b1 - b0) / (a0 - a1) p, q = e, a0 * e + b0 x0 = np.sign(a0 + b0) sep = max(-1, min(1, ANGLE_SCALING_FACTOR * (np.arctan2(a0 * x0 + b0 - q, x0 - p) - np.arctan2(a1 + b1 - q, 1 - p)))) return sep, (len(c0), len(c1)), (k, line, c)
def _calculateStatistics(self, img, haralick=False, zernike=False): result = [] # 3-bin histogram result.extend(mquantiles(img)) # First four moments result.extend([img.mean(), img.var(), skew(img, axis=None), kurtosis(img, axis=None)]) # Haralick features if haralick: integerImage = dtype.img_as_ubyte(img) result.extend(texture.haralick(integerImage).flatten()) # Zernike moments if zernike: result.extend(zernike_moments(img, int(self.rows) / 2 + 1)) return result
def _calculateStatistics(self, img, haralick=False, zernike=False): result = [] #3-bin histogram result.extend(mquantiles(img)) #First four moments result.extend([ img.mean(), img.var(), skew(img, axis=None), kurtosis(img, axis=None) ]) #Haralick features if haralick: integerImage = dtype.img_as_ubyte(img) result.extend(texture.haralick(integerImage).flatten()) #Zernike moments if zernike: result.extend(zernike_moments(img, int(self.rows) / 2 + 1)) return result
word, featurelst = tokens[0], [float(f) for f in tokens[1:]] features.append(featurelst) wordVector.append(word) featureMatrix = np.array(features) print >> sys.stderr, featureMatrix print >> sys.stderr, "*" * 100 TfeatureMatrix = featureMatrix.transpose() TdiscritizedfeatureNestedList = [] for column in TfeatureMatrix: print >> sys.stderr, column.tolist()[1:10], len(column.tolist()), np.average(column) bins = mquantiles(column, prob=np.array([0.01 * q for q in range(1, 101)]), alphap=0, betap=1) print >> sys.stderr, bins Bcolumn = np.digitize(column, bins).tolist() print >> sys.stderr, Bcolumn[1:10] TdiscritizedfeatureNestedList.append(Bcolumn) TdiscritizedfeatureMatrix = np.array(TdiscritizedfeatureNestedList) if VERBOSE: for word, dfeatures, cfeatures in zip(wordVector, TdiscritizedfeatureMatrix.transpose(), featureMatrix): print >> sys.stdout, "%s\t%s" % (word, "\t".join("%d(%f)" % (df, cf) for df, cf in zip(dfeatures, cfeatures)) ) else:
""" Process the 'out'put, calculate number of outstanding requests at time and calculate """ import csv from scipy.stats.mstats_basic import mquantiles combine_requests = 2 def clen(timeset): return (len(timeset) / combine_requests) * combine_requests times = {} times_via_len = {} with open("out", "r") as f: data = list(csv.reader(f)) for d in data: times[d[0]] = times.get(d[0], set()).union(set((tuple(test) for test in data if test[0] < d[0] < test[1]))) for timeset in times.values(): times_via_len[clen(timeset)] = times_via_len.get(clen(timeset), set()).union(timeset) with open("analyzed", "w") as f2: writer = csv.writer(f2) for length, rowset in times_via_len.items(): if length > 0: page_times = [float(p[2]) for p in rowset] writer.writerow(["'{}-{}'".format(length, length+combine_requests)] + [q for q in mquantiles(page_times, [0.1, 0.25, 0.75, 0.9])])
def flip_measure_regression(self, i, snp, min_clique_size=3, small_clique=10): '''Suggested by Oren. Flip measure for sample i at a snp. This is the separation angle between the regression slopes of the kinship coefficient of clique-father vs. the kinship coefficient of clique-mother for each of the two cliques. The angle is scaled to [-1,1].''' c0, c1 = self.ibd_clique_sample_ids(i, snp, 0), self.ibd_clique_sample_ids( i, snp, 1) if min(c0.shape[0], c1.shape[0]) < min_clique_size: sep, k, line, c = None, None, None, None else: c = [c0, c1] parents = [ self.ped.sample_id[self.ped.graph.predecessors(i)[a]] for a in im.constants.ALLELES ] K = lambda k, l: np.array( [self.params.kinship(parents[k], x) for x in c[l]]) # Perform least-squares fit of both cliques # k_threshold = Aligner._K_THRESHOLD prob = [0.25, 0.75] # Quantile region to include in fit k00 = K(0, 0) k10 = K(1, 0) # good = (k00 <= k_threshold) | (k10 <= k_threshold) m0, m1 = mquantiles(k00, prob=prob), mquantiles(k10, prob=prob) good = (k00 >= m0[0]) & (k00 <= m0[1]) & (k10 >= m1[0]) & ( k10 <= m1[1]) if len(c0) >= small_clique else np.arange( len(c0)) a0, b0 = linefit(k00[good], k10[good], 1) # print len(c0), np.where(good)[0] k01 = K(0, 1) k11 = K(1, 1) # good = (k01 <= k_threshold) | (k11 <= k_threshold) m0, m1 = mquantiles(k01, prob=prob), mquantiles(k11, prob=prob) good = (k01 >= m0[0]) & (k01 <= m0[1]) & (k11 >= m1[0]) & ( k11 <= m1[1]) if len(c1) >= small_clique else np.arange( len(c1)) a1, b1 = linefit(k01[good], k11[good], 1) # print len(c1), np.where(good)[0] k = (k00, k10, k01, k11) line = (a0, b0, a1, b1) # Calculate the separation angle if np.abs(a0 - a1) < 1e-15: sep = 0 else: e = (b1 - b0) / (a0 - a1) p, q = e, a0 * e + b0 x0 = np.sign(a0 + b0) sep = max( -1, min( 1, ANGLE_SCALING_FACTOR * (np.arctan2(a0 * x0 + b0 - q, x0 - p) - np.arctan2(a1 + b1 - q, 1 - p)))) return sep, (len(c0), len(c1)), (k, line, c)
def ci_non_parametric(x, p): return mquantiles(x, prob=[(1 - p) / 2, (1 + p) / 2], alphap=1, betap=1)