def averageHopDistance(transmissions): """ Take a list of numpy arrays which has rows as an information transmission. Outputs the total number of hops of information divided by the total number of original senders (those that did not receive from another person). A measure of the average spread of information from each source. """ numIterations = len(transmissions) originalInfoSenders = numpy.array([]) infoReceivers = numpy.array([]) totalHops = 0 #Assume transmissions are unique for i in range(0, numIterations): currentAlters = transmissions[i][:, 1] infoReceivers = numpy.union1d(infoReceivers, currentAlters) totalHops += transmissions[i].shape[0] currentEgos = transmissions[i][:, 0] originalInfoSenders = numpy.union1d(originalInfoSenders, currentEgos) originalInfoSenders = numpy.setdiff1d(originalInfoSenders, infoReceivers) #Number of path ends is infoReceivers.shape[0] if originalInfoSenders.shape[0] != 0: return float(totalHops)/originalInfoSenders.shape[0] else: return 0
def chi(self, customattribute): """ 计算其卡方值. """ attributeDict = dict() classAttributeDict = dict() for piece in self.chunks: for (attribute, classAttribute), arrays in piece.groupby([customattribute, self.classAttribute]).studentID.unique().iteritems(): attributeDict.setdefault((attribute, classAttribute), np.array([])) attributeDict[(attribute, classAttribute)] = np.union1d(attributeDict[(attribute, classAttribute)], arrays) for classAttribute, arrays in piece.groupby(self.classAttribute).studentID.unique().iteritems(): classAttributeDict.setdefault(classAttribute, np.array([])) classAttributeDict[classAttribute] = np.union1d(classAttributeDict[classAttribute], arrays) #各个类别的毕业去向群体中所占的比例. classSeries = Series(classAttributeDict).apply(lambda x:len(x)) classSeries /= classSeries.sum() #在各个attribute上的实际观测值. attributeObs = Series(attributeDict).apply(lambda x:len(x)).unstack(fill_value=0) attributeExp = DataFrame(index=attributeObs.index, columns=attributeObs.columns) #设置初始值. for index in attributeExp.index: attributeExp.ix[index] = attributeObs.ix[index].sum() #根据各个目标类别中的比例来获得其期望值. attributeExp = attributeExp.mul(classSeries).fillna(0) #根据实际观测值与期望值来计算其卡方值,并返回p-value值. return chisquare(attributeObs.stack(), attributeExp.stack()), attributeObs
def cr_reject2(fl, er, nsig=10.0, fwhm=2, grow=1, debug=True): """ interpolate across features that have widths smaller than the expected fwhm resolution. Parameters ---------- fwhm: int Resolution fwhm in pixels fl : array of floats, shape (N,) Flux er : array of floats, shape (N,) Error Returns the interpolated flux and error arrays. """ fl, er = (np.array(a, dtype=float) for a in (fl, er)) # interpolate over bad pixels fl1 = convolve_psf(fl, fwhm) ibad = np.where(np.abs(fl1 - fl) > nsig*er)[0] if debug: print(len(ibad)) extras1 = np.concatenate([ibad + 1 + i for i in range(grow)]) extras2 = np.concatenate([ibad - 1 - i for i in range(grow)]) ibad = np.union1d(ibad, np.union1d(extras1, extras2)) ibad = ibad[(ibad > -1) & (ibad < len(fl))] igood = np.setdiff1d(np.arange(len(fl1)), ibad) fl[ibad] = np.interp(ibad, igood, fl[igood]) er[ibad] = np.nan return fl,er
def fit(self, X, y): self._X_colcount = X.shape[1] #self.learner.fit(self._transform(X), y) self.get_learner(X, y) classifier_features = self.getClassifierFeatures() fe = SecondLayerFeatureEvaluator() local_excluded_features = np.union1d(self.excluded_features, classifier_features) local_X = utilities.exclude_cols(X, local_excluded_features) scores = fe.evaluate(local_X, X[:,classifier_features], n_jobs = self.n_jobs) i = 0 for feature in classifier_features: fc = sklearn.base.clone( self.feature_confidence_estimator).set_params( **self.feature_confidence_estimator.get_params()) fc.fit(X, feature, scores[i], local_excluded_features) self.setFeatureConfidenceEstimator(feature, fc) self._second_layer_features = np.union1d( self._second_layer_features, fc.getFeatures()) i += 1 return(self)
def set_cavity_walls(self,walls=['left','right','bottom','west','east']): """ set up to 5 walls as solid walls for the simulation """ solid_list_a = np.empty(0).flatten() solid_list_b = np.empty(0).flatten() solid_list_c = np.empty(0).flatten() solid_list_d = np.empty(0).flatten() solid_list_e = np.empty(0).flatten() for w in walls: if w=='right': solid_list_a = np.array(np.where((self.x==0.))).flatten() elif w=='left': solid_list_b = np.array(np.where((self.x > (self.Lx_p-self.dx/2.)))).flatten() elif w=='west': solid_list_d = np.array(np.where((self.z == 0.))).flatten() elif w=='bottom': solid_list_c = np.array(np.where((self.y == 0.))).flatten() elif w=='east': solid_list_e = np.array(np.where((self.z > (self.Lz_p - self.dx/2.)))).flatten() solid_list = np.array(np.union1d(solid_list_a,solid_list_b)); solid_list = np.array(np.union1d(solid_list,solid_list_c)); solid_list = np.array(np.union1d(solid_list,solid_list_e)); self.solid_list = np.array(np.union1d(solid_list,solid_list_d)) self.lid_list = np.array(np.where((self.y > (self.Ly_p-self.dx/2.)))).flatten()
def __call__(self, s, *pargs, **kargs): if len(kargs) == 0 and len(kargs) == 0: if s in self.a: at = np.searchsorted(self.a, s) return self.m[at] else: if s <= self.a[0]: return self.m[0] elif s >= self.a[-1]: return self.m[-1] else: at = np.searchsorted(self.a, s) d = (s - self.a[at-1]) / (self.a[at] - self.a[at-1]) r1 = self.m[at-1] r2 = self.m[at] # if isinstance(r1, mesh2d): # # X= np.union1d(r1.X, r2.X) # # Y = [res for res in r1(X)] # # print Y if isinstance(r1, mesh3d): X = np.union1d(r1.X, r2.X) Y = np.union1d(r1.X, r2.X) res = np.zeros((X.size, Y.size)) for ix, vx in enumerate(X): for iy, vy in enumerate(Y): a = r1(vx, vy) b = r2(vx, vy) res[ix, iy] = a + d * (b - a) return mesh3d(X=X, Y=Y, Z=res) else: print u"Pas dans la liste"
def get_obstList(self,X,Y,Z): """ Define areas external to pipe. """ #Pipe in - find all points exterior of large pipe pipe_in = np.array(np.where((X - 1)**2 + (Y - 1)**2 > (self.diam_in/2)**2)).flatten() pipe_in_stop = np.array(np.where(Z <= 4)).flatten() pipe_in = np.intersect1d(pipe_in[:],pipe_in_stop[:]) #Contraction - find all points exterior of contraction r_cone = self.diam_out h_cone = self.diam_out contraction = np.array(np.where((X - 1)**2 + (Y - 1)**2 > (r_cone/h_cone)**2*(Z - (4 + h_cone))**2)).flatten() contraction_start = np.array(np.where(Z >= 4)).flatten() contraction_stop = np.array(np.where(Z <= 4 + .5*self.diam_out)).flatten() contraction = np.intersect1d(contraction[:],contraction_start[:]) contraction = np.intersect1d(contraction[:],contraction_stop[:]) #Pipe out - final all points exterior of smaller pipe pipe_out = np.array(np.where((X - 1)**2 + (Y - 1)**2 > (self.diam_out/2)**2)).flatten() pipe_out_start = np.array(np.where(Z >= 4 + .5*self.diam_out)).flatten() pipe_out = np.intersect1d(pipe_out[:],pipe_out_start[:]) #Put the pieces together #pipe = pipe_in[:] pipe = np.union1d(contraction[:],pipe_in[:]) pipe = np.union1d(pipe[:],pipe_out[:]) obst_list = pipe[:] return list(obst_list[:])
def get_obstList(self,X,Y,Z): """ Define areas external to pipe. """ #Pipe in - find all points exterior of small pipe_in = np.array(np.where((X - 1)**2 + (Y - 1)**2 > (self.diam_in/2)**2)).flatten() pipe_in_stop = np.array(np.where(Z <= 3 + 0.5*(self.diam_out - self.diam_in))).flatten() pipe_in = np.intersect1d(pipe_in[:],pipe_in_stop[:]) #Expansion - find all points exterior of expansion r_cone = self.diam_in h_cone = self.diam_in expansion = np.array(np.where((X - 1)**2 + (Y - 1)**2 > (r_cone/h_cone)**2*(Z - 3)**2)).flatten() expansion_start = np.array(np.where(Z >= 3 + 0.5*(self.diam_out - self.diam_in))) #expansion_stop = np.array(np.where(Z <= 4)).flatten() expansion = np.intersect1d(expansion[:],expansion_start[:]) #expansion = np.intersect1d(expansion[:],expansion_stop[:]) #Pipe out - final all points exterior of smaller pipe pipe_out = np.array(np.where((X - 1)**2 + (Y - 1)**2 > (self.diam_out/2)**2)).flatten() pipe_out_start = np.array(np.where(Z >= 3 + 0.5*(self.diam_in - self.diam_out))).flatten() pipe_out = np.intersect1d(pipe_out[:],pipe_out_start[:]) #Put the pieces together pipe = expansion[:] pipe = np.union1d(expansion[:],pipe_in[:]) pipe = np.union1d(pipe[:],pipe_out[:]) obst_list = pipe[:] return list(obst_list[:])
def dDCR_moments(SED1, SED2, bandpass): zenith_angle = np.pi/4.0 * galsim.radians R500 = galsim.dcr.get_refraction(500, zenith_angle) # analytic first moment differences R = lambda w:(galsim.dcr.get_refraction(w, zenith_angle) - R500) / galsim.arcsec x1 = np.union1d(bandpass.wave_list, SED1.wave_list) x1 = x1[(x1 >= bandpass.blue_limit) & (x1 <= bandpass.red_limit)] x2 = np.union1d(bandpass.wave_list, SED2.wave_list) x2 = x2[(x2 >= bandpass.blue_limit) & (x2 <= bandpass.red_limit)] numR1 = np.trapz(R(x1) * bandpass(x1) * SED1(x1), x1) numR2 = np.trapz(R(x2) * bandpass(x2) * SED2(x2), x2) den1 = SED1.calculateFlux(bandpass) den2 = SED2.calculateFlux(bandpass) R1 = numR1/den1 R2 = numR2/den2 dR_analytic = R1 - R2 # analytic second moment differences V1_kernel = lambda w:(R(w) - R1)**2 V2_kernel = lambda w:(R(w) - R2)**2 numV1 = np.trapz(V1_kernel(x1) * bandpass(x1) * SED1(x1), x1) numV2 = np.trapz(V2_kernel(x2) * bandpass(x2) * SED2(x2), x2) V1 = numV1/den1 V2 = numV2/den2 dV_analytic = V1 - V2 return dR_analytic, dV_analytic, len(x2)
def reference_naive_aggregation(C): S = np.array_split(C.indices, C.indptr[1:-1]) n = C.shape[0] aggregates = np.empty(n, dtype=C.indices.dtype) aggregates[:] = -1 # aggregates[j] denotes the aggregate j is in R = np.zeros((0,)) # R stores already aggregated nodes j = 0 # j is the aggregate counter Cpts = [] # Only one aggregation pass for i, row in enumerate(S): # if i isn't already aggregated, grab all his neighbors if aggregates[i] == -1: unaggregated_neighbors = np.setdiff1d(row, R) aggregates[unaggregated_neighbors] = j aggregates[i] = j j += 1 R = np.union1d(R, unaggregated_neighbors) R = np.union1d(R, np.array([i])) Cpts.append(i) else: pass assert(np.unique(R).shape[0] == n) Pj = aggregates Pp = np.arange(n+1) Px = np.ones(n) return csr_matrix((Px, Pj, Pp)), np.array(Cpts)
def update_dimensions(self, test_cases): for tc in test_cases: self.x_range = num.union1d(self.x_range, tc.test_parameters.items()[0].values()) self.y_range = num.union1d(self.y_range, tc.test_parameters.items()[1].values()) self.z_range = num.union1d(self.z_range, tc.test_parameters.items()[2].values())
def change(self): ''' suggests a potential change to state return a changed state ''' ratio=self.params['transRotRatio'] position_idx=int(np.random.rand()*(self.params['m'])) noChangeFlag=1 if self.params['TtoDIsPhysical'] and np.random.rand()<=self.params['probFormChange'] and self.state[2][position_idx]==0: changedForm=self.state[2].copy() changedForm[position_idx]=1 changedState=(self.state[0],self.state[1],changedForm,self.state[3]) noChangeFlag=0 elif np.random.rand()<=ratio: noChangeFlag=0 #translation. changedPosition=self.state[0].copy() if self.params['isAlt']: #Alternative dynamics, where proteins only move if nearby position is open u=np.random.uniform() #Perform change of position if u<= self.params['altProb'] and (self.state[0][position_idx]-1)%self.params['N'] not in self.state[0]: changedPosition[position_idx]=changedPosition[position_idx]-1 elif u> self.params['altProb'] and (self.state[0][position_idx]+1)%self.params['N'] not in self.state[0]: changedPosition[position_idx]=changedPosition[position_idx]+1 else: #where jumping through proteins is allowed. # to be more physical, we force proteins to get into empty slots # not connected to any other proteins before they can get back # into a slot next to other proteins. # flag of whether chosen protein is currently connected to # other proteins if (self.state[0][position_idx]+1)%self.params['N'] in self.state[0] or (self.state[0][position_idx]-1)%self.params['N'] in self.state[0]: # protein is currently connected to other proteins, # so we force it to go into empty slots not connected to proteins connectedEmptySlots=np.union1d(np.union1d(self.state[0],(self.state[0]+1)%self.params['N']),(self.state[0]-1)%self.params['N']) slotsToChoose=np.array([x for x in xrange(self.params['N']) if x not in connectedEmptySlots]) else: # if protein is currently not conencted to proteins, any slot not occupied can be taken slotsToChoose = np.array([x for x in xrange(self.params['N']) if x not in self.state[0]]) if len(slotsToChoose)!=0: changedPosition[position_idx]=random.choice(slotsToChoose) changedState=(changedPosition,self.state[1],self.state[2],self.state[3]) else: #Rotation changedType=self.state[1].copy() changedType[position_idx]=changedType[position_idx][::-1] changedM=self.state[3].copy() changedM[self.state[1][position_idx][1],self.state[1][position_idx][0]]-=1 changedM[changedType[position_idx][1],changedType[position_idx][0]]+=1 #note the above changedM, idx 1 is for row, while idx 0 is for column in the Type array. This is due to np.ravel of np.indices changedState=(self.state[0],changedType,self.state[2],changedM) # changedState=self.checkATP(changedState) return changedState
def CONSTRUCT_TREE(df_nodes, df_edges,source=-1,sources=[-1,-2]): ids = np.union1d(df_edges.Pred_ID.unique(),df_edges.Prey_ID.unique()) df_tree = pd.DataFrame(columns=df_edges.columns) for node_id in ids: if node_id in sources: continue df = df_edges[(df_edges.Pred_ID==node_id) & (df_edges.Prey_ID.isin(sources))].sort('BiomassIngested') if len(df) > 0: row = df.irow(-1) else: try: row = df_edges[(df_edges.Pred_ID==node_id)].sort('BiomassIngested').irow(-1) except: print 'Discarding node without prey...' print node_id print df_edges[(df_edges.Pred_ID==node_id)].sort('BiomassIngested') #continue raise Exception('nodes without prey') df_tree = df_tree.append(row) df_tree[['Pred_ID', 'Prey_ID']] = df_tree[['Pred_ID', 'Prey_ID']].astype(int) df_tree[['Biomass_Assimilated','BiomassIngested']] = df_tree[['Biomass_Assimilated','BiomassIngested']].astype(float) ids = np.union1d(df_tree.Pred_ID.unique(),df_tree.Prey_ID.unique()) df_nodes = df_nodes[df_nodes.ID.isin(ids)] a_values = {ii:0 for ii in ids} c_values = {ii:0 for ii in ids} print len(a_values) print len(df_nodes) print a_values print df_tree.Prey_ID.unique() df_nodes['A_value'] = a_values.values() df_nodes['C_value'] = a_values.values() #return df_nodes,df_tree Tree.GET_A(df_tree,source,a_values,c_values) df_nodes = df_nodes.set_index('ID',drop = False) for key,value in a_values.iteritems(): #print key,value df_nodes.loc[key,'A_value'] = value for key,value in c_values.iteritems(): df_nodes.loc[key,'C_value'] = value df_nodes[['A_value','C_value']] = df_nodes[['A_value','C_value']].astype(int) return df_nodes,df_tree
def get_obstList(self,X,Y,Z): """ Define areas external to pipe. """ #Pipe_1 pipe_1 = np.array(np.where((X - 1)**2 + (Y - 4)**2 >= 0.5**2)).flatten() pipe_1_stop_z = np.array(np.where(Z <= 3.0)).flatten() pipe_1_stop_y = np.array(np.where(Y >= 3.25)).flatten() pipe_1_stop = np.intersect1d(pipe_1_stop_z[:],pipe_1_stop_y[:]) pipe_1 = np.intersect1d(pipe_1[:],pipe_1_stop[:]) #Turn_1 turn_1 = np.array(np.where((0.75 - np.sqrt((Y - 3.25)**2 + (Z -3)**2))**2 + (X - 1)**2 >= 0.5**2)).flatten() turn_1_stop_z = np.array(np.where(Z >= 3.0)).flatten() turn_1_stop_y = np.array(np.where(Y>= 1.75)).flatten() turn_1_stop = np.intersect1d(turn_1_stop_z[:],turn_1_stop_y[:]) turn_1 = np.intersect1d(turn_1[:],turn_1_stop[:]) #Pipe_2 pipe_2 = np.array(np.where((X - 1)**2 + (Y - 2.5)**2 >= 0.5**2)).flatten() pipe_2_start_z = np.array(np.where(Z >= 1.5)).flatten() pipe_2_start_y_up = np.array(np.where(Y <= 3.25)).flatten() pipe_2_start_y_down = np.array(np.where(Y >= 1.75)).flatten() pipe_2_start_y = np.intersect1d(pipe_2_start_y_up[:],pipe_2_start_y_down[:]) pipe_2_start = np.intersect1d(pipe_2_start_z[:],pipe_2_start_y[:]) pipe_2 = np.intersect1d(pipe_2[:],pipe_2_start[:]) pipe_2_stop_z = np.array(np.where(Z <= 3.0)).flatten() pipe_2_stop_y = np.array(np.where(Y <= 3.25)).flatten() pipe_2_stop = np.intersect1d(pipe_2_stop_z[:],pipe_2_stop_y[:]) pipe_2 = np.intersect1d(pipe_2[:],pipe_2_stop[:]) #Turn_2 turn_2 = np.array(np.where((0.75 - np.sqrt((Y - 1.75)**2 + (Z -1.5)**2))**2 + (X - 1)**2 >= 0.5**2)).flatten() turn_2_stop_z = np.array(np.where(Z <= 1.5)).flatten() turn_2_stop_y = np.array(np.where(Y <= 3.25)).flatten() turn_2_stop = np.intersect1d(turn_2_stop_z[:],turn_2_stop_y[:]) turn_2 = np.intersect1d(turn_2[:],turn_2_stop[:]) #Pipe_3 pipe_3 = np.array(np.where((X - 1)**2 + (Y - 1.0)**2 >= 0.5**2)).flatten() pipe_3_start_z = np.array(np.where(Z >= 1.5)).flatten() pipe_3_start_y = np.array(np.where(Y <= 1.75)).flatten() pipe_3_start = np.intersect1d(pipe_3_start_z[:],pipe_3_start_y[:]) pipe_3 = np.intersect1d(pipe_3[:],pipe_3_start[:]) #Put the pieces together pipe = np.union1d(pipe_1[:],turn_1[:]) pipe = np.union1d(pipe[:],pipe_2[:]) pipe = np.union1d(pipe[:],turn_2[:]) pipe = np.union1d(pipe[:],pipe_3[:]) obst_list = pipe[:] return list(obst_list[:])
def num_bipartisan_donors(df): """Find the number of people that have donated to more than one parties. Args: df: A DataFrame generated from the campaign finance data csv file with the column "party" added. Returns: An integer count of the number of people that have donated to more than one parties. Initial Approach: Select only contb amts > 0 (i.e. it is a donation) Select only people, parties. Group by people. aggregate number of parties select > 1 Count ''' bipartyAgg= df[df['contb_receipt_amt']>0][['contbr_nm','party']].drop_duplicates(['contbr_nm','party']).groupby('contbr_nm', as_index=False).agg(lambda x: x.count()) bipartySet= bipartyAgg[bipartyAgg['party']>1] return bipartySet['contbr_nm'].count() # TODO: Implement this function. ''' """ # Obtain the set of unique contributor, party pairs pplSet= df[df['contb_receipt_amt']>0][['contbr_nm','party']].drop_duplicates(['contbr_nm','party']) # Obtain all of the democrat info democ_d = np.array(pplSet[pplSet['party']=="Democrat"]["contbr_nm"]) # Obtain the republican subset repub_d = np.array(pplSet[pplSet['party']=="Republican"]["contbr_nm"]) # OBtain the libertarian subset liber_d = np.array(pplSet[pplSet['party']=="Libertarian"]["contbr_nm"]) # Intersect democrat set with republican dr = np.intersect1d(democ_d, repub_d) # Intersect republican set with libertarian rl = np.intersect1d(repub_d, liber_d) # Intersect democrat set with libertarian ld = np.intersect1d(liber_d, democ_d) # Return the union of all three intersections retVal = np.union1d(dr, np.union1d(rl, ld)) return retVal pass
def create_error_matrix(obs_data, prd_data, compact=True, classes=None): """ Create an error (confusion) matrix from observed and predicted data. The data is assumed to represent classes rather than continuous data. Parameters ---------- obs_data : array-like Observed classes prd_data : array-like Predicted classes compact : bool Flag for whether or not to return error matrix in compact form. If True, only the classes that are represented in the data will be returned. If False, all classes in classes keyword should be returned. Defaults to True. classes : array-like If compact is False, return error matrix for all classes. Defaults to None. Returns ------- err_mat : np.array Error matrix of classes class_xwalk : dict Dictionary of class value to row or column number """ if compact == True: # Find all classes present in either the observed or predicted data classes = np.union1d(np.unique(obs_data), np.unique(prd_data)) else: if classes == None: # No classes given - default to those present as above classes = np.union1d(np.unique(obs_data), np.unique(prd_data)) else: # Use the user-defined classes classes = np.array(classes) n = classes.size # One liner for calculating error matrix # http://stackoverflow.com/questions/10958702/ # python-one-liner-for-a-confusion-contingency-matrix-needed err_mat = np.array([zip(obs_data, prd_data).count(x) for x in itertools.product(classes, repeat=2)]).reshape(n, n) # Create the dictionary of class value to row/column number class_xwalk = dict((c, i) for (c, i) in zip(classes, xrange(n))) return err_mat, class_xwalk
def check_taxa_names(SpeciesList_file): w=np.genfromtxt(SpeciesList_file, dtype=str, skiprows=1)[:,0] words = np.unique(w) print "\nTaxa names with possible misspells (if any) will be listed below..." word_combinations = itertools.combinations(words,2) # sensitivity settings max_length_diff = 2 # maximum allowed difference between string lengths threshold_score = 0.7 threshold_s_diff = 3 all_scores = [] for w in word_combinations: taxon1 = w[0] taxon2 = w[1] score_all, diff_all = get_score(taxon1,taxon2,max_length_diff) # GENUS a = taxon1.split("_")[0] b = taxon2.split("_")[0] score_genus, diff_genus = get_score(a,b,max_length_diff) # SPECIES if len(taxon1.split("_")[0])>1 and len(taxon2.split("_")[0])>1: a = taxon1.split("_")[1] b = taxon2.split("_")[1] score_species, diff_species = get_score(a,b,max_length_diff) else: score_species, diff_species = score_genus,0 s_diff = diff_genus+diff_species if (score_genus+score_species)<2: if score_all > threshold_score and diff_all <= threshold_s_diff: if np.mean([score_genus,score_species]) > threshold_score and s_diff <= threshold_s_diff: all_scores.append([taxon1, taxon2,round(score_all,3),round(score_genus,3), round(score_species,3),int(s_diff)]) all_scores = np.array(all_scores) # top hits: score_float = all_scores[:,2].astype(float) diff_int = all_scores[:,5].astype(int) if len(all_scores)==0: sys.exit("No typos founds!") th1,th2 = 0.9,1 passed = np.array([]) while True: pass1 = (score_float>th1).nonzero()[0] pass2 = (diff_int<=th2).nonzero()[0] res = np.union1d(pass1,pass2) for i in res: if i not in passed: print '\t'.join(all_scores[i]) passed = np.union1d(res,passed) if len(passed)==len(all_scores): break answ = raw_input("\nShow more results (y or n)? ") if answ=="y": th1 -= 0.1 th2 += 1 else: break
def process_eyelid_traces(traces,time_vect,idx_CS_US,idx_US,idx_CS,thresh_CR=.1,time_CR_on=-.1,time_US_on=.05): """ preprocess traces output of get_behavior_traces Parameters: ---------- traces: ndarray (N trials X t time points) eyelid traces output of get_behavior_traces. thresh_CR: float fraction of eyelid closure considered a CR time_CR_on: float time of alleged beginning of CRs time_US_on: float time when US is considered to induce have a UR Returns: ------- eye_traces: ndarray normalized eyelid traces trigs: dict dictionary containing various subdivision of the triggers according to behavioral responses 'idxCSUSCR': index of trials with CS+US with CR 'idxCSUSNOCR': index of trials with CS+US without CR 'idxCSCR': 'idxCSNOCR': 'idxNOCR': index of trials with no CRs 'idxCR': index of trials with CRs 'idxUS': """ #normalize by max amplitudes at US eye_traces=old_div(traces,np.nanmax(np.nanmedian(traces[np.hstack([idx_CS_US,idx_US])][:,np.logical_and(time_vect>time_US_on,time_vect<time_US_on +.4 )],0))) amplitudes_at_US=np.mean(eye_traces[:,np.logical_and( time_vect > time_CR_on , time_vect <= time_US_on )],1) trigs=dict() trigs['idxCSUSCR']=idx_CS_US[np.where(amplitudes_at_US[idx_CS_US]>thresh_CR)[-1]] trigs['idxCSUSNOCR']=idx_CS_US[np.where(amplitudes_at_US[idx_CS_US]<thresh_CR)[-1]] trigs['idxCSCR']=idx_CS[np.where(amplitudes_at_US[idx_CS]>thresh_CR)[-1]] trigs['idxCSNOCR']=idx_CS[np.where(amplitudes_at_US[idx_CS]<thresh_CR)[-1]] trigs['idxNOCR']=np.union1d(trigs['idxCSUSNOCR'],trigs['idxCSNOCR']) trigs['idxCR']=np.union1d(trigs['idxCSUSCR'],trigs['idxCSCR']) trigs['idxUS']=idx_US return eye_traces,amplitudes_at_US, trigs
def _intersect_nCk(self, mx): # import pdb; pdb.set_trace() f0fps = np.where(mx[:, 0] > .25)[0] f1fps = np.where(mx[:, 1] > .25)[0] f2fps, f3fps = np.nonzero(mx[:, 2] > .25)[0], np.nonzero(mx[:, 3] > .25)[0] f0_f1_fps, f2_f3_fps = np.intersect1d(f0fps, f1fps), np.intersect1d(f2fps, f3fps) intersect_nck = np.union1d(np.intersect1d(f0_f1_fps, f2fps), np.intersect1d(f0_f1_fps, f3fps)) intersect_nck = np.union1d(intersect_nck, np.union1d(np.intersect1d(f0fps, f2_f3_fps), np.intersect1d(f1fps, f2_f3_fps))) # intersect_nck = np.union1d(intersect_nck, f2_f3_fps) return intersect_nck
def get_obstList(self,X,Y,Z): """ """ x = np.array(X); y = np.array(Y); z = np.array(Z); xMax = np.max(x); xMin = np.min(x); # expect this to be zero yMax = np.max(y); yMin = np.min(y); # expect this to be zero xPitch = self.xPitch yPitch = self.yPitch xT = self.xT yT = self.yT zT = self.zT gridZ = self.gridZ hX = self.hX hY = self.hY hD = self.hD obst_list = []; # get x-center of vertical grids xC_vGrids = np.linspace(xMin+xT/2.,xMax-xT/2.,((xMax-xMin)/xPitch)+1); for i in range(len(xC_vGrids)): distX = np.abs(x - xC_vGrids[i]); distZ = np.abs(z - gridZ); gridObstA = np.where((distX < xT/2.)) gridObstB = np.where((distZ < zT/2.)) gridObst = np.intersect1d(gridObstA,gridObstB); obst_list = np.union1d(obst_list[:],gridObst) # get y-center of horizontal grids yC_hGrids = np.linspace(yMin+yT/2.,yMax-yT/2.,((yMax - yMin)/yPitch)+1); for i in range(len(yC_hGrids)): distY = np.abs(y - yC_hGrids[i]); distZ = np.abs(z - gridZ); gridObstA = np.where((distY < yT/2.)) gridObstB = np.where((distZ < zT/2.)) gridObst = np.intersect1d(gridObstA,gridObstB); obst_list = np.union1d(obst_list[:],gridObst) # remove grids within the hole region distH = np.sqrt((y - hY)**2. + (x - hX)**2.) obstH = np.where(distH < hD/2.) obst_list = np.setdiff1d(obst_list[:],obstH) obst_list = obst_list.astype(np.int) return obst_list[:]
def __init__(self, cpfile1, cpfile2, smapfile, areafile, crop): self.cp1 = CropProgressCountyAdapter(cpfile1, smapfile, areafile, crop, 'nadata') self.cp2 = CropProgressCountyAdapter(cpfile2, smapfile, areafile, crop, 'data') self.year = append(self.cp1.year, self.cp2.year) self.county = union1d(self.cp1.county, self.cp2.county) self.state = union1d(self.cp1.state, self.cp1.state) self.per = self.cp1.per if crop == 'wheat.winter': self.year = append(self.year - 1, self.year[-1]) self.crop = crop
def getview (self, view, pbar): # {{{ from numpy import arange, min, max, clip import numpy as np daxis = self.daxis Nd = self.shape[daxis] # Get integer indices along the differentiation axis ind = view.integer_indices[daxis] # Want to do the finite difference with values to the left & right left = ind-1 centre = ind right = ind+1 # Truncate to the left & right boundaries left[left==-1] = 2 right[right==Nd] = Nd-3 # All the points we need to request (unique occurrences only) allpoints = np.union1d(left, np.union1d(centre, right)) allview = view.modify_slice(daxis, allpoints) # Get the data and axis values for these points allvalues = allview.get(self.var, pbar=pbar) allaxis = allview.get(self.dx) # Define a map from these unique points back to the left & right arrays getleft = np.searchsorted(allpoints,left) getcentre = np.searchsorted(allpoints,centre) getright = np.searchsorted(allpoints,right) # Make this 1D map into the right shape for the view (if multi-dimensional) getleft = [slice(None)]*daxis + [getleft] + [slice(None)]*(self.naxes-daxis-1) getcentre = [slice(None)]*daxis + [getcentre] + [slice(None)]*(self.naxes-daxis-1) getright = [slice(None)]*daxis + [getright] + [slice(None)]*(self.naxes-daxis-1) # Finally, get the left & right values, and do the finite difference L = allvalues[getleft] C = allvalues[getcentre] R = allvalues[getright] La = allaxis[getleft] Ca = allaxis[getcentre] Ra = allaxis[getright] den = 2. / ((Ra - Ca) * (Ca - La) * (Ra - La)) dL = (Ra - Ca) * den dC = (La - Ra) * den dR = (Ca - La) * den return np.asarray(dL * L + dC * C + dR * R, self.dtype)
def get_matching_pulses(cls, task_pulse_ms, eeg_pulse_ms): """ Finds the pulses in the EEG recording which correspond to the times at which pulses were send on the task laptop :param task_pulse_ms: Times at which pulses were sent on task :param eeg_pulse_ms: Samples at which pulses were received on eeg system :return: matching task pulses, matching eeg pulses, max residual from fit """ # Going to find differences between pulse times that match between task and eeg task_diff = np.diff(task_pulse_ms) eeg_diff = np.diff(eeg_pulse_ms) # We match the beginning and the end separately, then draw a line between them logger.debug('Scanning for start window') task_start_range, eeg_start_range = cls.find_matching_window(eeg_diff, task_diff, True) logger.debug('Scanning for end window') task_end_range, eeg_end_range = cls.find_matching_window(eeg_diff, task_diff, False) # This whole next part was just for confirming that the fit is good, # However, it was never really implemented... [slope_start, intercept_start, _, _, _] = cls.get_fit(task_pulse_ms[task_start_range[0]: task_start_range[1]], eeg_pulse_ms[eeg_start_range[0]: eeg_start_range[1]]) [slope_end, intercept_end, _, _, _] = cls.get_fit(task_pulse_ms[task_end_range[0]: task_end_range[1]], eeg_pulse_ms[eeg_end_range[0]: eeg_end_range[1]]) prediction_start = slope_start * task_pulse_ms[task_start_range[0]: task_start_range[1]] + intercept_start residuals_start = eeg_pulse_ms[eeg_start_range[0]: eeg_start_range[1]] - prediction_start max_residual_start = max(abs(residuals_start)) logger.debug('Max residual start %.1f' % max_residual_start) prediction_end = slope_end * task_pulse_ms[task_end_range[0]: task_end_range[1]] + intercept_end residuals_end = eeg_pulse_ms[eeg_end_range[0]: eeg_end_range[1]] - prediction_end max_residual_end = max(abs(residuals_end)) logger.debug('Max residual end %.1f;' % max_residual_end) max_residual = max(max_residual_start, max_residual_end) # Join the beginning and the end task_range = np.union1d(range(task_start_range[0], task_start_range[1]), range(task_end_range[0], task_end_range[1])) eeg_range = np.union1d(range(eeg_start_range[0], eeg_start_range[1]), range(eeg_end_range[0], eeg_end_range[1])) # Return the times that were used task_pulse_out = task_pulse_ms[task_range] eeg_pulse_out = eeg_pulse_ms[eeg_range] return task_pulse_out, eeg_pulse_out, max_residual
def deleteMolecule(atoms,bonds,angles,diheds,molId): molatoms=atoms[atoms[:,1]==molId][:,0] molbonds = np.array([]) molangles = np.array([]) moldiheds = np.array([]) for atom in molatoms: molbonds = np.union1d(molbonds,np.where(((bonds[:,2]==atom)|(bonds[:,3]==atom)))[0]) molangles = np.union1d(molangles,np.where(((angles[:,2]==atom)|(angles[:,3]==atom)|(angles[:,4]==atom)))[0]) moldiheds = np.union1d(moldiheds,np.where(((diheds[:,2]==atom)|(diheds[:,3]==atom)|(diheds[:,4]==atom)|(diheds[:,5]==atom)))[0]) newatoms=np.delete(atoms,np.where((atoms[:,1]==molId))[0],0) newbonds=np.delete(bonds,molbonds,0) newangles=np.delete(angles,molangles,0) newdiheds=np.delete(diheds,moldiheds,0) return (newatoms,newbonds,newangles,newdiheds)
def common_nites_deep(nitelist1,nitelist2,obs1=None,obs2=None,SNRsel1=None,SNRsel2=None,SNRand=0): sel1 = np.zeros(len(nitelist1),dtype='bool') sel2 = np.zeros(len(nitelist2),dtype='bool') cnites = np.array([]) if SNRsel1 == None: for lag in [-1,0,1]: goodnites1 = nitelist1 if obs1 is None else (nitelist1[obs1 == 2]+lag) goodnites2 = nitelist2 if obs2 is None else nitelist2[obs2 == 2] cnites_intersect = np.intersect1d(goodnites1,goodnites2) cnites = np.union1d(cnites,cnites_intersect) sel1 = sel1 | np.in1d(nitelist1,cnites-lag) sel2 = sel2 | np.in1d(nitelist2,cnites) elif SNRand == 0: for lag in [-1,0,1]: goodnites1 = nitelist1 if obs1 is None else (nitelist1[obs1 == 2]+lag) goodnites1SNR = nitelist1 if obs1 is None else (nitelist1[(obs1 == 2) & SNRsel1]+lag) goodnites2 = nitelist2 if obs2 is None else nitelist2[obs2 == 2] goodnites2SNR = nitelist2 if obs2 is None else (nitelist2[(obs2 == 2) & SNRsel2]) cnitesSNR1 = np.intersect1d(goodnites1SNR,goodnites2) cnitesSNR2 = np.intersect1d(goodnites1,goodnites2SNR) cnites_intersect = np.union1d(cnitesSNR1,cnitesSNR2) #cnites = np.union1d(cnites,cnites_intersect) cnites = np.union1d(np.union1d(cnites,cnites_intersect),np.union1d(cnites,cnites_intersect-lag)) sel1 = (sel1 | np.in1d(nitelist1,cnites-lag)) & (obs1 == 2) sel2 = (sel2 | np.in1d(nitelist2,cnites)) & (obs2 == 2) elif SNRand == 1: for lag in [-1,0,1]: goodnites1SNR = nitelist1 if obs1 is None else (nitelist1[(obs1 == 2) & SNRsel1]+lag) goodnites2SNR = nitelist2 if obs2 is None else (nitelist2[(obs2 == 2) & SNRsel2]) cnites_intersect = np.intersect1d(goodnites1SNR,goodnites2SNR) #cnites = np.union1d(cnites,cnites_intersect) cnites = np.union1d(np.union1d(cnites,cnites_intersect),np.union1d(cnites,cnites_intersect-lag)) sel1 = (sel1 | np.in1d(nitelist1,cnites-lag)) & (obs1 == 2) sel2 = (sel2 | np.in1d(nitelist2,cnites)) & (obs2 == 2) return sel1, sel2,cnites
def get_obstList(self,X,Y,Z): """ Define solid areas """ #Pipe pipe = np.array(np.where((X - 0.6)**2 + (Y - 0.6)**2 >= 0.5**2)).flatten() #Seat seat = np.array(np.where((X - 0.6)**2 + (Y - 0.6)**2 >= 0.42**2)).flatten() seat_start = np.array(np.where(Z >= 2.975)).flatten() seat_stop = np.array(np.where(Z <= 3.025)).flatten() seat = np.intersect1d(seat[:],seat_start[:]) seat = np.intersect1d(seat[:],seat_stop[:]) #Pivot pivot = np.array(np.where((X - 0.6)**2 + (Z - 3)**2 <= 0.075**2)).flatten() #Front Disc front_disc = np.array(np.where((Y - 0.6)**2 + (Z - 3)**2 <= 0.5**2)).flatten() front_disc_stop = np.array(np.where(Z <= 3.0)).flatten() front_disc_x_min = np.array(np.where(X >= 0.525)).flatten() front_disc_x_max = np.array(np.where(X <= 0.575)).flatten() front_disc = np.intersect1d(front_disc[:],front_disc_stop[:]) front_disc = np.intersect1d(front_disc[:],front_disc_x_min[:]) front_disc = np.intersect1d(front_disc[:],front_disc_x_max[:]) #Back Disc back_disc = np.array(np.where((Y - 0.6)**2 + (Z - 3)**2 <= 0.5**2)).flatten() back_disc_start = np.array(np.where(Z >= 3.0)).flatten() back_disc_x_min = np.array(np.where(X >= 0.625)).flatten() back_disc_x_max = np.array(np.where(X <= 0.675)).flatten() back_disc = np.intersect1d(back_disc[:],back_disc_start[:]) back_disc = np.intersect1d(back_disc[:],back_disc_x_min[:]) back_disc = np.intersect1d(back_disc[:],back_disc_x_max[:]) #Put the pieces together valve = np.union1d(pipe[:],seat[:]) valve = np.union1d(valve[:],pivot[:]) valve = np.union1d(valve[:],front_disc[:]) valve = np.union1d(valve[:],back_disc[:]) obst_list = valve[:] return list(obst_list[:])
def testRepCrossValidation(self): numExamples = 10 folds = 3 repetitions = 1 indices = Sampling.repCrossValidation(folds, numExamples, repetitions) for i in range(folds): self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all()) repetitions = 2 indices = Sampling.repCrossValidation(folds, numExamples, repetitions) for i in range(folds): self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all())
def CalcSimilarUsersSongs(userid): usersongsset = userDict[userid].keys() usersongintersection = p.DataFrame(index = [userid]) top5similarusers = [] for otheruserid in userDict.iteritems(): otherusersongsset = userDict[otheruserid].keys() usersongintersection.insert(0, otheruserid, len(np.intersect1d(usersongsset,otherusersongsset, False)), False) top5similarusers = usersongintersection.loc[userid][bn.argpartsort(-usersongintersection.loc[userid], 5)[:5]].index.values unlistenedsongs = np.array([]) for userid in top5similarusers: otherusersongsset = userDict[otheruserid].keys() np.union1d(unlistenedsongs, np.setdiff1d(otherusersongsset, usersongsset)) if(len(unlistenedsongs) >= 5): break return unlistenedsongs
def where_near_rope(demo, xyz, thresh=.04, add_other_points=-1): near_rope = [] traj_len = 0 for lr in "lr": if demo["arms_used"] in ["b", lr]: pos = np.array(demo["%s_gripper_tool_frame"%lr]["position"]) traj_len = len(pos) dist_to_rope = ssd.cdist(pos, xyz).min(axis=1) close_inds = np.nonzero(dist_to_rope < thresh)[0] slice_step = len(close_inds)/50 or 1 near_rope = np.union1d(near_rope, close_inds[::slice_step]) if add_other_points > 0: near_rope = np.union1d(near_rope, np.arange(0, traj_len, traj_len/add_other_points)) near_rope = np.union1d(near_rope, [0, traj_len-1]) return near_rope.astype(int)
def setUp(self): self.mesh=fmsh.MeshTri() self.mesh.refine(5) # boundary and interior node sets D1=np.nonzero(self.mesh.p[0,:]==0)[0] D2=np.nonzero(self.mesh.p[1,:]==0)[0] D3=np.nonzero(self.mesh.p[0,:]==1)[0] D4=np.nonzero(self.mesh.p[1,:]==1)[0] D=np.union1d(D1,D2); D=np.union1d(D,D3); self.D=np.union1d(D,D4); self.I=np.setdiff1d(np.arange(0,self.mesh.p.shape[1]),self.D)
def simulate(self, numpoints=None, tstep=None, integrator=None, varying_inputs=None, initcon=None, integrator_options=None): """ Simulate the model. Integrator-specific options may be specified as keyword arguments and will be passed on to the integrator. Parameters ---------- numpoints : int The number of points for the profiles returned by the simulator. Default is 100 tstep : int or float The time step to use in the profiles returned by the simulator. This is not the time step used internally by the integrators. This is an optional parameter that may be specified in place of 'numpoints'. integrator : string The string name of the integrator to use for simulation. The default is 'lsoda' when using Scipy and 'idas' when using CasADi varying_inputs : ``pyomo.environ.Suffix`` A :py:class:`Suffix<pyomo.environ.Suffix>` object containing the piecewise constant profiles to be used for certain time-varying algebraic variables. initcon : list of floats The initial conditions for the the differential variables. This is an optional argument. If not specified then the simulator will use the current value of the differential variables at the lower bound of the ContinuousSet for the initial condition. integrator_options : dict Dictionary containing options that should be passed to the integrator. See the documentation for a specific integrator for a list of valid options. Returns ------- numpy array, numpy array The first return value is a 1D array of time points corresponding to the second return value which is a 2D array of the profiles for the simulated differential and algebraic variables. """ if not numpy_available: raise ValueError("The numpy module is not available. " "Cannot simulate the model.") if integrator_options is None: integrator_options = {} if self._intpackage == 'scipy': # Specify the scipy integrator to use for simulation valid_integrators = ['vode', 'zvode', 'lsoda', 'dopri5', 'dop853'] if integrator is None: integrator = 'lsoda' elif integrator is 'odeint': integrator = 'lsoda' else: # Specify the casadi integrator to use for simulation. # Only a subset of these integrators may be used for # DAE simulation. We defer this check to CasADi. valid_integrators = ['cvodes', 'idas', 'collocation', 'rk'] if integrator is None: integrator = 'idas' if integrator not in valid_integrators: raise DAE_Error("Unrecognized %s integrator \'%s\'. Please select" " an integrator from %s" % (self._intpackage, integrator, valid_integrators)) # Set the time step or the number of points for the lists # returned by the integrator if tstep is not None and \ tstep > (self._contset.last() - self._contset.first()): raise ValueError( "The step size %6.2f is larger than the span of the " "ContinuousSet %s" % (tstep, self._contset.name())) if tstep is not None and numpoints is not None: raise ValueError( "Cannot specify both the step size and the number of " "points for the simulator") if tstep is None and numpoints is None: # Use 100 points by default numpoints = 100 if tstep is None: tsim = np.linspace(self._contset.first(), self._contset.last(), num=numpoints) # Consider adding an option for log spaced time points. Can be # important for simulating stiff systems. # tsim = np.logspace(-4,6, num=100) # np.log10(self._contset.first()),np.log10( # self._contset.last()),num=1000, endpoint=True) else: tsim = np.arange(self._contset.first(), self._contset.last(), tstep) switchpts = [] self._siminputvars = {} self._simalgvars = [] if varying_inputs is not None: if type(varying_inputs) is not Suffix: raise TypeError( "Varying input values must be specified using a " "Suffix. Please refer to the simulator documentation.") for alg in self._algvars: if alg._base in varying_inputs: # Find all the switching points switchpts += varying_inputs[alg._base].keys() # Add to dictionary of siminputvars self._siminputvars[alg._base] = alg else: self._simalgvars.append(alg) if self._intpackage is 'scipy' and len(self._simalgvars) != 0: raise DAE_Error("When simulating with Scipy you must " "provide values for all parameters " "and algebraic variables that are indexed " "by the ContinuoutSet using the " "'varying_inputs' keyword argument. " "Please refer to the simulator documentation " "for more information.") # Get the set of unique points switchpts = list(set(switchpts)) switchpts.sort() # Make sure all the switchpts are within the bounds of # the ContinuousSet if switchpts[0] < self._contset.first() or \ switchpts[-1] > self._contset.last(): raise ValueError("Found a switching point for one or more of " "the time-varying inputs that is not within " "the bounds of the ContinuousSet.") # Update tsim to include input switching points # This numpy function returns the unique, sorted points tsim = np.union1d(tsim, switchpts) else: self._simalgvars = self._algvars # Check if initial conditions were provided, otherwise obtain # them from the current variable values if initcon is not None: if len(initcon) > len(self._diffvars): raise ValueError( "Too many initial conditions were specified. The " "simulator was expecting a list with %i values." % len(self._diffvars)) if len(initcon) < len(self._diffvars): raise ValueError( "Too few initial conditions were specified. The " "simulator was expecting a list with %i values." % len(self._diffvars)) else: initcon = [] for v in self._diffvars: for idx, i in enumerate(v._args): if type(i) is IndexTemplate: break initpoint = self._contset.first() vidx = tuple(v._args[0:idx]) + (initpoint,) + \ tuple(v._args[idx + 1:]) # This line will raise an error if no value was set initcon.append(value(v._base[vidx])) # Call the integrator if self._intpackage is 'scipy': if not scipy_available: raise ValueError("The scipy module is not available. " "Cannot simulate the model.") tsim, profile = self._simulate_with_scipy(initcon, tsim, switchpts, varying_inputs, integrator, integrator_options) else: if len(switchpts) != 0: tsim, profile = \ self._simulate_with_casadi_with_inputs(initcon, tsim, varying_inputs, integrator, integrator_options) else: tsim, profile = \ self._simulate_with_casadi_no_inputs(initcon, tsim, integrator, integrator_options) self._tsim = tsim self._simsolution = profile return [tsim, profile]
print(x) print("x.sort():") print(x.sort()) print("x:") print(x) # <demo> --- stop --- # Exercise 10 print("x.max():") print(x.max()) i = x.argmax() print("x[i]:") print(x[i]) # <demo> --- stop --- # Exercise 11a x = array([1, 2, 3, 4, 5]) y = array([1, 2, 4, 6]) print("y[logical_not(in1d(y,intersect1d(x,y)))]:") print(y[logical_not(in1d(y, intersect1d(x, y)))]) # <demo> --- stop --- # Exercise 11b z = union1d(x, y) print("z[logical_not(in1d(z,intersect1d(x,y)))]:") print(z[logical_not(in1d(z, intersect1d(x, y)))]) y = array([nan, 2.2, 3.9, 4.6, nan, 2.4, 6.1, 1.8]) T = sum(logical_not(isnan(y))) # <demo> --- stop --- # Exercise 12 Ey2 = nansum(y**2) / T Ey = nansum(y) / T nanvar = Ey2 - Ey**2
def get_all_nodes(self): return np.union1d( np.where(self.nodes != 0)[0], np.where(self.node_to_n_edges > 0)[0])
param_bounds=np.loadtxt('../Qgen/uncertain_params_CMIP.txt',usecols=(1,2))[7:13,:] elif design == 'CMIPunscaled_SOWs': param_bounds=np.loadtxt('../Qgen/uncertain_params_CMIPunscaled.txt',usecols=(1,2))[7:13,:] SOW_values = np.array([1,1,1,1,0,0]) #Default parameter values for base SOW realizations = 10 param_names=['XBM_mu0','XBM_sigma0','XBM_mu1','XBM_sigma1','XBM_p00','XBM_p11'] params_no = len(param_names) problem = { 'num_vars': params_no, 'names': param_names, 'bounds': param_bounds.tolist() } # remove samples no longer in param_bounds rows_to_keep = np.union1d(np.where(LHsamples[:,0]>=0)[0],np.where(LHsamples[:,0]<=0)[0]) for i in range(params_no): within_rows = np.intersect1d(np.where(LHsamples[:,i] >= param_bounds[i][0])[0], np.where(LHsamples[:,i] <= param_bounds[i][1])[0]) rows_to_keep = np.intersect1d(rows_to_keep,within_rows) LHsamples = LHsamples[rows_to_keep,:] samples = len(LHsamples[:,0]) percentiles = np.arange(10, 110, 10) all_IDs = np.genfromtxt('../Structures_files/metrics_structures.txt',dtype='str').tolist() nStructures = len(all_IDs) # deal with fact that calling result.summary() in statsmodels.api # calls scipy.stats.chisqprob, which no longer exists scipy.stats.chisqprob = lambda chisq, df: scipy.stats.chi2.sf(chisq, df)
# 집합 함수 : 수학의 집합 연산을 수행. 1차원 배열만을 대상 # 배열 요소에서 중복을 제거하고 정렬한 결과 반환 np.unique(), 합집합 np.union1d(), 교집합 np.intersect1d(), 차집합 np.setdified1d() import numpy as np arr1 = [2, 5, 7, 9, 5, 2] arr2 = [2, 5, 8, 3, 1] # np.unique() 사용해서 new_arr1에 대입 new_arr1 = np.unique(arr1) print(new_arr1) # [2 5 7 9] # 합집합 print(np.union1d(new_arr1, arr2)) # [1 2 3 5 7 8 9] # 교집합 print(np.intersect1d(new_arr1, arr2)) # [2 5] # 차집합 print(np.setdiff1d(new_arr1, arr2)) # [7 9]
def selcomps(seldict, comptable, mmix, manacc, n_echos): """ Classify components in seldict as "accepted," "rejected," "midk," or "ignored." The selection process uses previously calculated parameters listed in `seldict` for each ICA component such as Kappa (a T2* weighting metric), Rho (an S0 weighting metric), and variance explained. See `Notes` for additional calculated metrics used to classify each component into one of the four listed groups. Parameters ---------- seldict : :obj:`dict` A dictionary with component-specific features used for classification. As output from `fitmodels_direct` comptable : (C x X) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. The index should be the component number. mmix : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the number of volumes in the original data manacc : :obj:`list` Comma-separated list of indices of manually accepted components n_echos : :obj:`int` Number of echos in original data Returns ------- comptable : :obj:`pandas.DataFrame` Updated component table with additional metrics and with classification (accepted, rejected, or ignored) Notes ----- The selection algorithm used in this function was originated in ME-ICA by Prantik Kundu, and his original implementation is available at: https://github.com/ME-ICA/me-ica/blob/b2781dd087ab9de99a2ec3925f04f02ce84f0adc/meica.libs/select_model.py This component selection process uses multiple, previously calculated metrics that include: kappa, rho, variance explained, component spatial weighting maps, noise and spatial frequency metrics, and measures of spatial overlap across metrics. Prantik began to update these selection criteria to use SVMs to distinguish components, a hypercommented version of this attempt is available at: https://gist.github.com/emdupre/ca92d52d345d08ee85e104093b81482e """ cols_at_end = ['classification', 'rationale'] comptable['classification'] = 'accepted' comptable['rationale'] = '' Z_maps = seldict['Z_maps'] Z_clmaps = seldict['Z_clmaps'] F_R2_maps = seldict['F_R2_maps'] F_S0_clmaps = seldict['F_S0_clmaps'] F_R2_clmaps = seldict['F_R2_clmaps'] Br_S0_clmaps = seldict['Br_S0_clmaps'] Br_R2_clmaps = seldict['Br_R2_clmaps'] n_vols, n_comps = mmix.shape # Set knobs LOW_PERC = 25 HIGH_PERC = 90 if n_vols < 100: EXTEND_FACTOR = 3 else: EXTEND_FACTOR = 2 RESTRICT_FACTOR = 2 # List of components midk = [] ign = [] all_comps = np.arange(comptable.shape[0]) # acc remains a full list that is whittled down over criteria acc = np.arange(comptable.shape[0]) # If user has specified if manacc: acc = sorted([int(vv) for vv in manacc.split(',')]) rej = sorted(np.setdiff1d(all_comps, acc)) comptable.loc[acc, 'classification'] = 'accepted' comptable.loc[rej, 'classification'] = 'rejected' comptable.loc[rej, 'rationale'] += 'I001;' # Move decision columns to end comptable = comptable[[c for c in comptable if c not in cols_at_end] + [c for c in cols_at_end if c in comptable]] comptable['rationale'] = comptable['rationale'].str.rstrip(';') return comptable """ Tally number of significant voxels for cluster-extent thresholded R2 and S0 model F-statistic maps. """ comptable['countsigFR2'] = F_R2_clmaps.sum(axis=0) comptable['countsigFS0'] = F_S0_clmaps.sum(axis=0) """ Generate Dice values for R2 and S0 models - dice_FR2: Dice value of cluster-extent thresholded maps of R2-model betas and F-statistics. - dice_FS0: Dice value of cluster-extent thresholded maps of S0-model betas and F-statistics. """ comptable['dice_FR2'] = np.zeros(all_comps.shape[0]) comptable['dice_FS0'] = np.zeros(all_comps.shape[0]) for i_comp in acc: comptable.loc[i_comp, 'dice_FR2'] = utils.dice(Br_R2_clmaps[:, i_comp], F_R2_clmaps[:, i_comp]) comptable.loc[i_comp, 'dice_FS0'] = utils.dice(Br_S0_clmaps[:, i_comp], F_S0_clmaps[:, i_comp]) comptable.loc[np.isnan(comptable['dice_FR2']), 'dice_FR2'] = 0 comptable.loc[np.isnan(comptable['dice_FS0']), 'dice_FS0'] = 0 """ Generate three metrics of component noise: - countnoise: Number of "noise" voxels (voxels highly weighted for component, but not from clusters) - signal-noise_t: T-statistic for two-sample t-test of F-statistics from "signal" voxels (voxels in clusters) against "noise" voxels (voxels not in clusters) for R2 model. - signal-noise_p: P-value from t-test. """ comptable['countnoise'] = 0 comptable['signal-noise_t'] = 0 comptable['signal-noise_p'] = 0 for i_comp in all_comps: # index voxels significantly loading on component but not from clusters comp_noise_sel = ((np.abs(Z_maps[:, i_comp]) > 1.95) & (Z_clmaps[:, i_comp] == 0)) comptable.loc[i_comp, 'countnoise'] = np.array(comp_noise_sel, dtype=np.int).sum() # NOTE: Why only compare distributions of *unique* F-statistics? noise_FR2_Z = np.log10(np.unique(F_R2_maps[comp_noise_sel, i_comp])) signal_FR2_Z = np.log10( np.unique(F_R2_maps[Z_clmaps[:, i_comp] == 1, i_comp])) (comptable.loc[i_comp, 'signal-noise_t'], comptable.loc[i_comp, 'signal-noise_p']) = stats.ttest_ind(signal_FR2_Z, noise_FR2_Z, equal_var=False) comptable.loc[np.isnan(comptable['signal-noise_t']), 'signal-noise_t'] = 0 comptable.loc[np.isnan(comptable['signal-noise_p']), 'signal-noise_p'] = 0 """ Assemble decision table with five metrics: - Kappa values ranked from largest to smallest - R2-model F-score map/beta map Dice scores ranked from largest to smallest - Signal F > Noise F t-statistics ranked from largest to smallest - Number of "noise" voxels (voxels highly weighted for component, but not from clusters) ranked from smallest to largest - Number of voxels with significant R2-model F-scores within clusters ranked from largest to smallest Smaller values (i.e., higher ranks) across metrics indicate more BOLD dependence and less noise. """ d_table_rank = np.vstack([ n_comps - stats.rankdata(comptable['kappa'], method='ordinal'), n_comps - stats.rankdata(comptable['dice_FR2'], method='ordinal'), n_comps - stats.rankdata(comptable['signal-noise_t'], method='ordinal'), stats.rankdata(comptable['countnoise'], method='ordinal'), n_comps - stats.rankdata(comptable['countsigFR2'], method='ordinal') ]).T n_decision_metrics = d_table_rank.shape[1] comptable['d_table_score'] = d_table_rank.sum(axis=1) """ Step 1: Reject anything that's obviously an artifact a. Estimate a null variance """ # Rho is higher than Kappa temp_rej0a = all_comps[(comptable['rho'] > comptable['kappa'])] comptable.loc[temp_rej0a, 'classification'] = 'rejected' comptable.loc[temp_rej0a, 'rationale'] += 'I002;' # Number of significant voxels for S0 model is higher than number for R2 # model *and* number for R2 model is greater than zero. temp_rej0b = all_comps[( (comptable['countsigFS0'] > comptable['countsigFR2']) & (comptable['countsigFR2'] > 0))] comptable.loc[temp_rej0b, 'classification'] = 'rejected' comptable.loc[temp_rej0b, 'rationale'] += 'I003;' rej = np.union1d(temp_rej0a, temp_rej0b) # Dice score for S0 maps is higher than Dice score for R2 maps and variance # explained is higher than the median across components. temp_rej1 = all_comps[(comptable['dice_FS0'] > comptable['dice_FR2']) & (comptable['variance explained'] > np.median( comptable['variance explained']))] comptable.loc[temp_rej1, 'classification'] = 'rejected' comptable.loc[temp_rej1, 'rationale'] += 'I004;' rej = np.union1d(temp_rej1, rej) # T-value is less than zero (noise has higher F-statistics than signal in # map) and variance explained is higher than the median across components. temp_rej2 = acc[(comptable.loc[acc, 'signal-noise_t'] < 0) & (comptable.loc[acc, 'variance explained'] > np.median( comptable['variance explained']))] comptable.loc[temp_rej2, 'classification'] = 'rejected' comptable.loc[temp_rej2, 'rationale'] += 'I005;' rej = np.union1d(temp_rej2, rej) acc = np.setdiff1d(acc, rej) """ Step 2: Make a guess for what the good components are, in order to estimate good component properties a. Not outlier variance b. Kappa>kappa_elbow c. Rho<Rho_elbow d. High R2* dice compared to S0 dice e. Gain of F_R2 in clusters vs noise f. Estimate a low and high variance """ # Step 2a # Upper limit for variance explained is median across components with high # Kappa values. High Kappa is defined as Kappa above Kappa elbow. varex_upper_p = np.median(comptable.loc[ comptable['kappa'] > getelbow(comptable['kappa'], return_val=True), 'variance explained']) ncls = acc.copy() # NOTE: We're not sure why this is done, nor why it's specifically done # three times. Need to look into this deeper, esp. to make sure the 3 # isn't a hard-coded reference to the number of echoes. # Reduce components to investigate as "good" to ones in which change in # variance explained is less than the limit defined above.... What? for nn in range(3): ncls = comptable.loc[ncls].loc[comptable.loc[ ncls, 'variance explained'].diff() < varex_upper_p].index.values # Compute elbows from other elbows kappas_under_f01 = ( comptable.loc[comptable['kappa'] < utils.getfbounds(n_echos)[-1], 'kappa']) # NOTE: Would an elbow from all Kappa values *ever* be lower than one from # a subset of lower values? kappa_elbow = np.min((getelbow(kappas_under_f01, return_val=True), getelbow(comptable['kappa'], return_val=True))) rho_elbow = np.mean( (getelbow(comptable.loc[ncls, 'rho'], return_val=True), getelbow(comptable['rho'], return_val=True), utils.getfbounds(n_echos)[0])) # Provisionally accept components based on Kappa and Rho elbows acc_prov = ncls[(comptable.loc[ncls, 'kappa'] >= kappa_elbow) & (comptable.loc[ncls, 'rho'] < rho_elbow)] if len(acc_prov) == 0: LGR.warning('No BOLD-like components detected') ign = sorted(np.setdiff1d(all_comps, rej)) comptable.loc[ign, 'classification'] = 'ignored' comptable.loc[ign, 'rationale'] += 'I006;' # Move decision columns to end comptable = comptable[[c for c in comptable if c not in cols_at_end] + [c for c in cols_at_end if c in comptable]] comptable['rationale'] = comptable['rationale'].str.rstrip(';') return comptable # Calculate "rate" for kappa: kappa range divided by variance explained # range, for potentially accepted components # NOTE: What is the logic behind this? kappa_rate = ((np.max(comptable.loc[acc_prov, 'kappa']) - np.min(comptable.loc[acc_prov, 'kappa'])) / (np.max(comptable.loc[acc_prov, 'variance explained']) - np.min(comptable.loc[acc_prov, 'variance explained']))) kappa_ratios = kappa_rate * comptable['variance explained'] / comptable[ 'kappa'] varex_lower = stats.scoreatpercentile( comptable.loc[acc_prov, 'variance explained'], LOW_PERC) varex_upper = stats.scoreatpercentile( comptable.loc[acc_prov, 'variance explained'], HIGH_PERC) """ Step 3: Get rid of midk components; i.e., those with higher than max decision score and high variance """ max_good_d_score = EXTEND_FACTOR * len(acc_prov) * n_decision_metrics midk = acc[(comptable.loc[acc, 'd_table_score'] > max_good_d_score) & (comptable.loc[acc, 'variance explained'] > EXTEND_FACTOR * varex_upper)] comptable.loc[midk, 'classification'] = 'rejected' comptable.loc[midk, 'rationale'] += 'I007;' acc = np.setdiff1d(acc, midk) acc_prov = np.setdiff1d(acc_prov, midk) """ Step 4: Find components to ignore """ high_varex = np.union1d( acc_prov, acc[comptable.loc[acc, 'variance explained'] > varex_lower]) ign = np.setdiff1d(acc, high_varex) # ignore low variance components ign = np.setdiff1d( ign, ign[comptable.loc[ign, 'd_table_score'] < max_good_d_score]) ign = np.setdiff1d(ign, ign[comptable.loc[ign, 'kappa'] > kappa_elbow]) comptable.loc[ign, 'classification'] = 'ignored' comptable.loc[ign, 'rationale'] += 'I008;' acc = np.setdiff1d(acc, ign) """ Step 5: Scrub the set if there are components that haven't been rejected or ignored, but are still not listed in the possible accepted group. """ if len(acc) > len(acc_prov): comptable['d_table_score_scrub'] = np.nan # Recompute the midk steps on the limited set to clean up the tail d_table_rank = np.vstack([ len(acc) - stats.rankdata(comptable.loc[acc, 'kappa'], method='ordinal'), len(acc) - stats.rankdata(comptable.loc[acc, 'dice_FR2'], method='ordinal'), len(acc) - stats.rankdata(comptable.loc[acc, 'signal-noise_t'], method='ordinal'), stats.rankdata(comptable.loc[acc, 'countnoise'], method='ordinal'), len(acc) - stats.rankdata(comptable.loc[acc, 'countsigFR2'], method='ordinal') ]).T comptable.loc[acc, 'd_table_score_scrub'] = d_table_rank.sum(1) num_acc_guess = int( np.mean([ np.sum((comptable.loc[acc, 'kappa'] > kappa_elbow) & (comptable.loc[acc, 'rho'] < rho_elbow)), np.sum(comptable.loc[acc, 'kappa'] > kappa_elbow) ])) conservative_guess = num_acc_guess * n_decision_metrics / RESTRICT_FACTOR # Rejection candidate based on artifact type A: candartA candartA = np.intersect1d( acc[comptable.loc[acc, 'd_table_score_scrub'] > conservative_guess], acc[kappa_ratios[acc] > EXTEND_FACTOR * 2]) candartA = np.intersect1d( candartA, candartA[comptable.loc[candartA, 'variance explained'] > varex_upper * EXTEND_FACTOR]) comptable.loc[candartA, 'classification'] = 'rejected' comptable.loc[candartA, 'rationale'] += 'I009;' midk = np.union1d(midk, candartA) # Rejection candidate based on artifact type B: candartB candartB = comptable.loc[acc].loc[ comptable.loc[acc, 'd_table_score_scrub'] > num_acc_guess * n_decision_metrics * HIGH_PERC / 100.].index.values candartB = np.intersect1d( candartB, candartB[comptable.loc[candartB, 'variance explained'] > varex_lower * EXTEND_FACTOR]) comptable.loc[candartB, 'classification'] = 'rejected' comptable.loc[candartB, 'rationale'] += 'I010;' midk = np.union1d(midk, candartB) # Find components to ignore new_varex_lower = stats.scoreatpercentile( comptable.loc[acc[:num_acc_guess], 'variance explained'], LOW_PERC) candart = comptable.loc[acc].loc[ comptable.loc[acc, 'd_table_score_scrub'] > num_acc_guess * n_decision_metrics].index.values ign_add0 = np.intersect1d( candart[comptable.loc[candart, 'variance explained'] > new_varex_lower], candart) ign_add0 = np.setdiff1d(ign_add0, midk) comptable.loc[ign_add0, 'classification'] = 'ignored' comptable.loc[ign_add0, 'rationale'] += 'I011;' ign = np.union1d(ign, ign_add0) ign_add1 = np.intersect1d( acc[comptable.loc[acc, 'kappa'] <= kappa_elbow], acc[comptable.loc[acc, 'variance explained'] > new_varex_lower]) ign_add1 = np.setdiff1d(ign_add1, midk) comptable.loc[ign_add1, 'classification'] = 'ignored' comptable.loc[ign_add1, 'rationale'] += 'I012;' # Move decision columns to end comptable = comptable[[c for c in comptable if c not in cols_at_end] + [c for c in cols_at_end if c in comptable]] comptable['rationale'] = comptable['rationale'].str.rstrip(';') return comptable
def estimate_w_CK__given_pi_DK(dataset=None, pi_DK=None, lambda_w=0.001, seed=42, prefix='', verbose=False, **kwargs): """ Estimate regression weights from provided probability features. Uses sklearn's regularized regressors under the hood. Returns ------- w_CK : 2D array, size C x K Regression weights """ K = pi_DK.shape[1] C = int(dataset['n_labels']) if verbose: pprint('%s Fitting %d regressions...' % (prefix, C)) w_CK = np.zeros((C, K)) u_y_vals = np.unique(dataset['y_DC'].flatten()) if u_y_vals.size <= 2 and np.union1d([0.0, 1.0], u_y_vals).size == 2: output_data_type = 'binary' else: output_data_type = 'real' if 'y_rowmask' in dataset: y_DC = dataset['y_DC'][1 == dataset['y_rowmask']] pi_DK = pi_DK[1 == dataset['y_rowmask']] u_y_vals = np.unique(y_DC.sum(axis=1)) assert u_y_vals.size > 1 else: y_DC = dataset['y_DC'] for c in range(C): # Do a quick regression to get initial weights! if output_data_type.count('binary') > 0: clf = LogisticRegression( fit_intercept=False, C=0.5 / lambda_w, random_state=seed, ) else: clf = RidgeRegression( fit_intercept=False, alpha=lambda_w, random_state=seed, ) clf.fit(pi_DK, y_DC[:, c]) w_CK[c] = clf.coef_ if verbose: pprint(' w_CK[%d, :5]=' % c + ' '.join(['% .2f' % w for w in w_CK[c, :5]])) pprint(' label id %d / %d done with lambda_w = %.5f' % (c + 1, C, lambda_w)) return w_CK
def _detect_outliers_core(self, imgfile, motionfile, runidx, cwd=None): """ Core routine for detecting outliers """ if not cwd: cwd = os.getcwd() # read in functional image if isinstance(imgfile, str): nim = load(imgfile) elif isinstance(imgfile, list): if len(imgfile) == 1: nim = load(imgfile[0]) else: images = [load(f) for f in imgfile] nim = funcs.concat_images(images) # compute global intensity signal (x, y, z, timepoints) = nim.get_shape() data = nim.get_data() affine = nim.get_affine() g = np.zeros((timepoints, 1)) masktype = self.inputs.mask_type if masktype == 'spm_global': # spm_global like calculation iflogger.debug('art: using spm global') intersect_mask = self.inputs.intersect_mask if intersect_mask: mask = np.ones((x, y, z), dtype=bool) for t0 in range(timepoints): vol = data[:, :, :, t0] # Use an SPM like approach mask_tmp = vol > \ (_nanmean(vol) / self.inputs.global_threshold) mask = mask * mask_tmp for t0 in range(timepoints): vol = data[:, :, :, t0] g[t0] = _nanmean(vol[mask]) if len(find_indices(mask)) < (np.prod((x, y, z)) / 10): intersect_mask = False g = np.zeros((timepoints, 1)) if not intersect_mask: iflogger.info('not intersect_mask is True') mask = np.zeros((x, y, z, timepoints)) for t0 in range(timepoints): vol = data[:, :, :, t0] mask_tmp = vol > \ (_nanmean(vol) / self.inputs.global_threshold) mask[:, :, :, t0] = mask_tmp g[t0] = np.nansum(vol * mask_tmp)/np.nansum(mask_tmp) elif masktype == 'file': # uses a mask image to determine intensity maskimg = load(self.inputs.mask_file) mask = maskimg.get_data() affine = maskimg.get_affine() mask = mask > 0.5 for t0 in range(timepoints): vol = data[:, :, :, t0] g[t0] = _nanmean(vol[mask]) elif masktype == 'thresh': # uses a fixed signal threshold for t0 in range(timepoints): vol = data[:, :, :, t0] mask = vol > self.inputs.mask_threshold g[t0] = _nanmean(vol[mask]) else: mask = np.ones((x, y, z)) g = _nanmean(data[mask > 0, :], 1) # compute normalized intensity values gz = signal.detrend(g, axis=0) # detrend the signal if self.inputs.use_differences[1]: gz = np.concatenate((np.zeros((1, 1)), np.diff(gz, n=1, axis=0)), axis=0) gz = (gz - np.mean(gz)) / np.std(gz) # normalize the detrended signal iidx = find_indices(abs(gz) > self.inputs.zintensity_threshold) # read in motion parameters mc_in = np.loadtxt(motionfile) mc = deepcopy(mc_in) (artifactfile, intensityfile, statsfile, normfile, plotfile, displacementfile, maskfile) = self._get_output_filenames(imgfile, cwd) mask_img = Nifti1Image(mask.astype(np.uint8), affine) mask_img.to_filename(maskfile) if self.inputs.use_norm: brain_pts = None if self.inputs.bound_by_brainmask: voxel_coords = np.nonzero(mask) coords = np.vstack((voxel_coords[0], np.vstack((voxel_coords[1], voxel_coords[2])))).T brain_pts = np.dot(affine, np.hstack((coords, np.ones((coords.shape[0], 1)))).T) # calculate the norm of the motion parameters normval, displacement = _calc_norm(mc, self.inputs.use_differences[0], self.inputs.parameter_source, brain_pts=brain_pts) tidx = find_indices(normval > self.inputs.norm_threshold) ridx = find_indices(normval < 0) if displacement is not None: dmap = np.zeros((x, y, z, timepoints), dtype=np.float) for i in range(timepoints): dmap[voxel_coords[0], voxel_coords[1], voxel_coords[2], i] = displacement[i, :] dimg = Nifti1Image(dmap, affine) dimg.to_filename(displacementfile) else: if self.inputs.use_differences[0]: mc = np.concatenate((np.zeros((1, 6)), np.diff(mc_in, n=1, axis=0)), axis=0) traval = mc[:, 0:3] # translation parameters (mm) rotval = mc[:, 3:6] # rotation parameters (rad) tidx = find_indices(np.sum(abs(traval) > self.inputs.translation_threshold, 1) > 0) ridx = find_indices(np.sum(abs(rotval) > self.inputs.rotation_threshold, 1) > 0) outliers = np.unique(np.union1d(iidx, np.union1d(tidx, ridx))) # write output to outputfile np.savetxt(artifactfile, outliers, fmt='%d', delimiter=' ') np.savetxt(intensityfile, g, fmt='%.2f', delimiter=' ') if self.inputs.use_norm: np.savetxt(normfile, normval, fmt='%.4f', delimiter=' ') if isdefined(self.inputs.save_plot) and self.inputs.save_plot: import matplotlib matplotlib.use(config.get("execution", "matplotlib_backend")) import matplotlib.pyplot as plt fig = plt.figure() if isdefined(self.inputs.use_norm) and self.inputs.use_norm: plt.subplot(211) else: plt.subplot(311) self._plot_outliers_with_wave(gz, iidx, 'Intensity') if isdefined(self.inputs.use_norm) and self.inputs.use_norm: plt.subplot(212) self._plot_outliers_with_wave(normval, np.union1d(tidx, ridx), 'Norm (mm)') else: diff = '' if self.inputs.use_differences[0]: diff = 'diff' plt.subplot(312) self._plot_outliers_with_wave(traval, tidx, 'Translation (mm)' + diff) plt.subplot(313) self._plot_outliers_with_wave(rotval, ridx, 'Rotation (rad)' + diff) plt.savefig(plotfile) plt.close(fig) motion_outliers = np.union1d(tidx, ridx) stats = [{'motion_file': motionfile, 'functional_file': imgfile}, {'common_outliers': len(np.intersect1d(iidx, motion_outliers)), 'intensity_outliers': len(np.setdiff1d(iidx, motion_outliers)), 'motion_outliers': len(np.setdiff1d(motion_outliers, iidx)), }, {'motion': [{'using differences': self.inputs.use_differences[0]}, {'mean': np.mean(mc_in, axis=0).tolist(), 'min': np.min(mc_in, axis=0).tolist(), 'max': np.max(mc_in, axis=0).tolist(), 'std': np.std(mc_in, axis=0).tolist()}, ]}, {'intensity': [{'using differences': self.inputs.use_differences[1]}, {'mean': np.mean(gz, axis=0).tolist(), 'min': np.min(gz, axis=0).tolist(), 'max': np.max(gz, axis=0).tolist(), 'std': np.std(gz, axis=0).tolist()}, ]}, ] if self.inputs.use_norm: stats.insert(3, {'motion_norm': {'mean': np.mean(normval, axis=0).tolist(), 'min': np.min(normval, axis=0).tolist(), 'max': np.max(normval, axis=0).tolist(), 'std': np.std(normval, axis=0).tolist(), }}) save_json(statsfile, stats)
def connected_components_analysis(g, er_g, ba_g): if nx.is_strongly_connected(g): print '\nThe original network is strongly connected with ' + \ str(nx.number_strongly_connected_components(g)) + ' strongly ' \ 'connected components' elif nx.is_weakly_connected(g): print '\nThe original network is weakly connected with ' + \ str(nx.number_weakly_connected_components(g)) + \ ' weakly connected components' else: print '\nThe original network is neither strongly connected nor ' + \ 'weakly connected' if nx.is_strongly_connected(er_g): print 'The Erdős–Rényi network is strongly connected with ' + \ str(nx.number_strongly_connected_components(er_g)) + ' strongly ' \ 'connected components' elif nx.is_weakly_connected(er_g): print 'The Erdős–Rényi is weakly connected with ' + \ str(nx.number_weakly_connected_components(er_g)) + \ ' weakly connected components' else: print 'The Erdős–Rényi is neither strongly connected nor weakly ' + \ 'connected' if nx.is_connected(ba_g): print 'The Barabási–Albert network is connected with ' + \ str(nx.number_connected_components(ba_g)) + \ ' connected components' else: print 'The Barabási–Albert is not connected' g_weak = collections.Counter(len(c) for c in nx.weakly_connected_components(g)) g_strong = collections.Counter( len(c)for c in nx.strongly_connected_components(g) ) er_weak = collections.Counter(len(c) for c in nx. weakly_connected_components(er_g)) er_strong = collections.Counter(len(c) for c in nx. strongly_connected_components(er_g)) x_weak = list(np.union1d(g_weak.keys(), er_weak.keys())) x_strong = list(np.union1d(g_strong.keys(), er_strong.keys())) plt.subplot(211) plt.bar(range(len(x_weak)), [g_weak[k] if k in g_weak else 0 for k in x_weak], color='tomato', label='Orginal', alpha=.5) plt.bar(range(len(x_weak)), [er_weak[k] if k in er_weak else 0 for k in x_weak], color='steelblue', label=u'Erdős–Rényi', alpha=.5) plt.xticks(range(len(x_weak)), x_weak) plt.title('Weakly connected components by length') plt.xlabel('Lenght') plt.ylabel('Connected components') plt.legend() plt.subplot(212) plt.bar(range(len(x_strong)), [g_strong[k] if k in g_strong else 0 for k in x_strong], color='tomato', label='Original', log=True, alpha=.5) plt.bar(range(len(x_strong)), [er_strong[k] if k in er_strong else 0 for k in x_strong], color='steelblue', label=u'Erdős–Rényi', log=True, alpha=.5) plt.xticks(range(len(x_strong)), x_strong) plt.title('Strongly connected components by length') plt.xlabel('Lenght') plt.ylabel('Connected components') plt.legend() plt.subplots_adjust(left=0.125, right=0.9, bottom=0.1, top=1.4, wspace=0.2, hspace=0.2) plt.tight_layout() plt.savefig('./imgs/connectivity.pdf', format='pdf') plt.clf()
def __init__(self, **kwargs): self.instrument = kwargs.get('instrument', 'nirspec') if self.instrument == 'nirspec': self.name = kwargs.get('name') self.order = kwargs.get('order') self.path = kwargs.get('path') self.applymask = kwargs.get('applymask', False) #self.manaulmask = kwargs('manaulmask', False) if self.path == None: self.path = './' fullpath = self.path + '/' + self.name + '_' + str( self.order) + '_all.fits' hdulist = fits.open(fullpath, ignore_missing_end=True) #The indices 0 to 3 correspond to wavelength, flux, noise, and sky self.header = hdulist[0].header self.wave = hdulist[0].data self.flux = hdulist[1].data self.noise = hdulist[2].data try: self.sky = hdulist[3].data except IndexError: print("No sky line data.") self.sky = np.zeros(self.wave.shape) self.mask = [] # define a list for storing the best wavelength shift self.bestshift = [] # store the original parameters self.oriWave = hdulist[0].data self.oriFlux = hdulist[1].data self.oriNoise = hdulist[2].data elif self.instrument == 'apogee': self.name = kwargs.get('name') self.path = kwargs.get('path') self.datatype = kwargs.get('datatype', 'aspcap') self.applymask = kwargs.get('applymask', False) self.applytell = kwargs.get('applytell', False) self.chip = kwargs.get('chip', 'all') hdulist = fits.open(self.path) if self.datatype == 'aspcap': crval1 = hdulist[1].header['CRVAL1'] cdelt1 = hdulist[1].header['CDELT1'] naxis1 = hdulist[1].header['NAXIS1'] self.header4 = hdulist[4].header self.param = hdulist[4].data['PARAM'] self.wave = np.array( pow(10, crval1 + cdelt1 * np.arange(naxis1))) self.oriWave = np.array( pow(10, crval1 + cdelt1 * np.arange(naxis1))) self.flux = np.array(hdulist[1].data) self.noise = np.array(hdulist[2].data) elif self.datatype == 'ap1d': self.header4 = hdulist[4].header self.header5 = hdulist[5].header # use aspcap data as wavelength calibrators self.wave = np.array(hdulist[4].data) self.flux = np.array(hdulist[1].data) self.noise = np.array(hdulist[2].data) # store the original parameters self.oriWave = np.array(hdulist[4].data) self.oriFlux = np.array(hdulist[1].data) self.oriNoise = np.array(hdulist[2].data) elif self.datatype == 'apvisit': self.header1 = hdulist[1].header self.header2 = hdulist[2].header self.header3 = hdulist[3].header self.header4 = hdulist[4].header self.header5 = hdulist[5].header self.header6 = hdulist[6].header self.header7 = hdulist[7].header self.header8 = hdulist[8].header self.header9 = hdulist[9].header self.header10 = hdulist[10].header # read the bitmask self.bitmask = hdulist[3].data #import bitmask # chip a if self.chip == 'all' or self.chip == 'a': mask_0 = [] for i in range(len(hdulist[3].data[0])): bitmask = smart.bits_set(hdulist[3].data[0][i]) if (0 in bitmask) or (1 in bitmask) or (2 in bitmask) or \ (3 in bitmask) or (4 in bitmask) or (5 in bitmask) or \ (6 in bitmask) or (12 in bitmask) or (14 in bitmask): mask_0.append(i) # chip b if self.chip == 'all' or self.chip == 'b': mask_1 = [] for i in range(len(hdulist[3].data[1])): bitmask = smart.bits_set(hdulist[3].data[1][i]) if (0 in bitmask) or (1 in bitmask) or (2 in bitmask) or \ (3 in bitmask) or (4 in bitmask) or (5 in bitmask) or \ (6 in bitmask) or (12 in bitmask) or (14 in bitmask): mask_1.append(i) # chip c if self.chip == 'all' or self.chip == 'c': mask_2 = [] for i in range(len(hdulist[3].data[2])): bitmask = smart.bits_set(hdulist[3].data[2][i]) if (0 in bitmask) or (1 in bitmask) or (2 in bitmask) or \ (3 in bitmask) or (4 in bitmask) or (5 in bitmask) or \ (6 in bitmask) or (12 in bitmask) or (14 in bitmask): mask_2.append(i) if self.chip == 'all': self.wave = np.array( list(np.delete(hdulist[4].data[0], mask_0)) + list(np.delete(hdulist[4].data[1], mask_1)) + list(np.delete(hdulist[4].data[2], mask_2))) self.flux = np.array( list(np.delete(hdulist[1].data[0], mask_0)) + list(np.delete(hdulist[1].data[1], mask_1)) + list(np.delete(hdulist[1].data[2], mask_2))) self.noise = np.array( list(np.delete(hdulist[2].data[0], mask_0)) + list(np.delete(hdulist[2].data[1], mask_1)) + list(np.delete(hdulist[2].data[2], mask_2))) self.sky = np.array( list(hdulist[5].data[0]) + list(hdulist[5].data[1]) + list(hdulist[5].data[2])) self.skynoise = np.array( list(hdulist[6].data[0]) + list(hdulist[6].data[1]) + list(hdulist[6].data[2])) self.tell = np.array( list(np.delete(hdulist[7].data[0], mask_0)) + list(np.delete(hdulist[7].data[1], mask_1)) + list(np.delete(hdulist[7].data[2], mask_2))) self.tellnoise = np.array( list(np.delete(hdulist[8].data[0], mask_0)) + list(np.delete(hdulist[8].data[1], mask_1)) + list(np.delete(hdulist[8].data[2], mask_2))) # store the original parameters self.oriWave = np.array( list(hdulist[4].data[0]) + list(hdulist[4].data[1]) + list(hdulist[4].data[2])) self.oriFlux = np.array( list(hdulist[1].data[0]) + list(hdulist[1].data[1]) + list(hdulist[1].data[2])) self.oriNoise = np.array( list(hdulist[2].data[0]) + list(hdulist[2].data[1]) + list(hdulist[2].data[2])) elif self.chip == 'a': self.wave = np.array( list(np.delete(hdulist[4].data[0], mask_0))) self.flux = np.array( list(np.delete(hdulist[1].data[0], mask_0))) self.noise = np.array( list(np.delete(hdulist[2].data[0], mask_0))) self.sky = np.array(list(hdulist[5].data[0])) self.skynoise = np.array(list(hdulist[6].data[0])) self.tell = np.array( list(np.delete(hdulist[7].data[0], mask_0))) self.tellnoise = np.array( list(np.delete(hdulist[8].data[0], mask_0))) # store the original parameters self.oriWave = np.array(list(hdulist[4].data[0])) self.oriFlux = np.array(list(hdulist[1].data[0])) self.oriNoise = np.array(list(hdulist[2].data[0])) elif self.chip == 'b': self.wave = np.array( list(np.delete(hdulist[4].data[1], mask_1))) self.flux = np.array( list(np.delete(hdulist[1].data[1], mask_1))) self.noise = np.array( list(np.delete(hdulist[2].data[1], mask_1))) self.sky = np.array(list(hdulist[5].data[1])) self.skynoise = np.array(list(hdulist[6].data[1])) self.tell = np.array( list(np.delete(hdulist[7].data[1], mask_1))) self.tellnoise = np.array( list(np.delete(hdulist[8].data[1], mask_1))) # store the original parameters self.oriWave = np.array(list(hdulist[4].data[1])) self.oriFlux = np.array(list(hdulist[1].data[1])) self.oriNoise = np.array(list(hdulist[2].data[1])) elif self.chip == 'c': self.wave = np.array( list(np.delete(hdulist[4].data[2], mask_2))) self.flux = np.array( list(np.delete(hdulist[1].data[2], mask_2))) self.noise = np.array( list(np.delete(hdulist[2].data[2], mask_2))) self.sky = np.array(list(hdulist[5].data[2])) self.skynoise = np.array(list(hdulist[6].data[2])) self.tell = np.array( list(np.delete(hdulist[7].data[2], mask_2))) self.tellnoise = np.array( list(np.delete(hdulist[8].data[2], mask_2))) # store the original parameters self.oriWave = np.array(list(hdulist[4].data[2])) self.oriFlux = np.array(list(hdulist[1].data[2])) self.oriNoise = np.array(list(hdulist[2].data[2])) if self.applytell: self.flux *= self.tell self.wavecoeff = hdulist[9].data self.lsfcoeff = hdulist[10].data if self.wave[0] > self.wave[-1]: self.wave = self.wave[::-1] self.flux = self.flux[::-1] self.noise = self.noise[::-1] self.sky = self.sky[::-1] self.skynoise = self.skynoise[::-1] self.tell = self.tell[::-1] self.tellnoise = self.tellnoise[::-1] self.oriWave = self.oriWave[::-1] self.oriFlux = self.oriFlux[::-1] self.oriNoise = self.oriNoise[::-1] # to separate the continuum end points self.oriWave0 = hdulist[4].data self.oriFlux0 = hdulist[1].data ## APOGEE APVISIT has corrected the telluric absorption; the forward-modeling routine needs to put it back #self.flux *= self.tell elif self.datatype == 'apstar': crval1 = hdulist[0].header['CRVAL1'] cdelt1 = hdulist[0].header['CDELT1'] naxis1 = hdulist[0].header['NWAVE'] self.header4 = hdulist[4].header self.header5 = hdulist[5].header self.header6 = hdulist[6].header self.header7 = hdulist[7].header self.header8 = hdulist[8].header self.header9 = hdulist[9].header #print(hdulist) #print(hdulist[1]) #print(hdulist[1].data.shape) #sys.exit() self.wave = np.array( pow(10, crval1 + cdelt1 * np.arange(1, naxis1 + 1))) self.flux = hdulist[1].data self.noise = hdulist[2].data self.sky = hdulist[4].data self.skynoise = hdulist[5].data self.tell = hdulist[6].data self.tellnoise = hdulist[7].data self.lsfcoeff = hdulist[8].data self.binary = hdulist[9].data # store the original parameters self.oriWave = np.array( pow(10, crval1 + cdelt1 * np.arange(1, naxis1 + 1))) self.oriFlux = hdulist[1].data self.oriNoise = hdulist[2].data ## APOGEE APVISIT has corrected the telluric absorption; the forward-modeling routine needs to put it back #self.flux *= self.tell self.header = hdulist[0].header self.header1 = hdulist[1].header self.header2 = hdulist[2].header self.header3 = hdulist[3].header self.model = np.array(hdulist[3].data) self.mask = [] elif self.instrument == 'igrins': self.name = kwargs.get('name') self.order = kwargs.get('order') self.path = kwargs.get('path') self.applymask = kwargs.get('applymask', False) #self.manaulmask = kwargs('manaulmask', False) if self.path == None: self.path = './' fullpath = self.path + '/' + self.name + '_' + str( self.order) + '.fits' hdulist = fits.open(fullpath, ignore_missing_end=True) #The indices 0 to 3 correspond to wavelength, flux, noise, and sky self.header = hdulist[0].header self.wave = hdulist[0].data * 10000.0 # convert to Angstrom self.flux = hdulist[1].data self.noise = hdulist[2].data if self.applymask: # set up masking criteria self.avgFlux = np.mean(self.flux) self.stdFlux = np.std(self.flux) self.smoothFlux = self.flux # set the outliers as the flux below #self.smoothFlux[self.smoothFlux <= self.avgFlux - 2 * self.stdFlux] = 0 #self.smoothFlux[ np.abs(self.smoothFlux - self.avgFlux ) <= 2 * self.stdFlux] = 0 self.mask = np.where( np.abs(self.flux - self.avgFlux) >= 3. * self.stdFlux) #print(self.mask) if self.instrument == 'apogee': #self.mask = np.union1d(self.mask[0],np.where(self.noise >= self.flux)[0]) noise_median = np.median(self.noise) self.mask = np.union1d( self.mask[0], np.where(self.noise >= 3. * noise_median)[0]) self.wave = np.delete(self.wave, list(self.mask)) self.flux = np.delete(self.flux, list(self.mask)) self.noise = np.delete(self.noise, list(self.mask)) if self.instrument == 'nirspec': self.sky = np.delete(self.sky, list(self.mask)) self.mask = self.mask[0]
if(total_sum > min_val): min_val = total_sum m_min = m c_min = c y_min = y x_min = x check_array[m_min][c_min][y_min][x_min] for m in range(m_min, m_min+args.m): for c in range(c_min, c_min+args.c): for y in range(y_min, y_min+args.y): for x in range(x_min, x_min+args.x): prune_weight(net, layer_name, m, c, y, x) pruning_signals[layer_name] = net.layer_dict[layer_name].blobs[0].data[m][c][y][x] prune_state[layer_name] = np.union1d(prune_state[layer_name], pruning_signals[layer_name]) #make the test_acc, ce_loss = test(pruning_solver, args.test_iterations, args.accuracy_layer_name, args.loss_layer_name) removed_weights = 0 total_weights = 0 for layer_name in layer_list: removed_weights += prune_state[layer_name].size total_weights += net.layer_dict[layer_name].blobs[0].data.size print("Test accuracy:", test_acc) print("Removed", removed_weights, "of", total_weights, "weights", " in layer ", layer_name) sys.stdout.flush() pruning_solver.net.save(args.output)
def convert_btag_csv_file(csvFilePath): btag_f = open(csvFilePath) nameandcols = btag_f.readline().split(';') btag_f.close() name = nameandcols[0].strip() columns = nameandcols[1].strip() columns = [column.strip() for column in columns.split(',')] corrections = np.genfromtxt(csvFilePath, dtype=None, names=tuple(columns), converters={ 1: lambda s: s.strip(), 2: lambda s: s.strip(), 10: lambda s: s.strip(' "') }, delimiter=',', skip_header=1, unpack=True, encoding='ascii') all_names = corrections[[columns[i] for i in range(4)]] labels = np.unique(corrections[[columns[i] for i in range(4)]]) names_and_bins = np.unique( corrections[[columns[i] for i in [0, 1, 2, 3, 4, 6, 8]]]) wrapped_up = {} for label in labels: etaMins = np.unique( corrections[np.where(all_names == label)][columns[4]]) etaMaxs = np.unique( corrections[np.where(all_names == label)][columns[5]]) etaBins = np.union1d(etaMins, etaMaxs) ptMins = np.unique( corrections[np.where(all_names == label)][columns[6]]) ptMaxs = np.unique( corrections[np.where(all_names == label)][columns[7]]) ptBins = np.union1d(ptMins, ptMaxs) discrMins = np.unique( corrections[np.where(all_names == label)][columns[8]]) discrMaxs = np.unique( corrections[np.where(all_names == label)][columns[9]]) discrBins = np.union1d(discrMins, discrMaxs) vals = np.zeros(shape=(len(discrBins) - 1, len(ptBins) - 1, len(etaBins) - 1), dtype=corrections.dtype[10]) for i, eta_bin in enumerate(etaBins[:-1]): for j, pt_bin in enumerate(ptBins[:-1]): for k, discr_bin in enumerate(discrBins[:-1]): this_bin = np.where((all_names == label) & (corrections[columns[4]] == eta_bin) & (corrections[columns[6]] == pt_bin) & (corrections[columns[8]] == discr_bin)) vals[k, j, i] = corrections[this_bin][columns[10]][0] label_decode = [] for i in range(len(label)): label_decode.append(label[i]) if isinstance(label_decode[i], bytes): label_decode[i] = label_decode[i].decode() else: label_decode[i] = str(label_decode[i]) str_label = '_'.join([name] + label_decode) feval_dim = btag_feval_dims[label[0]] wrapped_up[(str_label, 'dense_evaluated_lookup')] = (vals, (etaBins, ptBins, discrBins), tuple(feval_dim)) return wrapped_up
def concatenate(datasets, missing='intersect'): '''Concatenate a list of datasets Args: datasets (list): list of singlet.Dataset objects to be concatenated. The function runs over it more than once so a consuming lazy iterator is not a valid input. missing (str): What to do with genes that are missing from any of the datasets. 'pad' means pad the missing genes with zeros, 'intersect' means take only the intersection Returns: concatenated singlet.Dataset ''' if len(datasets) == 0: raise ValueError('Cannot concatenate empty list') ns = [] feas = [] for ds in datasets: ns.append(ds.n_samples) feas.append(ds.featurenames) if missing == 'intersect': features = feas[0] if len(datasets) > 1: for fea in feas[1:]: features = np.intersect1d(features, fea) elif missing == 'pad': features = feas[0] if len(datasets) > 1: for fea in feas[1:]: features = np.union1d(features, fea) features = np.sort(features) else: raise ValueError('missing must be "pad" or "intersect"') samplesheet = pd.concat([ds.samplesheet for ds in datasets], axis=0) if Counter(samplesheet.index.values).most_common(1)[0][1] > 1: raise ValueError( 'Samples cannot share names, make sure they are unique', ) fea_vect = pd.Series(np.arange(len(features)), index=features) m = len(features) ntot = sum(ns) mat = np.zeros((m, ntot), dtype=datasets[0].counts.values.dtype) i = 0 for ii, (ds, n) in enumerate(zip(datasets, ns)): # Shortcut if the features are alright if (len(feas[ii]) == m) and (feas[ii] == features).all(): mat[:, i: i+n] = ds.counts.values elif missing == 'intersect': mat[:, i: i+n] = ds.counts.loc[features].values else: js = fea_vect.loc[feas[ii]].values mat[js, i: i+n] = ds.counts.values i += n counts = datasets[0].counts.__class__( mat, index=features, columns=samplesheet.index, ) if missing == 'intersect': featuresheet = datasets[0].featuresheet.loc[features].copy() else: featuresheet = None return datasets[0].__class__( counts_table=counts, samplesheet=samplesheet, featuresheet=featuresheet, )
#CUDA SETUP CUDA = torch.cuda.is_available() print("On GPU: ", CUDA) if CUDA: dtype = torch.cuda.FloatTensor else: dtype = torch.FloatTensor #SIGNAL GENERATION signal = s.gen_data(LENGTH, 0.5, 1) target_range = np.array(range( 400, 430)) #the range in the signal where the target appears target_range = np.union1d(target_range, np.array(range(800, 830))) target_len = len(list(target_range)) signal[target_range] += 0.04 #add artificial target to the background signal signal += np.random.normal(loc=0, scale=0.0075, size=LENGTH) #add background noise signal = inverse_utils.normalise(signal) #normalise signal to range [-1, 1] x = np.zeros((LENGTH, 1)) x[:, 0] = signal """ plt.figure() plt.plot(range(LENGTH), signal) plt.title("Original Signal") plt.show()
def test_union1d_1(self): a = np.union1d([-1, 0, 1], [-2, 0, 2]) print(a)
# Apply per-class non-max suppression pre_nms_boxes = refined_proposals[keep] pre_nms_scores = roi_scores[keep] pre_nms_class_ids = roi_class_ids[keep] nms_keep = [] for class_id in np.unique(pre_nms_class_ids): # Pick detections of this class ixs = np.where(pre_nms_class_ids == class_id)[0] # Apply NMS class_keep = utils.non_max_suppression(pre_nms_boxes[ixs], pre_nms_scores[ixs], config.DETECTION_NMS_THRESHOLD) # Map indicies class_keep = keep[ixs[class_keep]] nms_keep = np.union1d(nms_keep, class_keep) print("{:22}: {} -> {}".format(dataset.class_names[class_id][:20], keep[ixs], class_keep)) keep = np.intersect1d(keep, nms_keep).astype(np.int32) print("\nKept after per-class NMS: {}\n{}".format(keep.shape[0], keep)) # In[28]: # Show final detections ixs = np.arange(len(keep)) # Display all # ixs = np.random.randint(0, len(keep), 10) # Display random sample captions = ["{} {:.3f}".format(dataset.class_names[c], s) if c > 0 else "" for c, s in zip(roi_class_ids[keep][ixs], roi_scores[keep][ixs])]
def proclus(X, k=2, l=3, minDeviation=0.1, A=30, B=3, niters=30, seed=1234): """ Run PROCLUS on a database to obtain a set of clusters and dimensions associated with each one. Parameters: ---------- - X: the data set - k: the desired number of clusters - l: average number of dimensions per cluster - minDeviation: for selection of bad medoids - A: constant for initial set of medoids - B: a smaller constant than A for the final set of medoids - niters: maximum number of iterations for the second phase - seed: seed for the RNG """ np.random.seed(seed) N, d = X.shape if B > A: raise Exception("B has to be smaller than A.") if l < 2: raise Exception("l must be >=2.") ############################### # 1.) Initialization phase ############################### # first find a superset of the set of k medoids by random sampling idxs = np.arange(N) np.random.shuffle(idxs) S = idxs[0:(A * k)] M = greedy(X, S, B * k) ############################### # 2.) Iterative phase ############################### BestObjective = np.inf # choose a random set of k medoids from M: Mcurr = np.random.permutation(M)[0:k] # M current Mbest = None # Best set of medoids found D = squareform(pdist(X)) # precompute the euclidean distance matrix it = 0 # iteration counter L = [] # locality sets of the medoids, i.e., points within delta_i of m_i. Dis = [] # important dimensions for each cluster assigns = [] # cluster membership assignments while True: it += 1 L = [] for i in range(len(Mcurr)): mi = Mcurr[i] # compute delta_i, the distance to the nearest medoid of m_i: di = D[mi, np.setdiff1d(Mcurr, mi)].min() # compute L_i, points in sphere centered at m_i with radius d_i L.append(np.where(D[mi] <= di)[0]) # find dimensions: Dis = findDimensions(X, k, l, L, Mcurr) # form the clusters: assigns = assignPoints(X, Mcurr, Dis) # evaluate the clusters: ObjectiveFunction = evaluateClusters(X, assigns, Dis, Mcurr) badM = [] # bad medoids Mold = Mcurr.copy() if ObjectiveFunction < BestObjective: BestObjective = ObjectiveFunction Mbest = Mcurr.copy() # compute the bad medoids in Mbest: badM = computeBadMedoids(X, assigns, Dis, Mcurr, minDeviation) print("bad medoids:") print(badM) if len(badM) > 0: # replace the bad medoids with random points from M: print("old mcurr:") print(Mcurr) Mavail = np.setdiff1d(M, Mbest) newSel = np.random.choice(Mavail, size=len(badM), replace=False) Mcurr = np.setdiff1d(Mbest, badM) Mcurr = np.union1d(Mcurr, newSel) print("new mcurr:") print(Mcurr) print("finished iter: %d" % it) if np.allclose(Mold, Mcurr) or it >= niters: break print("finished iterative phase...") ############################### # 3.) Refinement phase ############################### # compute a new L based on assignments: L = [] for i in range(len(Mcurr)): mi = Mcurr[i] L.append(np.where(assigns == mi)[0]) Dis = findDimensions(X, k, l, L, Mcurr) assigns = assignPoints(X, Mcurr, Dis) # handle outliers: # smallest Manhattan segmental distance of m_i to all (k-1) # other medoids with respect to D_i: deltais = np.zeros(k) for i in range(k): minDist = np.inf for j in range(k): if j != i: dist = manhattanSegmentalDist(X[Mcurr[i]], X[Mcurr[j]], Dis[i]) if dist < minDist: minDist = dist deltais[i] = minDist # mark as outliers the points that are not within delta_i of any m_i: for i in range(len(assigns)): clustered = False for j in range(k): d = manhattanSegmentalDist(X[Mcurr[j]], X[i], Dis[j]) if d <= deltais[j]: clustered = True break if not clustered: #print "marked an outlier" assigns[i] = -1 return (Mcurr, Dis, assigns)
def get_negative_pool(self, adj, entities, all_entities): pos_neighbors = np.union1d( entities, np.unique(np.concatenate(adj[entities]))) neg_entities = np.setdiff1d(all_entities, pos_neighbors) return neg_entities
def _sample(self, X, y): """Resample the dataset. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Matrix containing the data which have to be sampled. y : array-like, shape (n_samples,) Corresponding label for each sample in X. Returns ------- X_resampled : {ndarray, sparse matrix}, shape \ (n_samples_new, n_features) The array containing the resampled data. y_resampled : ndarray, shape (n_samples_new,) The corresponding label of `X_resampled` idx_under : ndarray, shape (n_samples, ) If `return_indices` is `True`, a boolean array will be returned containing the which samples have been selected. """ self._validate_estimator() enn = EditedNearestNeighbours(ratio=self.ratio, return_indices=True, n_neighbors=self.n_neighbors, kind_sel='mode', n_jobs=self.n_jobs) _, _, index_not_a1 = enn.fit_sample(X, y) index_a1 = np.ones(y.shape, dtype=bool) index_a1[index_not_a1] = False index_a1 = np.flatnonzero(index_a1) # clean the neighborhood target_stats = Counter(y) class_minority = min(target_stats, key=target_stats.get) # compute which classes to consider for cleaning for the A2 group classes_under_sample = [c for c, n_samples in target_stats.items() if (c in self.ratio_.keys() and (n_samples > X.shape[0] * self.threshold_cleaning))] self.nn_.fit(X) class_minority_indices = np.flatnonzero(y == class_minority) X_class = safe_indexing(X, class_minority_indices) y_class = safe_indexing(y, class_minority_indices) nnhood_idx = self.nn_.kneighbors( X_class, return_distance=False)[:, 1:] nnhood_label = y[nnhood_idx] if self.kind_sel == 'mode': nnhood_label_majority, _ = mode(nnhood_label, axis=1) nnhood_bool = np.ravel(nnhood_label_majority) == y_class elif self.kind_sel == 'all': nnhood_label_majority = nnhood_label == class_minority nnhood_bool = np.all(nnhood_label, axis=1) else: raise NotImplementedError # compute a2 group index_a2 = np.ravel(nnhood_idx[~nnhood_bool]) index_a2 = np.unique([index for index in index_a2 if y[index] in classes_under_sample]) union_a1_a2 = np.union1d(index_a1, index_a2).astype(int) selected_samples = np.ones(y.shape, dtype=bool) selected_samples[union_a1_a2] = False index_target_class = np.flatnonzero(selected_samples) if self.return_indices: return (safe_indexing(X, index_target_class), safe_indexing(y, index_target_class), index_target_class) else: return (safe_indexing(X, index_target_class), safe_indexing(y, index_target_class))
def connectivity(labels, neigh=None): """ CONNECTIVITY: given a labeling of an image, find the neighbors for each label. cmap = connectivity(labels, neigh=None, keep_bakground) Parameters ---------- labels: [M x N, uint64] a pseudo-image with a labeling of objects; label==0 indicates background neighborhood: type of neighborhood: either a neighborhood matrix or None If None, a 4-connected neighborhood is assumed, | 0 1 0 | neigh = | 1 0 1 | | 0 1 0 | Returns ------- cmap: a dictionary containing the connectivity list: cmap[label] is a list of all neighbors for that label borders: a dictionary containing the pixels on the borders between objects. For each object, all the pixels on the border that are in the neighborhood of another object (but not background), are stored in a list with elements of the form ((row,column), [list of neighboring labels]) """ if neigh is None: neigh = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) if neigh.shape[0] != neigh.shape[1]: raise ValueError('Improper neighborhood specification') if neigh.shape[0] % 2 != 1: raise ValueError('Neighborhood size must be odd') h = neigh.shape[0] / 2 # To be fixed: in some cases, the neighborhood is lost for objects with # width (or height) of 1 pixel: # find pixels on the edge - simple differetial operator img(i+1) - img(i) # edg_h = labels.copy() # edg_h[1:,:] = np.abs(edg_h[:-1,:] - edg_h[1:,:]) # edg_v = labels.copy() # edg_v[:,1:] = np.abs(edg_v[:,:-1] - edg_v[:,1:]) # edg = np.zeros(labels.shape, dtype=int) # edg = edg_h + edg_v # find all edge pixels, does not matter the magnitude labels = np.pad(labels, ((h,h),(h,h)), mode='constant', constant_values=0) # edg = np.pad(edg, ((h,h),(h,h)), mode='constant', constant_values=0) (n,m) = labels.shape cmap = {} # connectivity map borders = {} # borders # idx = np.where(edg > 0) idx = np.where(labels > 0) for (r,c) in zip(idx[0], idx[1]): # for all points on the boundaries roi = labels[max(0, r-h):min(n, r+h+1), max(0, c-h):min(m, c+h+1)] lb = roi[neigh == 1] # labels around current point, excluding its own label lb = np.setdiff1d(np.unique(roi), [labels[r,c]], assume_unique=True) if labels[r,c] in cmap: cmap[labels[r,c]] = np.union1d(cmap[labels[r,c]], lb) else: cmap[labels[r,c]] = lb lb = lb.tolist() if 0 in lb: # remove bkg lb.remove(0) if len(lb) == 0: # no other neighbors for this pixel continue # otherwise, add all neighboring objects: if labels[r,c] not in borders: # this is a new object to add: initialize with an empty list # then append the info (in any case) borders[labels[r,c]] = list() borders[labels[r,c]].append( ((r,c), lb) ) return cmap, borders
# left side wall (minus upmost and lowermost nodes) Dleftx = np.nonzero(mesh.p[0, :] == 0)[0] Dleftx = np.setdiff1d(Dleftx, np.nonzero(mesh.p[1, :] == 0)[0]) Dleftx = np.setdiff1d(Dleftx, np.nonzero(mesh.p[1, :] == 1)[0]) # right side wall (minus upmost and lowermost nodes) Drightx = np.nonzero(mesh.p[0, :] == 1)[0] Drightx = np.setdiff1d(Drightx, np.nonzero(mesh.p[1, :] == 0)[0]) Drightx = np.setdiff1d(Drightx, np.nonzero(mesh.p[1, :] == 1)[0]) # lower and upper side wall Dlowerx = np.nonzero(mesh.p[1, :] == 0)[0] Dupperx = np.nonzero(mesh.p[1, :] == 1)[0] # all dirichlet nodes Dallx = mesh.boundary_nodes() Dally = Dallx + N D = np.union1d(Dallx, Dally) I = np.setdiff1d(np.arange(0, 3 * N), D) # index sets for accessing different components I1 = np.arange(0, N) I2 = I1 + N Ip = I2 + N print "NDOF: " + str(N) print "NELS: " + str(mesh.t.shape[1]) a = fem.asm.AssemblerTriP1(mesh) mu = 1
def IndexByTeamSeason(seasonyear, d, c, Matzero=True): # ============================================================================= # ### Description: Selecting distinct ids of Teams # For each team: # Select the ids of the players by season and the matches played # Put index from 1 to n as a counter table # Create an empty np.zeros(matrix) or np.ones # ### Args: # # seasonyear: integer value. Only 2007-2014 years are accepted # # Matzero: if True, np.zeroMatrix is created, else, np.ones(matrix) # # c: cursor # # d: db_connection # # ###Return: # # It returns a pd.Dataframe with Players by team and Season with # 0 value by default # # ============================================================================= Alltables = [] query = """SELECT DISTINCT TeamId FROM Plays_In where GameId LIKE '{0}' ORDER BY TeamId ASC """.format(str(seasonyear) + '%') c.execute(query) TeamIds = c.fetchall() for Id in TeamIds: IdTeam = Id[0] query2 = """ SELECT GameId , Venue FROM Plays_in WHERE TeamId = {0} AND GameId LIKE '{1}' ORDER BY GameId ASC """.format(IdTeam, str(seasonyear) + '%') c.execute(query2) res = c.fetchall() listGames = ([int(i[0]) for i in res]) n = len(res) Totalarray = [] for i in range(n): GameIndex = res[i][0] Venue = res[i][1] ######LEFT to set it to some table playersarray = UniqueplayersperMatch(GameIndex, Venue, d) Totalarray = np.union1d(Totalarray, playersarray) Totalarray = Totalarray.astype(int).astype(str).tolist() columnnames = ['GameId', 'CountGame'] + Totalarray zeromatrix = np.zeros([n, len(Totalarray)]) if Matzero != True: zeromatrix = np.ones([n, len(Totalarray)]) data = np.column_stack((np.column_stack( (listGames, range(n))), zeromatrix)) d1 = pd.DataFrame(data, columns=columnnames, index=listGames) Alltables.append(d1) return Alltables
# In[213]: y=np.array([2,5,6,9,2,1]) # In[214]: np.intersect1d(x,y) #返回两个数组共有的元素(有序) # In[215]: np.union1d(x,y) #返回两个数组并集(有序) # In[216]: np.setdiff1d(x,y) #数组的差,即元素在x中不在y中 # In[217]: np.setxor1d(x,y) #数组的对称差,即在一个数组中不在两个数组中
al27_sab[i] = al27_sab[i] + xs # combined al27_total_e = numpy.concatenate((al27_sab_e, al27_e[al27_dex:])) al27_total = numpy.concatenate((al27_sab, al27_xs[al27_dex:])) + numpy.interp( al27_total_e, al27_e, al27_a) ### sapphire al_frac = 0.4 o_frac = 0.6 Md = (26.9815385 * al_frac + 15.99491461956 * o_frac) * 1.008664916 Na = 6.022e23 b = 1e-24 density = 3.98 Nd = density * Na * b / Md Nd_sapp = density * Na * b / Md sapp_e = numpy.union1d(o16_lib.energy, al27_lib.energy) sapp_xs = ( o_frac * numpy.interp(sapp_e, o16_lib.energy, o16_lib.reactions[2].sigma) + al_frac * numpy.interp(sapp_e, al27_lib.energy, al27_lib.reactions[2].sigma)) * Nd sapp_a = ( o_frac * numpy.interp(sapp_e, o16_lib.energy, o16_lib.reactions[102].sigma) + al_frac * numpy.interp(sapp_e, al27_lib.energy, al27_lib.reactions[102].sigma)) * Nd sapp_sab = (o_frac * osapp_sab_lib.inelastic_sigma + al_frac * alsapp_sab_lib.inelastic_sigma) * Nd sapp_sab_e = osapp_sab_lib.inelastic_e_in #find e dex where sab stops sapp_dex = numpy.where(sapp_e >= sapp_sab_e[-1])[0][0] ## add carbon to deuterium inelastic #for i in range(0,len(sapp_sab_e)):
elif offline_file == '/opt/local/Data/JGauthier-J115/offline_results/results_analysis_offline_JEFF_90k.npz': idx_components_r = np.where(r_values >= .95)[0] idx_components_raw = np.where(fitness_raw < -55)[0] idx_components_delta = np.where(fitness_delta < -55)[0] else: idx_components_r = np.where(r_values >= .8)[0] idx_components_raw = np.where(fitness_raw < -40)[0] idx_components_delta = np.where(fitness_delta < -40)[0] #min_radius = gSig[0] - 2 # masks_ws, idx_blobs, idx_non_blobs = extract_binary_masks_blob( # A.tocsc(), min_radius, dims, num_std_threshold=1, # minCircularity=0.7, minInertiaRatio=0.2, minConvexity=.5) idx_components = np.union1d(idx_components_r, idx_components_raw) idx_components = np.union1d(idx_components, idx_components_delta) #idx_blobs = np.intersect1d(idx_components, idx_blobs) idx_components_bad = np.setdiff1d(list(range(len(r_values))), idx_components) print(' ***** ') print((len(r_values))) print((len(idx_components))) #%% try: A_off = A_off.toarray()[:, idx_components] except: A_off = A_off[:, idx_components] C_off = C_off[idx_components] # OASISinstances = OASISinstances[()] #%%
def make_classification_histograms(overall_classifications, classifications, working_dir): working_dir = join(working_dir, "D_classification_histograms") if not exists(working_dir): makedirs(working_dir) db_types, counts = np.unique(overall_classifications, return_counts=True) sort_idx = np.argsort(counts)[::-1] db_types = db_types[sort_idx] counts = counts[sort_idx] plt.figure() plt.bar(np.arange(1, len(db_types) + 1), counts, tick_label=db_types) plt.xticks(rotation='vertical', fontsize=8) plt.title("# of query cells that are classified as each cell type") plt.savefig(join(working_dir, "overall.pdf"), bbox_inches="tight") plt.savefig(join(working_dir, "overall.png"), bbox_inches="tight") plt.close() control_clfs = [] disease_clfs = [] for query_type, clfs in classifications.items(): db_types, counts = np.unique(clfs, return_counts=True) sort_idx = np.argsort(counts)[::-1] db_types = db_types[sort_idx] counts = counts[sort_idx] plt.figure() plt.bar(np.arange(1, len(db_types) + 1), counts, tick_label=db_types) plt.xticks(rotation='vertical', fontsize=8) plt.title("# of query cells that are classified as each cell type") plt.savefig(join(working_dir, query_type + ".pdf"), bbox_inches="tight") plt.savefig(join(working_dir, query_type + ".png"), bbox_inches="tight") plt.close() if 'control' in query_type: control_clfs.extend(clfs) elif 'disease' in query_type: disease_clfs.extend(clfs) db_types_control, counts_control = np.unique(control_clfs, return_counts=True) sort_idx = np.argsort(counts_control)[::-1] db_types_control = db_types_control[sort_idx] counts_control = counts_control[sort_idx] plt.figure() plt.bar(np.arange(1, len(db_types_control) + 1), counts_control, tick_label=db_types_control) plt.xticks(rotation='vertical', fontsize=8) plt.title("# of query cells that are classified as each cell type") plt.savefig(join(working_dir, "control_all.pdf"), bbox_inches="tight") plt.savefig(join(working_dir, "control_all.png"), bbox_inches="tight") plt.close() db_types_disease, counts_disease = np.unique(disease_clfs, return_counts=True) sort_idx = np.argsort(counts_disease)[::-1] db_types_disease = db_types_disease[sort_idx] counts_disease = counts_disease[sort_idx] plt.figure() plt.bar(np.arange(1, len(db_types_disease) + 1), counts_disease, tick_label=db_types_disease) plt.xticks(rotation='vertical', fontsize=8) plt.title("# of query cells that are classified as each cell type") plt.savefig(join(working_dir, "disease_all.pdf"), bbox_inches="tight") plt.savefig(join(working_dir, "disease_all.png"), bbox_inches="tight") plt.close() # merge the cases merged_db_types = np.union1d(db_types_control, db_types_disease) control_counts = [] for label in merged_db_types: count = 0 for c in control_clfs: if c == label: count += 1 control_counts.append(count) control_counts = np.array(control_counts, dtype=float) print(control_counts) control_fracs = control_counts / len(control_clfs) disease_counts = [] for label in merged_db_types: count = 0 for c in disease_clfs: if c == label: count += 1 disease_counts.append(count) disease_counts = np.array(disease_counts, dtype=float) print(disease_counts) disease_fracs = disease_counts / len(disease_clfs) plt.figure() x_locations = np.arange(1, len(merged_db_types) + 1) width = 0.35 labels_sort_idx = np.argsort(control_fracs)[::-1] merged_db_types = merged_db_types[labels_sort_idx] disease_fracs = disease_fracs[labels_sort_idx] disease_counts = disease_counts[labels_sort_idx] control_fracs = control_fracs[labels_sort_idx] control_counts = control_counts[labels_sort_idx] merged_db_types = [' '.join(name.split()[1:]) for name in merged_db_types] merged_db_types = np.array(merged_db_types) rects1 = plt.bar(x_locations, control_fracs, width, color='b', label='control') rects2 = plt.bar(x_locations + width, disease_fracs, width, color='r', label='disease') #plt.bar(np.arange(1, len(merged_db_types)+1), disease_counts, tick_label=merged_db_types, color='r', label='disease') plt.xticks(x_locations + width / 2, merged_db_types, rotation='vertical', fontsize=8) plt.ylabel('Fraction of query cells') plt.legend() plt.title("Portion of query cells classified as each cell type") # add p-vals for rect1, rect2 in zip(rects1, rects2): height1 = rect1.get_height() height2 = rect2.get_height() count1 = math.floor(1000 * height1) count2 = math.floor(1000 * height2) pval = binom_test(count1, count1 + count2, p=0.5) plt.gca().text(rect1.get_x(), 1.05 * max(height1, height2), '{:.2e}'.format(pval), ha='left', va='bottom', fontsize=7) plt.savefig(join(working_dir, "grouped_bar.pdf"), bbox_inches="tight") plt.savefig(join(working_dir, "grouped_bar.png"), bbox_inches="tight") plt.close() # Fisher's test comparing the two (control vs disease) contingency_table = np.stack((control_counts, disease_counts), axis=0) np.save('contingency_table', contingency_table) np.savetxt('contingency_table.csv', contingency_table, delimiter=',') print(contingency_table.shape) keep = [ i for i in range(contingency_table.shape[1]) if 0 not in contingency_table[:, i] ] merged_db_types = merged_db_types[keep] control_counts = control_counts[keep] control_fracs = control_counts / np.sum(control_counts) disease_counts = disease_counts[keep] disease_fracs = disease_counts / np.sum(disease_counts) contingency_table = np.stack((control_counts, disease_counts), axis=0) x_locations = np.arange(1, len(merged_db_types) + 1) plt.figure() rects1 = plt.bar(x_locations, control_fracs, width, color='b', label='control') rects2 = plt.bar(x_locations + width, disease_fracs, width, color='r', label='disease') plt.xticks(x_locations + width / 2, merged_db_types, rotation='vertical', fontsize=8) plt.ylabel('Fraction of query cells') plt.legend() plt.title("Portion of query cells classified as each cell type") for rect1, rect2 in zip(rects1, rects2): height1 = rect1.get_height() height2 = rect2.get_height() count1 = math.floor(1000 * height1) count2 = math.floor(1000 * height2) pval = binom_test(count1, count1 + count2, p=0.5) plt.gca().text(rect1.get_x(), 1.05 * max(height1, height2), '{:.2e}'.format(pval), ha='left', va='bottom', fontsize=7) plt.savefig(join(working_dir, "grouped_bar_non_zero.pdf"), bbox_inches="tight") plt.savefig(join(working_dir, "grouped_bar_non_zero.png"), bbox_inches="tight") plt.close() np.save('contingency_table2', contingency_table) np.savetxt('contingency_table_no_zeros.csv', contingency_table, delimiter=',') print(contingency_table.shape)
def _find_events(data, first_samp, verbose=None, output='onset', consecutive='increasing', min_samples=0, mask=0, uint_cast=False): """Helper function for find events""" if min_samples > 0: merge = int(min_samples // 1) if merge == min_samples: merge -= 1 else: merge = 0 data = data.astype(np.int) if uint_cast: data = data.astype(np.uint16).astype(np.int) if data.min() < 0: warn('Trigger channel contains negative values, using absolute ' 'value. If data were acquired on a Neuromag system with ' 'STI016 active, consider using uint_cast=True to work around ' 'an acquisition bug') data = np.abs(data) # make sure trig channel is positive events = _find_stim_steps(data, first_samp, pad_stop=0, merge=merge) events = _mask_trigs(events, mask) # Determine event onsets and offsets if consecutive == 'increasing': onsets = (events[:, 2] > events[:, 1]) offsets = np.logical_and(np.logical_or(onsets, (events[:, 2] == 0)), (events[:, 1] > 0)) elif consecutive: onsets = (events[:, 2] > 0) offsets = (events[:, 1] > 0) else: onsets = (events[:, 1] == 0) offsets = (events[:, 2] == 0) onset_idx = np.where(onsets)[0] offset_idx = np.where(offsets)[0] if len(onset_idx) == 0 or len(offset_idx) == 0: return np.empty((0, 3), dtype='int32') # delete orphaned onsets/offsets if onset_idx[0] > offset_idx[0]: logger.info("Removing orphaned offset at the beginning of the file.") offset_idx = np.delete(offset_idx, 0) if onset_idx[-1] > offset_idx[-1]: logger.info("Removing orphaned onset at the end of the file.") onset_idx = np.delete(onset_idx, -1) if output == 'onset': events = events[onset_idx] elif output == 'step': idx = np.union1d(onset_idx, offset_idx) events = events[idx] elif output == 'offset': event_id = events[onset_idx, 2] events = events[offset_idx] events[:, 1] = events[:, 2] events[:, 2] = event_id events[:, 0] -= 1 else: raise Exception("Invalid output parameter %r" % output) logger.info("%s events found" % len(events)) logger.info("Events id: %s" % np.unique(events[:, 2])) return events
args = parser.parse_args() args.cuda = torch.cuda.is_available() print('cuda: %s' % args.cuda) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) data = Dataset(root='/tmp/', name=args.dataset) adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test idx_unlabeled = np.union1d(idx_val, idx_test) # Setup Attack Model model = DICE() n_perturbations = int(args.ptb_rate * (adj.sum()//2)) modified_adj = model.attack(adj, labels, n_perturbations) adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False, sparse=True, device=device) modified_adj = normalize_adj(modified_adj) modified_adj = sparse_mx_to_torch_sparse_tensor(modified_adj) modified_adj = modified_adj.to(device) def test(adj):
#define our X variables : #add technical analysis features filling Nans values data = add_all_ta_features(data, "Open", "High", "Low", "Close", "Volume", fillna=True) print(data.tail(2)) #define our Y variable : #evaluating the percentage of change in stock prices data["diff"]=np.log(data["Close"].shift(1))-np.log(data["Close"]) target='diff' #drop features corresponding to current stock price (e.g. Open, High, Low, Close, Volume) features=['momentum_ao', 'momentum_mfi', 'volume_adi', 'volume_em', 'volatility_bbhi', 'volatility_bbli', 'trend_adx'] columns = np.union1d(['diff', 'Close'], features) data = data[columns] #plot heatmap of feature correlation : corr = data.corr().abs() #construction of correlation matrix #generate a mask to ignore upper triangle mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, annot=True, mask=mask, cmap='coolwarm', fmt='.2f') plt.show() #drop rows and columns with NaN : data.dropna(axis=1, how=any, thresh=100, inplace=True) #first drop columns that mostly have NaN
def performConvolution(self, out_times=None, ntrunc=None, topo=None, verbose=False, eliter=5, nrem=1, massconerr=1e-2): """Convolve an ice load and an earth response model in fft space. Calculate the uplift associated with stored earth and ice model. Parameters ---------- out_times : an array of times at which to caluclate the convolution. (default is to use previously stored values). ntrunc : int The truncation number of the spherical harmonic expansion. Default is from the earth model, must be <= ice model's and < grid.nlat topo : array Topography on which to compute. If None (default), assumes a flat topography. Must be the same shapt as ice. verbose : boolean Display progress on computation. Depends on progressbar module. Default is False. eliter : int The maximum number of iterations allowed to compute initial elastic response to redistributed load at each stage. If 0, instantaneous elastic response is not computed. Default 5. nrem : int Number of removal stages between the provided ice stages (intermediate steps are interpolated linearly). Default 1. Results ------- observerDict : GiaSimOutput A dictionary whose keys are fields of interest, such as uplift ('upl'), geoid ('geo'), and solid surface topography ('sstopo'), computed on the input grid at out_times. """ DENICE = 931. #934. # kg/m^3 DENWAT = 1000. #999. # kg/m^3 DENSEA = 1000. #1029. # kg/m^3 GSURF = 9.815 # m/s^2 #PAperM = DENSEA*GSURF NREM = nrem # number of intermediate steps earth = self.earth ice = self.ice grid = self.grid if topo is None and self.topo is not None: topo = self.topo if topo is not None: assert topo.shape == ice.shape, 'Topo and Ice must have the same shape' # Resolution ntrunc = ntrunc or min(earth.nmax, ice.nlat-1) assert ntrunc <= ice.nlat-1, 'ntrunc > ice.nlat-1' ms, ns = spharm.getspecindx(ice.nlat-1) # npad is the indices in the larger (padded) array of spherical # harmonics that correspond to the smaller (response) array. npad = (ns <= ntrunc) # Store out_times if out_times is None: out_times = self.out_times else: out_times = out_times self.out_times = out_times assert out_times is not None, 'out_times is not set' # Calculate times of intermediate removal stages. diffs = np.diff(ice.times) addRemovalTimes = [] for i in range(1, NREM+1): addRemovalTimes.append(ice.times[:-1]+i*diffs/NREM) addRemovalTimes = np.array(addRemovalTimes).flatten() remTimes = np.union1d(ice.times, addRemovalTimes)[::-1] calcTimes = np.union1d(remTimes, out_times)[::-1] # Initialize output observer observerDict = initialize_output(self, out_times, calcTimes, ice.nlat-1, ntrunc, ns, ice.shape) for o in observerDict: o.loadStageUpdate(ice.times[0], sstopo=topo) esl = 0 # Equivalent sea level assumed to start at 0. elRespArray = earth.getResp(0.) ssResp = np.zeros_like(ns) ssResp[npad] = observerDict['SS'].isolateRespArray(elRespArray) # Convolve each ice stage to the each output time. # Primary loop: over ice load changes. for icea, ta, iceb, tb in ice.pairIter(): ################### LOAD STAGE CALCULATION ################### # Determine the water load redistribution for ice, uplift, and # geoid changes between ta and tb, if topo is not None: # Get index for starting time. nta = observerDict['SS'].locateByTime(ta) # Collect the solid-surface topography at beginning of step. Ta = observerDict['sstopo'].array[nta] # Redistribute the ocean by change in ocean floor / surface. ssa, ssb = observerDict['SS'].array[[nta, nta+1]] dSS = self.harmTrans.spectogrd(ssb-ssa) dhwBarU = sealevelChangeByUplift(dSS, Ta+DENICE/DENSEA*icea, grid) dhwU = oceanUpliftLoad(dhwBarU, Ta+DENICE/DENSEA*icea, dSS) # Update the solid-surface topography with uplift / geoid. Tb = Ta + dSS - dhwBarU esl += dhwBarU dLoad = dhwU.copy() dwLoad = dhwU.copy() # Save the water load # Redistribute ice, consistent with current floating ice. dILoad, dhwBarI = floatingIceRedistribute(icea, iceb, Tb, grid, DENICE/DENSEA) # Combine loads from ocean changes and ice volume changes. dLoad += dILoad esl += dhwBarI dwLoad += volumeChangeLoad(dhwBarI, Tb+DENICE/DENSEA*iceb) Tb -= dhwBarI # Calculate instantaneous (elastic and gravity) responses to # the load shift and redistribute ocean accordingly. # Note: WE DO NOT CURRENTLY RECHECK FOR FLOATING ICE LOADS. if eliter: # Get elastic and geoid response to the water load. # Find the elastic uplift in response to stage's load # redistribution. dSSel = self.harmTrans.spectogrd((ssResp)*\ self.harmTrans.grdtospec(dLoad)) dhwBarUel = sealevelChangeByUplift(dSSel, Tb+DENICE/DENSEA*iceb, grid) dhwUel = oceanUpliftLoad(dhwBarUel, Tb+DENICE/DENSEA*iceb, dSSel) Tb = Tb + dSSel - dhwBarUel esl += dhwBarUel dLoad = dLoad + dhwUel dwLoad += dhwUel # Iterate elastic responses until they are sufficiently small. for i in range(eliter): # Need to save elastic uplift and geoid at each iteration # to compare to previous steps for convergence. dSSelp = self.harmTrans.spectogrd((ssResp)*\ self.harmTrans.grdtospec(dhwUel)) dhwBarUel = sealevelChangeByUplift(dSSelp, Tb+DENICE/DENSEA*iceb, grid) dhwUel = oceanUpliftLoad(dhwBarUel, Tb+DENICE/DENSEA*iceb, dSSelp) # Correct topography Tb = Tb + dSSelp - dhwBarUel esl += dhwBarUel dLoad = dLoad + dhwUel dwLoad += dhwUel # Truncation error from further iteration err = np.mean(np.abs(dSSelp))/np.mean(np.abs(dSSel)) if err <= massconerr: break else: dSSel = dSSel + dSSelp continue observerDict['SS'].array[nta+1] += self.harmTrans.grdtospec(dSSel) for o in observerDict: # Topography and load for time tb are updated and saved. o.loadStageUpdate(tb, dLoad=dLoad, topo=Tb+iceb*(Tb + DENICE/DENSEA*iceb>=0), esl=esl, dwLoad=dwLoad, sstopo=Tb) else: dLoad = (iceb-icea)*DENICE/DENSEA Tb = None for o in observerDict: # Topography and load for time tb are updated and saved. o.loadStageUpdate(tb, dLoad=dLoad) # Transform load change into spherical harmonics. loadChangeSpec = self.harmTrans.grdtospec(dLoad)/NREM # Check for mass conservation. massConCheck = np.abs(loadChangeSpec[0])/np.abs(loadChangeSpec.max()) if verbose and massConCheck >= massconerr: print("Load at {0} doesn't conserve mass: {1}.".format(ta, massConCheck)) # N.B. the n=0 load should be zero in cases of glacial isostasy, as # mass is conserved during redistribution. ################# RESPONSE STAGE CALCULATION ################# # Secondary loop: over output times. for inter_time in np.linspace(tb, ta, NREM, endpoint=False)[::-1]: # Perform the time convolution for each output time for t_out in calcTimes[calcTimes < inter_time]: respArray = earth.getResp(inter_time-t_out) for o in observerDict: o.respStageUpdate(t_out, respArray, DENSEA*loadChangeSpec) # Don't keep the intermediate uplift stages for water redistribution #observerDict.removeObserver('eslUpl', 'eslGeo') return observerDict