def test_morph_labels(): """Test morph_labels.""" # Just process the first 5 labels for speed parc_fsaverage = read_labels_from_annot( 'fsaverage', 'aparc', subjects_dir=subjects_dir)[:5] parc_sample = read_labels_from_annot( 'sample', 'aparc', subjects_dir=subjects_dir)[:5] parc_fssamp = morph_labels( parc_fsaverage, 'sample', subjects_dir=subjects_dir) for lf, ls, lfs in zip(parc_fsaverage, parc_sample, parc_fssamp): assert lf.hemi == ls.hemi == lfs.hemi assert lf.name == ls.name == lfs.name perc_1 = np.in1d(lfs.vertices, ls.vertices).mean() * 100 perc_2 = np.in1d(ls.vertices, lfs.vertices).mean() * 100 # Ideally this would be 100%, but we do not use the same algorithm # as FreeSurfer ... assert perc_1 > 92 assert perc_2 > 88 with pytest.raises(ValueError, match='wrong and fsaverage'): morph_labels(parc_fsaverage, 'sample', subjects_dir=subjects_dir, subject_from='wrong') with pytest.raises(RuntimeError, match='Number of surface vertices'): _load_vert_pos('sample', subjects_dir, 'white', 'lh', 1) for label in parc_fsaverage: label.subject = None with pytest.raises(ValueError, match='subject_from must be provided'): morph_labels(parc_fsaverage, 'sample', subjects_dir=subjects_dir)
def compare(neurons1, spikes1, neurons2, spikes2): import matplotlib.pyplot as plt [sn1,ss1]=sort(neurons1,spikes1) [sn2,ss2]=sort(neurons2,spikes2) #sn1 = neurons1 #ss1 = spikes1 #sn2 = neurons2 #ss2 = spikes2 in1 = np.in1d(sn1, sn2) in2 = np.in1d(sn2, sn1) nin = len(sn1[in1]) print "Neuron in 1 but not in 2:", len(sn1)-nin print "Neuron in 2 but not in 1:", len(sn2)-nin for i in range(0,nin): if np.any(ss1[in1][i] > 0) and np.any(ss2[in2][i] > 0): if (len(ss1[in1][i]) == len(ss2[in2][i])): plt.plot(ss1[in1][i]-ss2[in2][i], i*np.ones([len(ss1[in1][i]),1]), '*') else: print "For neuron", sn1[in1][i], "difference in length of spiketrains: ", len(ss1[in1][i]) - len(ss2[in2][i]) print "ss1:", ss1[in1][i] print "ss2:", ss2[in2][i] #plt.plot(ss1[in1][i][:np.min(len(ss1[in1][i]), len(ss2[in2][i]))]-ss2[in2][i][:np.min(len(ss1[in1][i]), len(ss2[in2][i]))], i*np.ones([np.min(len(ss1[in1][i]), len(ss2[in2][i])),1]), '*') plt.show()
def apply(self, group): res = self.sel.apply(group) if not res: return group[[]] # empty selection # Fragment must come before self.prop_trans lookups! if self.prop == 'fragment': # Combine all fragments together, then check where group # indices are same as fragment(s) indices allfrags = functools.reduce(lambda x, y: x + y, res.fragments) mask = np.in1d(group.indices, allfrags.indices) return group[mask].unique # [xyz] must come before self.prop_trans lookups too! try: pos_idx = {'x': 0, 'y': 1, 'z': 2}[self.prop] except KeyError: # The self.prop string was already checked, # so don't need error checking here. # KeyError at this point is impossible! attrname = self.prop_trans[self.prop] vals = getattr(res, attrname) mask = np.in1d(getattr(group, attrname), vals) return group[mask].unique else: vals = res.positions[:, pos_idx] pos = group.positions[:, pos_idx] # isclose only does one value at a time mask = np.vstack([np.isclose(pos, v) for v in vals]).any(axis=0) return group[mask].unique
def get_destination_pathline_data(self, dest_cells): """Get pathline data for set of destination cells. Parameters ---------- dest_cells : list or array of tuples (k, i, j) of each destination cell (zero-based) Returns ------- pthldest : np.recarray Slice of pathline data array (e.g. PathlineFile._data) containing only pathlines with final k,i,j in dest_cells. """ ra = self._data.view(np.recarray) # find the intersection of endpoints and dest_cells # convert dest_cells to same dtype for comparison raslice = ra[['k', 'i', 'j']] dest_cells = np.array(dest_cells, dtype=raslice.dtype) inds = np.in1d(raslice, dest_cells) epdest = ra[inds].copy().view(np.recarray) # use particle ids to get the rest of the paths inds = np.in1d(ra.particleid, epdest.particleid) pthldes = ra[inds].copy() pthldes.sort(order=['particleid', 'time']) return pthldes
def generateBatch(curinds, elements, atomArraysAll, nAtomsDict, atomsIndsReverse, atomArraysAllDerivs): """This method generates batches from a large dataset using a set of selected indices curinds.""" # inputs: atomArraysFinal = {} atomArraysDerivsFinal = {} for element in elements: validKeys = np.in1d(atomsIndsReverse[element], curinds) if len(validKeys) > 0: atomArraysFinal[element] = atomArraysAll[element][validKeys] if len(atomArraysAllDerivs[element]) > 0: atomArraysDerivsFinal[element] = atomArraysAllDerivs[ element][validKeys, :, :, :] else: atomArraysDerivsFinal[element] = [] else: atomArraysFinal[element] = [] atomArraysDerivsFinal[element] = [] atomInds = {} for element in elements: validKeys = np.in1d(atomsIndsReverse[element], curinds) if len(validKeys) > 0: atomIndsTemp = np.sum(atomsIndsReverse[element][validKeys], 1) atomInds[element] = atomIndsTemp * 0. for i in range(len(curinds)): atomInds[element][atomIndsTemp == curinds[i]] = i else: atomInds[element] = [] return atomArraysFinal, atomArraysDerivsFinal, atomInds
def _sensoryComputeLearningMode(self, anchorInput): """ Associate this location with a sensory input. Subsequently, anchorInput will activate the current location during anchor(). @param anchorInput (numpy array) A sensory input. This will often come from a feature-location pair layer. """ overlaps = self.connections.computeActivity(anchorInput, self.connectedPermanence) activeSegments = np.where(overlaps >= self.activationThreshold)[0] potentialOverlaps = self.connections.computeActivity(anchorInput) matchingSegments = np.where(potentialOverlaps >= self.learningThreshold)[0] # Cells with a active segment: reinforce the segment cellsForActiveSegments = self.connections.mapSegmentsToCells( activeSegments) learningActiveSegments = activeSegments[ np.in1d(cellsForActiveSegments, self.activeCells)] remainingCells = np.setdiff1d(self.activeCells, cellsForActiveSegments) # Remaining cells with a matching segment: reinforce the best # matching segment. candidateSegments = self.connections.filterSegmentsByCell( matchingSegments, remainingCells) cellsForCandidateSegments = ( self.connections.mapSegmentsToCells(candidateSegments)) candidateSegments = candidateSegments[ np.in1d(cellsForCandidateSegments, remainingCells)] onePerCellFilter = np2.argmaxMulti(potentialOverlaps[candidateSegments], cellsForCandidateSegments) learningMatchingSegments = candidateSegments[onePerCellFilter] newSegmentCells = np.setdiff1d(remainingCells, cellsForCandidateSegments) for learningSegments in (learningActiveSegments, learningMatchingSegments): self._learn(self.connections, self.rng, learningSegments, anchorInput, potentialOverlaps, self.initialPermanence, self.sampleSize, self.permanenceIncrement, self.permanenceDecrement, self.maxSynapsesPerSegment) # Remaining cells without a matching segment: grow one. numNewSynapses = len(anchorInput) if self.sampleSize != -1: numNewSynapses = min(numNewSynapses, self.sampleSize) if self.maxSynapsesPerSegment != -1: numNewSynapses = min(numNewSynapses, self.maxSynapsesPerSegment) newSegments = self.connections.createSegments(newSegmentCells) self.connections.growSynapsesToSample( newSegments, anchorInput, numNewSynapses, self.initialPermanence, self.rng) self.activeSegments = activeSegments
def test_match_mask(): msk = np.array([ True, False, True, False, False], dtype=bool) idx = np.array([0, 2]) arr = np.array([1,2,3,4,5]) values = np.array([1,3]) assert (num.match_mask(arr, values) == msk).all() ret = num.match_mask(arr, values, fullout=True) assert (ret[0] == msk).all() assert (ret[1] == idx).all() assert (arr[msk] == np.array([1, 3])).all() assert (ret[0] == np.in1d(arr, values)).all() # handle cases where len(values) > len(arr) and values not contained in arr values = np.array([1,3,3,3,7,9,-3,-4,-5]) ret = num.match_mask(arr, values, fullout=True) assert (ret[0] == msk).all() assert (ret[1] == idx).all() assert (ret[0] == np.in1d(arr, values)).all() # float values: use eps ret = num.match_mask(arr+0.1, values, fullout=True, eps=0.2) assert (ret[0] == msk).all() assert (ret[1] == idx).all() msk = num.match_mask(np.array([1,2]), np.array([3,4])) assert (msk == np.array([False]*2)).all()
def __init__(self, sensorLst): self.firstByte = 19 self.packetInfo = [csp3.PacketDct[i] for i in sensorLst] self.sizeLst = np.array([i["size"] for i in self.packetInfo]) self.numBytes = np.sum(self.sizeLst) self.packetTypes = [i["dtype"] for i in self.packetInfo] self.dataFormat = ">" + "".join(self.packetTypes) # Total size is based on the number of sensors, the size # of the data from the sensors, plus 3 bytes for checking integrity self.totalSize = len(sensorLst) + self.numBytes + 3 # We want the indices where the sensor IDs (not their data) is located self.idIx = np.cumsum(np.append(np.array(2), self.sizeLst + 1))[:-1] self.idMask = np.in1d(np.arange(self.totalSize), self.idIx) # The indices where non-data (i.e., header, packet id, checksum) is located self.nonDataIx = np.concatenate((np.array([0,1]), self.idIx, np.array([self.totalSize-1,]))) # An index array for where non-data bytes appear self.nonDataMask = np.in1d(np.arange(self.totalSize - 1), self.nonDataIx) # The indices where data appears self.dataIx = np.arange(self.totalSize - 1)[~self.nonDataMask] # Make an array of the checkbits, of size equal to total length of packet tmp = np.zeros(self.totalSize) tmp[self.idMask] = np.array(sensorLst) self.packetCheck = tmp # Initialize the actual packet construction machinery self.lastPacket = [] self.curPacket = [] self.count = 0 self.checksum = 0 self.state = csp3.WAIT_HEADER
def _do_one_inner_iteration(self, inv_val): r""" Determine which throats are invaded at a given applied capillary pressure. """ # Generate a tlist containing boolean values for throat state Tinvaded = self['throat.entry_pressure'] <= inv_val # Find all pores that can be invaded at specified pressure [pclusters, tclusters] = self._net.find_clusters2(mask=Tinvaded, t_labels=True) if self._AL: # Identify clusters connected to invasion sites inv_clusters = sp.unique(pclusters[self['pore.inlets']]) else: # All clusters are invasion sites inv_clusters = pclusters inv_clusters = inv_clusters[inv_clusters >= 0] # Find pores on the invading clusters pmask = np.in1d(pclusters, inv_clusters) # Store current applied pressure in newly invaded pores pinds = (self['pore.inv_Pc'] == sp.inf) * (pmask) self['pore.inv_Pc'][pinds] = inv_val # Find throats on the invading clusters tmask = np.in1d(tclusters, inv_clusters) # Store current applied pressure in newly invaded throats tinds = (self['throat.inv_Pc'] == sp.inf) * (tmask) self['throat.inv_Pc'][tinds] = inv_val # Store total network saturation tsat = sp.sum(self._net['throat.volume'][self['throat.inv_Pc'] <= inv_val]) psat = sp.sum(self._net['pore.volume'][self['pore.inv_Pc'] <= inv_val]) total = sp.sum(self._net['throat.volume']) + sp.sum(self._net['pore.volume']) self['pore.inv_sat'][pinds] = (tsat + psat)/total self['throat.inv_sat'][tinds] = (tsat + psat)/total
def average_AM_firing_rate(spikeTimestamps, eventOnsetTimes, behavData, timeRange): currentFreq = behavData['currentFreq'] possibleFreq = np.unique(currentFreq) fr_array=np.array([]) #Only need to calculate this once, the loop then selects for each freq spikeTimesFromEventOnset, trialIndexForEachSpike, indexLimitsEachTrial = spikesanalysis.eventlocked_spiketimes( spikeTimestamps, eventOnsetTimes, timeRange) for freq in possibleFreq: select = np.flatnonzero(currentFreq==freq) selectspikes = spikeTimesFromEventOnset[np.in1d(trialIndexForEachSpike, select)] selectinds = trialIndexForEachSpike[np.in1d(trialIndexForEachSpike, select)] selectlimits = indexLimitsEachTrial[:, select] numSpikesEachTrial = np.squeeze(np.diff(selectlimits, axis=0)) spikeRateEachTrial = numSpikesEachTrial / float(timeRange[1]-timeRange[0]) averageFR = spikeRateEachTrial.mean() fr_array=np.concatenate((fr_array, np.array([averageFR]))) return fr_array
def Check_Result(self, Str_DataName, Int_DataNum, List_PeakIdx): Array_MyAnswer = np.array(List_PeakIdx) Array_MyAnswer = np.unique(Array_MyAnswer) Array_Anno = self.Load_Answer(Str_DataName, Int_DataNum) Int_TP = 0 Int_FP = 0 Int_FN = 0 Int_BufferSize = 2 for myanswer in Array_MyAnswer: Array_BufferMyAnswer = range(myanswer-Int_BufferSize, myanswer + Int_BufferSize) Array_BufferMyAnswer = np.array(Array_BufferMyAnswer) Array_InorNOT = np.in1d(Array_BufferMyAnswer, Array_Anno) if True in Array_InorNOT: Int_TP += 1 elif True not in Array_InorNOT: Int_FP += 1 for trueanswer in Array_Anno: Array_BufferMyAnswer = range(trueanswer - Int_BufferSize, trueanswer + Int_BufferSize) Array_BufferMyAnswer = np.array(Array_BufferMyAnswer) Array_InorNOT = np.in1d(Array_BufferMyAnswer, Array_MyAnswer) if True not in Array_InorNOT: Int_FN += 1 Flt_Se = float(Int_TP) / float(Int_TP + Int_FN) Flt_PP = float(Int_TP) / float(Int_TP + Int_FP) return Str_DataName, Int_DataNum, Flt_Se, Flt_PP
def generate_throats(self): r""" Generate the throats (connections, numbering and types) """ self._logger.info("generate_throats: Define connections between pores") img = self._net_img [Nx, Ny, Nz] = np.shape(img) Np = Nx*Ny*Nz ind = np.arange(0,Np) #Generate throats based on pattern of the adjacency matrix tpore1_1 = ind[(ind%Nx)<(Nx-1)] tpore2_1 = tpore1_1 + 1 tpore1_2 = ind[(ind%(Nx*Ny))<(Nx*(Ny-1))] tpore2_2 = tpore1_2 + Nx tpore1_3 = ind[(ind%Np)<(Nx*Ny*(Nz-1))] tpore2_3 = tpore1_3 + Nx*Ny tpore1 = np.hstack((tpore1_1,tpore1_2,tpore1_3)) tpore2 = np.hstack((tpore2_1,tpore2_2,tpore2_3)) connections = np.vstack((tpore1,tpore2)).T connections = connections[np.lexsort((connections[:, 1], connections[:, 0]))] #Remove throats to non-active pores img_ind = np.ravel_multi_index(np.nonzero(img), dims=np.shape(img), order='F') temp0 = np.in1d(connections[:,0],img_ind) temp1 = np.in1d(connections[:,1],img_ind) tind = temp0*temp1 connections = connections[tind] self._net.throat_properties['connections'] = self._voxel_to_pore_map[connections] self._net.throat_properties['type'] = np.zeros(np.sum(tind)) self._net.throat_properties['numbering'] = np.arange(0,np.sum(tind)) self._logger.debug("generate_throats: End of method")
def AM_vector_strength(spikeTimestamps, eventOnsetTimes, behavData, timeRange): currentFreq = behavData['currentFreq'] possibleFreq = np.unique(currentFreq) vs_array=np.array([]) ral_array=np.array([]) pval_array = np.array([]) timeRange = [0, 0.5] spikeTimesFromEventOnset, trialIndexForEachSpike, indexLimitsEachTrial = spikesanalysis.eventlocked_spiketimes( spikeTimestamps, eventOnsetTimes, timeRange) for freq in possibleFreq: select = np.flatnonzero(currentFreq==freq) selectspikes = spikeTimesFromEventOnset[np.in1d(trialIndexForEachSpike, select)] selectinds = trialIndexForEachSpike[np.in1d(trialIndexForEachSpike, select)] squeezedinds=np.array([list(np.unique(selectinds)).index(x) for x in selectinds]) spikesAfterFirstCycle = selectspikes[selectspikes>(1.0/freq)] indsAfterFirstCycle = selectinds[selectspikes>(1.0/freq)] strength, phase = vectorstrength(spikesAfterFirstCycle, 1.0/freq) vs_array=np.concatenate((vs_array, np.array([strength]))) #Compute the pval for the vector strength radsPerSec=freq*2*np.pi spikeRads = (spikesAfterFirstCycle*radsPerSec)%(2*np.pi) ral_test = circstats.rayleigh_test(spikeRads) pval = np.array([ral_test['pvalue']]) ral =np.array([2*len(spikesAfterFirstCycle)*(strength**2)]) pval_array = np.concatenate((pval_array, pval)) ral_array = np.concatenate((ral_array, ral)) return vs_array, pval_array, ral_array
def test_group_shuffle_split(): for groups_i in test_groups: X = y = np.ones(len(groups_i)) n_splits = 6 test_size = 1./3 slo = GroupShuffleSplit(n_splits, test_size=test_size, random_state=0) # Make sure the repr works repr(slo) # Test that the length is correct assert_equal(slo.get_n_splits(X, y, groups=groups_i), n_splits) l_unique = np.unique(groups_i) l = np.asarray(groups_i) for train, test in slo.split(X, y, groups=groups_i): # First test: no train group is in the test set and vice versa l_train_unique = np.unique(l[train]) l_test_unique = np.unique(l[test]) assert_false(np.any(np.in1d(l[train], l_test_unique))) assert_false(np.any(np.in1d(l[test], l_train_unique))) # Second test: train and test add up to all the data assert_equal(l[train].size + l[test].size, l.size) # Third test: train and test are disjoint assert_array_equal(np.intersect1d(train, test), []) # Fourth test: # unique train and test groups are correct, +- 1 for rounding error assert_true(abs(len(l_test_unique) - round(test_size * len(l_unique))) <= 1) assert_true(abs(len(l_train_unique) - round((1.0 - test_size) * len(l_unique))) <= 1)
def Pred_EOF_CCA(self): ''' 预报模块,需要进一步完善,有很多内容需要进一步深入 ''' I_Year = self.I_Year I_YearP = self.I_YearP print('I_Year=',I_Year) print('I_YearP=',I_YearP) #print(self.Field[:,0,0]) #print(self.FieldP[:,0,0]) #sys.exit(0) Region = self.Region[:,np.in1d(I_Year,I_YearP)] print('I_YearR=',I_Year[np.in1d(I_Year,I_YearP)]) FieldP = self.FieldP[:,self.p_np3] #等于过滤后的场文件 FieldP = FieldP.T FieldP2 = FieldP[:,np.in1d(I_YearP,I_Year)] print(FieldP2.shape,np.atleast_2d(FieldP[:,-1]).T.shape) print('FieldP.shape = ',FieldP.shape) print('FieldP2.shape = ',FieldP2.shape) print('Region.shape = ',Region.shape) self.X_Pre = dclim.dpre_eof_cca(FieldP2,Region,np.atleast_2d(FieldP[:,-1]).T,4) print(self.X_Pre.shape) self.out = np.hstack((self.StaLatLon,self.X_Pre)) print('Pred Year is ',I_YearP[-1]) np.savetxt('out.txt',self.out,fmt='%5d %7.2f %7.2f %7.2f',delimiter=' ')
def __init__(self, filename_list): n_file = np.size(filename_list) for i_file in np.arange(n_file): print('Adding %s to build TargetSurvey %d files to go'%(filename_list[i_file], n_file - i_file)) tmp = TargetTile(filename_list[i_file]) # The first file is a simple initialization if(i_file==0): self.type = tmp.type.copy() self.id = tmp.id.copy() self.n_observed = tmp.n_observed.copy() self.assigned_type = tmp.assigned_type.copy() self.assigned_z = tmp.assigned_z.copy() self.tile_names= [] for i in np.arange(np.size(self.id)): self.tile_names.append([filename_list[i_file]]) else: # the other files have to take into account the overlap mask = np.in1d(self.id, tmp.id) if((len(self.tile_names)!=np.size(self.id))): raise ValueError('Building TargetSurvey the numer of items in the filenames is not the same as in the ids.') for i in np.arange(np.size(self.id)): if(mask[i]==True): self.tile_names[i].append(filename_list[i_file]) mask = np.in1d(tmp.id, self.id, invert=True) n_new = np.size(np.where(mask==True)) self.id = np.append(self.id, tmp.id[mask]) self.type = np.append(self.type, tmp.type[mask]) self.n_observed = np.append(self.n_observed, tmp.n_observed[mask]) self.assigned_type = np.append(self.assigned_type, tmp.assigned_type[mask]) self.assigned_z = np.append(self.assigned_z, tmp.assigned_z[mask]) for i in np.arange(n_new): self.tile_names.append([filename_list[i_file]]) self.n_targets = np.size(self.id)
def filter_effects(self): """ Merge effects and data, and flip effect alleles """ effect_positions=self.effects[["CHR", "POS"]] data_positions=self.data.snp[["CHR", "POS"]] effect_include=np.in1d(effect_positions, data_positions) data_include=np.in1d(data_positions, effect_positions) self.data.filter_snps(data_include) self.effects=self.effects[effect_include] # Just give up and convert to float. I have no idea why int doesn't work here # but it's something to do with the fact that you can't have None as a numpy int # wheras float gets converted to nan. tmp_data=nprec.append_fields(self.data.snp, "GENO", None, dtypes=[(float,self.data.geno.shape[1])],usemask=False) tmp_data["GENO"]=self.data.geno self.effects=nprec.join_by(["CHR", "POS"], self.effects, tmp_data, usemask=False, jointype="inner") flipped=0 removed=0 for rec in self.effects: if rec["EFFECT"]==rec["REF"] and rec["OTHER"]==rec["ALT"]: pass elif rec["OTHER"]==rec["REF"] and rec["EFFECT"]==rec["ALT"]: flipped+=1 rec["OTHER"]=rec["ALT"] rec["EFFECT"]=rec["REF"] rec["BETA"]=-rec["BETA"] else: removed+=1 rec["EFFECT"]=rec["OTHER"]="N" self.effects=self.effects[self.effects["EFFECT"]!="N"] print( "Removed "+str(removed)+" non-matching alleles",file=sys.stderr) print( "Flipped "+str(flipped)+" alleles",file=sys.stderr)
def edgetype(G): """ edge type Examples -------- .. plot:: :include-source: >>> from pylayers.util.geomutil import * >>> import shapely.geometry as shg >>> import matplotlib.pyplot as plt >>> points = shg.MultiPoint([(0, 0),(0, 1),(1,1),(1.5,1),(2.5,1),(2.5,2),(2.8,2),(2.8,1.1),(3.2, 1.1), (3.2, 0.7), (0.4, 0.7), (0.4, 0)]) >>> polyg = Polygon(points) >>> Gv = polyg.buildGv(show=True) >>> plt.show() """ edges = np.array(G.edges()) tedg = np.array(G.edges()) eprod = np.prod(tedg,axis=1) esum = np.sum(tedg,axis=1) inded = np.nonzero(eprod<0)[0] ekeep = np.nonzero(eprod>0)[0] ieded = np.nonzero(esum>0)[0] indnd = np.nonzero(esum<0)[0] u1 = np.in1d(ieded,ekeep) u2 = np.in1d(indnd,ekeep) nded = list(edges[inded]) eded = list(edges[ieded[u1]]) ndnd = list(edges[indnd[u2]]) return(ndnd,nded,eded)
def fn_get_COV_from_JK(kappa_qe_arr, noofsims): nx, ny = kappa_qe_arr[0].shape dx =dy = 0.5 boxsize = nx * dx mapparams = [nx, ny, dx, dy] #### cluster stuff clra, cldec = 0., 0. minval, maxval = clra-boxsize/2/60., clra+boxsize/2/60. ra = dec = np.linspace(minval, maxval, nx) RA, DEC = np.meshgrid(ra,dec) RADEC = [RA, DEC] totalclus = len(kappa_qe_arr) #make several splits now each_split_should_contain = int(totalclus * 1./noofsims) fullarr = np.arange( totalclus ) inds_to_pick = np.copy(fullarr) STACKED_KAPPA_QE_JK = [] for n in range(noofsims): logfile = open(log_file, 'a'); logline = '\t\tsimno = %s\n' %(n) logfile.writelines('%s\n' %(logline));logfile.close() print logline inds = np.random.choice(inds_to_pick, size = each_split_should_contain, replace = 0) inds_to_delete = np.where (np.in1d(inds_to_pick, inds) == True)[0] inds_to_pick = np.delete(inds_to_pick, inds_to_delete) #push all on the non inds dic into - because for each JK we will ignore the files for this respective sim tmp = np.in1d(fullarr, inds) non_inds = np.where(tmp == False)[0] #print len(non_inds) STACKED_KAPPA = np.mean( kappa_qe_arr[non_inds], axis = 0 ) #STACKED_KAPPA = STACKED_KAPPA - MEAN_FIELD if sims.is_seq(MEAN_FIELD): STACKED_KAPPA = STACKED_KAPPA - MEAN_FIELD STACKED_KAPPA_QE_JK.append(STACKED_KAPPA) STACKED_KAPPA_QE_JK = np.asarray(STACKED_KAPPA_QE_JK) RADPROFILES = np.asarray( map(lambda x: sims.fn_radial_profile(x, RADEC, bin_size=binsize, minbin=0.0, maxbin=maxbin), STACKED_KAPPA_QE_JK) ) #calculate covariance between radial bins now RADPRF = RADPROFILES[:,:,1] totbins = np.shape(RADPRF)[1] RADPRF_MEAN = np.mean(RADPRF, axis = 0) RADPRF = RADPRF - RADPRF_MEAN #from IPython import embed; embed() kappa_COV = sims.calcCov(RADPRF, noofsims, npixels = totbins) #kappa_COV_2 = sims.calcCov(RADPRF, noofsims, npixels = totbins, perform_mean_sub = 0) return kappa_COV
def compute_mAP(index, qc, good_index, junk_index): ap = 0 cmc = torch.IntTensor(len(index)).zero_() if good_index.size==0: # if empty cmc[0] = -1 return ap,cmc # remove junk_index ranked_camera = gallery_cam[index] mask = np.in1d(index, junk_index, invert=True) #mask2 = np.in1d(index, np.append(good_index,junk_index), invert=True) index = index[mask] ranked_camera = ranked_camera[mask] for i in range(10): cam_metric[ qc-1, ranked_camera[i]-1 ] +=1 # find good_index index ngood = len(good_index) mask = np.in1d(index, good_index) rows_good = np.argwhere(mask==True) rows_good = rows_good.flatten() cmc[rows_good[0]:] = 1 for i in range(ngood): d_recall = 1.0/ngood precision = (i+1)*1.0/(rows_good[i]+1) if rows_good[i]!=0: old_precision = i*1.0/rows_good[i] else: old_precision=1.0 ap = ap + d_recall*(old_precision + precision)/2 return ap, cmc
def close_obj(coord, size): coord = coord_pack(coord) ba, ab = np.indices((len(coord), len(coord)), dtype=np.int16) sep = coord[ab].separation(coord[ba]) c = np.where(sep < size) close = np.where(c[0] < c[1]) pairs = np.vstack((c[0][close],c[1][close])) samefov = np.delete(np.arange(len(coord), dtype=np.int16), np.hstack((c[0][close],c[1][close]))) samefov = samefov.reshape(len(samefov),1).tolist() n, m = np.unique(pairs[0], return_counts=True) y = np.in1d(pairs[0], n[np.where(m == 1)]) n1, m1 = np.unique(pairs[1], return_counts=True) y1 = np.in1d(pairs[1], n1[np.where(m1 == 1)]) samefov = samefov + pairs.T[y*y1].tolist() q = pairs.T[-(y*y1)] for z in np.unique(q.T[0]): b = q.T[1][np.where(q.T[0] == z)] combs = [] for i in np.arange(len(b),0, -1, dtype=np.int16): els = [[z] + list(x) for x in itertools.combinations(b, i)] combs.append(els) for i in combs: for a in i: d = False w = [list(x) for x in itertools.combinations(a,2)] if not np.all([i in q.tolist() for i in w]): continue for v in samefov: e = all(k in v for k in a) d = bool(d + e) if d == False: samefov = samefov + [a] return np.sort(samefov).tolist()
def classifyPerCountry(T,V,Y,Y_country_hat): Y_country = np.floor(Y / 1000) print "\nClassifying per Country" Y_city = Y country_codes = list(set(Y_country)) nCountryCodes = len(country_codes) Y_hat = np.zeros(len(Y_country_hat)) for i in xrange(nCountryCodes): print '%s\r' % ' '*20, print ' ' , i*100/nCountryCodes, # clf = MultinomialNB(0.5) clf = SVC() country_idx = np.in1d(Y_country,country_codes[i]) country_idx_sparse = country_idx.nonzero()[0] T_country = T[country_idx_sparse,:] Y_cityPerCountry = Y_city[country_idx] unique_Y_cityPerCountry=list(set(Y_cityPerCountry)) predict_idx = np.in1d(Y_country_hat,country_codes[i]) predict_idx_sparse = predict_idx.nonzero()[0] if len(unique_Y_cityPerCountry)==1 : Y_hat[predict_idx] = unique_Y_cityPerCountry continue clf.fit(T_country,Y_cityPerCountry) if sum(predict_idx) > 1: Y_cityPerCountry_hat = clf.predict(V[predict_idx_sparse,:]) Y_hat[predict_idx] = Y_cityPerCountry_hat print "\n" return Y_hat
def sim_top_doc(self, topic_or_topics, weights=[], filter_words=[], print_len=10, as_strings=True, label_fn=_def_label_fn_, filter_nan=True): """ """ d_arr = _sim_top_doc_(self.corpus, self.model.doc_top, topic_or_topics, self.model.context_type, weights=weights, norms=self._doc_norms, print_len=print_len, as_strings=False, label_fn=label_fn, filter_nan=filter_nan) topics = _res_top_type_(topic_or_topics) if len(filter_words) > 0: white = set() for w in filter_words: l = self.word_topics(w, as_strings=False) d = l['i'][np.in1d(l['value'], topics)] white.update(d) d_arr = d_arr[(np.in1d(d_arr['i'], white))] if as_strings: md = self.corpus.view_metadata(self.model.context_type) docs = label_fn(md) d_arr = _map_strarr_(d_arr, docs, k='i', new_k='doc') return d_arr
def find_matches(mock, obs, opts): """ Function to find matching galaxy members between mock haloes and observed clusters. """ obs = obs[np.in1d(obs.mem_id, mock.m_mem_id, assume_unique = True)] mock = mock[np.in1d(mock.m_mem_id, obs.mem_id, assume_unique = True)] merged = np.lib.recfunctions.merge_arrays([obs, mock], flatten = True, usemask = False) clusters = [] count = 0 for id_val in np.unique(obs.id): clusters.append(Clusterx(count)) for member in merged[obs.id == id_val]: clusters[count].add_mem(member) count += 1 for cluster in clusters: cluster.props() cluster.halo_count() cluster.mass_hist(opts.mass_bin) return clusters
def map_to_external_reference(self, roi, refname='HXB2', in_patient=True): ''' return a map of positions in the patient to a reference genomewide Args: roi -- region of interest given as a string or a tuple (start, end) refname -- reference to compare to in_patient -- specifies whether the (start, end) refers to reference or patient coordinates returns: a (len(roi), 3) array with reference coordinates in first column, patient coordinates in second roi coordinates in third column ''' from .filenames import get_coordinate_map_filename coo_fn = get_coordinate_map_filename(self.name, 'genomewide', refname=refname) genomewide_map = np.loadtxt(coo_fn, dtype=int) if roi in self.annotation: roi_pos = np.array([x for x in self.annotation[roi]], dtype = int) ind = np.in1d(genomewide_map[:,1], roi_pos) roi_indices = np.in1d(roi_pos, genomewide_map[:,1]).nonzero()[0] return np.vstack((genomewide_map[ind].T, [roi_indices])).T elif roi == "genomewide": return np.vstack((genomewide_map.T, [genomewide_map[:,1]])).T else: try: start, stop = map(int, roi) start_ind = np.searchsorted(genomewide_map[:,in_patient], start) stop_ind = np.searchsorted(genomewide_map[:,in_patient], stop) return np.vstack((genomewide_map[start_ind:stop_ind].T, [genomewide_map[start_ind:stop_ind, in_patient] - start])).T except: raise ValueError("ROI not understood")
def check_filter_labels(inverse=False): # create a feature set fs, _ = make_classification_data(num_examples=1000, num_features=4, num_labels=5, train_test_ratio=1.0) # keep just the instaces with 0, 1 and 2 labels labels_to_filter = [0, 1, 2] # do the actual filtering fs.filter(labels=labels_to_filter, inverse=inverse) # make sure that we removed the right things if inverse: ids_kept = fs.ids[np.where(np.logical_not(np.in1d(fs.labels, labels_to_filter)))] else: ids_kept = fs.ids[np.where(np.in1d(fs.labels, labels_to_filter))] assert_array_equal(fs.ids, np.array(ids_kept)) # make sure that number of ids, labels and features are the same eq_(fs.ids.shape[0], fs.labels.shape[0]) eq_(fs.labels.shape[0], fs.features.shape[0])
def _limit_features(self, csr_matrix, low=2, high=None, limit=None): """ Lower bound on features, so that > n docs much contain the feature """ assert isinstance(csr_matrix, scipy.sparse.csr_matrix) # won't work with other sparse matrices # (most can be converted with .tocsr() method) indices_to_remove = np.where(np.asarray(csr_matrix.sum(axis=0) < low)[0])[0] # csr_matrix.sum(axis=0) < low: returns Boolean matrix where total features nums < low # np.asarray: converts np.matrix to np.array # [0]: since the array of interest is the first (and only) item in an outer array # np.where: to go from True/False to indices of Trues data_filter = np.in1d(csr_matrix.indices, indices_to_remove) # gets boolean array, where the columns of any non-zero values are to be removed # (i.e. their index is in the indices_to_remove array) # following three lines for info/debugging purposes # to show how many unique features are being removed num_total_features = len(np.unique(csr_matrix.indices)) num_features_to_remove = np.sum(np.in1d(indices_to_remove, np.unique(csr_matrix.indices))) print "%d/%d features will be removed" % (num_features_to_remove, num_total_features) csr_matrix.data[data_filter] = 0 # set the values to be removed to 0 to start with csr_matrix.eliminate_zeros() # then run the np optimised routine to delete those 0's (and free a little memory) # NB zeros are superfluous since a sparse matrix return csr_matrix
def compute_mAP(index, good_index, junk_index): ap = 0 cmc = torch.IntTensor(len(index)).zero_() if good_index.size==0: # if empty cmc[0] = -1 return ap,cmc # remove junk_index mask = np.in1d(index, junk_index, invert=True) index = index[mask] # find good_index index ngood = len(good_index) mask = np.in1d(index, good_index) rows_good = np.argwhere(mask==True) rows_good = rows_good.flatten() cmc[rows_good[0]:] = 1 for i in range(ngood): d_recall = 1.0/ngood precision = (i+1)*1.0/(rows_good[i]+1) if rows_good[i]!=0: old_precision = i*1.0/rows_good[i] else: old_precision=1.0 ap = ap + d_recall*(old_precision + precision)/2 return ap, cmc
def untie(a,b): """ Parameters ---------- a b Returns ------- boolean a r """ la = len(a) lb = len(b) u = np.intersect1d(a,b) lu = len(u) #print lu #print min(la,lb)/2 if lu >= min(la,lb)/2: # segment de a non commun avec b aa = a[~np.in1d(a,u)] # segment de b non commun avec a bb = b[~np.in1d(b,u)] r = np.hstack((aa,bb)) if la<lb: return(True,a,r) else: return(True,b,r) else: return(False,-1,-1)
def make_lineup(pa_transitions, non_pa_transitions, by_batting_order = True): #If by_batting_order is false, grab by position instead constructed_lineup = [] for lineup_spot in range(1, num_lineup_spots+1): final_pa_transitions = [] final_non_pa_transitions = [] spot_to_take = lineup_spot if by_batting_order else batting_order_positions_1[lineup_spot - 1] print (lineup_spot, spot_to_take) current_pa_transitions = pa_transitions[np.in1d(pa_transitions[:, 0], spot_to_take)] print(current_pa_transitions) current_non_pa_transitions = non_pa_transitions[np.in1d(non_pa_transitions[:, 0], spot_to_take)] for start_state in range(0,num_start_states): pa_row = [0] * num_end_states non_pa_row = [0] * num_end_states pa_for_start_state = current_pa_transitions[np.in1d(current_pa_transitions[:, 1], start_state)] non_pa_for_start_state = current_non_pa_transitions[np.in1d(current_non_pa_transitions[:, 1], start_state)] for row in pa_for_start_state: pa_row[row[2]] = row[3] final_pa_transitions.append(pa_row) for row in non_pa_for_start_state: non_pa_row[row[2]] = row[3] final_non_pa_transitions.append(non_pa_row) #print(final_pa_transitions) constructed_lineup.append(LineupSpot(final_pa_transitions, final_non_pa_transitions)) return constructed_lineup
def _calculate_v_fits(self): """ Find lines that run vertically on the screen. """ # binary_warped shape: (height, width) binary_warped = self.binary_warped left_lane_inds = [] right_lane_inds = [] left_margin = self.left_search_margin right_margin = self.right_search_margin # Pixels closer to the car are more important, so we apply weights the histogram weights = np.array([range(binary_warped.shape[0]) ])**self.closer_importance weighted = binary_warped * weights.T # weighted = weights.T ** binary_warped # weighted = binary_warped # For center weights, convert as follows: # 0 1 2 3 4 5 6 7 to 1 2 3 4 4 3 2 1 # (changes: 1, 1, 1, 1, 0, -2, -4. -6) # In other words, points closer to the center have higher scores. cweights = np.array([range(binary_warped.shape[1])]) hlen = int(cweights.shape[1] / 2) # half-length adj = [1] * hlen # adjustments for i in range(hlen): v = -i * 2 adj.append(v) cweights += adj weighted *= cweights**self.center_importance # Sums all weighted points in the bottom 50% section (remember that bigger numbers are at the bottom). histogram = np.sum(weighted[int(weighted.shape[0] * self.v_hist_crop_top):, :], axis=0) midpoint = np.int(histogram.shape[0] / 2) histogram_l = histogram[:(midpoint)] histogram_r = histogram[(midpoint):] # === SLIDING WINDOWS === leftx_base = np.argmax(histogram_l) + self.vert_x_adjust[0] rightx_base = np.argmax(histogram_r) + midpoint + self.vert_x_adjust[1] # Making sure bases do not pass the center. We do not want to have left line starts from # right section and vice versa. if (leftx_base + left_margin) > midpoint: leftx_base = midpoint - left_margin if (rightx_base - right_margin) < midpoint: rightx_base = midpoint + right_margin # At this point, leftx_base and rightx_base should contain x position of each respective line. window_height = np.int((binary_warped.shape[0] * (1.0 - self.v_win_crop_top)) / self.nwindows) # Identify the x and y positions of all nonzero pixels in the image nonzero = binary_warped.nonzero() nonzeroy = np.array(nonzero[0]) nonzerox = np.array(nonzero[1]) # Current positions to be updated for each window leftx_current = leftx_base rightx_current = rightx_base # Only used for debugging. if self.debug: out_img = np.dstack( (binary_warped, binary_warped, binary_warped)) * 255 left_patience_counter = 0 right_patience_counter = 0 # The window takes into account x position found in the previous # centroid. It finds a centroid closest to it in the next iteration of window. # If there is no centroid x, use window center. # Global coordinate is used here. left_window_patience = self.window_patience right_window_patience = self.window_patience # Step through the windows one by one for window in range(self.nwindows): # Identify window boundaries in x and y (and right and left) # The higher win_y_low is, the closer to the top of the plot. win_y_low = binary_warped.shape[0] - (window + 1) * window_height win_y_high = binary_warped.shape[0] - window * window_height win_xleft_low = leftx_current - left_margin win_xleft_high = leftx_current + left_margin win_xright_low = rightx_current - right_margin win_xright_high = rightx_current + right_margin # Identify the nonzero pixels in x and y within the window good_left_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & \ (nonzerox >= win_xleft_low) & (nonzerox < win_xleft_high)).nonzero()[0] good_right_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & \ (nonzerox >= win_xright_low) & (nonzerox < win_xright_high)).nonzero()[0] # === Select better centroid (Left) === # Find the centroids of pixels in the current window. connectivity = 4 left_pixels_x = nonzerox[good_left_inds] - win_xleft_low left_pixels_y = nonzeroy[good_left_inds] - win_y_low pixels = np.zeros((window_height, (left_margin * 2))) best_centroid_x, best_pixels_pos = self._choose_best_centroid( pixels, leftx_current, win_xleft_low, left_pixels_x, left_pixels_y) if best_pixels_pos is not None: # Currently, best_pixels_pos contains the most relevant positions. # We just need to convert them into good_[left/right]_inds. best_pixels_pos += [win_y_low, win_xleft_low] bestx = best_pixels_pos[:, 1] besty = best_pixels_pos[:, 0] good_left_inds = np.intersect1d(np.argwhere(np.in1d(nonzerox,bestx)).flatten(), \ np.argwhere(np.in1d(nonzeroy,besty)).flatten()).tolist() # === END Select better centroid (Left)=== # === Select better centroid (Right) === # Find the centroids of pixels in the current window. connectivity = 4 right_pixels_x = nonzerox[good_right_inds] - win_xright_low right_pixels_y = nonzeroy[good_right_inds] - win_y_low pixels = np.zeros((window_height, (right_margin * 2))) best_centroid_x, best_pixels_pos = self._choose_best_centroid( pixels, rightx_current, win_xright_low, right_pixels_x, right_pixels_y) if best_pixels_pos is not None: # Currently, best_pixels_pos contains the most relevant positions. # We just need to convert them into good_[left/right]_inds. best_pixels_pos += [win_y_low, win_xright_low] bestx = best_pixels_pos[:, 1] besty = best_pixels_pos[:, 0] good_right_inds = np.intersect1d(np.argwhere(np.in1d(nonzerox,bestx)).flatten(), \ np.argwhere(np.in1d(nonzeroy,besty)).flatten()).tolist() # === END Select better centroid (Right)=== # right_previous_centroid_x, best_pixels_pos = self._choose_best_centroid( # pixels, right_previous_centroid_x, right_pixels_x, right_pixels_y) # If any of the pixels touches left/right section, stop when there is no more pixel to add. # We do this by setting the patience to 1. if np.any(nonzerox[good_left_inds] == 0): left_window_patience = 1 if np.any(nonzerox[good_right_inds] == binary_warped.shape[1]): right_window_patience = 1 # If sliding windows do not find enough pixels for some iterations, give up. if left_patience_counter > left_window_patience: pass else: if len(good_left_inds) <= self.window_empty_px: left_patience_counter += 1 else: left_patience_counter = 0 # Append these indices to the lists left_lane_inds.append(good_left_inds) # If you found > minpix pixels, recenter next window on their mean position if len(good_left_inds) > self.window_minpix: leftx_current = np.int(np.mean(nonzerox[good_left_inds])) # === DEBUGGING SLIDING WINDOWS === if self.debug: cv2.rectangle(out_img, (win_xleft_low, win_y_low), (win_xleft_high, win_y_high), (255, 0, 0), math.ceil(2 * self.scale)) # === END DEBUGGING SLIDING WINDOWS === if right_patience_counter > right_window_patience: pass else: if len(good_right_inds) <= self.window_empty_px: right_patience_counter += 1 else: right_patience_counter = 0 # Append these indices to the lists right_lane_inds.append(good_right_inds) # If you found > minpix pixels, recenter next window on their mean position if len(good_right_inds) > self.window_minpix: rightx_current = np.int(np.mean(nonzerox[good_right_inds])) # === DEBUGGING SLIDING WINDOWS === if self.debug: cv2.rectangle(out_img, (win_xright_low, win_y_low), (win_xright_high, win_y_high), (0, 255, 0), math.ceil(2 * self.scale)) # === END DEBUGGING SLIDING WINDOWS === # === END SLIDING WINDOWS === fits = self._wrap_up_windows(left_lane_inds, right_lane_inds, nonzerox, nonzeroy, 'v') # === DEBUGGING === if self.debug: if self.debug_dir == 'v': plt.imshow(out_img, cmap='gray') # Normalize histogram values so they don't go beyond image height. maxval = np.amax(histogram) hist_viz = np.copy(histogram) if maxval != 0: hist_viz = (hist_viz / maxval) * binary_warped.shape[0] # Subtract histogram values from max values so the histogram can be drawn # at the bottom of the plot. hist_viz = binary_warped.shape[0] - hist_viz # Plot histogram if self.debug_axes: self.debug_axes.plot(hist_viz, '-', c='#00FFFF', lw=2) # === END DEBUGGING === return (fits, histogram_l, histogram_r)
@Project : @File: 唯一化以及其他的集合逻辑.py @Author: liuwz @time: 2021/12/26 11:11 上午 @desc: """ import numpy as np names = np.array(['Joe', 'Bob', 'Will', 'Bob', 'Will', 'Joe', 'Joe']) # unique 去重并排序 names = np.unique(names) print(names) # intersect1d 返回公共元素 并排序 x = np.arange(5) y = np.arange(3, 8) print(np.intersect1d(x, y)) # in1d 返回一个bool数组 表示一个数组中的值是否在另一个数组中 print(np.in1d(x, y)) # setdiff1d 返回集合的差,元素在x中不在y中 print(np.setdiff1d(x, y)) # 返回对称差:只存在一个数组的元素(不同时存在于两个数组) print(np.setxor1d(x, y))
def set_neuron_param(self, params, neurons=None, group=None): ''' Set the parameters of specific neurons or of a whole group. .. versionadded:: 1.0 Parameters ---------- params : dict Dictionary containing parameters for the neurons. Entries can be either a single number (same for all neurons) or a list (one entry per neuron). neurons : list of ints, optional (default: None) Ids of the neurons whose parameters should be modified. group : list of strings, optional (default: None) List of strings containing the names of the groups whose parameters should be updated. When modifying neurons from a single group, it is still usefull to specify the group name to speed up the pace. Note ---- If both `neurons` and `group` are None, all neurons will be modified. Warning ------- No check is performed on the validity of the parameters, which means that errors will only be detected when building the graph in NEST. ''' if self._to_nest: raise RuntimeError("Parameters cannot be changed after the " "network has been sent to NEST!") if neurons is not None: # specific neuron ids groups = [] # get the groups they could belong to if group is not None: if nonstring_container(group): groups.extend((self[g] for g in group)) else: groups.append(self[group]) else: groups.extend(self.values()) # update the groups parameters for g in groups: idx = np.where(np.in1d(g.ids, neurons, assume_unique=True))[0] # set the properties of the nodes for each entry in params for k, v in params.items(): default = np.NaN if k in g.neuron_param: default = g.neuron_param[k] elif nngt.get_config('with_nest'): try: import nest try: default = nest.GetDefaults(g.neuron_model, k) except nest.NESTError: pass except ImportError: pass vv = np.repeat(default, g.size) vv[idx] = v # update g.neuron_param[k] = vv else: # all neurons in one or several groups group = self.keys() if group is None else group if not nonstring_container(group): group = [group] start = 0 for name in group: g = self[name] for k, v in params.items(): if nonstring_container(v): g.neuron_param[k] = v[start:start+g.size] else: g.neuron_param[k] = v start += g.size
def get_adsorption_edges( self, symmetric=True, periodic=True): ''' Return the edges of adsorption sties defined as all regions with adjacent vertices. Parameters ---------- symmetric : bool Return only the symmetrically reduced edges. periodic : bool Return edges which are unique via periodicity. Returns ------- edges : ndarray (n, 2) All edges crossing ridge or vertices indexed by the expanded unit slab. ''' vt = scipy.spatial.Voronoi(self.coordinates[:, :2], qhull_options='Qbb Qc Qz C{}'.format(1e-2)) select, lens = [], [] for i, p in enumerate(vt.point_region): select += [vt.regions[p]] lens += [len(vt.regions[p])] dmax = max(lens) regions = np.zeros((len(select), dmax), int) mask = np.arange(dmax) < np.array(lens)[:, None] regions[mask] = np.concatenate(select) site_id = self.get_symmetric_sites(unique=False, screen=False) site_id = site_id + self.connectivity / 10 per = self.get_periodic_sites(screen=False) uper = self.get_periodic_sites() edges, symmetry, uniques = [], [], [] for i, p in enumerate(uper): poi = vt.point_region[p] voi = vt.regions[poi] for v in voi: nr = np.where(regions == v)[0] for n in nr: edge = sorted((p, n)) if n in uper[:i + 1] or edge in edges: continue if (np.in1d(per[edge], per[uper[:i]]).any()) and periodic: continue sym = sorted(site_id[edge]) if sym in symmetry: uniques += [False] else: uniques += [True] symmetry += [sym] edges += [edge] edges = np.array(edges) if symmetric: edges = edges[uniques] return edges
def __init__( self, slab, surface_atoms=None, tol=1e-5): ''' Create an extended unit cell of the surface sites for use in identifying other sites. Parameters ---------- slab : Gatoms object The slab associated with the adsorption site network to be attached. tol : float Absolute tolerance for floating point errors. ''' index, coords, offsets = Utils().expand_cell(slab, cutoff=5.0) if surface_atoms is None: surface_atoms = slab.get_surface_atoms() if surface_atoms is None: raise ValueError('Slab must contain surface atoms') extended_top = np.where(np.in1d(index, surface_atoms))[0] self.tol = tol self.coordinates = coords[extended_top].tolist() self.connectivity = np.ones(extended_top.shape[0]).tolist() self.r1_topology = [[i] for i in np.arange(len(extended_top))] self.index = index[extended_top] sites = self._get_higher_coordination_sites(coords[extended_top]) self.r2_topology = sites['top'][2] # Put data into array format selection = ['bridge', 'hollow', '4fold'] for i, k in enumerate(selection): coordinates, r1top, r2top = sites[k] if k in ['hollow', '4fold']: r2top = [[] for _ in coordinates] self.connectivity += (np.ones(len(coordinates)) * (i + 2)).tolist() self.coordinates += coordinates self.r1_topology += r1top self.r2_topology += r2top self.coordinates = np.array(self.coordinates) self.connectivity = np.array(self.connectivity, dtype=int) self.r1_topology = np.array(self.r1_topology, dtype=object) self.r2_topology = np.array(self.r2_topology, dtype=object) self.frac_coords = np.dot(self.coordinates, np.linalg.pinv(slab.cell)) self.slab = slab screen = (self.frac_coords[:, 0] > 0 - self.tol) & \ (self.frac_coords[:, 0] < 1 - self.tol) & \ (self.frac_coords[:, 1] > 0 - self.tol) & \ (self.frac_coords[:, 1] < 1 - self.tol) self.screen = screen self._symmetric_sites = None
if True in np.isnan(surf_data): print rest_interp_f surf_data = surf_data.squeeze() surf_f = '%s/fsaverage5/surf/%s.orig' % (fsDir, hemi) surf_faces = nib.freesurfer.io.read_geometry(surf_f)[1] mask = np.zeros((10242)) while True in np.isnan(surf_data): nans = np.unique(np.where(np.isnan(surf_data))[0]) mask[nans] = 1 bad = [] good = {} for node in nans: neighbors = np.unique(surf_faces[np.where( np.in1d(surf_faces.ravel(), [node]).reshape(surf_faces.shape))[0]]) bad_neighbors = neighbors[np.unique( np.where(np.isnan(surf_data[neighbors]))[0])] good_neighbors = np.setdiff1d(neighbors, bad_neighbors) bad.append((node, len(bad_neighbors))) good[node] = good_neighbors bad = np.array(bad).transpose() nodes_with_least_bad_neighbors = bad[0][bad[1] == np.min( bad[1])] for node in nodes_with_least_bad_neighbors: surf_data[node] = np.mean(surf_data[list(good[node])], axis=0) brain = Brain('fsaverage5', hemi, 'pial', curv=False) brain.add_data(mask, mask.min(),
def test_write_labels_to_annot(): """Test writing FreeSurfer parcellation from labels""" tempdir = _TempDir() labels = read_labels_from_annot('sample', subjects_dir=subjects_dir) # create temporary subjects-dir skeleton surf_dir = op.join(subjects_dir, 'sample', 'surf') temp_surf_dir = op.join(tempdir, 'sample', 'surf') os.makedirs(temp_surf_dir) shutil.copy(op.join(surf_dir, 'lh.white'), temp_surf_dir) shutil.copy(op.join(surf_dir, 'rh.white'), temp_surf_dir) os.makedirs(op.join(tempdir, 'sample', 'label')) # test automatic filenames dst = op.join(tempdir, 'sample', 'label', '%s.%s.annot') write_labels_to_annot(labels, 'sample', 'test1', subjects_dir=tempdir) assert_true(op.exists(dst % ('lh', 'test1'))) assert_true(op.exists(dst % ('rh', 'test1'))) # lh only for label in labels: if label.hemi == 'lh': break write_labels_to_annot([label], 'sample', 'test2', subjects_dir=tempdir) assert_true(op.exists(dst % ('lh', 'test2'))) assert_true(op.exists(dst % ('rh', 'test2'))) # rh only for label in labels: if label.hemi == 'rh': break write_labels_to_annot([label], 'sample', 'test3', subjects_dir=tempdir) assert_true(op.exists(dst % ('lh', 'test3'))) assert_true(op.exists(dst % ('rh', 'test3'))) # label alone assert_raises(TypeError, write_labels_to_annot, labels[0], 'sample', 'test4', subjects_dir=tempdir) # write left and right hemi labels with filenames: fnames = [op.join(tempdir, hemi + '-myparc') for hemi in ['lh', 'rh']] with warnings.catch_warnings(record=True): # specify subject_dir param for fname in fnames: write_labels_to_annot(labels, annot_fname=fname) # read it back labels2 = read_labels_from_annot('sample', subjects_dir=subjects_dir, annot_fname=fnames[0]) labels22 = read_labels_from_annot('sample', subjects_dir=subjects_dir, annot_fname=fnames[1]) labels2.extend(labels22) names = [label.name for label in labels2] for label in labels: idx = names.index(label.name) assert_labels_equal(label, labels2[idx]) # same with label-internal colors for fname in fnames: write_labels_to_annot(labels, 'sample', annot_fname=fname, overwrite=True, subjects_dir=subjects_dir) labels3 = read_labels_from_annot('sample', subjects_dir=subjects_dir, annot_fname=fnames[0]) labels33 = read_labels_from_annot('sample', subjects_dir=subjects_dir, annot_fname=fnames[1]) labels3.extend(labels33) names3 = [label.name for label in labels3] for label in labels: idx = names3.index(label.name) assert_labels_equal(label, labels3[idx]) # make sure we can't overwrite things assert_raises(ValueError, write_labels_to_annot, labels, 'sample', annot_fname=fnames[0], subjects_dir=subjects_dir) # however, this works write_labels_to_annot(labels, 'sample', annot_fname=fnames[0], overwrite=True, subjects_dir=subjects_dir) # label without color labels_ = labels[:] labels_[0] = labels_[0].copy() labels_[0].color = None write_labels_to_annot(labels_, 'sample', annot_fname=fnames[0], overwrite=True, subjects_dir=subjects_dir) # duplicate color labels_[0].color = labels_[2].color assert_raises(ValueError, write_labels_to_annot, labels_, 'sample', annot_fname=fnames[0], overwrite=True, subjects_dir=subjects_dir) # invalid color inputs labels_[0].color = (1.1, 1., 1., 1.) assert_raises(ValueError, write_labels_to_annot, labels_, 'sample', annot_fname=fnames[0], overwrite=True, subjects_dir=subjects_dir) # overlapping labels labels_ = labels[:] cuneus_lh = labels[6] precuneus_lh = labels[50] labels_.append(precuneus_lh + cuneus_lh) assert_raises(ValueError, write_labels_to_annot, labels_, 'sample', annot_fname=fnames[0], overwrite=True, subjects_dir=subjects_dir) # unlabeled vertices labels_lh = [label for label in labels if label.name.endswith('lh')] write_labels_to_annot(labels_lh[1:], 'sample', annot_fname=fnames[0], overwrite=True, subjects_dir=subjects_dir) labels_reloaded = read_labels_from_annot('sample', annot_fname=fnames[0], subjects_dir=subjects_dir) assert_equal(len(labels_lh), len(labels_reloaded)) label0 = labels_lh[0] label1 = labels_reloaded[-1] assert_equal(label1.name, "unknown-lh") assert_true(np.all(np.in1d(label0.vertices, label1.vertices))) # unnamed labels labels4 = labels[:] labels4[0].name = None assert_raises(ValueError, write_labels_to_annot, labels4, annot_fname=fnames[0])
def ClusterTree(D, adj_list): """ Compute Ward clustering linkage matrix for given similarity matrix adjacency structure. Parameters: - - - - - D : array similarity matrix adj_list : dictionary adjacency list Returns: - - - - Z : array linkage matrix """ X = D # Compute squared euclidean distance Y between rows Qx = np.tile(np.linalg.norm(X, axis=1)**2,(X.shape[0],1)) Y = Qx + Qx.transpose()-2*np.dot(X, X.transpose()) Y = spatial.distance.squareform(Y,checks=False) Y[Y<0] = 0 # Correct for numerical errors in very similar rows print('Similarity shape: {:}'.format(Y.shape)) # Construct adjacency matrix N = len(adj_list) A = np.zeros([N, N], dtype=bool) for i in range(N): A[i, adj_list[i]] = True connected = spatial.distance.squareform(A).astype(bool) print('Connected shape: {:}'.format(connected.shape)) # Initialize all data structures valid_clusts = np.ones(N, dtype=bool) # which clusters still remain col_limits = np.cumsum(np.concatenate((np.array([N-2]), np.arange(N-2, 0, -1)))) # During updating clusters, cluster index is constantly changing, R is # a index vector mapping the original index to the current (row, column) # index in Y. C denotes how many points are contained in each cluster. m = int(np.ceil(np.sqrt(2*Y.shape[0]))) C = np.zeros(2*m-1) C[0:m] = 1 R = np.arange(m) all_inds = np.arange(Y.shape[0]) # pairs of adjacent clusters that can be merged conn_inds = all_inds[connected] Z = np.zeros([m-1, 4]) for s in range(m-1): if conn_inds.size == 0: # The graph was disconnected (e.g. two hemispheres) # Just add all connections to finish up cluster tree connected = np.zeros(len(connected)) conn_inds = [] valid_clust_inds = np.flatnonzero(valid_clusts) for i in valid_clust_inds: U = valid_clusts U[i] = 0 new_conns = PdistInds(i, N, U) connected[new_conns] = True conn_inds = np.concatenate((conn_inds, new_conns)) conn_inds = np.unique(conn_inds) # Find closest pair of clusters v = np.amin(Y[conn_inds]) k = conn_inds[np.argmin(Y[conn_inds])] j = np.where(k <= col_limits)[0][0] i = N - (col_limits[j] - k) - 1 # Add row to output linkage Z[s, 0:3] = np.array([R[i], R[j], v]) # Update Y with this new cluster i containing old clusters i and j U = valid_clusts U[np.array([i, j])] = 0 oldI = PdistInds(i, N, U) oldJ = PdistInds(j, N, U) Y[oldI] = (( C[R[U]]+C[R[i]])*Y[oldI] + (C[R[U]]+C[R[j]])*Y[oldJ] - C[R[U]]*v)/(C[R[i]]+C[R[j]] + C[R[U]]) # Add j's connections to new cluster i new_conns = connected[oldJ] & ~connected[oldI] connected[oldI] = connected[oldI] | new_conns conn_inds = np.sort(np.concatenate((conn_inds, oldI[new_conns]))) # Remove all of j's connections from conn_inds and connected U[i] = 1 J = PdistInds(j, N, U) conn_inds = conn_inds[np.in1d(conn_inds, J, assume_unique=True, invert=True).astype(np.int)] connected[J] = np.zeros(len(J)) valid_clusts[j] = 0 # update m, N, R C[m+s] = C[R[i]] + C[R[j]] Z[s, 3] = C[m+s] R[i] = m+s Z[:, 2] = np.sqrt(Z[:, 2]) return Z
def plot_projs_joint(projs, evoked, picks_trace=None, *, topomap_kwargs=None, show=True, verbose=None): """Plot projectors and evoked jointly. Parameters ---------- projs : list of Projection The projectors to plot. evoked : instance of Evoked The data to plot. Typically this is the evoked instance created from averaging the epochs used to create the projection. %(picks_plot_projs_joint_trace)s topomap_kwargs : dict | None Keyword arguments to pass to :func:`mne.viz.plot_projs_topomap`. %(show)s %(verbose)s Returns ------- fig : instance of matplotlib Figure The figure. Notes ----- This function creates a figure with three columns: 1. The left shows the evoked data traces before (black) and after (green) projection. 2. The center shows the topomaps associated with each of the projectors. 3. The right again shows the data traces (black), but this time with: 1. The data projected onto each projector with a single normalization factor (solid lines). This is useful for seeing the relative power in each projection vector. 2. The data projected onto each projector with individual normalization factors (dashed lines). This is useful for visualizing each time course regardless of its power. 3. Additional data traces from ``picks_trace`` (solid yellow lines). This is useful for visualizing the "ground truth" of the time course, e.g. the measured EOG or ECG channel time courses. .. versionadded:: 1.1 """ import matplotlib.pyplot as plt from ..evoked import Evoked _validate_type(evoked, Evoked, 'evoked') _validate_type(topomap_kwargs, (None, dict), 'topomap_kwargs') projs = _check_type_projs(projs) topomap_kwargs = dict() if topomap_kwargs is None else topomap_kwargs if picks_trace is not None: picks_trace = _picks_to_idx(evoked.info, picks_trace, allow_empty=False) info = evoked.info ch_types = evoked.get_channel_types(unique=True, only_data_chs=True) proj_by_type = dict() # will be set up like an enumerate key->[pi, proj] ch_names_by_type = dict() used = np.zeros(len(projs), int) for ch_type in ch_types: these_picks = _picks_to_idx(info, ch_type, allow_empty=True) these_chs = [evoked.ch_names[pick] for pick in these_picks] ch_names_by_type[ch_type] = these_chs for pi, proj in enumerate(projs): if not set(these_chs).intersection(proj['data']['col_names']): continue if ch_type not in proj_by_type: proj_by_type[ch_type] = list() proj_by_type[ch_type].append([pi, deepcopy(proj)]) used[pi] += 1 missing = (~used.astype(bool)).sum() if missing: warn(f'{missing} projector{_pl(missing)} had no channel names ' 'present in epochs') del projs ch_types = list(proj_by_type) # reduce to number we actually need # room for legend max_proj_per_type = max(len(x) for x in proj_by_type.values()) cs_trace = 3 cs_topo = 2 n_col = max_proj_per_type * cs_topo + 2 * cs_trace n_row = len(ch_types) shape = (n_row, n_col) fig = plt.figure(figsize=(n_col * 1.1 + 0.5, n_row * 1.8 + 0.5), constrained_layout=True) ri = 0 # pick some sufficiently distinct colors (6 per proj type, e.g., ECG, # should be enough hopefully!) # https://personal.sron.nl/~pault/data/colourschemes.pdf # "Vibrant" color scheme proj_colors = [ '#CC3311', # red '#009988', # teal '#0077BB', # blue '#EE3377', # magenta '#EE7733', # orange '#33BBEE', # cyan ] trace_color = '#CCBB44' # yellow after_color, after_name = '#228833', 'green' type_titles = DEFAULTS['titles'] last_ax = [None] * 2 first_ax = dict() pe_kwargs = dict(show=False, draw=False) for ch_type, these_projs in proj_by_type.items(): these_idxs, these_projs = zip(*these_projs) ch_names = ch_names_by_type[ch_type] idx = np.where([ np.in1d(ch_names, proj['data']['col_names']).all() for proj in these_projs ])[0] used[idx] += 1 count = len(these_projs) for proj in these_projs: sub_idx = [ proj['data']['col_names'].index(name) for name in ch_names ] proj['data']['data'] = proj['data']['data'][:, sub_idx] proj['data']['col_names'] = ch_names ba_ax = plt.subplot2grid(shape, (ri, 0), colspan=cs_trace, fig=fig) topo_axes = [ plt.subplot2grid(shape, (ri, ci * cs_topo + cs_trace), colspan=cs_topo, fig=fig) for ci in range(count) ] tr_ax = plt.subplot2grid(shape, (ri, n_col - cs_trace), colspan=cs_trace, fig=fig) # topomaps _plot_projs_topomap(these_projs, info=info, show=False, axes=topo_axes, **topomap_kwargs) for idx, proj, ax_ in zip(these_idxs, these_projs, topo_axes): ax_.set_title('') # could use proj['desc'] but it's long ax_.set_xlabel(f'projs[{idx}]', fontsize='small') unit = DEFAULTS['units'][ch_type] # traces this_evoked = evoked.copy().pick_channels(ch_names) p = np.concatenate([p['data']['data'] for p in these_projs]) assert p.shape == (len(these_projs), len(this_evoked.data)) traces = np.dot(p, this_evoked.data) traces *= np.sign(np.mean(np.dot(this_evoked.data, traces.T), 0))[:, np.newaxis] if picks_trace is not None: ch_traces = evoked.data[picks_trace] ch_traces -= np.mean(ch_traces, axis=1, keepdims=True) ch_traces /= np.abs(ch_traces).max() _plot_evoked(this_evoked, picks='all', axes=[tr_ax], **pe_kwargs) for line in tr_ax.lines: line.set(lw=0.5, zorder=3) for t in list(tr_ax.texts): t.remove() scale = 0.8 * np.abs(tr_ax.get_ylim()).max() hs, labels = list(), list() traces /= np.abs(traces).max() # uniformly scaled for ti, trace in enumerate(traces): hs.append( tr_ax.plot(this_evoked.times, trace * scale, color=proj_colors[ti % len(proj_colors)], zorder=5)[0]) labels.append(f'projs[{these_idxs[ti]}]') traces /= np.abs(traces).max(1, keepdims=True) # independently for ti, trace in enumerate(traces): tr_ax.plot(this_evoked.times, trace * scale, color=proj_colors[ti % len(proj_colors)], zorder=3.5, ls='--', lw=1., alpha=0.75) if picks_trace is not None: trace_ch = [evoked.ch_names[pick] for pick in picks_trace] if len(picks_trace) == 1: trace_ch = trace_ch[0] hs.append( tr_ax.plot(this_evoked.times, ch_traces.T * scale, color=trace_color, lw=3, zorder=4, alpha=0.75)[0]) labels.append(str(trace_ch)) tr_ax.set(title='', xlabel='', ylabel='') # This will steal space from the subplots in a constrained layout # https://matplotlib.org/3.5.0/tutorials/intermediate/constrainedlayout_guide.html#legends # noqa: E501 tr_ax.legend(hs, labels, loc='center left', borderaxespad=0.05, bbox_to_anchor=[1.05, 0.5]) last_ax[1] = tr_ax key = 'Projected time course' if key not in first_ax: first_ax[key] = tr_ax # Before and after traces _plot_evoked(this_evoked, picks='all', axes=[ba_ax], **pe_kwargs) for line in ba_ax.lines: line.set(lw=0.5, zorder=3) loff = len(ba_ax.lines) this_proj_evoked = this_evoked.copy().add_proj(these_projs) # with meg='combined' any existing mag projectors (those already part # of evoked before we add_proj above) will have greatly # reduced power, so we ignore the warning about this issue this_proj_evoked.apply_proj(verbose='error') _plot_evoked(this_proj_evoked, picks='all', axes=[ba_ax], **pe_kwargs) for line in ba_ax.lines[loff:]: line.set(lw=0.5, zorder=4, color=after_color) for t in list(ba_ax.texts): t.remove() ba_ax.set(title='', xlabel='') ba_ax.set(ylabel=f'{type_titles[ch_type]}\n{unit}') last_ax[0] = ba_ax key = f'Before (black) and after ({after_name})' if key not in first_ax: first_ax[key] = ba_ax ri += 1 for ax in last_ax: ax.set(xlabel='Time (sec)') for title, ax in first_ax.items(): ax.set_title(title, fontsize='medium') plt_show(show) return fig
def calculate_bayes_factors(fold_change, output_file, essential_genes, non_essential_genes, columns_to_test, network_file, align_info, use_bootstrapping, use_small_sample, filter_multi_target, loci_without_mismatch, loci_with_mismatch, bootstrap_iterations, no_of_cross_validations, sgrna_bayes_factors, equalise_sgrna_no, seed, run_test_mode, equalise_rep_no): """ \b Calculate Bayes Factors from an input fold change file: \b BAGEL.py bf -i [fold change] -o [output file] -e [essentials genes] -n [nonessentials genes] -c [columns] \b Calculates a log2 Bayes Factor for each gene. Positive BFs indicate confidence that the gene is essential. Output written to the [output file] contains: gene name, mean Bayes Factor across all iterations, std deviation of BFs, and number of iterations in which the gene was part of the test set (and a BF was calculated[output file]. \b Required options: -i --fold-change [fold change file] Tab-delimited file of reagents and fold changes (see documentation for format). -o, --output-file [output file] Output filename -e, --essential-genes [reference essentials] File with list of training set of essential genes -n, --non-essential-genes [reference nonessentials] File with list of training set of nonessential genes -c [columns to test] comma-delimited list of columns in input file to include in analyisis \b Network options: -w [network file] Enable Network boosting. Tab-delmited file of edges. [GeneA (\\t) GeneB]\n' \b Multi-target guides filtering options: -m, --filter-multi-target Enable filtering multi-targeting guide RNAs --align-info [file] Input precalculated align-info file -m0, --loci-without-mismatch Filtering guide RNAs without mismatch targeting over than [N] loci, default = 10 -m1, --loci-with-mismatch Filtering guide RNAs with 1-bp mismatch targeting over than [N] loci, default = 10 \b Other options: -b, --bootstrapping Use bootstrapping instead of cross-validation (Slow) -s, --small-sample Low-fat BAGEL, Only resampled training set (Bootstrapping, iteration = 100) -r --sgrna-bayes-factors Calculate sgRNA-wise Bayes Factor -f --equalise-sgrna-no Equalize the number of sgRNAs per gene to particular value [Number] -p --equalise-rep-no Equalize the number of repicates to particular value [Number] -N --no-of-cross-validations Number of sections for cross validation (default 10) -NB --bootstraps-iterations Number of bootstrap iterations (default 1000) -s, --seed=N Define random seed -h, --help Show this help text \b Example: \b BAGEL.py bf -i fc_file.txt -o results.bf -e ess_training_set.txt -n noness_training_set.txt -c 1,2,3 """ np.random.seed(seed) # set random seed if network_file: network_boost = True else: network_boost = False if sgrna_bayes_factors: rna_level = True else: rna_level = False if network_file and sgrna_bayes_factors: network_boost = False if equalise_sgrna_no: flat_sgrna = True else: flat_sgrna = False if equalise_rep_no: flat_rep = True else: flat_rep = False if use_small_sample: train_method = 0 bootstrap_iterations = 100 elif use_bootstrapping: train_method = 0 else: train_method = 1 genes = {} fc = {} gene2rna = {} rna2gene = {} multi_targeting_sgrnas = dict() multi_targeting_sgrnas_info = dict() if filter_multi_target: try: aligninfo = pd.read_csv(align_info, header=None, index_col=0, sep="\t").fillna("") for seqid in aligninfo.index: perfectmatch = 0 mismatch_1bp = 0 perfectmatch_gene = 0 mismatch_1bp_gene = 0 if aligninfo[1][seqid] != "": perfectmatch = len(aligninfo[1][seqid].split(",")) if aligninfo[2][seqid] != "": perfectmatch_gene = len(aligninfo[2][seqid].split(",")) if aligninfo[3][seqid] != "": mismatch_1bp = len(aligninfo[3][seqid].split(",")) if aligninfo[4][seqid] != "": mismatch_1bp_gene = len(aligninfo[4][seqid].split(",")) if perfectmatch > loci_without_mismatch or mismatch_1bp > loci_with_mismatch: multi_targeting_sgrnas[seqid] = True elif perfectmatch > 1 or mismatch_1bp > 0: multi_targeting_sgrnas_info[seqid] = (perfectmatch, mismatch_1bp, perfectmatch_gene, mismatch_1bp_gene) except: print("Please check align-info file") sys.exit(1) print("Total %d multi-targeting gRNAs are discarded" % len(multi_targeting_sgrnas)) # # LOAD FOLDCHANGES # rnatagset = set() with open(fold_change) as fin: fieldname = fin.readline().rstrip().split('\t') # # DEFINE CONTROLS # columns = columns_to_test.split(',') try: try: column_list = list(map(int, columns)) column_labels = [fieldname[x + 1] for x in column_list] except ValueError: column_labels = columns column_list = [ x for x in range(len(fieldname) - 1) if fieldname[x + 1] in column_labels ] # +1 because of First column start 2 print("Using column: " + ", ".join(column_labels)) # print "Using column: " + ", ".join(map(str,column_list)) except: print("Invalid columns") sys.exit(1) for line in fin: fields = line.rstrip().split('\t') rnatag = fields[0] if filter_multi_target is True: # multitargeting sgrna filtering if rnatag in multi_targeting_sgrnas: continue # skip multitargeting sgrna. if rnatag in rnatagset: print("Error! sgRNA tag duplicates") sys.exit(1) rnatagset.add(rnatag) gsym = fields[1] genes[gsym] = 1 if gsym not in gene2rna: gene2rna[gsym] = [] gene2rna[gsym].append(rnatag) rna2gene[rnatag] = gsym fc[rnatag] = {} for i in column_list: fc[rnatag][i] = float( fields[i + 1] ) # per user docs, GENE is column 0, first data column is col 1. genes_array = np.array(list(genes.keys())) gene_idx = np.arange(len(genes)) print("Number of unique genes: " + str(len(genes))) # # DEFINE REFERENCE SETS # coreEss = [] with open(essential_genes) as fin: skip_header = fin.readline() for line in fin: coreEss.append(line.rstrip().split('\t')[0]) coreEss = np.array(coreEss) print("Number of reference essentials: " + str(len(coreEss))) nonEss = [] with open(non_essential_genes) as fin: skip_header = fin.readline() for line in fin: nonEss.append(line.rstrip().split('\t')[0]) nonEss = np.array(nonEss) print("Number of reference nonessentials: " + str(len(nonEss))) # # LOAD NETWORK # if network_boost is True: network = {} edgecount = 0 with open(network_file) as fin: for line in fin: linearray = line.rstrip().split('\t') # GeneA \t GeneB format if linearray[0] in genes_array and linearray[1] in genes_array: for i in [0, 1]: if linearray[i] not in network: network[linearray[i]] = {} network[linearray[i]][linearray[ -1 * (i - 1)]] = 1 # save edge information edgecount += 1 print("Number of network edges: " + str(edgecount)) # # INITIALIZE BFS # # Define foldchange dynamic threshold. logarithm decay. # Parameters are defined by regression (achilles data) 2**-7 was used in previous version. FC_THRESH = 2**( -1.1535 * np.log(len(np.intersect1d(genes_array, nonEss)) + 13.324) + 0.7728) bf = {} boostedbf = {} for g in genes_array: for rnatag in gene2rna[g]: bf[rnatag] = [] boostedbf[g] = [] # boosted bf at gene level # # TRAINING # if use_small_sample: # declare training class # training_data = Training(setdiff1d(gene_idx,np.where(in1d(genes_array,coreEss))),cvnum=NUMCV) # declare training class (only for Gold-standard gene set) training_data = Training(np.where( np.in1d(genes_array, np.union1d(coreEss, nonEss)))[0], cvnum=no_of_cross_validations) # all non-goldstandards all_non_gs = np.where( np.logical_not(np.in1d(genes_array, np.union1d(coreEss, nonEss))))[0] else: training_data = Training( gene_idx, cvnum=no_of_cross_validations) # declare training class if train_method == 0: LOOPCOUNT = bootstrap_iterations elif train_method == 1: LOOPCOUNT = no_of_cross_validations # 10-folds if run_test_mode == True: fp = open(output_file + ".traininfo", "w") fp.write("#1: Loopcount\n#2: Training set\n#3: Testset\n") print("Iter TrainEss TrainNon TestSet") sys.stdout.flush() for loop in range(LOOPCOUNT): currentbf = {} printstr = "" printstr += str(loop) # # bootstrap resample (10-folds cross-validation) from gene list to get the training set # test set for this iteration is everything not selected in bootstrap resampled (10-folds cross-validation) # training set # define essential and nonessential training sets: arrays of indexes # gene_train_idx, gene_test_idx = training_data.get_data(train_method) if use_small_sample: # test set is union of rest of training set (gold-standard) and the other genes (all of non-gold-standard) gene_test_idx = np.union1d(gene_test_idx, all_non_gs) if run_test_mode: fp.write("%d\n%s\n%s\n" % (loop, ",".join(genes_array[gene_train_idx]), ",".join( genes_array[gene_test_idx]))) train_ess = np.where(np.in1d(genes_array[gene_train_idx], coreEss))[0] train_non = np.where(np.in1d(genes_array[gene_train_idx], nonEss))[0] printstr += " " + str(len(train_ess)) printstr += " " + str(len(train_non)) printstr += " " + str(len(gene_test_idx)) print(printstr) sys.stdout.flush() # # define ess_train: vector of observed fold changes of essential genes in training set # ess_train_fc_list_of_lists = [ fc[rnatag] for g in genes_array[gene_train_idx[train_ess]] for rnatag in gene2rna[g] ] ess_train_fc_flat_list = [ obs for sublist in ess_train_fc_list_of_lists for obs in list(sublist.values()) ] # # define non_train vector of observed fold changes of nonessential genes in training set # non_train_fc_list_of_lists = [ fc[rnatag] for g in genes_array[gene_train_idx[train_non]] for rnatag in gene2rna[g] ] non_train_fc_flat_list = [ obs for sublist in non_train_fc_list_of_lists for obs in list(sublist.values()) ] # # calculate empirical fold change distributions for both # kess = stats.gaussian_kde(ess_train_fc_flat_list) knon = stats.gaussian_kde(non_train_fc_flat_list) # # define empirical upper and lower bounds within which to calculate BF = f(fold change) # x = np.arange(-10, 2, 0.01) nonfitx = knon.evaluate(x) # define lower bound empirical fold change threshold: minimum FC np.where knon is above threshold f = np.where(nonfitx > FC_THRESH) xmin = round_to_hundredth(min(x[f])) # define upper bound empirical fold change threshold: minimum value of log2(ess/non) subx = np.arange(xmin, max(x[f]), 0.01) logratio_sample = np.log2(kess.evaluate(subx) / knon.evaluate(subx)) f = np.where(logratio_sample == logratio_sample.min()) xmax = round_to_hundredth(subx[f]) # # round foldchanges to nearest 0.01 # precalculate logratios and build lookup table (for speed) # logratio_lookup = {} for i in np.arange(xmin, xmax + 0.01, 0.01): logratio_lookup[np.around(i * 100)] = np.log2( kess.evaluate(i) / knon.evaluate(i)) # # calculate BFs from lookup table for withheld test set # # liner interpolation testx = list() testy = list() for g in genes_array[gene_train_idx]: for rnatag in gene2rna[g]: for foldchange in list(fc[rnatag].values()): if foldchange >= xmin and foldchange <= xmax: testx.append(np.around(foldchange * 100) / 100) testy.append(logratio_lookup[np.around(foldchange * 100)][0]) try: slope, intercept, r_value, p_value, std_err = stats.linregress( np.array(testx), np.array(testy)) except: print("Regression failed. Check quality of the screen") sys.exit(1) # # BF calculation # for g in genes_array[gene_test_idx]: for rnatag in gene2rna[g]: bayes_factor = [] for rep in column_list: bayes_factor.append(slope * fc[rnatag][rep] + intercept) bf[rnatag].append(bayes_factor) if run_test_mode == True: fp.close() num_obs = dict() if rna_level is False: bf_mean = dict() bf_std = dict() bf_norm = dict() # sgRNA number complement if rna_level or filter_multi_target: bf_mean_rna_rep = dict() bf_std_rna_rep = dict() # bf_norm_rna_rep = dict() for g in gene2rna: num_obs[g] = len(bf[gene2rna[g][0]]) if rna_level or filter_multi_target: for rnatag in gene2rna[g]: bf_mean_rna_rep[rnatag] = dict() bf_std_rna_rep[rnatag] = dict() t = list(zip(*bf[rnatag])) for rep in range(len(column_list)): bf_mean_rna_rep[rnatag][column_list[rep]] = np.mean(t[rep]) bf_std_rna_rep[rnatag][column_list[rep]] = np.std(t[rep]) if rna_level == False: sumofbf_list = list() for i in range(num_obs[g]): sumofbf = 0.0 for rnatag in gene2rna[g]: sumofbf += sum(bf[rnatag][i]) sumofbf_list.append(sumofbf) # append each iter bf_mean[g] = np.mean(sumofbf_list) bf_std[g] = np.std(sumofbf_list) # # BUILD MULTIPLE REGRESSION MODEL FOR MULTI TARGETING GUIDE RNAs # if filter_multi_target: count = 0 trainset = dict() bf_multi_corrected_gene = dict() bf_multi_corrected_rna = dict() for gene in gene2rna: # multi_targeting_sgrnas_info[seqid] = (perfectmatch, mismatch_1bp, perfectmatch_gene, mismatch_1bp_gene) multitarget = list() onlytarget = list() for seqid in gene2rna[gene]: if seqid not in aligninfo.index: continue if seqid in multi_targeting_sgrnas_info: multitarget.append(seqid) else: onlytarget.append(seqid) if len( onlytarget ) > 0: # comparsion between sgRNAs targeting one locus and multiple loci if len(multitarget) > 0: bf_only = np.mean([ sum(list(bf_mean_rna_rep[seqid].values())) for seqid in onlytarget ]) for seqid in onlytarget: trainset[seqid] = [1, 0, 0] for seqid in multitarget: if multi_targeting_sgrnas_info[seqid][ 2] > 1 or multi_targeting_sgrnas_info[seqid][ 3] > 0: # train model using multi-targeting only targeting one protein coding gene continue count += 1 increment = sum(list( bf_mean_rna_rep[seqid].values())) - bf_only trainset[seqid] = [ multi_targeting_sgrnas_info[seqid][0], multi_targeting_sgrnas_info[seqid][1], increment ] if count < 10: print( "Not enough train set for calculating multi-targeting effect.\n" ) print( "It may cause due to unmatched gRNA names between the foldchange file and the align info file.\n" ) print("Filtering is not finished\n") filter_multi_target = False else: trainset = pd.DataFrame().from_dict(trainset).T X = trainset[[0, 1]] y = trainset[2] regressor = LinearRegression() regressor.fit(X, y) coeff_df = pd.DataFrame(regressor.coef_, X.columns, columns=['Coefficient']) for i in [0, 1]: if coeff_df['Coefficient'][i] < 0: print( "Regression coefficient is below than zero. Substituted to zero\n" ) coeff_df['Coefficient'][i] = 0.0 print( "Multiple effects from perfect matched loci = %.3f and 1bp mis-matched loci = %.3f" % (coeff_df['Coefficient'][0], coeff_df['Coefficient'][1])) if rna_level == False: for g in gene2rna: penalty = 0.0 for seqid in gene2rna[g]: if seqid in multi_targeting_sgrnas_info: penalty += float( multi_targeting_sgrnas_info[seqid][0] - 1) * coeff_df['Coefficient'][0] + float( multi_targeting_sgrnas_info[seqid] [1]) * coeff_df['Coefficient'][1] bf_multi_corrected_gene[g] = bf_mean[g] - penalty else: for g in gene2rna: for seqid in gene2rna[g]: if seqid in multi_targeting_sgrnas_info: penalty = float( multi_targeting_sgrnas_info[seqid][0] - 1) * coeff_df['Coefficient'][0] + float( multi_targeting_sgrnas_info[seqid] [1]) * coeff_df['Coefficient'][1] else: penalty = 0.0 bf_multi_corrected_rna[seqid] = sum( list(bf_mean_rna_rep[seqid].values())) - penalty # # NORMALIZE sgRNA COUNT # if rna_level is False and flat_sgrna == True: if filter_multi_target == True: targetbf = bf_multi_corrected_gene else: targetbf = bf_mean for g in gene2rna: multiple_factor = equalise_sgrna_no / float(len(gene2rna[g])) bf_norm[g] = targetbf[g] * multiple_factor ''' if bf_std[rnatag] == 0.0: bf_norm[rnatag] = float('inf') else: bf_norm[g] = ( bf[rnatag] - bf_mean[rnatag] ) / bf_std[rnatag] ''' training_data = Training(gene_idx) # set training class reset # # calculate network scores # if network_boost == True and rna_level == False: # Network boost is only working for gene level if run_test_mode == True: # TEST MODE fp = open(output_file + ".netscore", "w") print("\nNetwork score calculation start\n") networkscores = {} for g in genes_array[gene_idx]: if g in network: templist = list() for neighbor in network[g]: if neighbor in bf_mean: templist.append(bf_mean[neighbor]) templist.sort(reverse=True) networkscores[g] = fibo_weighted_sum(templist) # # start training # for loop in range(LOOPCOUNT): currentnbf = {} printstr = "" printstr += str(loop) # # draw train, test sets # gene_train_idx, gene_test_idx = training_data.get_data( train_method) # # define essential and nonessential training sets: arrays of indexes # train_ess = np.where(np.in1d(genes_array[gene_train_idx], coreEss))[0] train_non = np.where(np.in1d(genes_array[gene_train_idx], nonEss))[0] printstr += " " + str(len(train_ess)) printstr += " " + str(len(train_non)) printstr += " " + str(len(gene_test_idx)) sys.stdout.flush() # # calculate Network BF for test set # ess_ns_list = [ networkscores[x] for x in genes_array[gene_train_idx[train_ess]] if x in networkscores ] non_ns_list = [ networkscores[x] for x in genes_array[gene_train_idx[train_non]] if x in networkscores ] kess = stats.gaussian_kde(ess_ns_list) knon = stats.gaussian_kde(non_ns_list) # # set x boundary for liner regression # testx = list() testy = list() xmin = float(np.inf) xmax = float(-np.inf) for networkscore in np.arange(max(ess_ns_list), min(ess_ns_list), -0.01): density_ess = kess.evaluate(networkscore)[0] density_non = knon.evaluate(networkscore)[0] if density_ess == 0.0 or density_non == 0.0: continue if np.log2(density_ess / density_non) > -5 and networkscore < np.array( ess_ns_list).mean(): # reverse xmin = min(xmin, networkscore) for networkscore in np.arange(min(non_ns_list), max(non_ns_list), 0.01): density_ess = kess.evaluate(networkscore)[0] density_non = knon.evaluate(networkscore)[0] if density_ess == 0.0 or density_non == 0.0: continue if np.log2(density_ess / density_non) < 5 and networkscore > np.array( non_ns_list).mean(): # reverse xmax = max(xmax, networkscore) # # liner regression # testx = list() testy = list() for g in genes_array[gene_train_idx]: if g in networkscores: if networkscores[g] >= xmin and networkscores[g] <= xmax: testx.append(np.around(networkscores[g] * 100) / 100) testy.append( np.log2( kess.evaluate(networkscores[g])[0] / knon.evaluate(networkscores[g])[0])) slope, intercept, r_value, p_value, std_err = stats.linregress( np.array(testx), np.array(testy)) for g in genes_array[gene_test_idx]: if g in networkscores: if run_test_mode == True: fp.write("%s\t%f\t%f\n" % (g, networkscores[g], slope * networkscores[g] + intercept)) nbf = slope * networkscores[g] + intercept else: nbf = 0.0 boostedbf[g].append(bf_mean[g] + nbf) if flat_sgrna == True: boostedbf[g].append(bf_norm[g] + nbf) if run_test_mode == True: fp.close() # # print out results # # Equalizing factor (Replicates) if flat_rep == True: eqf = equalise_rep_no / float(len(column_labels)) else: eqf = 1 # print out with open(output_file, 'w') as fout: if rna_level == True: fout.write('RNA\tGENE') for i in range(len(column_list)): fout.write('\t{0:s}'.format(column_labels[i])) if train_method == 0: fout.write('\t{0:s}'.format(column_labels[i] + "_STD")) fout.write('\tBF') if train_method == 0: fout.write('\tNumObs') fout.write('\n') for rnatag in sorted(bf.keys()): # RNA tag fout.write('{0:s}\t'.format(rnatag)) # Gene gene = rna2gene[rnatag] fout.write('{0:s}\t'.format(gene)) # BF of replicates for rep in column_list: fout.write('{0:4.3f}\t'.format( bf_mean_rna_rep[rnatag][rep])) if train_method == 0: fout.write('{0:4.3f}\t'.format( bf_std_rna_rep[rnatag][rep])) # Sum BF of replicates if filter_multi_target == True: fout.write('{0:4.3f}'.format( float(bf_multi_corrected_rna[rnatag]) * eqf )) # eqf = equalizing factor for the number of replicates else: fout.write('{0:4.3f}'.format( float(sum(list(bf_mean_rna_rep[rnatag].values()))) * eqf)) # Num obs if train_method == 0: fout.write('\t{0:d}'.format(num_obs[gene])) fout.write('\n') else: fout.write('GENE') if network_boost == True: fout.write('\tBoostedBF') if train_method == 0: fout.write('\tSTD_BoostedBF') fout.write('\tBF') if train_method == 0: fout.write('\tSTD\tNumObs') if flat_sgrna == True: fout.write('\tNormBF') fout.write('\n') for g in sorted(genes.keys()): # Gene fout.write('{0:s}'.format(g)) if network_boost == True: boostedbf_mean = np.mean(boostedbf[g]) boostedbf_std = np.std(boostedbf[g]) fout.write('\t{0:4.3f}'.format( float(boostedbf_mean) * eqf)) if train_method == 0: fout.write('\t{0:4.3f}'.format( float(boostedbf_std) * eqf)) # BF if filter_multi_target == True: fout.write('\t{0:4.3f}'.format( float(bf_multi_corrected_gene[g]) * eqf )) # eqf = equalizing factor for the number of replicates else: fout.write('\t{0:4.3f}'.format(float(bf_mean[g]) * eqf)) # STD, Count if train_method == 0: fout.write('\t{0:4.3f}\t{1:d}'.format( float(bf_std[g]), num_obs[g])) # Normalized BF if flat_sgrna == True: fout.write('\t{0:4.3f}'.format(float(bf_norm[g]))) fout.write('\n')
def _make_bibc_bcbv(bus, branch, graph): """ performs depth-first-search bus ordering and creates Direct Load Flow (DLF) matrix which establishes direct relation between bus current injections and voltage drops from each bus to the root bus :param ppc: matpower-type case data :return: DLF matrix DLF = BIBC * BCBV where BIBC - Bus Injection to Branch-Current BCBV - Branch-Current to Bus-Voltage ppc with bfs ordering original bus names bfs ordered (used to convert voltage array back to normal) """ nobus = bus.shape[0] nobranch = branch.shape[0] # reference bus is assumed as root bus for a radial network refs = bus[bus[:, BUS_TYPE] == 3, BUS_I] norefs = len(refs) G = graph.copy() # network graph # dictionary with impedance values keyed by branch tuple (frombus, tobus) # TODO use list or array, not both branches_lst = list( zip(branch[:, F_BUS].real.astype(int), branch[:, T_BUS].real.astype(int))) branches_arr = branch[:, F_BUS:T_BUS + 1].real.astype(int) branches_ind_dict = dict( zip(zip(branches_arr[:, 0], branches_arr[:, 1]), range(0, nobranch))) branches_ind_dict.update( dict( zip(zip(branches_arr[:, 1], branches_arr[:, 0]), range(0, nobranch)))) tap = branch[:, TAP] # * np.exp(1j * np.pi / 180 * branch[:, SHIFT]) z_ser = (branch[:, BR_R].real + 1j * branch[:, BR_X].real) * tap # series impedance z_brch_dict = dict(zip(branches_lst, z_ser)) # initialization of lists for building sparse BIBC and BCBV matrices rowi_BIBC = [] coli_BIBC = [] data_BIBC = [] data_BCBV = [] buses_ordered_bfs_nets = [] for ref in refs: # ordering buses according to breadth-first-search (bfs) buses_ordered_bfs, predecs_bfs = csgraph.breadth_first_order( G, ref, directed=False, return_predecessors=True) buses_ordered_bfs_nets.append(buses_ordered_bfs) branches_ordered_bfs = list( zip(predecs_bfs[buses_ordered_bfs[1:]], buses_ordered_bfs[1:])) G_tree = csgraph.breadth_first_tree(G, ref, directed=False) # if multiple networks get subnetwork branches if norefs > 1: branches_sub_mask = ( np.in1d(branches_arr[:, 0], buses_ordered_bfs) & np.in1d(branches_arr[:, 1], buses_ordered_bfs)) branches = np.sort(branches_arr[branches_sub_mask, :], axis=1) else: branches = np.sort(branches_arr, axis=1) # identify loops if graph is not a tree branches_loops = [] if G_tree.nnz < branches.shape[0]: G_tree_nnzs = G_tree.nonzero() branches_tree = np.sort(np.array([G_tree_nnzs[0], G_tree_nnzs[1]]).T, axis=1) branches_loops = ( set(zip(branches[:, 0], branches[:, 1])) - set(zip(branches_tree[:, 0], branches_tree[:, 1]))) # #------ building BIBC and BCBV martrices ------ # branches in trees brchi = 0 for brch in branches_ordered_bfs: tree_down, predecs = csgraph.breadth_first_order( G_tree, brch[1], directed=True, return_predecessors=True) if len(tree_down) == 1: # If at leaf pass if brch in z_brch_dict: z_br = z_brch_dict[brch] else: z_br = z_brch_dict[brch[::-1]] rowi_BIBC += [branches_ind_dict[brch]] * len(tree_down) coli_BIBC += list(tree_down) data_BCBV += [z_br] * len(tree_down) data_BIBC += [1] * len(tree_down) # branches from loops for loop_i, brch_loop in enumerate(branches_loops): path_lens, path_preds = csgraph.shortest_path( G_tree, directed=False, indices=brch_loop, return_predecessors=True) init, end = brch_loop loop = [end] while init != end: end = path_preds[0, end] loop.append(end) loop_size = len(loop) coli_BIBC += [nobus + loop_i] * loop_size for i in range(len(loop)): brch = (loop[i - 1], loop[i]) if np.argwhere(buses_ordered_bfs == brch[0]) < np.argwhere( buses_ordered_bfs == brch[1]): brch_direct = 1 else: brch_direct = -1 data_BIBC.append(brch_direct) if brch in branches_ind_dict: rowi_BIBC.append(branches_ind_dict[brch]) else: rowi_BIBC.append(branches_ind_dict[brch[::-1]]) if brch in z_brch_dict: data_BCBV.append(z_brch_dict[brch] * brch_direct) else: data_BCBV.append(z_brch_dict[brch[::-1]] * brch_direct) brchi += 1 # construction of the BIBC matrix # column indices correspond to buses: assuming root bus is always 0 after ordering indices are subtracted by 1 BIBC = csr_matrix((data_BIBC, (rowi_BIBC, np.array(coli_BIBC) - norefs)), shape=(nobranch, nobranch)) BCBV = csr_matrix((data_BCBV, (rowi_BIBC, np.array(coli_BIBC) - norefs)), shape=(nobranch, nobranch)).transpose() if BCBV.shape[0] > nobus - 1: # if nbrch > nobus - 1 -> network has loops DLF_loop = BCBV * BIBC # DLF = [A M.T ] # [M N ] A = DLF_loop[0:nobus - 1, 0:nobus - 1] M = DLF_loop[nobus - 1:, 0:nobus - 1] N = DLF_loop[nobus - 1:, nobus - 1:].A # considering the fact that number of loops is relatively small, N matrix is expected to be small and dense # ...in that case dense version is more efficient, i.e. N is transformed to dense and # inverted using sp.linalg.inv(N) DLF = A - M.T * csr_matrix(sp.linalg.inv(N)) * M # Kron's Reduction else: # no loops -> radial network DLF = BCBV * BIBC return DLF, buses_ordered_bfs_nets
def own_def(roilist, sub, nClusters, scan, scan_type, savepng=0, session=1, algo=0, type_cor=0): p_dir = '/home/ajoshi/data/HCP_data' r_factor = 3 ref_dir = os.path.join(p_dir, 'reference') ref = '100307' fn1 = ref + '.reduce' + str(r_factor) + '.LR_mask.mat' fname1 = os.path.join(ref_dir, fn1) msk = scipy.io.loadmat(fname1) # h5py.File(fname1); #dfs_left = readdfs(os.path.join(p_dir, 'reference', ref + '.aparc.a2009s.32k_fs.reduce3.' + 'left' + '.dfs')) #dfs_left_sm = readdfs(os.path.join(p_dir, 'reference', ref + '.aparc.\ #a2009s.32k_fs.reduce3.very_smooth.' + 'left' + '.dfs')) dfs_left_sm = readdfs( os.path.join('/home/ajoshi/for_gaurav', '100307.BCI2reduce3.very_smooth.' + scan_type + '.dfs')) dfs_left = readdfs( os.path.join('/home/ajoshi/for_gaurav', '100307.BCI2reduce3.very_smooth.' + scan_type + '.dfs')) data = scipy.io.loadmat( os.path.join( p_dir, 'data', sub, sub + '.rfMRI_REST' + str(session) + scan + '.reduce3.ftdata.NLM_11N_hvar_25.mat')) LR_flag = msk['LR_flag'] # 0= right hemisphere && 1== left hemisphere if scan_type == 'right': LR_flag = np.squeeze(LR_flag) == 0 else: LR_flag = np.squeeze(LR_flag) == 1 data = data['ftdata_NLM'] temp = data[LR_flag, :] m = np.mean(temp, 1) temp = temp - m[:, None] s = np.std(temp, 1) + 1e-16 temp = temp / s[:, None] msk_small_region = np.in1d(dfs_left.labels, roilist) d = temp[msk_small_region, :] rho = np.corrcoef(d) rho[~np.isfinite(rho)] = 0 if algo == 0: SC = SpectralClustering(n_clusters=nClusters, affinity='precomputed') labels = SC.fit_predict(np.abs(rho)) if savepng > 0: r = dfs_left_sm r.labels = np.zeros([r.vertices.shape[0]]) r.labels[msk_small_region] = labels + 1 mlab.triangular_mesh(r.vertices[:, 0], r.vertices[:, 1], r.vertices[:, 2], r.faces, representation='surface', opacity=1, scalars=np.float64(r.labels)) mlab.gcf().scene.parallel_projection = True mlab.view(azimuth=0, elevation=-90) mlab.colorbar(orientation='horizontal') #mlab.show() mlab.savefig(filename='clusters_' + str(nClusters) + '_rois_' + str(roilist) + 'subject_' + sub + 'session' + str(session) + '_labels.png') mlab.show()
def biot_convergence_in_space(N): # coding: utf-8 # ### Source terms and analytical solutions # In[330]: def source_flow(g, tau): x1 = g.cell_centers[0] x2 = g.cell_centers[1] f_flow = tau*(2*np.sin(2*np.pi*x2) - \ 4*x1*np.pi**2*np.sin(2*np.pi*x2)*(x1 - 1)) - \ x1*np.sin(2*np.pi*x2) - \ np.sin(2*np.pi*x2)*(x1 - 1) + \ 2*np.pi*np.cos(2*np.pi*x2)*np.sin(2*np.pi*x1) return f_flow def source_mechanics(g): x1 = g.cell_centers[0] x2 = g.cell_centers[1] f_mech = np.zeros(g.num_cells * g.dim) f_mech[::2] = 6*np.sin(2*np.pi*x2) - \ x1*np.sin(2*np.pi*x2) - \ np.sin(2*np.pi*x2)*(x1 - 1) - \ 8*np.pi**2*np.cos(2*np.pi*x1)*np.cos(2*np.pi*x2) - \ 4*x1*np.pi**2*np.sin(2*np.pi*x2)*(x1 - 1) f_mech[1::2] = 4*np.pi*np.cos(2*np.pi*x2)*(x1 - 1) + \ 16*np.pi**2*np.sin(2*np.pi*x1)*np.sin(2*np.pi*x2) + \ 4*x1*np.pi*np.cos(2*np.pi*x2) - \ 2*x1*np.pi*np.cos(2*np.pi*x2)*(x1 - 1) return f_mech def analytical(g): sol = dict() x1 = g.cell_centers[0] x2 = g.cell_centers[1] sol['u'] = np.zeros(g.num_cells * g.dim) sol['u'][::2] = x1 * (1 - x1) * np.sin(2 * np.pi * x2) sol['u'][1::2] = np.sin(2 * np.pi * x1) * np.sin(2 * np.pi * x2) sol['p'] = sol['u'][::2] return sol # ### Getting mechanics boundary conditions # In[331]: def get_bc_mechanics(g, b_faces, x_min, x_max, west, east, y_min, y_max, south, north): # Setting the tags at each boundary side for the mechanics problem labels_mech = np.array([None] * b_faces.size) labels_mech[west] = 'dir' labels_mech[east] = 'dir' labels_mech[south] = 'dir' labels_mech[north] = 'dir' # Constructing the bc object for the mechanics problem bc_mech = pp.BoundaryConditionVectorial(g, b_faces, labels_mech) # Constructing the boundary values array for the mechanics problem bc_val_mech = np.zeros(g.num_faces * g.dim) return bc_mech, bc_val_mech # ### Getting flow boundary conditions # In[332]: def get_bc_flow(g, b_faces, x_min, x_max, west, east, y_min, y_max, south, north): # Setting the tags at each boundary side for the mechanics problem labels_flow = np.array([None] * b_faces.size) labels_flow[west] = 'dir' labels_flow[east] = 'dir' labels_flow[south] = 'dir' labels_flow[north] = 'dir' # Constructing the bc object for the flow problem bc_flow = pp.BoundaryCondition(g, b_faces, labels_flow) # Constructing the boundary values array for the flow problem bc_val_flow = np.zeros(g.num_faces) return bc_flow, bc_val_flow # ### Setting up the grid # In[333]: Nx = Ny = N Lx = 1 Ly = 1 g = pp.CartGrid([Nx, Ny], [Lx, Ly]) g.compute_geometry() V = g.cell_volumes # ### Physical parameters # In[334]: # Skeleton parameters mu_s = 1 # [Pa] Shear modulus lambda_s = 1 # [Pa] Lame parameter K_s = (2 / 3) * mu_s + lambda_s # [Pa] Bulk modulus E_s = mu_s * ((9 * K_s) / (3 * K_s + mu_s)) # [Pa] Young's modulus nu_s = (3 * K_s - 2 * mu_s) / (2 * (3 * K_s + mu_s) ) # [-] Poisson's coefficient k_s = 1 # [m^2] Permeabiliy # Fluid parameters mu_f = 1 # [Pa s] Dynamic viscosity # Porous medium parameters alpha_biot = 1. # [m^2] Intrinsic permeability S_m = 0 # [1/Pa] Specific Storage # ### Creating second and fourth order tensors # In[335]: # Permeability tensor perm = pp.SecondOrderTensor(g.dim, k_s * np.ones(g.num_cells)) # Stiffness matrix constit = pp.FourthOrderTensor(g.dim, mu_s * np.ones(g.num_cells), lambda_s * np.ones(g.num_cells)) # ### Time parameters # In[336]: t0 = 0 # [s] Initial time tf = 1 # [s] Final simulation time tLevels = 1 # [-] Time levels times = np.linspace(t0, tf, tLevels + 1) # [s] Vector of time evaluations dt = np.diff(times) # [s] Vector of time steps # ### Boundary conditions pre-processing # In[337]: b_faces = g.tags['domain_boundary_faces'].nonzero()[0] # Extracting indices of boundary faces w.r.t g x_min = b_faces[g.face_centers[0, b_faces] < 0.0001] x_max = b_faces[g.face_centers[0, b_faces] > 0.9999 * Lx] y_min = b_faces[g.face_centers[1, b_faces] < 0.0001] y_max = b_faces[g.face_centers[1, b_faces] > 0.9999 * Ly] # Extracting indices of boundary faces w.r.t b_faces west = np.in1d(b_faces, x_min).nonzero() east = np.in1d(b_faces, x_max).nonzero() south = np.in1d(b_faces, y_min).nonzero() north = np.in1d(b_faces, y_max).nonzero() # Mechanics boundary conditions bc_mech, bc_val_mech = get_bc_mechanics(g, b_faces, x_min, x_max, west, east, y_min, y_max, south, north) # FLOW BOUNDARY CONDITIONS bc_flow, bc_val_flow = get_bc_flow(g, b_faces, x_min, x_max, west, east, y_min, y_max, south, north) # ### Initialiazing solution and solver dicitionaries # In[338]: # Solution dictionary sol = dict() sol['time'] = np.zeros(tLevels + 1, dtype=float) sol['displacement'] = np.zeros((tLevels + 1, g.num_cells * g.dim), dtype=float) sol['displacement_faces'] = np.zeros( (tLevels + 1, g.num_faces * g.dim * 2), dtype=float) sol['pressure'] = np.zeros((tLevels + 1, g.num_cells), dtype=float) sol['traction'] = np.zeros((tLevels + 1, g.num_faces * g.dim), dtype=float) sol['flux'] = np.zeros((tLevels + 1, g.num_faces), dtype=float) sol['iter'] = np.array([], dtype=int) sol['time_step'] = np.array([], dtype=float) sol['residual'] = np.array([], dtype=float) # Solver dictionary newton_param = dict() newton_param['tol'] = 1E-8 # maximum tolerance newton_param['max_iter'] = 20 # maximum number of iterations newton_param['res_norm'] = 1000 # initializing residual newton_param['iter'] = 1 # iteration # ### Discrete operators and discrete equations # ### Flow operators # In[339]: F = lambda x: biot_F * x # Flux operator boundF = lambda x: biot_boundF * x # Bound Flux operator compat = lambda x: biot_compat * x # Compatibility operator (Stabilization term) divF = lambda x: biot_divF * x # Scalar divergence operator # ### Mechanics operators # In[340]: S = lambda x: biot_S * x # Stress operator boundS = lambda x: biot_boundS * x # Bound Stress operator divU = lambda x: biot_divU * x # Divergence of displacement field divS = lambda x: biot_divS * x # Vector divergence operator gradP = lambda x: biot_divS * biot_gradP * x # Pressure gradient operator boundDivU = lambda x: biot_boundDivU * x # Bound Divergence of displacement operator boundUCell = lambda x: biot_boundUCell * x # Contribution of displacement at cells -> Face displacement boundUFace = lambda x: biot_boundUFace * x # Contribution of bc_mech at the boundaries -> Face displacement boundUPressure = lambda x: biot_boundUPressure * x # Contribution of pressure at cells -> Face displacement # ### Discrete equations # In[341]: # Source terms f_mech = source_mechanics(g) f_flow = source_flow(g, dt[0]) # Generalized Hooke's law T = lambda u: S(u) + boundS(bc_val_mech) # Momentum conservation equation (I) u_eq1 = lambda u: divS(T(u)) # Momentum conservation equation (II) u_eq2 = lambda p: -gradP(p) + f_mech * V[0] # Darcy's law Q = lambda p: (1. / mu_f) * (F(p) + boundF(bc_val_flow)) # Mass conservation equation (I) p_eq1 = lambda u, u_n: alpha_biot * divU(u - u_n) # Mass conservation equation (II) p_eq2 = lambda p, p_n, dt: (p - p_n) * S_m * V + divF(Q( p)) * dt + alpha_biot * compat(p - p_n) * V[0] - (f_flow / dt) * V[0] # ## Creating AD variables # In[343]: # Create displacement AD-variable u_ad = Ad_array(np.zeros(g.num_cells * 2), sps.diags(np.ones(g.num_cells * g.dim))) # Create pressure AD-variable p_ad = Ad_array(np.zeros(g.num_cells), sps.diags(np.ones(g.num_cells))) # ## Performing discretization # In[344]: d = dict() # initialize dictionary to store data # Mechanics data object specified_parameters_mech = { "fourth_order_tensor": constit, "bc": bc_mech, "biot_alpha": 1., "bc_values": bc_val_mech } pp.initialize_default_data(g, d, "mechanics", specified_parameters_mech) # Flow data object specified_parameters_flow = { "second_order_tensor": perm, "bc": bc_flow, "biot_alpha": 1., "bc_values": bc_val_flow } pp.initialize_default_data(g, d, "flow", specified_parameters_flow) # Biot discretization solver_biot = pp.Biot("mechanics", "flow") solver_biot.discretize(g, d) # Mechanics discretization matrices biot_S = d['discretization_matrices']['mechanics']['stress'] biot_boundS = d['discretization_matrices']['mechanics']['bound_stress'] biot_divU = d['discretization_matrices']['mechanics']['div_d'] biot_gradP = d['discretization_matrices']['mechanics']['grad_p'] biot_boundDivU = d['discretization_matrices']['mechanics']['bound_div_d'] biot_boundUCell = d['discretization_matrices']['mechanics'][ 'bound_displacement_cell'] biot_boundUFace = d['discretization_matrices']['mechanics'][ 'bound_displacement_face'] biot_boundUPressure = d['discretization_matrices']['mechanics'][ 'bound_displacement_pressure'] biot_divS = pp.fvutils.vector_divergence(g) # Flow discretization matrices biot_F = d['discretization_matrices']['flow']['flux'] biot_boundF = d['discretization_matrices']['flow']['bound_flux'] biot_compat = d['discretization_matrices']['flow']['biot_stabilization'] biot_divF = pp.fvutils.scalar_divergence(g) # Saving initial condition sol['pressure'][0] = p_ad.val sol['displacement'][0] = u_ad.val sol['displacement_faces'][0] = (boundUCell(sol['displacement'][0]) + boundUFace(bc_val_mech) + boundUPressure(sol['pressure'][0])) sol['time'][0] = times[0] sol['traction'][0] = T(u_ad.val) sol['flux'][0] = Q(p_ad.val) # ## The time loop # In[345]: tt = 0 # time counter while times[tt] < times[-1]: tt += 1 # increasing time counter # Displacement and pressure at the previous time step u_n = u_ad.val.copy() p_n = p_ad.val.copy() # Updating residual and iteration at each time step newton_param.update({'res_norm': 1000, 'iter': 1}) # Newton loop while newton_param['res_norm'] > newton_param['tol'] and newton_param[ 'iter'] <= newton_param['max_iter']: # Calling equations eq1 = u_eq1(u_ad) eq2 = u_eq2(p_ad) eq3 = p_eq1(u_ad, u_n) eq4 = p_eq2(p_ad, p_n, dt[tt - 1]) # Assembling Jacobian of the coupled system J_mech = np.hstack( (eq1.jac, eq2.jac)) # Jacobian blocks (mechanics) J_flow = np.hstack((eq3.jac, eq4.jac)) # Jacobian blocks (flow) J = sps.bmat(np.vstack((J_mech, J_flow)), format='csc') # Jacobian (coupled) # Determining residual of the coupled system R_mech = eq1.val + eq2.val # Residual (mechanics) R_flow = eq3.val + eq4.val # Residual (flow) R = np.hstack((R_mech, R_flow)) # Residual (coupled) y = sps.linalg.spsolve(J, -R) # u_ad.val = u_ad.val + y[:g.dim * g.num_cells] # Newton update p_ad.val = p_ad.val + y[g.dim * g.num_cells:] # newton_param['res_norm'] = np.linalg.norm(R) # Updating residual if newton_param['res_norm'] <= newton_param[ 'tol'] and newton_param['iter'] <= newton_param['max_iter']: print('Iter: {} \t Error: {:.8f} [m]'.format( newton_param['iter'], newton_param['res_norm'])) elif newton_param['iter'] > newton_param['max_iter']: print('Error: Newton method did not converge!') else: newton_param['iter'] += 1 # Saving variables sol['iter'] = np.concatenate( (sol['iter'], np.array([newton_param['iter']]))) sol['residual'] = np.concatenate( (sol['residual'], np.array([newton_param['res_norm']]))) sol['time_step'] = np.concatenate((sol['time_step'], dt)) sol['pressure'][tt] = p_ad.val sol['displacement'][tt] = u_ad.val sol['displacement_faces'][tt] = (boundUCell(sol['displacement'][tt]) + boundUFace(bc_val_mech) + boundUPressure(sol['pressure'][tt])) sol['time'][tt] = times[tt] sol['traction'][tt] = T(u_ad.val) sol['flux'][tt] = Q(p_ad.val) # Determining analytical solution sol_anal = analytical(g) # Determining norms p_norm = np.linalg.norm(sol_anal['p'] - sol['pressure'][-1]) / ( np.linalg.norm(sol['pressure'][-1])) u_mag_num = np.sqrt(sol['displacement'][-1][::2]**2 + sol['displacement'][-1][1::2]**2) u_mag_ana = np.sqrt(sol_anal['u'][::2]**2 + sol_anal['u'][1::2]**2) u_norm = np.linalg.norm(u_mag_ana - u_mag_num) / np.linalg.norm(u_mag_num) return p_norm, u_norm
def _bravyi_kitaev_mode(self, n): """ Bravyi-Kitaev mode. Args: n (int): number of modes Returns: numpy.ndarray: Array of mode indexes """ def parity_set(j, n): """Computes the parity set of the j-th orbital in n modes. Args: j (int) : the orbital index n (int) : the total number of modes Returns: numpy.ndarray: Array of mode indexes """ indexes = np.array([]) if n % 2 != 0: return indexes if j < n / 2: indexes = np.append(indexes, parity_set(j, n / 2)) else: indexes = np.append(indexes, np.append( parity_set(j - n / 2, n / 2) + n / 2, n / 2 - 1)) return indexes def update_set(j, n): """Computes the update set of the j-th orbital in n modes. Args: j (int) : the orbital index n (int) : the total number of modes Returns: numpy.ndarray: Array of mode indexes """ indexes = np.array([]) if n % 2 != 0: return indexes if j < n / 2: indexes = np.append(indexes, np.append( n - 1, update_set(j, n / 2))) else: indexes = np.append(indexes, update_set(j - n / 2, n / 2) + n / 2) return indexes def flip_set(j, n): """Computes the flip set of the j-th orbital in n modes. Args: j (int) : the orbital index n (int) : the total number of modes Returns: numpy.ndarray: Array of mode indexes """ indexes = np.array([]) if n % 2 != 0: return indexes if j < n / 2: indexes = np.append(indexes, flip_set(j, n / 2)) elif j >= n / 2 and j < n - 1: # pylint: disable=chained-comparison indexes = np.append(indexes, flip_set(j - n / 2, n / 2) + n / 2) else: indexes = np.append(np.append(indexes, flip_set( j - n / 2, n / 2) + n / 2), n / 2 - 1) return indexes a_list = [] # FIND BINARY SUPERSET SIZE bin_sup = 1 # pylint: disable=comparison-with-callable while n > np.power(2, bin_sup): bin_sup += 1 # DEFINE INDEX SETS FOR EVERY FERMIONIC MODE update_sets = [] update_pauli = [] parity_sets = [] parity_pauli = [] flip_sets = [] remainder_sets = [] remainder_pauli = [] for j in range(n): update_sets.append(update_set(j, np.power(2, bin_sup))) update_sets[j] = update_sets[j][update_sets[j] < n] parity_sets.append(parity_set(j, np.power(2, bin_sup))) parity_sets[j] = parity_sets[j][parity_sets[j] < n] flip_sets.append(flip_set(j, np.power(2, bin_sup))) flip_sets[j] = flip_sets[j][flip_sets[j] < n] remainder_sets.append(np.setdiff1d(parity_sets[j], flip_sets[j])) update_pauli.append(Pauli(np.zeros(n, dtype=np.bool), np.zeros(n, dtype=np.bool))) parity_pauli.append(Pauli(np.zeros(n, dtype=np.bool), np.zeros(n, dtype=np.bool))) remainder_pauli.append(Pauli(np.zeros(n, dtype=np.bool), np.zeros(n, dtype=np.bool))) for k in range(n): if np.in1d(k, update_sets[j]): update_pauli[j].update_x(True, k) if np.in1d(k, parity_sets[j]): parity_pauli[j].update_z(True, k) if np.in1d(k, remainder_sets[j]): remainder_pauli[j].update_z(True, k) x_j = Pauli(np.zeros(n, dtype=np.bool), np.zeros(n, dtype=np.bool)) x_j.update_x(True, j) y_j = Pauli(np.zeros(n, dtype=np.bool), np.zeros(n, dtype=np.bool)) y_j.update_z(True, j) y_j.update_x(True, j) a_list.append((update_pauli[j] * x_j * parity_pauli[j], update_pauli[j] * y_j * remainder_pauli[j])) return a_list
def detect_peaks(x, mph=None, mpd=1, threshold=0, edge='rising', kpsh=False, valley=False, show=False, ax=None): """ Returns ------- ind : 1D array_like indeces of the peaks in `x`. Notes ----- The detection of valleys instead of peaks is performed internally by simply negating the data: `ind_valleys = detect_peaks(-x)` The function can handle NaN's Copyright 2015. Marcos Duarte. MIT license. """ x = np.atleast_1d(x).astype('float64') if x.size < 3: return np.array([], dtype=int) if valley: x = -x # find indices of all peaks dx = x[1:] - x[:-1] # handle NaN's indnan = np.where(np.isnan(x))[0] if indnan.size: x[indnan] = np.inf dx[np.where(np.isnan(dx))[0]] = np.inf ine, ire, ife = np.array([[], [], []], dtype=int) if not edge: ine = np.where((np.hstack((dx, 0)) < 0) & (np.hstack((0, dx)) > 0))[0] else: if edge.lower() in ['rising', 'both']: ire = np.where((np.hstack((dx, 0)) <= 0) & (np.hstack((0, dx)) > 0))[0] if edge.lower() in ['falling', 'both']: ife = np.where((np.hstack((dx, 0)) < 0) & (np.hstack((0, dx)) >= 0))[0] ind = np.unique(np.hstack((ine, ire, ife))) # handle NaN's if ind.size and indnan.size: # NaN's and values close to NaN's cannot be peaks ind = ind[np.in1d(ind, np.unique(np.hstack( (indnan, indnan - 1, indnan + 1))), invert=True)] # first and last values of x cannot be peaks if ind.size and ind[0] == 0: ind = ind[1:] if ind.size and ind[-1] == x.size - 1: ind = ind[:-1] # remove peaks < minimum peak height if ind.size and mph is not None: ind = ind[x[ind] >= mph] # remove peaks - neighbors < threshold if ind.size and threshold > 0: dx = np.min(np.vstack([x[ind] - x[ind - 1], x[ind] - x[ind + 1]]), axis=0) ind = np.delete(ind, np.where(dx < threshold)[0]) # detect small peaks closer than minimum peak distance if ind.size and mpd > 1: ind = ind[np.argsort(x[ind])][::-1] # sort ind by peak height idel = np.zeros(ind.size, dtype=bool) for i in range(ind.size): if not idel[i]: # keep peaks with the same height if kpsh is True idel = idel | (ind >= ind[i] - mpd) & (ind <= ind[i] + mpd) \ & (x[ind[i]] > x[ind] if kpsh else True) idel[i] = 0 # Keep current peak # remove the small peaks and sort back the indices by their occurrence ind = np.sort(ind[~idel]) return ind
def _bfswpf(DLF, bus, gen, branch, baseMVA, Ybus, Sbus, V0, ref, pv, pq, buses_ordered_bfs_nets, options, **kwargs): """ distribution power flow solution according to [1] :param DLF: direct-Load-Flow matrix which relates bus current injections to voltage drops from the root bus :param bus: buses martix :param gen: generators matrix :param branch: branches matrix :param baseMVA: :param Ybus: bus admittance matrix :param Sbus: vector of power injections :param V0: initial voltage state vector :param ref: reference bus index :param pv: PV buses indices :param pq: PQ buses indices :param buses_ordered_bfs_nets: buses ordered according to breadth-first search :return: power flow result """ enforce_q_lims = options["enforce_q_lims"] tolerance_mva = options["tolerance_mva"] max_iteration = options["max_iteration"] voltage_depend_loads = options["voltage_depend_loads"] # setting options max_it = max_iteration # maximum iterations verbose = kwargs["VERBOSE"] # verbose is set in run._runpppf() # # tolerance for the inner loop for PV nodes if 'tolerance_mva_pv' in kwargs: tol_mva_inner = kwargs['tolerance_mva_pv'] else: tol_mva_inner = 1.e-2 if 'max_iter_pv' in kwargs: max_iter_pv = kwargs['max_iter_pv'] else: max_iter_pv = 20 nobus = bus.shape[0] ngen = gen.shape[0] mask_root = ~(bus[:, BUS_TYPE] == 3) # mask for eliminating root bus norefs = len(ref) Ysh = _makeYsh_bfsw(bus, branch, baseMVA) # detect generators on PV buses which have status ON gen_pv = np.in1d(gen[:, GEN_BUS], pv) & (gen[:, GEN_STATUS] > 0) qg_lim = np.zeros( ngen, dtype=bool) # initialize generators which violated Q limits Iinj = np.conj(Sbus / V0) - Ysh * V0 # Initial current injections # initiate reference voltage vector V_ref = np.ones(nobus, dtype=complex) for neti, buses_ordered_bfs in enumerate(buses_ordered_bfs_nets): V_ref[buses_ordered_bfs] *= V0[ref[neti]] V = V0.copy() n_iter = 0 converged = 0 if verbose: print(' -- AC Power Flow (Backward/Forward sweep)\n') while not converged and n_iter < max_it: n_iter_inner = 0 n_iter += 1 deltaV = DLF * Iinj[mask_root] V[mask_root] = V_ref[mask_root] + deltaV # ## # inner loop for considering PV buses # TODO improve PV buses inner loop inner_loop_converged = False while not inner_loop_converged and len(pv) > 0: pvi = pv - norefs # internal PV buses indices, assuming reference node is always 0 Vmis = (np.abs(gen[gen_pv, VG]))**2 - (np.abs(V[pv]))**2 # TODO improve getting values from sparse DLF matrix - DLF[pvi, pvi] is unefficient dQ = (Vmis / (2 * DLF[pvi, pvi].A1.imag)).flatten() gen[gen_pv, QG] += dQ if enforce_q_lims: # check Q violation limits ## find gens with violated Q constraints qg_max_lim = (gen[:, QG] > gen[:, QMAX]) & gen_pv qg_min_lim = (gen[:, QG] < gen[:, QMIN]) & gen_pv if qg_min_lim.any(): gen[qg_min_lim, QG] = gen[qg_min_lim, QMIN] bus[gen[qg_min_lim, GEN_BUS].astype(int), BUS_TYPE] = 1 # convert to PQ bus if qg_max_lim.any(): gen[qg_max_lim, QG] = gen[qg_max_lim, QMAX] bus[gen[qg_max_lim, GEN_BUS].astype(int), BUS_TYPE] = 1 # convert to PQ bus # TODO: correct: once all the PV buses are converted to PQ buses, conversion back to PV is not possible qg_lim_new = qg_min_lim | qg_max_lim if qg_lim_new.any(): pq2pv = (qg_lim != qg_lim_new) & qg_lim # convert PQ to PV bus if pq2pv.any(): bus[gen[qg_max_lim, GEN_BUS].astype(int), BUS_TYPE] = 2 # convert to PV bus qg_lim = qg_lim_new.copy() ref, pv, pq = bustypes(bus, gen) # avoid calling makeSbus, update only Sbus for pv nodes Sbus = (makeSbus(baseMVA, bus, gen, vm=abs(V)) if voltage_depend_loads else makeSbus(baseMVA, bus, gen)) Iinj = np.conj(Sbus / V) - Ysh * V deltaV = DLF * Iinj[mask_root] V[mask_root] = V_ref[mask_root] + deltaV if n_iter_inner > max_iter_pv: raise LoadflowNotConverged( " FBSW Power Flow did not converge - inner iterations for PV nodes " "reached maximum value of {0}!".format(max_iter_pv)) n_iter_inner += 1 if np.all(np.abs(dQ) < tol_mva_inner ): # inner loop termination criterion inner_loop_converged = True # testing termination criterion - if voltage_depend_loads: Sbus = makeSbus(baseMVA, bus, gen, vm=abs(V)) F = _evaluate_Fx(Ybus, V, Sbus, ref, pv, pq) # check tolerance converged = _check_for_convergence(F, tolerance_mva) if converged and verbose: print("\nFwd-back sweep power flow converged in " "{0} iterations.\n".format(n_iter)) # updating injected currents Iinj = np.conj(Sbus / V) - Ysh * V return V, converged
##################################### terms = getTerms.get() # get dictionary of terms for each subject tmp = np.recfromcsv( 'filename_subject_list.csv') # read in list of file vs subj docSubj = dict(zip(tmp['filename'], tmp['subject'])) # dict of name and subj tmp = np.recfromtxt('./outFiles/part-r-00000', delimiter='\t') # output and topN files = tmp[:, 0] topN = tmp[:, 1:] val = np.empty(topN.shape, dtype=bool) for n, (f, t) in enumerate(zip(files, topN)): # see if topN term within subject val[n] = np.in1d(t, terms[docSubj[f]]) head = np.char.add('term', np.char.mod('%d', range(1, val.shape[1] + 1))) # header valString = np.char.mod('%i', val) # convert True/False into '1'/'0' np.savetxt('./outFiles/validation.csv', np.hstack((files[:, None], valString)), fmt='%s', delimiter=',', header='fileName,' + ','.join(head), comments='')
def run(self, workspace): x_name = self.x_name.value y_name = self.y_name.value object_set = workspace.object_set x = object_set.get_objects(x_name) x_data = x.segmented dimensions = x.dimensions y_data = x.segmented.copy() reference_name = self.reference_name.value reference = object_set.get_objects(reference_name) reference_data = reference.segmented # Get the parent object labels outer_labels = numpy.unique(reference_data) if self.remove_orphans.value: # Get the child object labels inner_labels = numpy.unique(x_data) # Find the discrepancies between child and parent orphans = numpy.setdiff1d(inner_labels, outer_labels) # Remove them from the original array orphan_mask = numpy.in1d(x_data, orphans) # orphan_mask here is a 1D array, but it has the same number of elements # as y_data. Since we know that, we can reshape it to the original array # shape and use it as a boolean mask to take out the orphaned objects y_data[orphan_mask.reshape(x_data.shape)] = 0 for obj in outer_labels: # Ignore the background if obj == 0: continue # Find where in the array the child object is outside of the # parent object (i.e. where the original array is *not* that # object *and* where the child array is that object) constrain_mask = (reference_data != obj) & (x_data == obj) # Remove those parts outside the parent y_data[constrain_mask] = 0 # Only remove intruding pieces if the user has requested it if self.coersion_method.value == METHOD_REMOVE: intrude_mask = (reference_data == obj) & (x_data != obj) & (x_data != 0) y_data[intrude_mask] = 0 objects = cellprofiler_core.object.Objects() objects.segmented = y_data objects.parent_image = x.parent_image workspace.object_set.add_objects(objects, y_name) self.add_measurements(workspace) if self.show_window: workspace.display_data.x_data = x_data workspace.display_data.y_data = y_data workspace.display_data.reference = reference_data workspace.display_data.dimensions = dimensions
print("found ", nr.nodeIndex, " nodes") print("found ", wr.edgeIndex, " edges") # sort edges by source node index print("sorting nodes by id...") nodes = nodes[nodes[:, 0].argsort()] print("done...") print("sorting edges by source node...") edgesOut = edgesOut[edgesOut[:, 0].argsort()] edgesIn = edgesIn[edgesIn[:, 0].argsort()] print("done...") print("remove edges with no node in sources") se1 = len(edgesOut) edgesOut = edgesOut[numpy.in1d(edgesOut[:, 0], nodes[:, 0])] edgesIn = edgesIn[numpy.in1d(edgesIn[:, 0], nodes[:, 0])] se2 = len(edgesOut) print("removed ", (se1 - se2), " edges because of missing source node") print("count number of edges per source node") offsetsOut[0] = 0 offsetsIn[0] = 0 for i in range(0, len(nodes) - 1): if i % 100000 == 0: print(round(float(i) / len(nodes) * 100.0), "% done") #print(i+offsets[i]) startIndex = numpy.searchsorted(edgesOut[:, 0], nodes[i][0])
for index, center in enumerate(centers): centers[index] = np.random.uniform(minValue, maxValue, 5) for iteration in range(iterations): # Set pixels to their cluster for idx, data in enumerate(pixel_matrix_scaled): distanceToCenters = np.ndarray(shape=(K)) for index, center in enumerate(centers): distanceToCenters[index] = euclidean_distances(data.reshape(1, -1), center.reshape(1, -1)) pixel_cluster_vector[idx] = np.argmin(distanceToCenters) ################################################################################################## # Check if a cluster is ever empty, if so append a random datapoint to it clusterToCheck = np.arange(K) #contains an array with all clusters #e.g for K=10, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) clustersEmpty = np.in1d(clusterToCheck, pixel_cluster_vector) #^ [True True False True * n of clusters] False means empty for index, item in enumerate(clustersEmpty): if item == False: pixel_cluster_vector[np.random.randint(len(pixel_cluster_vector))] = index # ^ sets a random pixel to that cluster as mentioned in the homework writeup ################################################################################################## # Move centers to the centroid of their cluster for i in range(K): dataInCenter = [] for index, item in enumerate(pixel_cluster_vector): if item == i: dataInCenter.append(pixel_matrix_scaled[index]) dataInCenter = np.array(dataInCenter)
import time import EmergeIterate t0=time.time() iterate = EmergeIterate.EmergeIterate(22, 'MD10') iterate.open_snapshots() iterate.init_new_quantities() #iterate.map_halos_between_snapshots() import pandas as pd print('N f1 halos', len(iterate.f1['/halo_properties/id'].value)) print('N f0 halos', len(iterate.f0['/halo_properties/id'].value)) f1_new_halos = (n.in1d(iterate.f1['/halo_properties/id'].value, iterate.f0['/halo_properties/desc_id'].value)==False) # new halos [f1_new_halos] to be fed to EmergeIterate.compute_qtys_new_halos_pk f1_evolved_halos = (new_halos==False) f0_propagated_halos = n.in1d(iterate.f0['/halo_properties/desc_id'].value, iterate.f1['/halo_properties/id'].value) f0_lost_halos = (propagated_halos==False) print('lost halos', len(iterate.f0['/halo_properties/desc_id'].value[f0_lost_halos])) print('propagated halos', len(iterate.f0['/halo_properties/desc_id'].value[f0_propagated_halos])) print('new halos', len(iterate.f1['/halo_properties/desc_id'].value[f1_new_halos])) print('evolved halos', len(iterate.f1['/halo_properties/id'].value[f1_evolved_halos])) # parmi f0_propagated_halos
label='Supplied Data') ax0.plot(qdata, Idata, 'bo', alpha=0.5, label='Interpolated Data') ax0.plot(qbinsc, Imean, 'r.', label='Scattering from Density') handles, labels = ax0.get_legend_handles_labels() handles = [handles[2], handles[0], handles[1]] labels = [labels[2], labels[0], labels[1]] ymin = np.min(np.hstack((I, Idata, Imean))) ymax = np.max(np.hstack((I, Idata, Imean))) ax0.set_ylim([0.5 * ymin, 1.5 * ymax]) ax0.legend(handles, labels) ax0.semilogy() ax0.set_ylabel('I(q)') ax1 = plt.subplot(gs[1]) ax1.plot(qdata, qdata * 0, 'k--') residuals = np.log10(Imean[np.in1d(qbinsc, qdata)]) - np.log10(Idata) ax1.plot(qdata, residuals, 'ro-') ylim = ax1.get_ylim() ymax = np.max(np.abs(ylim)) n = int(.9 * len(residuals)) ymax = np.max(np.abs(residuals[:-n])) ax1.set_ylim([-ymax, ymax]) ax1.yaxis.major.locator.set_params(nbins=5) xlim = ax0.get_xlim() ax1.set_xlim(xlim) ax1.set_ylabel('Residuals') ax1.set_xlabel(r'q ($\mathrm{\AA^{-1}}$)') #plt.setp(ax0.get_xticklabels(), visible=False) plt.tight_layout() plt.savefig(args.output + '_fit.png', dpi=150) plt.close()
def evaluateRecommender(self, recommender_object, n_processes=None): """ :param recommender_object: the trained recommender object, a Recommender subclass :param URM_test_list: list of URMs to test the recommender against, or a single URM object :param cutoff_list: list of cutoffs to be use to report the scores, or a single cutoff """ if n_processes is None: n_processes = int(multiprocessing.cpu_count() / 2) start_time = time.time() # Split the users to evaluate n_processes = min(n_processes, len(self.usersToEvaluate)) batch_len = int(len(self.usersToEvaluate) / n_processes) batch_len = max(batch_len, 1) sequential_evaluators_list = [] sequential_evaluators_n_users_list = [] for n_evaluator in range(n_processes): stat_user = n_evaluator * batch_len end_user = min((n_evaluator + 1) * batch_len, len(self.usersToEvaluate)) if n_evaluator == n_processes - 1: end_user = len(self.usersToEvaluate) batch_users = self.usersToEvaluate[stat_user:end_user] sequential_evaluators_n_users_list.append(len(batch_users)) not_in_batch_users = np.in1d(self.usersToEvaluate, batch_users, invert=True) not_in_batch_users = np.array( self.usersToEvaluate)[not_in_batch_users] new_evaluator = _ParallelEvaluator_batch( self.URM_test, self.cutoff_list, ignore_users=not_in_batch_users) sequential_evaluators_list.append(new_evaluator) if self.ignore_items_flag: recommender_object.set_items_to_ignore(self.ignore_items_ID) run_parallel_evaluator_partial = partial( _run_parallel_evaluator, recommender_object=recommender_object) pool = multiprocessing.Pool(processes=n_processes, maxtasksperchild=1) resultList = pool.map(run_parallel_evaluator_partial, sequential_evaluators_list) print( "ParallelEvaluator: Processed {} ( {:.2f}% ) in {:.2f} seconds. Users per second: {:.0f}" .format( len(self.usersToEvaluate), 100.0 * float(len(self.usersToEvaluate)) / len(self.usersToEvaluate), time.time() - start_time, float(len(self.usersToEvaluate)) / (time.time() - start_time))) sys.stdout.flush() sys.stderr.flush() results_dict = {} n_users_evaluated = 0 for cutoff in self.cutoff_list: results_dict[cutoff] = create_empty_metrics_dict( self.n_items, self.n_users, recommender_object.URM_train, self.ignore_items_ID, self.ignore_users_ID, cutoff, self.diversity_object) for new_result_index in range(len(resultList)): new_result, n_users_evaluated_batch = resultList[new_result_index] n_users_evaluated += n_users_evaluated_batch results_dict = _merge_results_dict(results_dict, new_result, n_users_evaluated_batch) for cutoff in self.cutoff_list: for key in results_dict[cutoff].keys(): results_dict[cutoff][key] /= len(self.usersToEvaluate) if n_users_evaluated > 0: for cutoff in self.cutoff_list: results_current_cutoff = results_dict[cutoff] for key in results_current_cutoff.keys(): value = results_current_cutoff[key] if isinstance(value, Metrics_Object): results_current_cutoff[key] = value.get_metric_value() else: results_current_cutoff[key] = value / n_users_evaluated precision_ = results_current_cutoff[ EvaluatorMetrics.PRECISION.value] recall_ = results_current_cutoff[EvaluatorMetrics.RECALL.value] if precision_ + recall_ != 0: results_current_cutoff[EvaluatorMetrics.F1.value] = 2 * ( precision_ * recall_) / (precision_ + recall_) else: print( "WARNING: No users had a sufficient number of relevant items") sequential_evaluators_list = None sequential_evaluators_n_users_list = None if self.ignore_items_flag: recommender_object.reset_items_to_ignore() results_run_string = self.get_result_string(results_dict) return (results_dict, results_run_string)
def sampling(self, data=None, pars=None, chains=4, iter=2000, warmup=None, thin=1, seed=None, init='random', sample_file=None, diagnostic_file=None, verbose=False, **kwargs): """Draw samples from the model. Parameters ---------- data : dict A Python dictionary providing the data for the model. Variables for Stan are stored in the dictionary as expected. Variable names are the keys and the values are their associated values. Stan only accepts certain kinds of values; see Notes. pars : list of string, optional A list of strings indicating parameters of interest. By default all parameters specified in the model will be stored. chains : int, optional Positive integer specifying number of chains. 4 by default. iter : int, 2000 by default Positive integer specifying how many iterations for each chain including warmup. warmup : int, iter//2 by default Positive integer specifying number of warmup (aka burin) iterations. As `warmup` also specifies the number of iterations used for step-size adaption, warmup samples should not be used for inference. thin : int, 1 by default Positive integer specifying the period for saving samples. seed : int, optional The seed, a positive integer for random number generation. Only one seed is needed when multiple chains are used, as the other chain's seeds are generated from the first chain's to prevent dependency among random number streams. By default, seed is ``random.randint(0, MAX_UINT)``. init : {0, '0', 'random', function returning dict, list of dict}, optional Specifies how initial parameter values are chosen: 0 or '0' initializes all to be zero on the unconstrained support; 'random' generates random initial values; list of size equal to the number of chains (`chains`), where the list contains a dict with initial parameter values; function returning a dict with initial parameter values. The function may take an optional argument `chain_id`. sample_file : string, optional File name specifying where samples for *all* parameters and other saved quantities will be written. If not provided, no samples will be written. If the folder given is not writable, a temporary directory will be used. When there are multiple chains, an underscore and chain number are appended to the file name. By default do not write samples to file. diagnostic_file : str, optional File name indicating where diagonstic data for all parameters should be written. If not writable, a temporary directory is used. verbose : boolean, False by default Indicates whether intermediate output should be piped to the console. This output may be useful for debugging. Returns ------- fit : StanFit4<model_name> Instance containing the fitted results. Other parameters ---------------- chain_id : int, optional Iterable of unique ints naming chains or int with which to start. leapfrog_steps : int, optional epsilon : float, optional gamma : float, optional delta : float, optional equal_step_sizes : bool, optional max_treedepth : int, optional nondiag_mass : bool, optional test_grad : bool If True, Stan will not perform any sampling. Instead the gradient calculation is tested and printed out and the fitted stanfit4model object will be in test gradient mode. False is the default. refresh : int, optional Controls how to indicate progress during sampling. By default, `refresh` = max(iter//10, 1). Notes ----- More details can be found in Stan's manual. The default sampler is NUTS2, where `leapfrog_steps` is ``-1`` and `equal_step_sizes` is False. To use NUTS with full mass matrix, set `nondiag_mass` to True. Examples -------- >>> from pystan import StanModel >>> m = StanModel(model_code='parameters {real y;} model {y ~ normal(0,1);}') >>> m.sampling(iter=100) """ # NOTE: in this function, iter masks iter() the python function. # If this ever turns out to be a problem just add: # iter_ = iter # del iter # now builtins.iter is available if sample_file is not None: raise NotImplementedError("sample_file not supported yet") if diagnostic_file is not None: raise NotImplementedError("diagnostic_file not supported yet") if data is None: data = {} if warmup is None: warmup = int(iter // 2) data_r, data_i = pystan.misc._split_data(data) fit = self.fit_class(data_r, data_i) # store a copy of the data passed to fit in the class fit.data = {} fit.data.update(data_i) fit.data.update(data_r) m_pars = fit._get_param_names() p_dims = fit._get_param_dims() if pars is not None and len(pars) > 0: if not all(p in m_pars for p in pars): pars = np.asarray(pars) unmatched = pars[np.invert(np.in1d(pars, m_pars))] msg = "No parameter(s): {}; sampling not done." raise ValueError(msg.format(', '.join(pars[unmatched]))) if chains < 1: raise ValueError("The number of chains is less than one; sampling" "not done.") if seed is None: seed = random.randint(0, MAX_UINT) seed = int(seed) args_list = pystan.misc._config_argss(chains=chains, iter=iter, warmup=warmup, thin=thin, init=init, seed=seed, sample_file=sample_file, diagnostic_file=diagnostic_file, **kwargs) # number of samples saved after thinning warmup2 = 1 + (warmup - 1) // thin n_kept = 1 + (iter - warmup - 1) // thin n_save = n_kept + warmup2 samples, rets = [], [] # samples and return values if kwargs.get('test_grad') is None: mode = "SAMPLING" else: mode = "TESTING GRADIENT" # FIXME: use concurrent.futures to parallelize this for i in range(chains): if kwargs.get('refresh') is None or kwargs.get('refresh') > 0: chain_num = i + 1 msg = "{} FOR MODEL {} NOW (CHAIN {})." logging.info(msg.format(mode, self.model_name, chain_num)) ret, samples_i = fit._call_sampler(args_list[i]) samples.append(samples_i) # call_sampler in stan_fit.hpp will raise a std::runtime_error # if the return value is non-zero. Cython will generate a # RuntimeError from this. # FIXME: should one mimic rstan and "return" an empty StanFit? # That is, should I wipe fit's attributes and return that? inits_used = pystan.misc._organize_inits([s['inits'] for s in samples], m_pars, p_dims) # test_gradient mode: don't sample if samples[0]['test_grad']: fit.sim = {'num_failed': [s['num_failed'] for s in samples]} return fit perm_lst = [np.random.permutation(n_kept) for _ in range(chains)] fnames_oi = fit._get_param_fnames_oi() n_flatnames = len(fnames_oi) fit.sim = { 'samples': samples, # rstan has this; name clashes with 'chains' in samples[0]['chains'] 'chains': len(samples), 'iter': iter, 'warmup': warmup, 'thin': thin, 'n_save': [n_save] * chains, 'warmup2': [warmup2] * chains, 'permutation': perm_lst, 'pars_oi': fit._get_param_names_oi(), 'dims_oi': fit._get_param_dims(), 'fnames_oi': fnames_oi, 'n_flatnames': n_flatnames } fit.model_name = self.model_name fit.model_pars = m_pars fit.par_dims = p_dims fit.mode = 0 fit.inits = inits_used fit.stan_args = args_list fit.stanmodel = self fit.date = datetime.datetime.now() return fit
def _run_evaluation_on_selected_users(self, recommender_object, usersToEvaluate): start_time = time.time() start_time_print = time.time() results_dict = {} for cutoff in self.cutoff_list: results_dict[cutoff] = create_empty_metrics_dict( self.n_items, self.n_users, recommender_object.URM_train, self.ignore_items_ID, self.ignore_users_ID, cutoff, self.diversity_object) n_users_evaluated = 0 for test_user in usersToEvaluate: # Being the URM CSR, the indices are the non-zero column indexes relevant_items = self.get_user_relevant_items(test_user) n_users_evaluated += 1 recommended_items = recommender_object.recommend( test_user, remove_seen_flag=self.exclude_seen, cutoff=self.max_cutoff, remove_top_pop_flag=False, remove_CustomItems_flag=self.ignore_items_flag) is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True) for cutoff in self.cutoff_list: results_current_cutoff = results_dict[cutoff] is_relevant_current_cutoff = is_relevant[0:cutoff] recommended_items_current_cutoff = recommended_items[0:cutoff] results_current_cutoff[ EvaluatorMetrics.ROC_AUC.value] += roc_auc( is_relevant_current_cutoff) results_current_cutoff[ EvaluatorMetrics.PRECISION.value] += precision( is_relevant_current_cutoff, len(relevant_items)) results_current_cutoff[ EvaluatorMetrics.RECALL.value] += recall( is_relevant_current_cutoff, relevant_items) results_current_cutoff[EvaluatorMetrics.RECALL_TEST_LEN. value] += recall_min_test_len( is_relevant_current_cutoff, relevant_items) results_current_cutoff[EvaluatorMetrics.MAP.value] += map( is_relevant_current_cutoff, relevant_items) results_current_cutoff[EvaluatorMetrics.MRR.value] += rr( is_relevant_current_cutoff) results_current_cutoff[EvaluatorMetrics.NDCG.value] += ndcg( recommended_items_current_cutoff, relevant_items, relevance=self.get_user_test_ratings(test_user), at=cutoff) results_current_cutoff[ EvaluatorMetrics.HIT_RATE. value] += is_relevant_current_cutoff.sum() results_current_cutoff[EvaluatorMetrics.ARHR.value] += arhr( is_relevant_current_cutoff) results_current_cutoff[ EvaluatorMetrics.NOVELTY.value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.DIVERSITY_GINI.value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[EvaluatorMetrics.SHANNON_ENTROPY. value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.COVERAGE_ITEM.value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.COVERAGE_USER.value].add_recommendations( recommended_items_current_cutoff, test_user) results_current_cutoff[ EvaluatorMetrics.DIVERSITY_MEAN_INTER_LIST. value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[EvaluatorMetrics.DIVERSITY_HERFINDAHL. value].add_recommendations( recommended_items_current_cutoff) if EvaluatorMetrics.DIVERSITY_SIMILARITY.value in results_current_cutoff: results_current_cutoff[ EvaluatorMetrics.DIVERSITY_SIMILARITY. value].add_recommendations( recommended_items_current_cutoff) if time.time() - start_time_print > 30 or n_users_evaluated == len( self.usersToEvaluate): print( "SequentialEvaluator: Processed {} ( {:.2f}% ) in {:.2f} seconds. Users per second: {:.0f}" .format( n_users_evaluated, 100.0 * float(n_users_evaluated) / len(self.usersToEvaluate), time.time() - start_time, float(n_users_evaluated) / (time.time() - start_time))) sys.stdout.flush() sys.stderr.flush() start_time_print = time.time() return results_dict, n_users_evaluated
def evaluateRecommender(self, recommender_object): """ :param recommender_object: the trained recommender object, a Recommender subclass :param URM_test_list: list of URMs to test the recommender against, or a single URM object :param cutoff_list: list of cutoffs to be use to report the scores, or a single cutoff """ results_dict = {} for cutoff in self.cutoff_list: results_dict[cutoff] = create_empty_metrics_dict( self.n_items, self.n_users, recommender_object.URM_train, self.ignore_items_ID, self.ignore_users_ID, cutoff, self.diversity_object) start_time = time.time() start_time_print = time.time() n_eval = 0 self.__all_items = np.arange(0, self.n_items, dtype=np.int) self.__all_items = set(self.__all_items) if self.ignore_items_flag: recommender_object.set_items_to_ignore(self.ignore_items_ID) for test_user in self.usersToEvaluate: # Being the URM CSR, the indices are the non-zero column indexes relevant_items = self.get_user_relevant_items(test_user) n_eval += 1 self.user_specific_remove_items(recommender_object, test_user) # recommended_items = recommender_object.recommend(np.array(test_user), remove_seen_flag=self.exclude_seen, # cutoff = self.max_cutoff, remove_top_pop_flag=False, remove_CustomItems_flag=self.ignore_items_flag) recommended_items = recommender_object.recommend( np.atleast_1d(test_user), remove_seen_flag=self.exclude_seen, cutoff=self.max_cutoff, remove_top_pop_flag=False, remove_CustomItems_flag=self.ignore_items_flag) recommended_items = np.array(recommended_items[0]) recommender_object.reset_items_to_ignore() is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True) for cutoff in self.cutoff_list: results_current_cutoff = results_dict[cutoff] is_relevant_current_cutoff = is_relevant[0:cutoff] recommended_items_current_cutoff = recommended_items[0:cutoff] results_current_cutoff[ EvaluatorMetrics.ROC_AUC.value] += roc_auc( is_relevant_current_cutoff) results_current_cutoff[ EvaluatorMetrics.PRECISION.value] += precision( is_relevant_current_cutoff, len(relevant_items)) results_current_cutoff[ EvaluatorMetrics.RECALL.value] += recall( is_relevant_current_cutoff, relevant_items) results_current_cutoff[EvaluatorMetrics.RECALL_TEST_LEN. value] += recall_min_test_len( is_relevant_current_cutoff, relevant_items) results_current_cutoff[EvaluatorMetrics.MAP.value] += map( is_relevant_current_cutoff, relevant_items) results_current_cutoff[EvaluatorMetrics.MRR.value] += rr( is_relevant_current_cutoff) results_current_cutoff[EvaluatorMetrics.NDCG.value] += ndcg( recommended_items_current_cutoff, relevant_items, relevance=self.get_user_test_ratings(test_user), at=cutoff) results_current_cutoff[ EvaluatorMetrics.HIT_RATE. value] += is_relevant_current_cutoff.sum() results_current_cutoff[EvaluatorMetrics.ARHR.value] += arhr( is_relevant_current_cutoff) results_current_cutoff[ EvaluatorMetrics.NOVELTY.value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.DIVERSITY_GINI.value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[EvaluatorMetrics.SHANNON_ENTROPY. value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.COVERAGE_ITEM.value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.COVERAGE_USER.value].add_recommendations( recommended_items_current_cutoff, test_user) results_current_cutoff[ EvaluatorMetrics.DIVERSITY_MEAN_INTER_LIST. value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[EvaluatorMetrics.DIVERSITY_HERFINDAHL. value].add_recommendations( recommended_items_current_cutoff) if EvaluatorMetrics.DIVERSITY_SIMILARITY.value in results_current_cutoff: results_current_cutoff[ EvaluatorMetrics.DIVERSITY_SIMILARITY. value].add_recommendations( recommended_items_current_cutoff) if time.time() - start_time_print > 30 or n_eval == len( self.usersToEvaluate): print( "SequentialEvaluator: Processed {} ( {:.2f}% ) in {:.2f} seconds. Users per second: {:.0f}" .format(n_eval, 100.0 * float(n_eval) / len(self.usersToEvaluate), time.time() - start_time, float(n_eval) / (time.time() - start_time))) sys.stdout.flush() sys.stderr.flush() start_time_print = time.time() if (n_eval > 0): for cutoff in self.cutoff_list: results_current_cutoff = results_dict[cutoff] for key in results_current_cutoff.keys(): value = results_current_cutoff[key] if isinstance(value, Metrics_Object): results_current_cutoff[key] = value.get_metric_value() else: results_current_cutoff[key] = value / n_eval precision_ = results_current_cutoff[ EvaluatorMetrics.PRECISION.value] recall_ = results_current_cutoff[EvaluatorMetrics.RECALL.value] if precision_ + recall_ != 0: results_current_cutoff[EvaluatorMetrics.F1.value] = 2 * ( precision_ * recall_) / (precision_ + recall_) else: print( "WARNING: No users had a sufficient number of relevant items") if self.ignore_items_flag: recommender_object.reset_items_to_ignore() results_run_string = self.get_result_string(results_dict) return (results_dict, results_run_string)
def main(subject, session, bids_folder, modalities=None, registration_scheme='linear_precise'): if modalities is None: modalities = ['T2starw', 'MTw', 'TSE'] curdir = op.dirname(op.realpath(__file__)) registration_scheme = op.join(curdir, f'{registration_scheme}.json') anat_dir = op.join(bids_folder, f'sub-{subject}', f'ses-{session}', 'anat') target = op.join(bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}', 'anat', f'sub-{subject}_desc-preproc_T1w.nii.gz') target_mask = op.join(bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}', 'anat', f'sub-{subject}_desc-brain_mask.nii.gz') init_regs = glob.glob( op.join(bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}', f'ses-{session}', 'anat', '*from-orig_to-T1w_*.txt')) t1w_to_mni_transform = op.join( bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}', 'anat', f'sub-{subject}_from-T1w_to-MNI152NLin2009cAsym_mode-image_xfm.h5') t1w_in_mni = op.join( bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}', 'anat', f'sub-{subject}_space-MNI152NLin2009cAsym_desc-preproc_T1w.nii.gz') mni_brain_mask = op.join( bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}', 'anat', f'sub-{subject}_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz') if len(init_regs) > 0: init_reg = init_regs[0] else: init_reg = None print(f'INITIAL TRANSFORM: {init_reg}') def make_registration_wf(input_file, name, subject=subject, target=target, target_mask=target_mask, init_reg=init_reg, t1w_to_mni_transform=t1w_to_mni_transform, t1w_in_mni=t1w_in_mni, mni_brain_mask=mni_brain_mask, ants_numthreads=8): workflow = pe.Workflow(base_dir='/tmp/workflow_folders', name=name) input_node = pe.Node(niu.IdentityInterface(fields=[ 'input_file', 'target', 'target_mask', 't1w_to_mni_transform', 't1w_in_mni', 'mni_brain_mask' ]), name='inputspec') input_node.inputs.input_file = input_file input_node.inputs.target = target input_node.inputs.target_mask = target_mask input_node.inputs.init_reg = init_reg input_node.inputs.t1w_to_mni_transform = t1w_to_mni_transform input_node.inputs.t1w_in_mni = t1w_in_mni input_node.inputs.mni_brain_mask = mni_brain_mask convert_dtype = pe.Node(fsl.maths.MathsCommand(), name='convert_dtype') convert_dtype.inputs.output_datatype = 'double' workflow.connect(input_node, 'input_file', convert_dtype, 'in_file') inu_n4 = pe.Node( N4BiasFieldCorrection( dimension=3, save_bias=True, num_threads=ants_numthreads, rescale_intensities=True, copy_header=True, ), n_procs=ants_numthreads, name="inu_n4", ) workflow.connect(convert_dtype, 'out_file', inu_n4, 'input_image') register = pe.Node(Registration(from_file=registration_scheme, num_threads=ants_numthreads, verbose=True), name='registration') workflow.connect(inu_n4, 'output_image', register, 'moving_image') if init_reg: workflow.connect(input_node, 'init_reg', register, 'initial_moving_transform') workflow.connect(input_node, 'target', register, 'fixed_image') workflow.connect(input_node, 'target_mask', register, 'fixed_image_masks') def get_mask(input_image): from nilearn import image from nipype.utils.filemanip import split_filename import os.path as op _, fn, _ = split_filename(input_image) mask = image.math_img('im != 0', im=input_image) new_fn = op.abspath(fn + '_mask.nii.gz') mask.to_filename(new_fn) return new_fn mask_node = pe.Node(niu.Function(function=get_mask, input_names=['input_image'], output_names=['mask']), name='mask_node') workflow.connect(register, 'warped_image', mask_node, 'input_image') gen_grid_node = pe.Node(GenerateSamplingReference(), name='gen_grid_node') workflow.connect(mask_node, 'mask', gen_grid_node, 'fov_mask') workflow.connect(inu_n4, 'output_image', gen_grid_node, 'moving_image') workflow.connect(input_node, 'target', gen_grid_node, 'fixed_image') datasink_image_t1w = pe.Node(DerivativesDataSink( out_path_base='registration', compress=True, base_directory=op.join(bids_folder, 'derivatives')), name='datasink_image_t1w') workflow.connect(input_node, 'input_file', datasink_image_t1w, 'source_file') datasink_image_t1w.inputs.space = 'T1w' datasink_image_t1w.inputs.desc = 'registered' datasink_report_t1w = pe.Node(DerivativesDataSink( out_path_base='registration', space='T1w', base_directory=op.join(bids_folder, 'derivatives'), datatype='figures'), name='datasink_report_t1w') workflow.connect(input_node, 'input_file', datasink_report_t1w, 'source_file') datasink_report_t1w.inputs.space = 'T1w' transformer = pe.Node(ApplyTransforms( interpolation='LanczosWindowedSinc', generate_report=True, num_threads=ants_numthreads), n_procs=ants_numthreads, name='transformer') workflow.connect(transformer, 'output_image', datasink_image_t1w, 'in_file') workflow.connect(transformer, 'out_report', datasink_report_t1w, 'in_file') workflow.connect(inu_n4, 'output_image', transformer, 'input_image') workflow.connect(gen_grid_node, 'out_file', transformer, 'reference_image') workflow.connect(register, 'composite_transform', transformer, 'transforms') concat_transforms = pe.Node(niu.Merge(2), name='concat_transforms') workflow.connect(register, 'composite_transform', concat_transforms, 'in2') workflow.connect(input_node, 't1w_to_mni_transform', concat_transforms, 'in1') transformer_to_mni1 = pe.Node(ApplyTransforms( interpolation='LanczosWindowedSinc', generate_report=False, num_threads=ants_numthreads), n_procs=ants_numthreads, name='transformer_to_mni1') workflow.connect(inu_n4, 'output_image', transformer_to_mni1, 'input_image') workflow.connect(input_node, 't1w_in_mni', transformer_to_mni1, 'reference_image') workflow.connect(concat_transforms, 'out', transformer_to_mni1, 'transforms') mask_node_mni = pe.Node(niu.Function(function=get_mask, input_names=['input_image'], output_names=['mask']), name='mask_node_mni') workflow.connect(transformer_to_mni1, 'output_image', mask_node_mni, 'input_image') def join_masks(mask1, mask2): from nilearn import image from nipype.utils.filemanip import split_filename import os.path as op _, fn, _ = split_filename(mask1) new_mask = image.math_img('(im1 > 0) & (im2 > 0)', im1=mask1, im2=mask2) new_fn = op.abspath(fn + '_jointmask' + '.nii.gz') new_mask.to_filename(new_fn) return new_fn combine_masks_node = pe.Node(niu.Function( function=join_masks, input_names=['mask1', 'mask2'], output_names=['combined_mask']), name='combine_mask_node') workflow.connect(mask_node_mni, 'mask', combine_masks_node, 'mask1') workflow.connect(input_node, 'mni_brain_mask', combine_masks_node, 'mask2') gen_grid_node_mni = pe.Node(GenerateSamplingReference(), name='gen_grid_node_mni') workflow.connect(combine_masks_node, 'combined_mask', gen_grid_node_mni, 'fov_mask') workflow.connect(inu_n4, 'output_image', gen_grid_node_mni, 'moving_image') workflow.connect(input_node, 't1w_in_mni', gen_grid_node_mni, 'fixed_image') transformer_to_mni2 = pe.Node(ApplyTransforms( interpolation='LanczosWindowedSinc', generate_report=False, num_threads=ants_numthreads), n_procs=ants_numthreads, name='transformer_to_mni2') workflow.connect(inu_n4, 'output_image', transformer_to_mni2, 'input_image') workflow.connect(gen_grid_node_mni, 'out_file', transformer_to_mni2, 'reference_image') workflow.connect(concat_transforms, 'out', transformer_to_mni2, 'transforms') datasink_image_mni = pe.Node(DerivativesDataSink( out_path_base='registration', compress=True, base_directory=op.join(bids_folder, 'derivatives')), name='datasink_mni') datasink_image_mni.inputs.source_file = input_file datasink_image_mni.inputs.space = 'MNI152NLin2009cAsym' datasink_image_mni.inputs.desc = 'registered' workflow.connect(input_node, 'input_file', datasink_image_mni, 'source_file') workflow.connect(transformer_to_mni2, 'output_image', datasink_image_mni, 'in_file') return workflow df = BIDSLayout(anat_dir, validate=False).to_df() print(df['extension']) df = df[np.in1d(df.extension, ['.nii', '.nii.gz'])] if 'acquisition' in df.columns: df = df[~((df.suffix == 'T2starw') & (df.acquisition != 'average'))] print(df) df = df[np.in1d(df['suffix'], modalities)] for ix, row in df.iterrows(): logging.info('Registering {row.path}') wf_name = f'register_{subject}_{session}_{row.suffix}' if ('run' in row) and row.run: wf_name += f'_{row.run}' wf = make_registration_wf(row.path, wf_name) wf.run()
def _run_evaluation_on_selected_users(self, recommender_object, usersToEvaluate, block_size=1000): start_time = time.time() start_time_print = time.time() results_dict = {} for cutoff in self.cutoff_list: results_dict[cutoff] = create_empty_metrics_dict( self.n_items, self.n_users, recommender_object.get_URM_train(), self.ignore_items_ID, self.ignore_users_ID, cutoff, self.diversity_object) n_users_evaluated = 0 # Start from -block_size to ensure it to be 0 at the first block user_batch_start = 0 user_batch_end = 0 while user_batch_start < len(self.usersToEvaluate): user_batch_end = user_batch_start + block_size user_batch_end = min(user_batch_end, len(usersToEvaluate)) test_user_batch_array = np.array( usersToEvaluate[user_batch_start:user_batch_end]) user_batch_start = user_batch_end # Compute predictions for a batch of users using vectorization, much more efficient than computing it one at a time recommended_items_batch_list = recommender_object.recommend( test_user_batch_array, remove_seen_flag=self.exclude_seen, cutoff=self.max_cutoff, remove_top_pop_flag=False, remove_CustomItems_flag=self.ignore_items_flag) # Compute recommendation quality for each user in batch for batch_user_index in range(len(recommended_items_batch_list)): user_id = test_user_batch_array[batch_user_index] recommended_items = recommended_items_batch_list[ batch_user_index] # Being the URM CSR, the indices are the non-zero column indexes relevant_items = self.get_user_relevant_items(user_id) is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True) n_users_evaluated += 1 for cutoff in self.cutoff_list: results_current_cutoff = results_dict[cutoff] is_relevant_current_cutoff = is_relevant[0:cutoff] recommended_items_current_cutoff = recommended_items[ 0:cutoff] results_current_cutoff[ EvaluatorMetrics.ROC_AUC.value] += roc_auc( is_relevant_current_cutoff) results_current_cutoff[ EvaluatorMetrics.PRECISION.value] += precision( is_relevant_current_cutoff, len(relevant_items)) results_current_cutoff[ EvaluatorMetrics.RECALL.value] += recall( is_relevant_current_cutoff, relevant_items) results_current_cutoff[EvaluatorMetrics.RECALL_TEST_LEN. value] += recall_min_test_len( is_relevant_current_cutoff, relevant_items) results_current_cutoff[EvaluatorMetrics.MAP.value] += map( is_relevant_current_cutoff, relevant_items) results_current_cutoff[EvaluatorMetrics.MRR.value] += rr( is_relevant_current_cutoff) results_current_cutoff[ EvaluatorMetrics.NDCG.value] += ndcg( recommended_items_current_cutoff, relevant_items, relevance=self.get_user_test_ratings(user_id), at=cutoff) results_current_cutoff[ EvaluatorMetrics.HIT_RATE. value] += is_relevant_current_cutoff.sum() results_current_cutoff[ EvaluatorMetrics.ARHR.value] += arhr( is_relevant_current_cutoff) results_current_cutoff[ EvaluatorMetrics.NOVELTY.value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.DIVERSITY_GINI. value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.SHANNON_ENTROPY. value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.COVERAGE_ITEM. value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.COVERAGE_USER. value].add_recommendations( recommended_items_current_cutoff, user_id) results_current_cutoff[ EvaluatorMetrics.DIVERSITY_MEAN_INTER_LIST. value].add_recommendations( recommended_items_current_cutoff) results_current_cutoff[ EvaluatorMetrics.DIVERSITY_HERFINDAHL. value].add_recommendations( recommended_items_current_cutoff) if EvaluatorMetrics.DIVERSITY_SIMILARITY.value in results_current_cutoff: results_current_cutoff[ EvaluatorMetrics.DIVERSITY_SIMILARITY. value].add_recommendations( recommended_items_current_cutoff) if time.time( ) - start_time_print > 30 or n_users_evaluated == len( self.usersToEvaluate): print( "SequentialEvaluator: Processed {} ( {:.2f}% ) in {:.2f} seconds. Users per second: {:.0f}" .format( n_users_evaluated, 100.0 * float(n_users_evaluated) / len(self.usersToEvaluate), time.time() - start_time, float(n_users_evaluated) / (time.time() - start_time))) sys.stdout.flush() sys.stderr.flush() start_time_print = time.time() return results_dict, n_users_evaluated
print x.mean() #standard deviation print 'std deviation' print x.std() #variance print 'variance' print x.var() #logical operations -and / or- condition2 = np.array([True, False, True]) print 'for operator' print condition2.any() #for operator print 'and operator' print condition2.all() #and operator #sorting in numpy arrays unsorted_array = np.array([1, 2, 8, 10, 7, 3]) unsorted_array.sort() print "sorting" print unsorted_array #unique arr2 = np.array(['solid', 'solid', 'liquid', 'liquid', 'gas', 'gas']) print "unique" print np.unique(arr2) #in One Dimension print "1 dimension" print np.in1d(['solid', 'gas', 'plasma'], arr2)
def load_km(self, as_sparse=True, sort=False): """Load and construct mass and stiffness matrices from an ANSYS full file. Parameters ---------- as_sparse : bool, optional Outputs the mass and stiffness matrices as scipy csc sparse arrays when True by default. sort : bool, optional Rearranges the k and m matrices such that the rows correspond to to the sorted rows and columns in dor_ref. Also sorts dor_ref. Returns ------- dof_ref : (n x 2) np.int32 array This array contains the node and degree corresponding to each row and column in the mass and stiffness matrices. In a 3 DOF analysis the dof integers will correspond to: 0 - x 1 - y 2 - z Sort these values by node number and DOF by enabling the sort parameter. k : (n x n) np.float or scipy.csc array Stiffness array m : (n x n) np.float or scipy.csc array Mass array Examples -------- >>> import pyansys >>> full = pyansys.read_binary('file.rst') >>> dof_ref, k, m = full.load_km() >>> print(k) (0, 0) 163408119.6581276 (0, 1) 0.0423270 (1, 1) 163408119.6581276 : : (342, 344) 6590544.8717949 (343, 344) -6590544.8717950 (344, 344) 20426014.9572689 Notes ----- Constrained entries are removed from the mass and stiffness matrices. Constrained DOF can be accessed from ``const``, which returns the node number and DOF constrained in ANSYS. """ if not os.path.isfile(self.filename): raise Exception('%s not found' % self.filename) if as_sparse: try: from scipy.sparse import csc_matrix, coo_matrix except ImportError: raise ImportError('Unable to load scipy, use ``load_km`` with ' '``as_sparse=False``') # number of terms in stiffness matrix ntermK = two_ints_to_long(self._header['ntermKl'], self._header['ntermKh']) ptrSTF = self._header['ptrSTF'] # Location of stiffness matrix ptrMAS = self._header['ptrMAS'] # Location in file to mass matrix # number of terms in mass matrix ntermM = two_ints_to_long(self._header['ntermMl'], self._header['ntermMh']) ptrDOF = self._header['ptrDOF'] # pointer to DOF info # DOF information with open(self.filename, 'rb') as f: read_table(f, skip=True) # standard header read_table(f, skip=True) # full header read_table(f, skip=True) # number of degrees of freedom # Nodal equivalence table neqv = read_table(f, cython=True) # read number of degrees of freedom for each node and constant tables f.seek(ptrDOF*4) ndof = read_table(f, cython=True) const = read_table(f, cython=True) # degree of freedom reference and number of degress of freedom per node dof_ref = [ndof, neqv] self.ndof = ndof # Read k and m blocks (see help(ReadArray) for block description) if ntermK: krow, kcol, kdata = _binary_reader.read_array(self.filename, ptrSTF, ntermK, self.neqn, const) else: warnings.warn('Missing stiffness matrix') kdata = None if ntermM: mrow, mcol, mdata = _binary_reader.read_array(self.filename, ptrMAS, ntermM, self.neqn, const) else: warnings.warn('Missing mass matrix') mdata = None # remove constrained entries if np.any(const < 0): if kdata is not None: remove = np.nonzero(const < 0)[0] mask = ~np.logical_or(np.in1d(krow, remove), np.in1d(kcol, remove)) krow = krow[mask] kcol = kcol[mask] kdata = kdata[mask] if mdata is not None: mask = ~np.logical_or(np.in1d(mrow, remove), np.in1d(mcol, remove)) mrow = mrow[mask] mcol = mcol[mask] mdata = mdata[mask] # sort nodal equivalence dof_ref, index, nref, dref = _binary_reader.sort_nodal_eqlv(self.neqn, neqv, ndof) # store constrained dof information unsort_dof_ref = np.vstack((nref, dref)).T self._const = unsort_dof_ref[const < 0] if sort: # make sorting the same as ANSYS rdfull would output # resort to make in upper triangle krow = index[krow] kcol = index[kcol] krow, kcol = np.sort(np.vstack((krow, kcol)), 0) if mdata is not None: mrow = index[mrow] mcol = index[mcol] mrow, mcol = np.sort(np.vstack((mrow, mcol)), 0) else: dof_ref = unsort_dof_ref # store data for later reference if kdata is not None: self._krow = krow self._kcol = kcol self._kdata = kdata if mdata is not None: self._mrow = mrow self._mcol = mcol self._mdata = mdata # output as a sparse matrix if as_sparse: if kdata is not None: k = coo_matrix((self.neqn,) * 2) k.data = kdata # data has to be set first k.row = krow k.col = kcol # convert to csc matrix (generally faster for sparse solvers) k = csc_matrix(k) else: k = None if mdata is not None: m = coo_matrix((self.neqn,) * 2) m.data = mdata m.row = mrow m.col = mcol # convert to csc matrix (generally faster for sparse solvers) m = csc_matrix(m) else: m = None else: if kdata is not None: k = np.zeros((self.neqn,) * 2) k[krow, kcol] = kdata else: k = None if mdata is not None: m = np.zeros((self.neqn,) * 2) m[mrow, mcol] = mdata else: m = None return dof_ref, k, m