def extract_sub_matrix(mat, inds): """ Extract submatrix of `mat` by deleting `inds` rows/cols """ for i in sorted(inds, reverse=True): mat = np.delete(mat, i, axis=0) mat = np.delete(mat, i, axis=1) return mat
def removeOldestEntriesFromDataSet(self, quantity): datasetSize = self.getDatasetLength() if datasetSize <= quantity: self.clearDataSet() return self._data['input'] = _np.delete(self._data['input'], xrange(quantity), 0) self._data['target'] = _np.delete(self._data['target'], xrange(quantity), 0)
def gstamp(self, ports_v, time=0, reduced=True): """Returns the differential (trans)conductance wrt the port specified by port_index when the element has the voltages specified in ports_v across its ports, at (simulation) time. ports_v: a list in the form: [voltage_across_port0, voltage_across_port1, ...] port_index: an integer, 0 <= port_index < len(self.get_ports()) time: the simulation time at which the evaluation is performed. Set it to None during DC analysis. """ indices = ([self.n1 - 1]*2 + [self.n2 - 1]*2, [self.n1 - 1, self.n2 - 1]*2) gm = self.model.get_gm(self.model, 0, utilities.tuplinator(ports_v), 0, self.device) if gm == 0: gm = options.gmin*2 stamp = np.array(((gm, -gm), (-gm, gm)), dtype=np.float64) if reduced: zap_rc = [pos for pos, i in enumerate(indices[1][:2]) if i == -1] stamp = np.delete(stamp, zap_rc, axis=0) stamp = np.delete(stamp, zap_rc, axis=1) indices = tuple(zip(*[(i, y) for i, y in zip(*indices) if (i != -1 and y != -1)])) stamp_flat = stamp.reshape(-1) stamp_folded = [] indices_folded = [] for ix, it in enumerate([(i, y) for i, y in zip(*indices)]): if it not in indices_folded: indices_folded.append(it) stamp_folded.append(stamp_flat[ix]) else: w = indices_folded.index(it) stamp_folded[w] += stamp_flat[ix] indices = tuple(zip(*indices_folded)) stamp = np.array(stamp_folded) return indices, stamp
def model_and_predict(self, X_train, y_train, X_test): district_idx = self.columns.index('PdDistrict') districts = set(X_train[:,district_idx]) district_ys = {} # Grow forest and predict separately for each district's records for d in districts: district_X_train = X_train[X_train[:, district_idx] == d] district_X_train = np.delete(district_X_train, district_idx, 1) district_y_train = y_train[X_train[:, district_idx] == d] district_X_test = X_test[X_test[:, district_idx] == d] district_X_test = np.delete(district_X_test, district_idx, 1) print "Growing forest for", d # Not saving output in Git so make this deterministic # with random_state rf = RandomForestClassifier(n_estimators=self.n_trees, n_jobs=-1, random_state=782629) rf.fit(district_X_train, district_y_train) district_ys[d] = list(rf.predict(district_X_test)) print "Finished", d print "All predictions made" y_hat = [] for row in X_test: d_ys = district_ys[row[district_idx]] y_hat.append(d_ys.pop(0)) return y_hat
def append_new_point(self, y, x=None): self._axis_y_array = np.append(self._axis_y_array, y) if x: self._axis_x_array = np.append(self._axis_x_array, x) else: self._axis_x_array = np.arange(len(self._axis_y_array)) if self.max_plot_points: if self._axis_y_array.size > self.max_plot_points: self._axis_y_array = np.delete(self._axis_y_array, 0) self._axis_x_array = np.delete(self._axis_x_array, 0) if self.single_curve is None: self.single_curve, = self.axes.plot( self._axis_y_array, linewidth=2, marker="s" ) else: self.axes.fill(self._axis_y_array, "r", linewidth=2) self._axis_y_limits[1] = ( self._axis_y_array.max() + self._axis_y_array.max() * 0.05 ) self.axes.set_ylim(self._axis_y_limits) self.single_curve.set_xdata(self._axis_x_array) self.single_curve.set_ydata(self._axis_y_array) self.axes.relim() self.axes.autoscale_view() self.fig.canvas.draw() self.fig.canvas.flush_events() self.axes.grid(True) # TODO move y lims as propery self.axes.set_ylim( (0, self._axis_y_array.max() + self._axis_y_array.max() * 0.05) )
def solveBlockGlasso(signal): start = int(signal[0]) # include S_Matrix = S_Matrix_bc.value W_matrix = W_Matrix_bc.value old_W = np.copy(W_matrix) end = min(int(signal[1]),S_Matrix.shape[0]) # non-inclusive deltamatrix = np.zeros(S_Matrix.shape) NN = S_Matrix.shape[0] for n in range(start,end): W11 = np.delete(W_matrix,n,0) W11 = np.delete(W11,n,1) Z = linalg.sqrtm(W11) s11 = S_Matrix[:,n] s11 = np.delete(s11,n) Y = np.dot(nplinalg.inv(linalg.sqrtm(W11)),s11) Y = np.real(Y) Z = np.real(Z) B = lasso(Z,Y,beta_value) updated_column = np.dot(W11,B) matrix_ind = np.array(range(0,NN)) matrix_ind = np.delete(matrix_ind,n) column_ind = 0 for k in matrix_ind: deltamatrix[k,n]=updated_column[column_ind] - W_matrix[k,n] deltamatrix[n,k]=updated_column[column_ind] - W_matrix[k,n] W_matrix[k,n] = updated_column[column_ind] W_matrix[n,k] = updated_column[column_ind] column_ind = column_ind+1
def non_max_suppression(boxes, scores, threshold): """Performs non-maximum supression and returns indicies of kept boxes. boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. scores: 1-D array of box scores. threshold: Float. IoU threshold to use for filtering. """ assert boxes.shape[0] > 0 if boxes.dtype.kind != "f": boxes = boxes.astype(np.float32) # Compute box areas y1 = boxes[:, 0] x1 = boxes[:, 1] y2 = boxes[:, 2] x2 = boxes[:, 3] area = (y2 - y1) * (x2 - x1) # Get indicies of boxes sorted by scores (highest first) ixs = scores.argsort()[::-1] pick = [] while len(ixs) > 0: # Pick top box and add its index to the list i = ixs[0] pick.append(i) # Compute IoU of the picked box with the rest iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) # Identify boxes with IoU over the threshold. This # returns indicies into ixs[1:], so add 1 to get # indicies into ixs. remove_ixs = np.where(iou > threshold)[0] + 1 # Remove indicies of the picked and overlapped boxes. ixs = np.delete(ixs, remove_ixs) ixs = np.delete(ixs, 0) return np.array(pick, dtype=np.int32)
def stftFiltering(x, fs, w, N, H, filter): # apply a filter to a sound by using the STFT # x: input sound, w: analysis window, N: FFT size, H: hop size # filter: magnitude response of filter with frequency-magnitude pairs (in dB) # returns y: output sound M = w.size # size of analysis window hM1 = int(math.floor((M+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(M/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size-hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window y = np.zeros(x.size) # initialize output array while pin<=pend: # while sound pointer is smaller than last sample #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select one frame of input sound mX, pX = DFT.dftAnal(x1, w, N) # compute dft #------transformation----- mY = mX + filter # filter input magnitude spectrum #-----synthesis----- y1 = DFT.dftSynth(mY, pX, M) # compute idft y[pin-hM1:pin+hM2] += H*y1 # overlap-add to generate output sound pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample return y
def update_extra_mat(matfile,to_remove): """ updates the time_frames, confounds and mask_suppressed arrays to reflect the removed volumes. However, does not change other items in _extra.mat file """ mat = loadmat(matfile) # update time_frames ntf = np.delete(mat['time_frames'][0],to_remove) mat.update({'time_frames': ntf}) # update confounds ncon = np.delete(mat['confounds'],to_remove,axis = 0) mat.update({'confounds': ncon}) # update mask_suppressed ms = mat['mask_suppressed'] for supp in to_remove: ms[supp][0] = 1 mat.update({'mask_suppressed': ms}) # save updated mat file jnk, flnme = os.path.split(matfile) savemat(os.path.join(output_dir,flnme),mat)
def sineModelSynth(tfreq, tmag, tphase, N, H, fs): """ Synthesis of a sound using the sinusoidal model tfreq,tmag,tphase: frequencies, magnitudes and phases of sinusoids N: synthesis FFT size, H: hop size, fs: sampling rate returns y: output array sound """ hN = N/2 # half of FFT size for synthesis L = tfreq.shape[0] # number of frames pout = 0 # initialize output sound pointer ysize = H*(L+3) # output sound size y = np.zeros(ysize) # initialize output array sw = np.zeros(N) # initialize synthesis window ow = triang(2*H) # triangular window sw[hN-H:hN+H] = ow # add triangular window bh = blackmanharris(N) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hN-H:hN+H] = sw[hN-H:hN+H]/bh[hN-H:hN+H] # normalized synthesis window lastytfreq = tfreq[0,:] # initialize synthesis frequencies ytphase = 2*np.pi*np.random.rand(tfreq[0,:].size) # initialize synthesis phases for l in range(L): # iterate over all frames if (tphase.size > 0): # if no phases generate them ytphase = tphase[l,:] else: ytphase += (np.pi*(lastytfreq+tfreq[l,:])/fs)*H # propagate phases Y = UF.genSpecSines(tfreq[l,:], tmag[l,:], ytphase, N, fs) # generate sines in the spectrum lastytfreq = tfreq[l,:] # save frequency for phase propagation ytphase = ytphase % (2*np.pi) # make phase inside 2*pi yw = np.real(fftshift(ifft(Y))) # compute inverse FFT y[pout:pout+N] += sw*yw # overlap-add and apply a synthesis window pout += H # advance sound pointer y = np.delete(y, range(hN)) # delete half of first window y = np.delete(y, range(y.size-hN, y.size)) # delete half of the last window return y
def project_into_plane(index, r0, rm): r'''Projects out-of-plane resolution into a specified plane by performing a gaussian integral over the third axis. Parameters ---------- index : int Index of the axis that should be integrated out r0 : float Resolution prefactor rm : ndarray Resolution array Returns ------- mp : ndarray Resolution matrix in a specified plane ''' r = np.sqrt(2 * np.pi / rm[index, index]) * r0 mp = rm b = rm[:, index] + rm[index, :].T b = np.delete(b, index, 0) mp = np.delete(mp, index, 0) mp = np.delete(mp, index, 1) mp -= 1 / (4. * rm[index, index]) * np.outer(b, b.T) return [r, mp]
def edit_description(instance): # twenty different categories scores = [0] * 20 # Strip out all the punctuation unstripped = instance[9].lower() for c in string.punctuation: unstripped = unstripped.replace(c,"") description = unstripped.split() # add to the score if a word matches a category # 10 is the description for word in description: for i, category in enumerate(LDA): if word in category: scores[i] += 1 # save the target target = instance[-1] # get rid of the description and target columns instance = np.delete(instance, 10, 0) # 10 is which column, 1 means column, 0 means row instance = np.delete(instance, -1, 0) # add the scores instance = np.append(instance, scores) # add the target back on the end return np.append(instance, target)
def build_tree(data, labels, word_data, level): if (level == 0): #return label value which is dominant return LabelConv[st.mode(labels)[0][0]-1]; #select appropriate attribute for the node: best, best_ig = attribute_selection(data,labels); best_data = data[:,best]; best_word = word_data[best]; #remove all regarding that attribute from the data: word_data = np.delete(word_data,best,0); left_data = np.delete(data[best_data == 0,:],best,1); right_data = np.delete(data[best_data == 1,:],best,1); #divide labels into two subarray based on selected attribute: left_labl = labels[best_data == 0]; right_labl = labels[best_data == 1]; if (check_label(left_labl) == 2 and level != 0): #since label is mono-valued: left = LabelConv[left_labl[0]-1]; else: left = build_tree(left_data,left_labl,word_data,level-1); if (check_label(right_labl) == 2 and level != 0): #since label is mono-valued: right = LabelConv[right_labl[0]-1]; else: right = build_tree(right_data,right_labl,word_data,level-1); subtrees = {0: left, 1: right}; return (best_word,best_ig,subtrees);
def reduce_dimension(m): """ reduce the dimension of game matrix based on domination -- player I will be better off if one row constently larger than another player II will be better off if one col constently smaller than anthoer Output: the reduced-size game matrix Note: This implements stric domination. TODO: convex reduction """ local = np.array(m) flag = True while True: rbefore = len(local) candidates = [] for nr in permutations(range(len(local)), 2): bigger = reduce(lambda x,y: x and y, local[nr[0]]>local[nr[1]]) if bigger: candidates.append(nr[1]) for i in candidates: local = np.delete(local, i, 0) cbefore = len(local[0]) candidates = [] for nc in permutations(range(len(local[0])), 2): smaller = reduce(lambda x,y: x and y, local[:,nc[0]]<local[:, nc[1]]) if smaller: candidates.append(nc[1]) for i in candidates: local = np.delete(local, i, 1) if len(local[0])==cbefore and len(local)==rbefore: break return local
def pixel_coordinates(nx, ny, mode="centers"): """Get pixel coordinates from a regular grid with dimension nx by ny. Parameters ---------- nx : int xsize ny : int ysize mode : string `centers` or `edges` to return the pixel coordinates defaults to centers Returns ------- coordinates : :class:`numpy:numpy.ndarray` Array of shape (ny,nx) with pixel coordinates (x,y) """ x = np.linspace(0, nx, num=nx + 1) y = np.linspace(0, ny, num=ny + 1) if mode == "centers": x = x + 0.5 y = y + 0.5 x = np.delete(x, -1) y = np.delete(y, -1) X, Y = np.meshgrid(x, y) coordinates = np.empty(X.shape + (2,)) coordinates[:, :, 0] = X coordinates[:, :, 1] = Y return coordinates
def stochasticModelSynth(stocEnv, H, N): """ Stochastic synthesis of a sound stocEnv: stochastic envelope; H: hop size; N: fft size returns y: output sound """ if not(UF.isPower2(N)): # raise error if N not a power of two raise ValueError("N is not a power of two") hN = N/2+1 # positive size of fft No2 = N/2 # half of N L = stocEnv[:,0].size # number of frames ysize = H*(L+3) # output sound size y = np.zeros(ysize) # initialize output array ws = 2*hanning(N) # synthesis window pout = 0 # output sound pointer for l in range(L): mY = resample(stocEnv[l,:], hN) # interpolate to original size pY = 2*np.pi*np.random.rand(hN) # generate phase random values Y = np.zeros(N, dtype = complex) # initialize synthesis spectrum Y[:hN] = 10**(mY/20) * np.exp(1j*pY) # generate positive freq. Y[hN:] = 10**(mY[-2:0:-1]/20) * np.exp(-1j*pY[-2:0:-1]) # generate negative freq. fftbuffer = np.real(ifft(Y)) # inverse FFT y[pout:pout+N] += ws*fftbuffer # overlap-add pout += H y = np.delete(y, range(No2)) # delete half of first window y = np.delete(y, range(y.size-No2, y.size)) # delete half of the last window return y
def MaxImpedanceComputation(InputGraph): MaxTotalImpedance=0 G=InputGraph.copy() number_of_vertices=G.order() vertexlist=G.nodes() for top_node in vertexlist: for ground_node in vertexlist: if ground_node<top_node: ordered_vertexlist=vertexlist[:] ordered_vertexlist.remove(top_node) ordered_vertexlist.remove(ground_node) ordered_vertexlist.insert(0,top_node) ordered_vertexlist.insert(0,ground_node) LaplacianMatrix=nx.laplacian(G,ordered_vertexlist) ConductanceMatrix=np.delete(LaplacianMatrix,0,0) ConductanceMatrix=np.delete(ConductanceMatrix,np.s_[0],1) InputVector=[0]*(number_of_vertices-1) InputVector[0]=1 VoltageVector=linalg.solve(ConductanceMatrix,InputVector) TotalImpedance=VoltageVector[0] if TotalImpedance>MaxTotalImpedance: MaxTotalImpedance=TotalImpedance return MaxTotalImpedance
def err_plot(output,basedir='.', field='MLWA', err='dMLWA', suffix='syserr', label=r'$\tau_L$',err_label=r'$\delta\tau_{L,\mathrm{sys}}$',exclude=exclude): fig = plt.figure() ax = fig.add_subplot(111) ax.set_xlabel(label) ax.set_ylabel(err_label) for p in range(6): coef = '{}/NGC_891_P{}_bin30_allz2.{}.fits'.format(basedir,p+1,suffix) print coef c = pyfits.open(coef)[1].data exarr = np.array(exclude[p]) - 1 d = np.delete(c[field], exarr) e = np.delete(c[err], exarr) ax.scatter(d,e/d, c='k', alpha=0.7, linewidth=0) ax.set_yticks([0.1,0.2,0.3,0.4,0.5]) pp = PDF(output) pp.savefig(fig) pp.close() plt.close(fig) return
def carbonylorcarboxyl(allligand,index,bond_dist): allligandcoods = allligand.positions ocoods = np.zeros((1,3), dtype = float) ocoods[0,:] = allligandcoods[index,:] ocoods = np.float32(ocoods) tempdist = MDAnalysis.lib.distances.distance_array(ocoods,allligandcoods) A = np.argsort(tempdist) temp = int(A[0,1]) Omatecood = np.zeros((1,3), dtype = float) Omatecood[0,:] = allligandcoods[temp,:] Omatecood = np.float32(Omatecood) tempdist2 = MDAnalysis.lib.distances.distance_array(Omatecood, allligandcoods) B = np.argsort(tempdist2) B = np.delete(B,0,axis = 1) for i in xrange(0,B.size): if B[0,i] == index: C = np.delete(B,i,axis = 1) break base1 = int(C[0,0]) base2 = int(C[0,1]) type1 = allligand[base1].type type2 = allligand[base2].type if type1 == 'O' or type2 == 'O': atype = 'carboxyl' else: atype = 'carbonyl' return atype
def make_DeviationPlot(self,year): average = np.array(self.average) deviation = np.array(self.deviation) dates = [] dis = [] count = 0 for d in self.time: if self.time[count].year == year: dates.append(datetime.date(self.time[count].year, self.time[count].month,self.time[count].day)) dis.append(self.discharge[count]) count += 1 dis = np.array(dis) dates = np.array(dates) if len(dates) == 365: average = np.delete(average,-1) deviation = np.delete(deviation,-1) plus1 = np.array(average+deviation) minus1 = np.array(average-deviation) plt.plot(dates,dis,'r') x = np.linspace(1,366,366) plt.plot(dates,average,'k') plt.fill_between(dates,average,plus1,facecolor='gray') plt.fill_between(dates,average,minus1,facecolor='gray')
def findDistance(record, data, result): transidsfortrain = data[:, [0]] data = np.delete(data, 0, 1) numAttributes = len(data[0]) trainClasses = data[:, [numAttributes - 1]] data = np.delete(data, (numAttributes - 1), 1) counter = 0 for row in record: currentRecord = row[1:-1] print currentRecord tempResult = (data - currentRecord) ** 2 tempResult = np.sum(tempResult, axis=1).reshape(len(tempResult), 1) tempResult = np.sqrt(tempResult) tempResult = np.hstack((tempResult, trainClasses)) tempResult = tempResult[np.argsort(tempResult[:, 0])] result[counter][1] = tempResult[0][0] result[counter][2] = tempResult[0][1] result[counter][3] = tempResult[1][0] result[counter][4] = tempResult[1][1] result[counter][5] = tempResult[2][0] result[counter][6] = tempResult[2][1] result[counter][7] = tempResult[3][0] result[counter][8] = tempResult[3][1] result[counter][9] = tempResult[4][0] result[counter][10] = tempResult[4][1] # TODO more things will be appended to result if value of n changes result[counter][11] = tempResult[5][0] result[counter][12] = tempResult[5][1] counter += 1
def execEnd(self,eventIdx): # execute an end-breaking or depolymerization event. oligoEndBreak=self.ald['end'][eventIdx/2] leftRight=eventIdx%2*2-1 lr=-(leftRight+1)/2 unitMoving=oligoEndBreak.ends[lr] oligo_vanish,form_oligo,self.event_code=oligoEndBreak.end_break(leftRight,self.units) if form_oligo: # not empty mono=form_oligo['monomer'] if mono: # monomer + monomer (mergeOligo) idx=np.where([x in [mono,unitMoving] for x in self.monomers])[0] self.monomers=np.delete(self.monomers,idx) self.oligos=np.insert(self.oligos,0,form_oligo['oligo']) else: # monomer + multimer (mergeOligo) idx=np.where([unitMoving is x for x in self.monomers])[0] self.monomers=np.delete(self.monomers,idx) else: #empty, add the end to monomers self.monomers=np.insert(self.monomers,0,unitMoving) unitMoving.energize() if oligo_vanish: idx=np.where([oligoEndBreak is x for x in self.oligos])[0] self.oligos=np.delete(self.oligos,idx) idx=np.where([unitMoving is not x for x in oligoEndBreak.subunits])[0] nonmoving_unit=oligoEndBreak.subunits[idx[0]] self.monomers=np.insert(self.monomers,0,nonmoving_unit) nonmoving_unit.energize()
def StoreTransition(self, s_t, a_t, r_t, s_t_next, d_t=0): s_t = s_t.reshape(1, self.state_size) s_t_next = s_t_next.reshape(1, self.state_size) a_t = a_t.reshape(1, self.action_size) r_t = r_t.reshape(1, 1) d_t = np.array([d_t]).reshape(1, 1) self.S = np.concatenate((self.S, s_t)) self.Stag = np.concatenate((self.Stag, s_t_next)) self.A = np.concatenate((self.A, a_t)) self.R = np.concatenate((self.R, r_t)) self.D = np.concatenate((self.D, d_t)) if self.populated < self.buffer_size: if self.populated == 0: self.S = np.delete(self.S,0,0) self.A = np.delete(self.A,0,0) self.R = np.delete(self.R,0,0) self.Stag = np.delete(self.Stag,0,0) self.D = np.delete(self.D,0,0) self.populated += 1 else: self.S = np.delete(self.S,0,0) self.A = np.delete(self.A,0,0) self.R = np.delete(self.R,0,0) self.Stag = np.delete(self.Stag,0,0) self.D = np.delete(self.D,0,0)
def loadGlob(self, simu, Z, S): file_root = self._fileRoot(simu, Z, S) data = np.loadtxt(self.dir + file_root + '/' + file_root + '_all.deus_histo.txt') data = np.delete(data, self.nb_histo-1) densityscale = np.linspace(self.glob_start,self.glob_end,self.nb_histo,0) densityscale = np.delete(densityscale, self.nb_histo-1) return densityscale, data
def update_proximity_matrix(self, old_prox, new_centroid, a, b): old_prox = np.delete(old_prox, [a,b], 0) #delete rows old_prox = np.delete(old_prox, [a,b], 1) #delete cols # add a line of zeroes on the right and bottom edges mid = np.hstack((old_prox, np.zeros((old_prox.shape[0], 1), dtype=old_prox.dtype))) pprint(("mid", mid, mid.shape)) new_prox = np.vstack((mid, np.zeros((1, mid.shape[1]), dtype=mid.dtype))) pprint(("expanded", new_prox, new_prox.shape)) old_length = len(old_prox) - 1 new_length = len(new_prox) - 1 #fill them in with new comparisons new_prox[new_length,new_length] = float(HIGH) for i, centroid in enumerate(self.centroids[:-1]): diff = np.linalg.norm(centroid - new_centroid) pprint(("checking", diff, i)) new_prox[new_length,i] = diff new_prox[i,new_length] = diff pprint(("new prox", new_prox, new_prox.shape)) return new_prox
def _rebuild_iso(self, sel): g = self.graph ss = [p.plots[pp][0] for p in g.plots for pp in p.plots if pp == 'data{}'.format(self.group_id)] self._set_renderer_selection(ss, sel) if self._cached_data: reg=self._cached_reg xs, ys, xerr, yerr = self._cached_data nxs = delete(xs, sel) nys = delete(ys, sel) nxerr = delete(xerr, sel) nyerr = delete(yerr, sel) # reg = ReedYorkRegressor(xs=nxs, ys=nys, # xserr=nxerr, yserr=nyerr) reg.trait_set(xs=nxs, ys=nys,xserr=nxerr, yserr=nyerr) reg.calculate() fit = self.graph.plots[0].plots['fit{}'.format(self.group_id)][0] mi, ma = self.graph.get_x_limits() rxs = linspace(mi, ma, 10) rys = reg.predict(rxs) fit.index.set_data(rxs) fit.value.set_data(rys) if self._plot_label: self._add_info(self.graph.plots[0], reg, label=self._plot_label)
def clean_features(features, labels): # remove features missing in a lot of samples feature_threshold = [300, 250, 200, 150, 100] sample_threshold = [20, 15, 10, 5, 0] for f, s in zip(feature_threshold, sample_threshold): remove_cols = explore_features(features, f) features = np.delete(features, remove_cols, axis=1) print features.shape print '---' # remove samples missing data remove_rows = explore_samples(features, s) features = np.delete(features, remove_rows, axis=0) labels = np.delete(labels, remove_rows) print features.shape, labels.shape print '---' # RATIONALE: any feature missing in more than 5% of # samples has no guarantee of being collected so do # not include in model and any sample still missing # data probably is fairly unknown or poorly recorded # TODO: efficiently remove NaNs while keeping as much data as possibles return features, labels
def find_offset_old(self,datafile, nonlinmin, nonlinmax, exclude, threshold): '''find_offset is used to determine the systematic offset present in the experimental setup that causes data to not be symmetric about zero input angle. It reads in the output of laserBench and returns the offset (in degrees)''' input_a, output_a = np.loadtxt(datafile,usecols=(0,1),unpack=True) for e in exclude: did = np.where(input_a == e) output_a = np.delete(output_a, did) input_a = np.delete(input_a, did) pidx = np.where(input_a > nonlinmax) nidx = np.where(input_a < nonlinmin) in_a = np.append(input_a[nidx],input_a[pidx]) out_a = np.append(-1*output_a[nidx],output_a[pidx]) error = np.zeros(in_a.size)+1 b = 1000. offset = 0. while abs(b) > threshold: m, b = ADE.fit_line(in_a,out_a,error) offset += b in_a += b return offset
def sortArray(x, y): order_indicies = [] delete_indicies = [] for i in range( 1, len(x) ): if (x[i] == x[i-1]): print 'duplicate at', i, x[i] delete_indicies.append(i) if (len(delete_indicies) > 4): return x, y, True for i in delete_indicies: x = np.delete(x, i) y = np.delete(y, i) for i in range( 1, len(x) ): if (x[i] < x[i-1]): x_temp = x[i] y_temp = y[i] x[i] = x[i-1] x[i-1] = x_temp y[i] = y[i-1] y[i-1] = y_temp print 'reorder data at', i, x[i] order_indicies.append(i) if (( len(order_indicies)+len(delete_indicies) ) > 4): return x, y, True return x, y, False
def load_pics(path): #get all images npts = 32 #find all the png files in the current path images = [os.path.join(path,f) for f in os.listdir(path) if os.path.splitext(f)[1] == '.png'] desc_list = np.array(np.zeros(npts*128)) #numerical classes array classy = [] for pic in images: desc, kp = getDescriptorKp(pic, npts) #sometimes not all 32 descriptors are returned because there's not enough, in which case we just pad up to 32 descriptors * 128 values/desc desc_list = np.vstack( (desc_list, np.resize(desc.flatten(), (1, npts*128))) ) #figure what pokemon it is from the file name match = re.search(r"pokemon-(\d+)-", pic).group(1) classy.append(int(match)) #convert classy array to a set of logical arrays classfication = np.array(np.zeros(NPOKEMON)) for i in classy: tmp = np.zeros(NPOKEMON) tmp[i-1] = 1 classfication = np.vstack( (classfication, tmp) ) #remove the first row of dummy values desc_list = np.delete(desc_list, 0, 0) classfication = np.delete(classfication, 0, 0) #normalize training features normalize(desc_list) return desc_list, classfication
def test_frequency(self): hv = Hierarchy(db_name='vec_store.sqlite', file_name='hierarchy') # Produce frequency plots between the lower and upp bound. for i in range(20, 22): select_limit = [i - 1, i + 1] data1 = np.empty(1, ) data2 = np.empty(1, ) hit1, hit2 = 0, 0 for k in range(1, 4): selected_features1 = feature_frequency( hv, 243, 3, 8, new_data=True, ridge=True, scale=True, globalscale=True, normalization=True, featselect_featvar=False, featselect_featconst=True, select_limit=select_limit, feat_sub=i) selected_features2 = feature_frequency( hv, 243, 3, 8, smallest=True, new_data=False, ridge=True, scale=True, globalscale=True, normalization=True, featselect_featvar=False, featselect_featconst=True, select_limit=select_limit, feat_sub=i) if bool(selected_features1): hit1 += 1 if bool(selected_features2): hit2 += 1 if bool(selected_features1) and bool(selected_features2): data1 = np.concatenate( (data1, (list(selected_features1.items())[0])[1][0][:]), axis=0) data2 = np.concatenate( (data2, (list(selected_features2.items())[0])[1][0][:]), axis=0) data1 = np.delete(data1, 0) data2 = np.delete(data2, 0) data_all = np.concatenate((data1, data2), axis=0) if len(data_all) > 0: bins = np.arange(min(data_all) - 2, max(data_all) + 2, 0.5) hist1 = np.histogram(data1, bins=bins) hist2 = np.histogram(data2, bins=bins) r1_hist1 = np.delete(hist1[0], np.where(hist1[0] == 0)) r1_hist1 = np.divide(r1_hist1.astype('float'), len(data1)) * 100 r2_hist1 = np.delete( np.delete(hist1[1], np.where(hist1[0] == 0)), -1) r1_hist2 = np.delete(hist2[0], np.where(hist2[0] == 0)) r1_hist2 = np.divide(r1_hist2.astype('float'), len(data2)) * 100 r2_hist2 = np.delete( np.delete(hist2[1], np.where(hist2[0] == 0)), -1) if np.shape(r1_hist2)[0] > np.shape(r1_hist1)[0]: dif = np.shape(r1_hist2)[0] - np.shape(r1_hist1)[0] r1_hist1 = np.concatenate((r1_hist1, np.zeros(dif)), axis=0) r2_hist1 = np.concatenate((r2_hist1, np.zeros(dif)), axis=0) elif np.shape(r1_hist1)[0] > np.shape(r1_hist2)[0]: dif = np.shape(r1_hist1)[0] - np.shape(r1_hist2)[0] r1_hist2 = np.concatenate((r1_hist2, np.zeros(dif)), axis=0) r2_hist2 = np.concatenate((r2_hist2, np.zeros(dif)), axis=0)
def chance_level(paths, unit, nr_run_time): data1 = pd.read_csv(paths[0], header=None).values data2 = pd.read_csv(paths[1], header=None).values data1 = np.array(data1) data2 = np.array(data2) results = [] # remove the trials that contain 0 indexes_to_remove = np.array([]) for i in range(data2.shape[0]): if row_has_zero_values(data2[i]): indexes_to_remove = np.append(indexes_to_remove, i) data2 = np.delete(data2, indexes_to_remove, axis=0) data1 = np.delete(data1, indexes_to_remove, axis=0) y1 = get_labels(data1) y2 = get_labels(data2) # for i in range(8): # o = i * 45 # print o # print data1[y1 == o].shape x1 = get_data(data1) x2 = get_data(data2) unit_fr = x1[:, unit] unit_ma = x2[:, unit] init_correlation = pearsonr(unit_fr, unit_ma)[0] for n in range(nr_run_time): np.random.shuffle(data1) np.random.shuffle(data2) x1 = get_data(data1) x2 = get_data(data2) unit_fr = x1[:, unit] unit_ma = x2[:, unit] results.append(pearsonr(unit_fr, unit_ma)[0]) x = np.array(results) x.sort() f = plt.figure() ax = f.add_subplot(111) plt.text(0.05, 0.97, "Mean: %f" % np.mean(x), ha='left', va='top', transform=ax.transAxes) plt.text(0.05, 0.94, "Std: %f" % np.std(x), ha='left', va='top', transform=ax.transAxes) magenta_patch = mpatches.Patch(color='cyan', label='Mean') green_patch = mpatches.Patch(color='yellow', label='Initial correlation') plt.legend(handles=[magenta_patch, green_patch]) fit = norm.pdf(x, np.mean(x), np.std(x)) plt.plot(x, fit, color='red') plt.axvline(x.mean(), color='cyan', linewidth=1.5, label='Mean') plt.axvline(init_correlation, color='yellow', linewidth=1.5, label='Init_correlation') plt.hist(x, bins='auto', normed=True) plt.title("Correlation FR_MA_UNIT: %d" % unit) figure = plt.gcf() figure.set_size_inches(15, 9) #plt.show() plt.savefig("correlation_FR_MA_UNIT_%s.png" % unit, dpi=100)
def allocate_aps(self, plot=False): x = [] y = [] z = [] for i in range(self.grid.shape[0]): for j in range(self.grid.shape[1]): x.append(i) y.append(j) z.append(self.grid[i, j]) d = {'x': x, 'y': y, 'z': z} data = pd.DataFrame(data=d) X = data.x Y = data.y D = np.array(list(zip(X, Y))) flag = 0 # Number of clusters k = 1 # X coordinates of random centroids C_x = np.random.uniform(0, np.max(X), size=k) # Y coordinates of random centroids C_y = np.random.uniform(0, np.max(Y), size=k) clusters = np.zeros(len(D)) dense_cluster = 0 len_list = [] while flag != 1: flag = 1 if len(C_x) != k: candidates = [] for j in range(len(D)): if clusters[j] == dense_cluster: candidates.append(D[j, :]) cand = random.choice(candidates) C_x_aux = [cand[0]] C_y_aux = [cand[1]] C_x = np.concatenate((C_x, C_x_aux), axis=0) C_y = np.concatenate((C_y, C_y_aux), axis=0) C = np.array(list(zip(C_x, C_y)), dtype=np.float32) # To store the value of centroids when it updates C_old = np.zeros(C.shape) # Cluster Lables(0, 1, 2) # Error func. - Distance between new centroids and old centroids error = dist(C, C_old) # Loop will run till the error becomes zero while sum(error) != 0: # Assigning each value to its closest cluster for i in range(len(D)): distances = dist(D[i], C) cluster = np.argmin(distances) clusters[i] = cluster # Storing the old centroid values C_old = deepcopy(C) # Finding the new centroids by taking the average value for i in range(k): points_x = [np.repeat(D[j, 0], data.z[j]) for j in range(len(D)) if clusters[j] == i] l_x = [] for item in points_x: l_x = np.concatenate((l_x, item), axis=0) points_y = [np.repeat(D[j, 1], data.z[j]) for j in range(len(D)) if clusters[j] == i] l_y = [] for item in points_y: l_y = np.concatenate((l_y, item), axis=0) if np.isnan(np.mean(l_x)): C[i] = np.array([-1, -1]) break else: C[i] = np.array([np.mean(l_x), np.mean(l_y)]) error = dist(C, C_old) print(error, k) l_max = 0 for i in range(k): points_x = [np.repeat(D[j, 0], data.z[j]) for j in range(len(D)) if clusters[j] == i] l_x = [] for item in points_x: l_x = np.concatenate((l_x, item), axis=0) len_list.append(len(l_x)) if len(l_x) > l_max: l_max = len(l_x) dense_cluster = i if len(l_x) > 200 and flag != -1: flag = 0 if len(l_x) == 0: flag = -1 print(i, len(l_x)) if flag == -1: k = k - 1 C_x = C[:, 0] C_y = C[:, 1] k = k + 1 k = k - 1 mean_density = np.sum(len_list) / k print('Clusters:', k) print('Mean density:', mean_density) print('Max_cluster:', np.max(len_list)) print('Min_cluster:', np.min(len_list)) print(len_list) C_aux = C count = 0 for i in range(k): p = 0 p = [p + 0 for j in range(len(D)) if clusters[j] == i] points_x = [np.repeat(D[j, 0], data.z[j]) for j in range(len(D)) if clusters[j] == i] l_x = [] for item in points_x: l_x = np.concatenate((l_x, item), axis=0) points_y = [np.repeat(D[j, 1], data.z[j]) for j in range(len(D)) if clusters[j] == i] l_y = [] for item in points_y: l_y = np.concatenate((l_y, item), axis=0) if len(l_x) == 0: C_aux = np.delete(C_aux, i - count, 0) count = count + 1 else: print(C[i], len(l_x), len(p)) C = C_aux k = len(C) self.aps = C * self.grid_step print(k) if plot: plt.figure(figsize=(10, 8)) ax = sns.heatmap(data=clusters.reshape(self.grid.shape).transpose(), annot=True, cbar=False) ax.invert_yaxis() ax.scatter(C[:, 0], C[:, 1], marker='*', s=100, color='yellow') plt.title("APs clusters by density", fontsize=20) plt.show() plt.figure(figsize=(10, 8)) ax = sns.heatmap(data=self.grid.transpose(), annot=True) ax.invert_yaxis() ax.scatter(C[:, 0], C[:, 1], marker='*', s=100, color='yellow') plt.title("APs clusters and scenario density", fontsize=20) plt.show()
def MINE(Start: np.ndarray): Cycle = 100 Digits = 2 a, b = 20, -10 MemNum = 30 x0 = mat.repmat(Start, MemNum, 1) Dir = a * np.random.rand(MemNum, Digits) - b x0 += Dir Dir = Dir / Dir.min() if Dir.dtype != "float64": Dir = Dir.astype("float") Dir = Dir / (la.norm(Dir, axis=1).reshape(MemNum, 1)) Hbest = np.zeros((MemNum, Digits)) Hbestv = np.zeros((MemNum, 1)) for inner in range(MemNum): Hbestv[inner,:] = Evaluate(x0[inner,:]) Hbest[inner,:] = x0[inner,:] value = [] Pace = np.std(Hbest, axis=0) Pace = 4 * la.norm(Pace) P = [] R = [] Po = [] for i in range(Cycle): radius = np.std(Hbest, axis=0) radius = la.norm(radius) po = np.mean(Hbest, axis=0) Po.append(po) # Pace = 400 / (1 + np.exp(7*(i - (3*Cycle / 2)) / Cycle)) Judge = 1 / (1 + np.exp(Pace /(radius+1e-15) - 1)) Pace = 3.5 * radius *Judge Bias = 1.2 * Judge P.append(Pace) R.append(radius) # Bias = 1.2 / (1 + np.exp(2 * (i - (Cycle / 2)) / Cycle)) Sort = np.argsort(Hbestv, axis=0) c1 = np.linspace(1.2, 1, Digits) c1.reshape(Digits, 1) T0, Tend = 0.45, 0.9 A = (Tend - T0) / (Cycle ** 0.5) B = T0 Ref = Hbest[Sort[:, 0],:] for Inner in range(MemNum): T = B + A * (Inner ** 0.5) base = Ref - mat.repmat(x0[Inner,:], MemNum, 1) Normal = la.norm(base, axis=0) det = Normal.argmin() base = np.delete(base, det, axis=0) base = base[0:Digits,:] if base.dtype != "float": base = base.astype("float") norm = la.norm(base,axis=1).reshape(Digits,1) for index in range(Digits): if norm[index, 0] == 0: base[index,:] = np.ones(Digits) else: base[index,:] = base[index,:] / norm[index,0] belief1 = 2 * np.random.rand(1, Digits) - Bias * c1 belief2 = np.random.logistic(T,np.abs((1-T)/3), Digits) direction = np.dot(belief1, base) direction = direction / la.norm(direction) Dir[Inner,:] = (1-belief2) * Dir[Inner,:] + belief2 * direction if Dir[Inner,:].dtype != "float64": Dir = Dir.astype("float") Dir[Inner,:] = Dir[Inner,:] / la.norm(Dir[Inner,:]) x0[Inner,:] = x0[Inner,:] + Pace * Dir[Inner,:] Check = Evaluate(x0[Inner,:]) if Hbestv[Inner,:] > Check: Hbest[Inner,:] = x0[Inner,:] Hbestv[Inner,:] = Check value.append(Hbestv.min()) Po = np.array(Po) plt.figure("MINE:Time-Position") for i in range(Digits): plt.plot(np.arange(Cycle), Po[:, i]) plt.xlabel("Times") plt.ylabel("Value") plt.figure("MINE:Time-Radius/Pace") Pace,=plt.plot(range(Cycle), P, label="Pace") Radius,=plt.plot(range(Cycle), R, label="Radius") plt.legend(loc="upper right") plt.xlabel("Times") plt.ylabel("Value") return value,Hbestv.min(),Hbest[np.argmin(Hbestv),:]
avPOW_NMRSE = 0 avE_NMRSE = 0 avT_NMRSE = 0 distNMRSE = [] # Random seed used to debug #np.random.seed(1234567890) for iter in range(iters): # Dataset shuffle shuffledData = np.array([WH, C, K, N, LAT, POW, E, T]) shuffledData = shuffledData[:, np.random.permutation(shuffledData.shape[1])] # Dataset split in k-folds foldSize = shuffledData.shape[1] / k_folds for i in range(k_folds): # Split for train data trainData = np.delete(shuffledData, np.arange(i*foldSize,i*foldSize+foldSize,dtype=int), 1) # Split for validation data validationData = shuffledData[:,np.arange(i*foldSize,i*foldSize+foldSize,dtype=int)] # Identification over training Dataset LAT_parameters, LAT_covariance = curve_fit(LatAggModel, trainData[:4,:], trainData[4,:], maxfev=1000) POW_parameters, POW_covariance = curve_fit(PowAggModel, trainData[:4,:], trainData[5,:], maxfev=1000) E_parameters, E_covariance = curve_fit(EneAggModel, trainData[:4,:], trainData[6,:], maxfev=1000) T_parameters, T_covariance = curve_fit(ThrAggModel, trainData[:4,:], trainData[7,:], maxfev=1000) # Compute resulting NRMSE on validation Dataset fold distNMRSE.append(NRMSE(validationData[6,:], validationData[:4,:], EneAggModel, E_parameters)) avLAT_NMRSE += NRMSE(validationData[4,:], validationData[:4,:], LatAggModel, LAT_parameters) avPOW_NMRSE += NRMSE(validationData[5,:], validationData[:4,:], PowAggModel, POW_parameters) avE_NMRSE += NRMSE(validationData[6,:], validationData[:4,:], EneAggModel, E_parameters) avT_NMRSE += NRMSE(validationData[7,:], validationData[:4,:], ThrAggModel, T_parameters) # Store obtained distribution per fold iteration parameterDistLAT.append(np.concatenate((LAT_parameters[0]*selectedParameters[0], \
for i in range(0,len(species)): try: ftp = FTP('ftp.ncbi.nlm.nih.gov') ftp.login() ftp.cwd('/genomes/refseq/bacteria/%s/latest_assembly_versions/'%species[i,0][3:]) file = ''.join(ftp.nlst()[0]) ftp.cwd(file) filename = file + '_genomic.fna.gz' genomes.append(filename[:-3]) ftp.retrbinary('RETR ' + filename, open(filename, 'wb').write) subprocess.Popen('gzip -d %s'%filename, shell=True,stdout=subprocess.PIPE).wait() ftp.close() except Exception as e: print(e) species = np.delete(species, (i), axis=0) continue subprocess.Popen('echo ...done >> logfile.txt', shell=True,stdout=subprocess.PIPE).wait() #find length of each genome to determine needed depth of sequencing for j in range(0,len(genomes)): glengths=[] glengths.append(int(subprocess.Popen("awk 'NR>1' %s | wc -c"%genomes[j],shell=True,stdout=subprocess.PIPE).stdout.read())) #calculate length percentages and relative coverage total = sum(glengths) maxSeqCov = 20 glengths = np.array([100*x/total for x in glengths]) depths = np.divide(glengths,species[:,1].astype(float))*maxSeqCov
def remove_samples(self, new_sample_amount, zscore_high=2, weighted_dist_value=1.0, annotate=False, remove_noise=True, remove_similar=True, apply_changes=False, display_all_graphs=False, show_gif=False, shelve_relative_path=None, create_visuals=True): self.__index_array = None self.__total_indexes = None self.__tmp_reduced_scaled = None self.__all_dp_dist_list = None self.__pbar = None self.__all_dp_dist_dict = None new_sample_amount = int(new_sample_amount) if new_sample_amount >= self.__scaled.shape[0]: print("THROW ERROR HERE: Sample removal must be less then") elif new_sample_amount <= 0: print("THROW ERROR HERE: Val must be a positive number!") elif remove_noise == False and remove_similar == False: print("THROW ERROR HERE: At least one operation must be made!") else: # Store data for removal removed_dps_dict = dict() # Stored removed datapoints for visualizations noise_removal_dps_dict = dict() similarity_dps_dict = dict() df_index_scaled_dict = dict() # Index to shape for i, df_index in enumerate(self.__df_index_values): df_index_scaled_dict[df_index] = i if not remove_noise: folder_dir_name = "Data_Point_Removal_Weight={1}".format( zscore_high, weighted_dist_value) elif not remove_similar: folder_dir_name = "Data_Point_Removal_Zscore={0}".format( zscore_high, weighted_dist_value) else: folder_dir_name = "Data_Point_Removal_Zscore={0}_Weight={1}".format( zscore_high, weighted_dist_value) # Display graph before augmentation; Create centroid centroid = np.mean(self.__scaled, axis=0) column_list = [i for i in range(0, self.__scaled.shape[1])] reduced_scaled = np.column_stack( (self.__scaled, self.__df_index_values.reshape( (self.__scaled.shape[0], 1)).astype(self.__scaled.dtype))) if create_visuals: self.__visualize_data_points(centroid=centroid, scaled_data=self.__scaled, noise_removal_dps=[], similar_removal_dps=[], new_sample_amount=new_sample_amount, zscore_high=zscore_high, weighted_dist_value=weighted_dist_value, annotate=annotate, output_path=folder_dir_name, title="Starting point", remove_noise=remove_noise, remove_similar=remove_similar, display_all_graphs=display_all_graphs) if remove_noise: dp_distances = np.zeros(len(reduced_scaled)) # Keep looping until new sample amount has been reached or # the distances are properly. while reduced_scaled.shape[0] > new_sample_amount: for index, dp in enumerate(reduced_scaled): dp_distances[index] = distance.euclidean( centroid, dp[:column_list[-1] + 1]) farthest_dp_index = np.argmax(dp_distances) zscores_dp_distances = zscore(np.concatenate(( dp_distances, np.array([distance.euclidean(centroid, self.__scaled[ dp_index]) for dp_index in list(removed_dps_dict.values()) ])), axis=0)) if zscores_dp_distances[farthest_dp_index] >= zscore_high: farthest_dp = reduced_scaled[farthest_dp_index][ :column_list[-1] + 1] # Add original dataframe index to the dict; # remove actual row from the data df_index = int(reduced_scaled[farthest_dp_index][-1]) removed_dps_dict[df_index] = df_index_scaled_dict[ df_index] if shelve_relative_path: shelf = shelve.open(shelve_relative_path) shelf[shelve_relative_path.split("/")[-1]] = list( removed_dps_dict.keys()) shelf.close() if create_visuals: noise_removal_dps_dict[df_index] = \ df_index_scaled_dict[df_index] reduced_scaled = np.delete(reduced_scaled, farthest_dp_index, 0) # Update centroid centroid = np.mean(reduced_scaled[:, column_list], axis=0) if create_visuals: self.__visualize_data_points(centroid=centroid, scaled_data=reduced_scaled[ :, column_list], noise_removal_dps=list( noise_removal_dps_dict.values()), similar_removal_dps=[], new_sample_amount=new_sample_amount, zscore_high=zscore_high, weighted_dist_value=weighted_dist_value, annotate=annotate, output_path=folder_dir_name, new_dp_meta_noise_removal=( farthest_dp, zscores_dp_distances[ farthest_dp_index], dp_distances[ farthest_dp_index]), title="Data Removal: Noise reduction", remove_noise=remove_noise, remove_similar=remove_similar, display_all_graphs=display_all_graphs) else: print( "Scaled size is now {0} and Z-Score {1:.2f}.".format( reduced_scaled.shape[0], zscores_dp_distances[farthest_dp_index])) # Break loop distances are below z-score val else: break if create_visuals: self.__create_gif_dp_amount(n_start=self.__scaled.shape[0], n_end=reduced_scaled.shape[0], folder_dir_name=folder_dir_name, filename="Noise Reduction", show_gif=show_gif) if remove_similar: starting_shape = reduced_scaled.shape[0] farthest_dp_distance = None dp_distances = np.zeros(len(reduced_scaled)) while reduced_scaled.shape[0] > new_sample_amount: # Following unconventional programming for multi threading # speed and memory increase self.__index_array = [i for i in range(0, len(reduced_scaled))] self.__total_indexes = len(self.__index_array) self.__tmp_reduced_scaled = copy.deepcopy( reduced_scaled[:, column_list]) if not farthest_dp_distance: for index, dp in enumerate(self.__tmp_reduced_scaled): dp_distances[index] = distance.euclidean( centroid, dp[:column_list[-1] + 1]) farthest_dp_distance = np.amax(dp_distances) farthest_dp_distance *= weighted_dist_value removal_index, keep_index, smallest_dist = self.__shortest_dist_relationship( centroid) if farthest_dp_distance < smallest_dist: print("Target distance reached!!!") break new_dp_meta_similar_removal = ( self.__tmp_reduced_scaled[removal_index], self.__tmp_reduced_scaled[keep_index]) df_index = int(reduced_scaled[removal_index][-1]) removed_dps_dict[df_index] = df_index_scaled_dict[df_index] if create_visuals: similarity_dps_dict[df_index] = df_index_scaled_dict[ df_index] if shelve_relative_path: shelf = shelve.open(shelve_relative_path) shelf[shelve_relative_path.split("/")[-1]] = list( removed_dps_dict.keys()) shelf.close() # Remove from temp scaled reduced_scaled = np.delete(reduced_scaled, removal_index, 0) # Update centroid centroid = np.mean(reduced_scaled[:, column_list], axis=0) if create_visuals: self.__visualize_data_points(centroid=centroid, scaled_data=reduced_scaled[ :, column_list], noise_removal_dps=list( noise_removal_dps_dict.values()), similar_removal_dps=list( similarity_dps_dict.values()), new_sample_amount=new_sample_amount, zscore_high=zscore_high, weighted_dist_value=weighted_dist_value, annotate=annotate, output_path=folder_dir_name, new_dp_meta_similar_removal=new_dp_meta_similar_removal, title="Data Removal: Similarity removal", remove_noise=remove_noise, remove_similar=remove_similar, display_all_graphs=display_all_graphs) else: print("Scaled size is now {0}.".format( reduced_scaled.shape[0])) # De-init multithreading artifacts self.__index_array = None self.__total_indexes = None self.__tmp_reduced_scaled = None self.__all_dp_dist_list = None if create_visuals: self.__create_gif_dp_amount(n_start=starting_shape - 1, n_end=reduced_scaled.shape[0], folder_dir_name=folder_dir_name, filename="Similar Reduction", show_gif=show_gif) if remove_similar and remove_noise and create_visuals: self.__visualize_data_points(centroid=centroid, scaled_data=reduced_scaled[:, column_list], noise_removal_dps=list( noise_removal_dps_dict.values()), similar_removal_dps=list( similarity_dps_dict.values()), new_sample_amount=new_sample_amount, zscore_high=zscore_high, weighted_dist_value=weighted_dist_value, annotate=annotate, output_path=folder_dir_name, new_dp_meta_similar_removal=None, title="Final Result", remove_noise=remove_noise, remove_similar=remove_similar, white_out_mode=True, no_print_output=True, display_all_graphs=display_all_graphs) self.__create_gif_dp_amount(n_start=self.__scaled.shape[0], n_end=reduced_scaled.shape[0], folder_dir_name=folder_dir_name, filename="Noise and Similar Reduction", flash_final_results=True, show_gif=show_gif) if apply_changes: self.__scaled = reduced_scaled[:, column_list] return list(removed_dps_dict.keys())
def allocate_edcs(self, opt='n', n=3, EDC_min=2, EDC_max=2): flag2 = 0 if opt == 'n': E_n = n kmeans = KMeans(n_clusters=E_n, random_state=0).fit(self.aps) E = kmeans.cluster_centers_ C_EDC = kmeans.predict(self.aps) print(E) print(len(E)) elif opt == 'max': E_n = len(self.aps) while flag2 != 1: kmeans = KMeans(n_clusters=E_n, random_state=0).fit(self.aps) E = kmeans.cluster_centers_ C_EDC = kmeans.predict(self.aps) for i in range(E_n): if (C_EDC.tolist()).count(i) > EDC_max: flag2 = 1 E_n = E_n - 1 E_n = E_n + 2 kmeans = KMeans(n_clusters=E_n, random_state=0).fit(self.aps) E = kmeans.cluster_centers_ C_EDC = kmeans.predict(self.aps) print(E) print(len(E)) elif opt == 'min': E_n = 1 while flag2 != 1: kmeans = KMeans(n_clusters=E_n, random_state=0).fit(self.aps) E = kmeans.cluster_centers_ C_EDC = kmeans.predict(self.aps) for i in range(E_n): if (C_EDC.tolist()).count(i) < EDC_min: flag2 = 1 E_n = E_n + 1 E_n = E_n - 2 kmeans = KMeans(n_clusters=E_n, random_state=0).fit(self.aps) E = kmeans.cluster_centers_ C_EDC = kmeans.predict(self.aps) print(E) print(len(E)) elif opt == 'minmax': flag3 = 0 C_E_aux = self.aps E_n = 1 r_c = [] while flag3 != 1: # print(r_c) while flag2 != 1: kmeans = KMeans(n_clusters=E_n, random_state=0).fit(C_E_aux) E = kmeans.cluster_centers_ C_EDC = kmeans.predict(C_E_aux) # print(C_EDC) for i in range(E_n): if (C_EDC.tolist()).count(i) < EDC_max: flag2 = 1 E_n = E_n + 1 flag2 = 0 E_n = E_n - 1 kmeans = KMeans(n_clusters=E_n, random_state=0).fit(C_E_aux) E = kmeans.cluster_centers_ C_EDC = kmeans.predict(C_E_aux) # print('now') # print(C_EDC) f = [] # print(C_E_aux) # print(E) count = 0 # print(C_E_aux) for index, item in enumerate(C_EDC.tolist()): # print(index,item) if len(C_EDC.tolist()) / EDC_max < 1: r_c = np.vstack((r_c, E)) flag3 = 1 if (C_EDC.tolist()).count(item) <= EDC_max: C_E_aux = np.delete(C_E_aux, index - count, 0) count = count + 1 if item not in f: if len(r_c) == 0: r_c = np.concatenate((r_c, E[item]), axis=0) else: r_c = np.vstack((r_c, E[item])) f.append(item) if len(C_E_aux) <= EDC_max: flag3 = 1 # print(r_c) kmeans = KMeans(n_clusters=len(r_c), init=r_c) E = r_c C_EDC = kmeans.fit_predict(self.aps) print(E) # print(kmeans.cluster_centers_) E_n = len(E) print(len(E)) self.edcs = E
def remove_throats(self, ti_list_delete): """ Deletes throats from pore network Parameters ---------- ti_list_delete: intarray Indices of throats to be deleted Notes ---------- Throat indices will be adjust to remain continguous after deletion """ assert len(np.unique(ti_list_delete)) == len(ti_list_delete) ti_list_old = np.arange(self.nr_t) ti_new_to_old = np.delete(ti_list_old, ti_list_delete) ti_old_to_new = { ti_new_to_old[i]: i for i in xrange(len(ti_new_to_old)) } assert np.max(ti_old_to_new.values()) < self.nr_t - len(ti_list_delete) # Remove entries in ngh_pores and ngh_tubes arrays corresponding to deleted tubes for ti in ti_list_delete: pi_1, pi_2 = self.edgelist[ti, :] assert ti in self.ngh_tubes[pi_1] assert ti in self.ngh_tubes[pi_2] mask_pi_1 = self.ngh_tubes[pi_1] != ti mask_pi_2 = self.ngh_tubes[pi_2] != ti self.ngh_pores[pi_1] = self.ngh_pores[pi_1][mask_pi_1] self.ngh_pores[pi_2] = self.ngh_pores[pi_2][mask_pi_2] self.ngh_tubes[pi_1] = self.ngh_tubes[pi_1][mask_pi_1] self.ngh_tubes[pi_2] = self.ngh_tubes[pi_2][mask_pi_2] self.nr_nghs[pi_1] -= 1 self.nr_nghs[pi_2] -= 1 self.nr_t -= 1 assert self.nr_t == len(ti_new_to_old) # Change indices of tubes in ngh_tubes arrays for pi in xrange(self.nr_p): new_ngh_tubes_pi = [ ti_old_to_new[self.ngh_tubes[pi][x]] for x in xrange(self.nr_nghs[pi]) ] self.ngh_tubes[pi] = np.asarray(new_ngh_tubes_pi, dtype=np.int32) if len(new_ngh_tubes_pi) > 0: assert np.max(new_ngh_tubes_pi) < self.nr_t self.edgelist = np.delete(self.edgelist, ti_list_delete, 0) self.tubes.remove_tubes(ti_list_delete) assert self.edgelist.shape[0] == self.nr_t assert self.edgelist.shape[1] == 2 self._create_helper_properties()
def __shortest_dist_relationship(self, centroid): """ Finds the two datapoints that have the smallest distance. """ if not self.__all_dp_dist_list: total = 0 for i in range(0, self.__tmp_reduced_scaled.shape[0]): total += i print("The total time required is:", str( datetime.timedelta(seconds=total * 1.3e-5))) self.__all_dp_dist_list = find_all_distances_in_matrix( matrix=self.__tmp_reduced_scaled, index_array=self.__index_array, total_indexes=self.__total_indexes,) # :::ADD WEIGHTED DISTANCE IDEA HERE FUTURE ERIC::: all_shortest = [ [target_dp_index, np.argmin(distances) + target_dp_index + 1, np.amin(distances)] for target_dp_index, distances in enumerate(self.__all_dp_dist_list) if len(distances) > 0] smallest_dps_relationship = min(all_shortest, key=lambda x: x[2]) dp_1_index = smallest_dps_relationship[0] dp_2_index = smallest_dps_relationship[1] smallest_distance = smallest_dps_relationship[2] dp_1_dist = distance.euclidean(self.__tmp_reduced_scaled[dp_1_index], centroid) dp_2_dist = distance.euclidean(self.__tmp_reduced_scaled[dp_2_index], centroid) # Decide of the two dps which to remove removal_index = None keep_index = None if dp_1_dist < dp_2_dist: removal_index = dp_2_index keep_index = dp_1_index else: removal_index = dp_1_index keep_index = dp_2_index # Return distances values to everyone above the removed index for sub_removal_index, dp_index_key in enumerate( range(removal_index - 1, -1, -1)): self.__all_dp_dist_list[dp_index_key] = np.delete( self.__all_dp_dist_list[dp_index_key], sub_removal_index, 0) self.__all_dp_dist_list.pop(removal_index) # Return back the indexes and distance return removal_index, keep_index, smallest_distance