def __init__(self,wcs,axes,counts=None,roi_radius_deg=180.,roi_msk=None): super(FITSImage, self).__init__(axes,counts=counts, var=copy.deepcopy(counts)) self._wcs = wcs self._roi_radius_deg = roi_radius_deg self._header = self._wcs.to_header(True) self._lon = self._header['CRVAL1'] self._lat = self._header['CRVAL2'] self._roi_msk = np.empty(shape=self._counts.shape[:2],dtype=bool) self._roi_msk.fill(False) if not roi_msk is None: self._roi_msk |= roi_msk xpix, ypix = np.meshgrid(self.axis(0).center,self.axis(1).center) xpix = np.ravel(xpix) ypix = np.ravel(ypix) # self._pix_lon, self._pix_lat = self._wcs.wcs_pix2sky(xpix,ypix, 0) self._pix_lon, self._pix_lat = self._wcs.wcs_pix2world(xpix,ypix, 0) self.add_roi_msk(self._lon,self._lat,roi_radius_deg,True, self.axis(1)._coordsys)
def column_or_1d(y, warn=False): """ Ravel column or 1d numpy array, else raises an error Parameters ---------- y : array-like warn : boolean, default False To control display of warnings. Returns ------- y : array """ shape = np.shape(y) if len(shape) == 1: return np.ravel(y) if len(shape) == 2 and shape[1] == 1: if warn: warnings.warn("A column-vector y was passed when a 1d array was" " expected. Please change the shape of y to " "(n_samples, ), for example using ravel().", DataConversionWarning, stacklevel=2) return np.ravel(y) raise ValueError("bad input shape {0}".format(shape))
def __find_index(self, lat, lon, latvar, lonvar, n=1): if self._kdt.get(latvar.name) is None: latvals = latvar[:] * RAD_FACTOR lonvals = lonvar[:] * RAD_FACTOR clat, clon = np.cos(latvals), np.cos(lonvals) slat, slon = np.sin(latvals), np.sin(lonvals) triples = np.array(list(zip(np.ravel(clat * clon), np.ravel(clat * slon), np.ravel(slat)))) self._kdt[latvar.name] = KDTree(triples) del clat, clon del slat, slon del triples if not hasattr(lat, "__len__"): lat = [lat] lon = [lon] lat = np.array(lat) lon = np.array(lon) lat_rad = lat * RAD_FACTOR lon_rad = lon * RAD_FACTOR clat, clon = np.cos(lat_rad), np.cos(lon_rad) slat, slon = np.sin(lat_rad), np.sin(lon_rad) q = np.array([clat * clon, clat * slon, slat]).transpose() dist_sq_min, minindex_1d = self._kdt[latvar.name].query( np.float32(q), k=n ) iy_min, ix_min = np.unravel_index(minindex_1d, latvar.shape) return iy_min, ix_min, dist_sq_min * EARTH_RADIUS
def similarness(image1,image2): """ Return the correlation distance be1tween the histograms. This is 'normalized' so that 1 is a perfect match while -1 is a complete mismatch and 0 is no match. """ # Open and resize images to 200x200 i1 = Image.open(image1).resize((200,200)) i2 = Image.open(image2).resize((200,200)) # Get histogram and seperate into RGB channels i1hist = numpy.array(i1.histogram()).astype('float32') i1r, i1b, i1g = i1hist[0:256], i1hist[256:256*2], i1hist[256*2:] # Re bin the histogram from 256 bins to 48 for each channel i1rh = numpy.array([sum(i1r[i*16:16*(i+1)]) for i in range(16)]).astype('float32') i1bh = numpy.array([sum(i1b[i*16:16*(i+1)]) for i in range(16)]).astype('float32') i1gh = numpy.array([sum(i1g[i*16:16*(i+1)]) for i in range(16)]).astype('float32') # Combine all the channels back into one array i1histbin = numpy.ravel([i1rh, i1bh, i1gh]).astype('float32') # Same steps for the second image i2hist = numpy.array(i2.histogram()).astype('float32') i2r, i2b, i2g = i2hist[0:256], i2hist[256:256*2], i2hist[256*2:] i2rh = numpy.array([sum(i2r[i*16:16*(i+1)]) for i in range(16)]).astype('float32') i2bh = numpy.array([sum(i2b[i*16:16*(i+1)]) for i in range(16)]).astype('float32') i2gh = numpy.array([sum(i2g[i*16:16*(i+1)]) for i in range(16)]).astype('float32') i2histbin = numpy.ravel([i2rh, i2bh, i2gh]).astype('float32') return cv2.compareHist(i1histbin, i2histbin, 0)
def counts(self, a): """Returns array containing counts of each item in a. For example, on the enumeration 'UCAG', the sequence 'CCUG' would return the array [1,2,0,1] reflecting one count for the first item in the enumeration ('U'), two counts for the second item ('C'), no counts for the third item ('A'), and one count for the last item ('G'). The result will always be a vector of Int with length equal to the length of the enumeration. We return Int and non an unsigned type because it's common to subtract counts, which produces surprising results on unit types (i.e. wrapraround to maxint) unless the type is explicitly coerced by the user. Sliently ignores any unrecognized indices, e.g. if your enumeration contains 'TCAG' and you get an 'X', the 'X' will be ignored because it has no index in the enumeration. """ try: data = ravel(a) except ValueError: #ravel failed; try coercing to array try: data = ravel(array(a)) except ValueError: #try mapping to string data = ravel(array(map(str, a))) return sum(asarray(self._allowed_range == data, Int), axis=-1)
def exact_roc(actuals, controls): """ computes the area under the roc curve for separating to sets. Uses all possibl thresholds and trapezoidal interpolation. Also returns arrays of the true positive rate and the false positive rate. """ actuals = np.ravel(actuals) controls = np.ravel(controls) if np.isnan(actuals).any(): raise RuntimeError('NaN found in actuals') if np.isnan(controls).any(): raise RuntimeError('NaN found in controls') thresholds = np.hstack([-np.inf, np.unique(np.concatenate((actuals,controls))), np.inf])[::-1] true_pos_rate = np.empty(thresholds.size) false_pos_rate = np.empty(thresholds.size) num_act = float(len(actuals)) num_ctr = float(len(controls)) for i, value in enumerate(thresholds): true_pos_rate[i] = (actuals >= value).sum() / num_act false_pos_rate[i] = (controls >= value).sum() / num_ctr auc = np.dot(np.diff(false_pos_rate), (true_pos_rate[0:-1]+true_pos_rate[1:])/2) return(auc, true_pos_rate, false_pos_rate)
def kdtree_fast(latvar,lonvar,lat0,lon0): ''' :param latvar: :param lonvar: :param lat0: :param lon0: :return: ''' rad_factor = pi/180.0 # for trignometry, need angles in radians # Read latitude and longitude from file into numpy arrays latvals = latvar[:] * rad_factor lonvals = lonvar[:] * rad_factor ny,nx = latvals.shape clat,clon = cos(latvals),cos(lonvals) slat,slon = sin(latvals),sin(lonvals) # Build kd-tree from big arrays of 3D coordinates triples = list(zip(ravel(clat*clon), ravel(clat*slon), ravel(slat))) kdt = cKDTree(triples) lat0_rad = lat0 * rad_factor lon0_rad = lon0 * rad_factor clat0,clon0 = cos(lat0_rad),cos(lon0_rad) slat0,slon0 = sin(lat0_rad),sin(lon0_rad) dist_sq_min, minindex_1d = kdt.query([clat0*clon0, clat0*slon0, slat0]) iy_min, ix_min = unravel_index(minindex_1d, latvals.shape) return iy_min,ix_min
def equal(a, b, exact): if array_equal(a, b): return True if hasattr(a, 'dtype') and a.dtype in ['f4','f8']: nnans = isnan(a).sum() if nnans > 0: # For results containing NaNs, just check that the number # of NaNs is the same in both arrays. This check could be # made more exhaustive, but checking element by element in # python space is very expensive in general. return nnans == isnan(b).sum() ninfs = isinf(a).sum() if ninfs > 0: # Ditto for Inf's return ninfs == isinf(b).sum() if exact: return (shape(a) == shape(b)) and alltrue(ravel(a) == ravel(b), axis=0) else: if hasattr(a, 'dtype') and a.dtype == 'f4': atol = 1e-5 # Relax precission for special opcodes, like fmod else: atol = 1e-8 return (shape(a) == shape(b) and allclose(ravel(a), ravel(b), atol=atol))
def __eq__(self, other): if not isinstance(other, DenseMatrix) or self.numRows != other.numRows or self.numCols != other.numCols: return False self_values = np.ravel(self.toArray(), order="F") other_values = np.ravel(other.toArray(), order="F") return all(self_values == other_values)
def predict(self, X, return_std=False): """ Perform classification on test vectors X. Parameters ---------- X : {array-like, object with finite length or shape} Training data, requires length = n_samples return_std : boolean, optional Whether to return the standard deviation of posterior prediction. All zeros in this case. Returns ------- y : array, shape = [n_samples] or [n_samples, n_outputs] Predicted target values for X. y_std : array, shape = [n_samples] or [n_samples, n_outputs] Standard deviation of predictive distribution of query points. """ check_is_fitted(self, "constant_") n_samples = _num_samples(X) y = np.full((n_samples, self.n_outputs_), self.constant_, dtype=np.array(self.constant_).dtype) y_std = np.zeros((n_samples, self.n_outputs_)) if self.n_outputs_ == 1 and not self.output_2d_: y = np.ravel(y) y_std = np.ravel(y_std) return (y, y_std) if return_std else y
def create_edisp(event_class, event_type, erec, egy, cth): """Create an array of energy response values versus energy and inclination angle. Parameters ---------- egy : `~numpy.ndarray` Energy in MeV. cth : `~numpy.ndarray` Cosine of the incidence angle. """ irf = create_irf(event_class, event_type) theta = np.degrees(np.arccos(cth)) v = np.zeros((len(erec), len(egy), len(cth))) m = (erec[:,None] / egy[None,:] < 3.0) & (erec[:,None] / egy[None,:] > 0.33333) # m |= ((erec[:,None] / egy[None,:] < 3.0) & # (erec[:,None] / egy[None,:] > 0.5) & (egy[None,:] < 10**2.5)) m = np.broadcast_to(m[:,:,None], v.shape) try: x = np.ones(v.shape)*erec[:,None,None] y = np.ones(v.shape)*egy[None,:,None] z = np.ones(v.shape)*theta[None,None,:] v[m] = irf.edisp().value(np.ravel(x[m]), np.ravel(y[m]), np.ravel(z[m]), 0.0) except: for i, x in enumerate(egy): for j, y in enumerate(theta): m = (erec / x < 3.0) & (erec / x > 0.333) v[m, i, j] = irf.edisp().value(erec[m], x, y, 0.0) return v
def _returnXY(self): """ Returns gridded points as a vector """ X,Y = np.meshgrid(self.xgrd,self.ygrd) return np.column_stack((np.ravel(X),np.ravel(Y)))
def _check_hessian(self): if self.ff.system.cell.nvec != 0: # external rotations should be implemented properly for periodic systems. # 1D -> one external rotation, 2D and 3D -> no external rotation raise NotImplementedError('The hessian test is only working for isolated systems') # compute hessian hessian = estimate_cart_hessian(self.ff) # construct basis of external/internal degrees (rows) x, y, z = self.ff.system.pos.T natom = self.ff.system.natom ext_basis = np.array([ [1.0, 0.0, 0.0]*natom, [0.0, 1.0, 0.0]*natom, [0.0, 0.0, 1.0]*natom, # TODO: this assumes geometry is centered for good conditioning np.ravel(np.array([np.zeros(natom), z, -y]).T), np.ravel(np.array([-z, np.zeros(natom), x]).T), np.ravel(np.array([y, -x, np.zeros(natom)]).T), ]).T u, s, vt = np.linalg.svd(ext_basis, full_matrices=True) rank = (s > s.max()*1e-10).sum() # for linear and int_basis = u[:,rank:] # project hessian int_hessian = np.dot(int_basis.T, np.dot(hessian, int_basis)) evals = np.linalg.eigvalsh(int_hessian) self.num_neg_evals = (evals < 0).sum() # call tamkin as double check import tamkin system = self.ff.system mol = tamkin.Molecule(system.numbers, system.pos, system.masses, self.energy, self.gpos, hessian) nma = tamkin.NMA(mol, tamkin.ConstrainExt()) invcm = lightspeed/centimeter #print nma.freqs/invcm self.num_neg_evals = (nma.freqs < 0).sum()
def plot_3d_covariance(mean, cov): o,w,h = covariance_ellipse(cov,3) # rotate width and height to x,y axis wx = abs(w*np.cos(o) + h*np.sin(o))*1.2 wy = abs(h*np.cos(o) - w*np.sin(o))*1.2 # scale w 拿来重用 if wx > wy: w = wx else: w = wy minx = mean[0] - w maxx = mean[0] + w miny = mean[1] - w maxy = mean[1] + w xs = np.arange(minx, maxx, (maxx-minx)/40.) ys = np.arange(miny, maxy, (maxy-miny)/40.) xv, yv = np.meshgrid (xs, ys) zs = np.array([100.* stats.multivariate_normal.pdf(np.array([x,y]),mean,cov) \ for x,y in zip(np.ravel(xv), np.ravel(yv))]) zv = zs.reshape(xv.shape) ax = plt.figure().add_subplot(111, projection='3d') ax.plot_surface(xv, yv, zv, rstride=1, cstride=1, cmap=cm.autumn) ax.set_xlabel('X') ax.set_ylabel('Y') ax.contour(xv, yv, zv, zdir='x', offset=minx-1, cmap=cm.autumn) ax.contour(xv, yv, zv, zdir='y', offset=maxy, cmap=cm.BuGn)
def __init__(self, con_id=None, onset=None, amplitude=None): """ Parameters ---------- con_id: array of shape (n_events), type = string, optional identifier of the events onset: array of shape (n_events), type = float, optional, onset time (in s.) of the events amplitude: array of shape (n_events), type = float, optional, amplitude of the events (if applicable) """ self.con_id = con_id self.onset = onset self.amplitude = amplitude self.n_event = 0 if con_id is not None: self.n_events = len(con_id) try: # this is only for backward compatibility: # if con_id were integers, they become a string self.con_id = np.array(["c" + str(int(float(c))) for c in con_id]) except: self.con_id = np.ravel(np.array(con_id)).astype("str") if onset is not None: if len(onset) != self.n_events: raise ValueError("inconsistent definition of ids and onsets") self.onset = np.ravel(np.array(onset)).astype(np.float) if amplitude is not None: if len(amplitude) != self.n_events: raise ValueError("inconsistent definition of amplitude") self.amplitude = np.ravel(np.array(amplitude)) self.type = "event" self.n_conditions = len(np.unique(self.con_id))
def gen_batch_in_memory(self, X, nn_finder, nb_q, prior_factor): """Generate batch, assuming X is loaded in memory in the main program""" while True: # Select idx at random for the batch idx = np.random.choice(X.shape[0], self.batch_size, replace=False) X_batch_color = X[idx] X_batch_black = X_batch_color[:, :1, :, :] X_batch_ab = X_batch_color[:, 1:, :, :] npts, c, h, w = X_batch_ab.shape X_a = np.ravel(X_batch_ab[:, 0, :, :]) X_b = np.ravel(X_batch_ab[:, 1, :, :]) X_batch_ab = np.vstack((X_a, X_b)).T Y_batch = self.get_soft_encoding(X_batch_ab, nn_finder, nb_q) # Add prior weight to Y_batch idx_max = np.argmax(Y_batch, axis=1) weights = prior_factor[idx_max].reshape(Y_batch.shape[0], 1) Y_batch = np.concatenate((Y_batch, weights), axis=1) # # Reshape Y_batch Y_batch = Y_batch.reshape((npts, h, w, nb_q + 1)) yield X_batch_black, X_batch_color, Y_batch
def producer(): try: # Load the data from HDF5 file with h5py.File(self.hdf5_file, "r") as hf: num_chan, height, width = self.X_shape[-3:] # Select start_idx at random for the batch idx_start = np.random.randint(0, self.X_shape[0] - self.batch_size) idx_end = idx_start + self.batch_size # Get X and y X_batch_color = hf["%s_lab_data" % self.dset][idx_start: idx_end, :, :, :] X_batch_black = X_batch_color[:, :1, :, :] X_batch_ab = X_batch_color[:, 1:, :, :] npts, c, h, w = X_batch_ab.shape X_a = np.ravel(X_batch_ab[:, 0, :, :]) X_b = np.ravel(X_batch_ab[:, 1, :, :]) X_batch_ab = np.vstack((X_a, X_b)).T Y_batch = self.get_soft_encoding(X_batch_ab, nn_finder, nb_q) # Add prior weight to Y_batch idx_max = np.argmax(Y_batch, axis=1) weights = prior_factor[idx_max].reshape(Y_batch.shape[0], 1) Y_batch = np.concatenate((Y_batch, weights), axis=1) # # Reshape Y_batch Y_batch = Y_batch.reshape((npts, h, w, nb_q + 1)) # Put the data in a queue queue.put((X_batch_black, X_batch_color, Y_batch)) except: print("Nothing here")
def __init__(self,pix,name=None,title=None,z1=None,z2=None): # pix must be a numpy array... self.a = 1.0 self.b = self.c = 0. self.d = -1.0 _shape = pix.shape # Start assuming full image can fit in frame buffer self.tx = _shape[1] / 2 + _shape[1] % 2 self.ty = _shape[0] / 2 + _shape[0] % 2 self.dtx = _shape[1] / 2 + _shape[1] % 2 self.dty = _shape[0] / 2 + _shape[0] % 2 # Determine full range of pixel values for image if not z1: self.z1 = n.minimum.reduce(n.ravel(pix)) else: self.z1 = z1 if not z2: self.z2 = n.maximum.reduce(n.ravel(pix)) else: self.z2 = z2 self.zt = self._W_LINEAR if not name: self.name = 'Image' else: self.name = name self.title = title self.ny,self.nx = pix.shape self.full_ny, self.full_nx = pix.shape
def __call__(self, filt, mask=None): ''' Provide the iterator over the levels. ''' self._check_filter(filt, mask) # This cover method is only for one-dimensional filter functions. assert(self.dim==1) # The interval length measures indices, not filter values # in this case. self.interval_length = 1. / \ ( self.intervals[0] - (self.intervals[0]-1)*self.fract_overlap ) self.step_size = self.interval_length*(1-self.fract_overlap) if mask is None: self.n = len(self.filt) self.sortorder = np.argsort(np.ravel(self.filt)) else: idx = np.flatnonzero(mask) self.n = len(idx) sortorder = np.argsort(np.ravel(self.filt[mask])) self.sortorder = idx[sortorder] assert len(self.sortorder)==self.n self.iter = range(self.intervals[0]).__iter__() return self
def reproject(self, nj_obj, field): """Reproject a field of another njord inst. to the current grid""" if not hasattr(self,'nj_ivec'): self.add_njijvec(nj_obj) field = getattr(nj_obj, field) if type(field) is str else field if hasattr(nj_obj, 'tvec') and (len(nj_obj.tvec) == field.shape[0]): newfield = np.zeros(nj_obj.tvec.shape + self.llat.shape) for tpos in range(len(nj_obj.tvec)): newfield[tpos,:,:] = self.reproject(nj_obj, field[tpos,...]) return newfield di = self.i2 - self.i1 dj = self.j2 - self.j1 xy = np.vstack((self.nj_jvec, self.nj_ivec)) if type(field) == str: weights = np.ravel(nj_obj.__dict__[field])[self.nj_mask] else: weights = np.ravel(field)[self.nj_mask] mask = ~np.isnan(weights) flat_coord = np.ravel_multi_index(xy[:,mask],(dj, di)) sums = np.bincount(flat_coord, weights[mask]) cnts = np.bincount(flat_coord) fld = np.zeros((dj, di)) * np.nan fld.flat[:len(sums)] = sums.astype(np.float)/cnts try: self.add_landmask() fld[self.landmask] = np.nan except: print "Couldn't load landmask for %s" % self.projname return fld
def doSetNoDataInSeriesOld(infile, nodata, outfile, outformat, options): fileH = gdal.Open(infile, GA_ReadOnly) if fileH is None: exitMessage('Could not open file {0}. Exit(1).'.format(infile), 1) # does not data exist? data = numpy.ravel( fileH.GetRasterBand(1).ReadAsArray()) wnodata = (data==nodata) if wnodata.any(): print 'No data already set. Return(0)' return(0) common = numpy.ones(data.shape) for iband in range(1, fileH.RasterCount): newdata = numpy.ravel(fileH.GetRasterBand(iband + 1).ReadAsArray()) wnequal = data!=newdata common[wnequal] = 0 gdal.TermProgress_nocb( (iband+1)/float( 2*fileH.RasterCount ) ) # is there any constant time series? if common.any(): outDrv = gdal.GetDriverByName(outformat) outDS = outDrv.Create(outfile, fileH.RasterXSize, fileH.RasterYSize, fileH.RasterCount, fileH.GetRasterBand(1).GetRasterDataType, options) outDS.SetProjection( fileH.GetProjection() ) outDS.SetGeoTransform( fileH.GetGeoTransform() ) #then set these time series to nodata for iband in range(fileH.RasterCount): data = numpy.ravel(fileH.GetRasterBand(iband + 1).ReadAsArray(0, 0, fileH.RasterXSize, fileH.RasterYSize)) data[common] = nodata outDS.GetRasterBand( iband + 1 ).WriteArray( data.reshape(fileH.RasterYSize, fileH.RasterXSize), 0, 0) gdal.TermProgress_nocb( (iband+1+fileH.RasterCount) / float( 2*fileH.RasterCount ) ) gdal.TermProgress_nocb(1)
def _lf_acc(self, subset, lf_idx): gt = self.gt._gt_vec pred = np.ravel(self.lf_matrix.tocsc()[:,lf_idx].todense()) has_label = np.where(pred != 0) has_gt = np.where(gt != 0) # Get labels/gt for candidates in dev set, with label, with gt gd_idxs = np.intersect1d(has_label, subset) gd_idxs = np.intersect1d(has_gt, gd_idxs) gt = np.ravel(gt[gd_idxs]) pred_sub = np.ravel(pred[gd_idxs]) n_neg = np.sum(pred_sub == -1) n_pos = np.sum(pred_sub == 1) if np.sum(pred == -1) == 0: neg_acc = -1 elif n_neg == 0: neg_acc = 0 else: neg_acc = float(np.sum((pred_sub == -1) * (gt == -1))) / n_neg if np.sum(pred == 1) == 0: pos_acc = -1 elif n_pos == 0: pos_acc = 0 else: pos_acc = float(np.sum((pred_sub == 1) * (gt == 1))) / n_pos return (pos_acc, n_pos, neg_acc, n_neg)
def make_kernel_grid(freq, kernel_size, n_pix, placement_grid): """ make_kernel_grid(freq,kernel_size,n_pix,placement_grid) freq ~ cyc/n_pix kernel_size ~ pix. the fwhm of the gaussian envelope, effectively the kernel radius. n_pix ~ pixels per side of square image placement_grid = (X,Y) grid of kernel centers, as from meshgrid return: kernel_set = 3D numpy array of complex ripple filters ~ [number_of_filters] x [n_pix] x [n_pix] """ iter_x = np.ravel(placement_grid[0]) iter_y = np.ravel(placement_grid[1]) kernel_set = np.zeros((len(iter_x), n_pix, n_pix)).astype(complex) count = 0 print "constructing %d filters" % (len(iter_x)) for x, y in zip(iter_x, iter_y): kernel_set[count, :, :] = complex_ripple_filter(freq, (x, y), kernel_size, n_pix) count += 1 return kernel_set
def _binopt(self, other, op, in_shape=None, out_shape=None): """apply the binary operation fn to two sparse matrices""" # ideally we'd take the GCDs of the blocksize dimensions # and explode self and other to match other = self.__class__(other, blocksize=self.blocksize) # e.g. bsr_plus_bsr, etc. fn = getattr(sparsetools, self.format + op + self.format) R,C = self.blocksize max_bnnz = len(self.data) + len(other.data) indptr = np.empty_like(self.indptr) indices = np.empty(max_bnnz, dtype=np.intc) data = np.empty(R*C*max_bnnz, dtype=upcast(self.dtype,other.dtype)) fn(self.shape[0]//R, self.shape[1]//C, R, C, self.indptr, self.indices, np.ravel(self.data), other.indptr, other.indices, np.ravel(other.data), indptr, indices, data) actual_bnnz = indptr[-1] indices = indices[:actual_bnnz] data = data[:R*C*actual_bnnz] if actual_bnnz < max_bnnz/2: indices = indices.copy() data = data.copy() data = data.reshape(-1,R,C) return self.__class__((data, indices, indptr), shape=self.shape)
def __call__(self, transform_xy, x1, y1, x2, y2): """ get extreme values. x1, y1, x2, y2 in image coordinates (0-based) nx, ny : number of divisions in each axis """ x_, y_ = np.linspace(x1, x2, self.nx), np.linspace(y1, y2, self.ny) x, y = np.meshgrid(x_, y_) lon, lat = transform_xy(np.ravel(x), np.ravel(y)) # iron out jumps, but algorithm should be improved. # Tis is just naive way of doing and my fail for some cases. if self.lon_cycle is not None: lon0 = np.nanmin(lon) lon -= 360.0 * ((lon - lon0) > 180.0) if self.lat_cycle is not None: lat0 = np.nanmin(lat) lat -= 360.0 * ((lat - lat0) > 180.0) lon_min, lon_max = np.nanmin(lon), np.nanmax(lon) lat_min, lat_max = np.nanmin(lat), np.nanmax(lat) lon_min, lon_max, lat_min, lat_max = self._adjust_extremes(lon_min, lon_max, lat_min, lat_max) return lon_min, lon_max, lat_min, lat_max
def objective_function(self, fps, fjac=None, **kwargs): """ Function to minimize. Parameters ---------- fps : list parameters returned by the fitter fjac : None or list parameters for which to compute the jacobian args : list [model, [weights], [input coordinates]] """ status = 0 model = kwargs['model'] weights = kwargs['weights'] model.parameters = fps meas = kwargs['err'] if 'y' in kwargs: args = (kwargs['x'], kwargs['y']) else: args = (kwargs['x'],) r = [status] if weights is None: residuals = np.ravel(model(*args) - meas) r.append(residuals) else: residuals = np.ravel(weights * (model(*args) - meas)) r.append(residuals) if fjac is not None: args = args + (meas,) fderiv = np.array(self._wrap_deriv(fps, model, weights, *args)) r.append(fderiv) return r
def cost(params, Y, R, num_features, lambdas): Y = np.matrix(Y) # (1682, 943) R = np.matrix(R) # (1682, 943) num_movies = Y.shape[0] num_users = Y.shape[1] # reshape the parameter array into parameter matrices X = np.matrix(np.reshape(params[:num_movies * num_features], (num_movies, num_features))) # (1682, 10) Theta = np.matrix(np.reshape(params[num_movies * num_features:], (num_users, num_features))) # (943, 10) # initializations J = 0 X_grad = np.zeros(X.shape) # (1682, 10) Theta_grad = np.zeros(Theta.shape) # (943, 10) # compute the cost error = np.multiply((X * Theta.T) - Y, R) # (1682, 943) squared_error = np.power(error, 2) # (1682, 943) J = (1. / 2) * np.sum(squared_error) # add the cost regularization J = J + ((lambdas / 2) * np.sum(np.power(Theta, 2))) J = J + ((lambdas / 2) * np.sum(np.power(X, 2))) # calculate the gradients with regularization X_grad = (error * Theta) + (lambdas * X) Theta_grad = (error.T * X) + (lambdas * Theta) # unravel the gradient matrices into a single array grad = np.concatenate((np.ravel(X_grad), np.ravel(Theta_grad))) return J, grad
def on_epoch_end(self, epoch, logs={}): model.save_weights(weightSavePath + "bestWeights_regressMOS_smallNetwork_latestModel.h5",overwrite=True) logging.info(" -- Epoch "+str(epoch)+" done, loss : "+ str(logs.get('loss'))) predictedScoresVal = np.ravel(model.predict(valData,batch_size=batchSize)) predictedScoresTest = np.ravel(model.predict(testData,batch_size=batchSize)) sroccVal = scipy.stats.spearmanr(predictedScoresVal, valLabels) plccVal = scipy.stats.pearsonr(predictedScoresVal, valLabels) sroccTest = scipy.stats.spearmanr(predictedScoresTest, testLabels) plccTest = scipy.stats.pearsonr(predictedScoresTest, testLabels) t_str_val = '\nSpearman corr for validation set is ' + str(sroccVal[0]) + '\nPearson corr for validation set is '+ str(plccVal[0]) + '\nMean absolute error for validation set is ' + str(np.mean(np.abs(predictedScoresVal-valLabels))) t_str_test = '\nSpearman corr for test set is ' + str(sroccTest[0]) + '\nPearson corr for test set is '+ str(plccTest[0]) + '\nMean absolute error for test set is ' + str(np.mean(np.abs(predictedScoresTest-testLabels))) print t_str_val print t_str_test mean_corr = sroccVal[0] + plccVal[0] if mean_corr > self.best_mean_corr: self.best_mean_corr = mean_corr model.save_weights(weightSavePath + "bestWeights_regressMOS_smallNetwork_bestCorr.h5",overwrite=True) printing("Best correlation loss model saved at Epoch " + str(epoch) + "\n") self.metric.append(logs.get("val_loss")) if epoch % 5 == 0: model.optimizer.lr.set_value(round(Decimal(0.8*model.optimizer.lr.get_value()),8)) learningRate = model.optimizer.lr.get_value() printing("") printing("The current learning rate is: " + str(learningRate))
def test_cvxopt(): mycvxopt.solvers().qp(0,0,0,0,0,0) path = '/Users/Admin/Dropbox/ml/MachineLearning_CS6140' with open(os.path.join(path, 'cvxopt.pkl'), 'rb') as f: arr = pickle.load(f) print 'pickle loaded' P = arr[0] q = arr[1] G = arr[2] h = arr[3] A = arr[4] b = arr[5] print 'input assigned' # pcost dcost gap pres dres #0: -6.3339e+03 -5.5410e+05 2e+06 2e+00 2e-14 #1: 5.8332e+02 -3.1277e+05 5e+05 2e-01 2e-14 #2: 1.3585e+03 -1.3003e+05 2e+05 7e-02 2e-14 #return np.ravel(solution['x']) with open(os.path.join(path, 'cvxopt_solution.pkl'), 'rb') as f: solution = pickle.load(f) print 'solution pickle loaded' mysolution = cvxopt.solvers.qp(P, q, G, h, A, b) print 'convex optimizer solved' if np.allclose(np.ravel(mysolution['x']), np.ravel(solution['x'])): print 'EQUAL!!!' else: print 'WROng!!!'
def add_ij(self): self.imat,self.jmat = np.meshgrid(np.arange(self.i2-self.i1), np.arange(self.j2-self.j1)) self.kdijvec = np.vstack((np.ravel(self.imat), np.ravel(self.jmat))).T self._ijvec = np.arange(np.prod(self.imat.shape))
datalvl0=pd.concat([train_X10,test_X10],axis=0) datalvl1=pd.get_dummies(datalvl0) train_X1=datalvl1[0:train_X10.shape[0]] test_X1=datalvl1[train_X10.shape[0]:] #train_X1=train_data[train_data.columns[4]] train_X22=train_data[train_data.columns[6]] train_X3=train_data[train_data.columns[7]] train_y1=train_label[train_label.columns[1:]] train_X2=(train_X22 - train_X22.min()) / (train_X22.max() - train_X22.min()) train_X3=pd.get_dummies(train_X3) train_X = pd.concat([train_X1, train_X2,train_X3,train_despo,train_ext,train_name,train_nameneg], axis=1) train_y=np.ravel(train_y1) X_train,X_test, y_train, y_test = train_test_split(train_X,train_y,test_size=0.4, random_state=0) rf1 = RandomForestRegressor(n_estimators= 70,max_depth=40, min_samples_split=55, min_samples_leaf=50,max_features='auto',oob_score=True, random_state=100,n_jobs=-1) rf1.fit(X_train,y_train) y_pred=rf1.predict(X_test) print(log_loss(y_test, y_pred)) """fill NA test_X11=test_data[test_data.columns[3]] test_X12=test_data[test_data.columns[4]] test_X13=pd.concat([test_X11, test_X12], axis=1)
def attack_svm(self, server, predictor_name, kernel_type, attack_type, dimension, query_budget, dataset=None, roundsize=5): if dataset is None or len(dataset) < 2: print("[!] Dataset too small") print("[*] Aborting attack...") raise ValueError if not isinstance(dataset, list): dataset = dataset.tolist() if attack_type == "retraining": my_model = svm.SVC(kernel=kernel_type) X = [] y = [] for datum in random.sample(dataset, query_budget): b = self.client.poll_server(server, predictor_name, [datum]) X.append(datum) y.append(b) my_model.fit(X, numpy.ravel(y)) return my_model elif attack_type == "adaptive retraining": if len(dataset) >= query_budget > roundsize: pool = random.sample(dataset, query_budget) x = [] y = [] n = roundsize t = math.ceil(query_budget / n) for i in range(0, n): a = pool.pop(0) b = self.client.poll_server(server, predictor_name, [a])[0] x.append(a) y.append(b) while min(y) == max(y): for i in range(0, n): a = pool.pop(0) b = self.client.poll_server(server, predictor_name, [a])[0] x.append(a) y.append(b) t -= 1 print("[*] Additional initial random round had to be done due to no variance") my_model = svm.SVC(kernel=kernel_type) for i in range(0, t-1): my_model.fit(x, numpy.ravel(y)) for j in range(0, n): if not pool: break distances = my_model.decision_function(pool).tolist() closest = pool.pop(distances.index(min(distances))) x.append(closest) y.append(self.client.poll_server(server, predictor_name, [closest])[0]) my_model.fit(x, numpy.ravel(y)) return my_model else: print("[!] Error: dataset to small or roundsize bigger than query_budget") raise ValueError elif attack_type == "lowd-meek": if len(dataset) != 2: print("[!] Error: For Lowd-Meek attack, please provide exactly a positive and a negative sample") raise ValueError elif kernel_type != "linear": print("[!] Error: Unsupported Kernel by lowd-meek attack") raise ValueError else: print("[*] Initiating lowd-meek attack.") epsilon = 0.01 d = 0.01 vector1 = dataset[0] vector2 = dataset[1] vector1_category = numpy.ravel(self.client.poll_server(server, predictor_name, [vector1])) vector2_category = numpy.ravel(self.client.poll_server(server, predictor_name, [vector2])) if vector1_category == vector2_category: print("[!] Error: Provided Samples are in same category") raise ValueError else: if vector1_category == [0]: print(vector1_category, "is 0") negative_instance = vector1 positive_instance = vector2 else: print(vector2_category, "is 0") negative_instance = vector2 positive_instance = vector1 #sign_witness_p = positive_instance sign_witness_n = negative_instance print("[+] Positive and Negative Instance confirmed.") for feature in range(0, len(sign_witness_n)): print("[*] Finding Signwitness. Checking feature", feature) f = sign_witness_n[feature] sign_witness_n[feature] = positive_instance[feature] if numpy.ravel(self.client.poll_server(server, predictor_name, [sign_witness_n])) == [1]: sign_witness_p = sign_witness_n.copy() sign_witness_n[feature] = f f_index = feature print("[+] Sign Witnesses found with feature index:", f_index) break weight_f = 1 * (sign_witness_p[feature] - sign_witness_n[feature]) / abs(sign_witness_p[feature] - sign_witness_n[feature]) # Find Negative Instance of x with gap(x) < epsilon/4 delta = sign_witness_p[feature] - sign_witness_n[feature] seeker = sign_witness_n #seeker[feature] = sign_witness_p[feature] - delta #print(sign_witness_p) #print(sign_witness_n) while True: #print("S - ", seeker) pred = self.client.poll_server(server, predictor_name, [seeker]) #print("p:", pred) if pred == [1]: #print("Positive. delta", delta) delta = delta / 2 seeker[feature] = seeker[feature] - delta else: #print("Negative. delta", delta) if abs(delta) < epsilon/4: print("[+] found hyperplane crossing", seeker) break delta = delta / 2 seeker[feature] = seeker[feature] + delta # seeker should be that negative instance now. crossing = seeker.copy() seeker[feature] += 1 classification = numpy.ravel(self.client.poll_server(server, predictor_name, [seeker])) dooble = seeker.copy() # dooble is negative instance weight = [0]*len(dooble) #print("Weight on initieal feature", weight_f) for otherfeature in range(0, len(dooble)): if otherfeature == feature: weight[otherfeature] = weight_f continue # line search on the other features dooble[otherfeature] += 1/d if numpy.ravel(self.client.poll_server(server, predictor_name, [dooble])) == classification: #print("DIDNOTCHANGE") doox = dooble.copy() dooble[otherfeature] -= 2/d if numpy.ravel(self.client.poll_server(server, predictor_name, [dooble])) == classification: # if even though added 1/d class stays the same -> weigh = 0 weight[otherfeature] = 0 dooble[otherfeature] = seeker[otherfeature] #print("found weightless feature,", otherfeature) continue else: distance_max = -1/d else: distance_max = 1/d distance_min = 0 distance_mid = (distance_max + distance_min) / 2 dooble[otherfeature] = seeker[otherfeature] + distance_mid while abs(distance_mid - distance_min) > epsilon / 4: if numpy.ravel(self.client.poll_server(server, predictor_name, [dooble])) != classification: distance_min = distance_min distance_max = distance_mid distance_mid = (distance_min + distance_max) / 2 dooble[otherfeature] = seeker[otherfeature] + distance_mid else: distance_min = distance_mid distance_mid = (distance_min + distance_max) / 2 distance_max = distance_max dooble[otherfeature] = seeker[otherfeature] + distance_mid test = seeker[otherfeature]-dooble[otherfeature] weight[otherfeature] = weight_f / test continue print("[+] Found Weights", weight) a = -(weight[0] / weight[1]) intercept = crossing[1] - a * crossing[0] print("[+] Found Intercept (2d)", intercept) class LinearMockSVM: def __init__(self, w__, b__): self.w__ = w__ self.b__ = b__*w__[1] # norm def predict(self, val): rv = [] for v in val: #print(numpy.sign(numpy.dot(self.w__, v) - self.b__)) rv.append(0) if numpy.sign(numpy.dot(self.w__, v) - self.b__) == -1 else rv.append(1) return rv return LinearMockSVM(weight, intercept) else: print("Error: Unknown attack type") raise ValueError
def predict(self, X): """Perform classification on test vectors X. Parameters ---------- X : {array-like, object with finite length or shape} Training data, requires length = n_samples Returns ------- y : array, shape = [n_samples] or [n_samples, n_outputs] Predicted target values for X. """ check_is_fitted(self, 'classes_') # numpy random_state expects Python int and not long as size argument # under Windows n_samples = _num_samples(X) rs = check_random_state(self.random_state) n_classes_ = self.n_classes_ classes_ = self.classes_ class_prior_ = self.class_prior_ constant = self.constant if self.n_outputs_ == 1: # Get same type even for self.n_outputs_ == 1 n_classes_ = [n_classes_] classes_ = [classes_] class_prior_ = [class_prior_] constant = [constant] # Compute probability only once if self.strategy == "stratified": proba = self.predict_proba(X) if self.n_outputs_ == 1: proba = [proba] if self.sparse_output_: class_prob = None if self.strategy in ("most_frequent", "prior"): classes_ = [np.array([cp.argmax()]) for cp in class_prior_] elif self.strategy == "stratified": class_prob = class_prior_ elif self.strategy == "uniform": raise ValueError("Sparse target prediction is not " "supported with the uniform strategy") elif self.strategy == "constant": classes_ = [np.array([c]) for c in constant] y = random_choice_csc(n_samples, classes_, class_prob, self.random_state) else: if self.strategy in ("most_frequent", "prior"): y = np.tile([classes_[k][class_prior_[k].argmax()] for k in range(self.n_outputs_)], [n_samples, 1]) elif self.strategy == "stratified": y = np.vstack(classes_[k][proba[k].argmax(axis=1)] for k in range(self.n_outputs_)).T elif self.strategy == "uniform": ret = [classes_[k][rs.randint(n_classes_[k], size=n_samples)] for k in range(self.n_outputs_)] y = np.vstack(ret).T elif self.strategy == "constant": y = np.tile(self.constant, (n_samples, 1)) if self.n_outputs_ == 1 and not self.output_2d_: y = np.ravel(y) return y
from sklearn.ensemble import GradientBoostingRegressor from sklearn import linear_model import matplotlib.pyplot as plt import numpy as np classifier = 'GBR' crossval = 1; loc_train = 'train.vw' loc_test = 'test.vw' loc_submission = 'treePreds.txt' # features,labels,ids = makeFeatureArray(loc_train) # extract train features # testFeatures,testLabels,testids = makeFeatureArray(loc_test) # extract test features if crossval==1: x_train, x_test, y_train, y_test = train_test_split(features, np.ravel(labels), test_size=0.33, random_state=42) # separate train and test sets if classifier == 'RF': clf = RandomForestClassifier(n_estimators=400,n_jobs=4) # create random forest clf.fit(x_train,y_train) # train classifier predictions = clf.predict_proba(np.ravel(x_test)) # generate predictions fpr,tpr,tr = roc_curve(y_test,predictions,1) score = roc_auc_score(y_test,predictions) if classifier == 'RR': clf = linear_model.Ridge(alpha = 0.5) # create Ridge regression clf.fit(x_train,y_train) # train classifier predictions = clf.predict(x_test) # generate predictions fpr,tpr,tr = roc_curve(y_test,predictions,1) score = roc_auc_score(y_test,predictions) if classifier == 'GBR': clf = GradientBoostingRegressor(n_estimators=100, loss='ls').fit(x_train, y_train)
print ('Collecting files for {} experiment'.format(name)) ZSOCRE_CUT = 2.0 #•Get protocol type protocol = str(filelist.loc[[manip],['Protocol']].values.ravel()[0]) print (protocol) global_reshape = global_reshape-1 scanspot_list = ['Scanspot 1','Scanspot 2','Scanspot 3','Scanspot 4'] files = [] for i in range(global_reshape.size): idx_list = np.ravel(filelist.loc[[manip],[scanspot_list[i]]].values) record_list = [] for idx in idx_list : if isinstance(idx,str) == True: record_list.append(idx) files.append(record_list) files = np.asarray(files) #----------------------ZEBRIN BANDS & ORIENTATION------------------------------- _BANDS_micron = df.loc[['%s'%name],'P2- contra':'P2- ipsi'] #Selects values in xcl file from P2- contra to P2- ipsi _BANDS_norm = df.loc[['%s norm_P1-'%name],'P2- contra':'P2- ipsi'] #Selects values in xcl file from P2- contra to P2- ipsi
print("Loss val: " + str(values[0])) print("Accuracy val: " + str(values[1])) values_t = model.evaluate(x=[Q1_test, Q2_test], y=y_test) print("Loss test: " + str(values_t[0])) print("Accuracy test: " + str(values_t[1])) #Model evaluation """ i/p:validation, test o/p: results_val.txt,results_test.txt """ yhat_probs_val = model.predict([Q1_val, Q2_val], verbose=0) yhat_probs_test = model.predict([Q1_test, Q2_test], verbose=0) y_pred = np.ravel(yhat_probs_val).tolist() df_pred = pd.DataFrame() df_pred['pred_classes'] = y_pred y_val = np.ravel(y_val).tolist() df_pred['True Y_val'] = y_val excel_ = df_pred.to_excel(path + "Pred_val_2.xlsx", index=None, header=True) y_pred_t = np.ravel(yhat_probs_test).tolist() df_pred_t = pd.DataFrame() df_pred_t['pred_classes'] = y_pred_t y_test = np.ravel(y_test).tolist() df_pred_t['True Y_test'] = y_test excel_t = df_pred_t.to_excel(path + "Pred_test.xlsx", index=None, header=True) y_pr_val = (df_pred['pred_classes']).tolist() y_val_tr = (df_pred['True Y_val']).tolist()
def get_data(audio, eeg, audio_unatt=None, idx_eeg=None, num_batch=None, idx_sample=None, num_context=1, num_predict=1, dct_params=None): """Select a sequence of audio, audio_unattnd, and eeg data Reshape the selected data into num_batch frames for prediction. Arguments --------- audio : (num_part, num_samples) eeg : (num_part, num_ch, num_samples) idx_sample : row idx of audio and eeg data Defaults to a random sample if not specified num_context : scalar Total number of samples of input used to predict an output sample. If one-to-one mapping with no delay, num_context=1 num_predict : scalar Total number of time samples to be predicted in the output dct_params['idx_keep_audioTime']: index of samples into the time vector Returns ------- X : Variable (num_batch, num_ch * num_context + num_context) eeg + audio y : Variable (num_batch, class) z_unatt : None """ if (dct_params is not None) and ('idx_keep_audioTime' in dct_params): idx_keep_audioTime = dct_params['idx_keep_audioTime'] else: idx_keep_audioTime = None ###################################################################################################### import numpy as np import scipy import scipy.signal import torch from torch.autograd import Variable import sklearn import sklearn.preprocessing import time a = audio[idx_sample] # selected audio part e = eeg[idx_sample] # selected eeg part au = audio_unatt[idx_sample] # selected unattended audio # Trim off NaNs idx_a = np.logical_not(np.isnan(a)) idx_e = np.logical_not(np.isnan(e[1])) if np.abs(np.sum(idx_a) - np.sum(idx_e)) > 3: print('unequal samples') idx_keep = np.logical_and(idx_a, idx_e) a = a[idx_keep] e = e[:, idx_keep] au = au[idx_keep] if a.shape[0] >= num_context: # Make a conv matrix out of the eeg # Make a conv matrix out of the attended audio # Make a conv matrix out of the unattended audio # Cat [X_eeg, X_audio], y = 1 # Cat [X_eeg, X_audio_unatt], y = 0 # Return X, y # No frame shifts are needed. num_time = a.size - num_context + 1 num_ch = e.shape[0] if idx_keep_audioTime is None: num_column_audio = num_context idx_keep_audioTime = np.arange(num_context) else: num_column_audio = np.size(idx_keep_audioTime) X_eeg = np.nan * np.ones((num_time, num_ch, num_column_audio)) X_audio = np.nan * np.ones((num_time, num_column_audio)) X_audio_unatt = np.nan * np.ones((num_time, num_column_audio)) print(X_eeg.shape) for idx in range(num_time): idx_keep = np.arange(num_context) + idx for idx_ch in range(num_ch): X_eeg[idx, idx_ch] = np.ravel(e[idx_ch, idx_keep])[idx_keep_audioTime] X_audio[idx] = np.ravel(a[idx_keep])[idx_keep_audioTime] X_audio_unatt[idx] = np.ravel(au[idx_keep])[idx_keep_audioTime] X_audio = X_audio[:, None, :] X_audio_unatt = X_audio_unatt[:, None, :] X1 = np.concatenate((X_eeg, X_audio), axis=1) X0 = np.concatenate((X_eeg, X_audio_unatt), axis=1) X = np.concatenate((X0, X1), axis=0) y = np.concatenate((np.zeros((num_time, 1)), np.ones((num_time, 1))), axis=0) X = Variable(torch.from_numpy(X).type('torch.FloatTensor')) y = Variable(torch.from_numpy(np.array(y)).type('torch.FloatTensor')) z_unatt = None else: print('-warning, too little data-') X = None y = None z_unatt = None a = None a_unatt = None return X, y, z_unatt
count = 0 for file in filelist[550:]: data = [] with open('/projects/kumar-lab/mehtav/normalised_vd/' + file, 'rb') as f: f.seek(0) data = pickle.load(f) # data = [points/conf/vel, traj_number] # Each traj has shape (nrow x 12 x 2) or (nrow by 12) print(count) count = count + 1 for t in range(len(data[1])): p_traj = data[0][t] c_traj = data[1][t] v_traj = data[2][t] op_traj = [] for i in range(v_traj.shape[0]): s = np.ravel(np.delete( p_traj[i], CENTER_SPINE_INDEX, 0)) # Removing center spine, as it is always at the origin a = np.ravel( np.delete(v_traj[i], CENTER_SPINE_INDEX, 0)) # Removing center spine, as it is always at rest s = np.delete(s, 17, 0) # Removing x coordinate of base tail a = np.delete(a, 17, 0) # Removing x coordinate of base tail op_traj.append((s, a)) with open( '/projects/kumar-lab/mehtav/sa_traj/' + file[:-23] + '_' + str(t) + '_sa.pkl', 'wb') as f: pickle.dump(op_traj, f)
def calc_SS(self, smooth_jacobian=True): """ now set up the secondary spectrum defined by: delay = theta^2, i.e. 0.5 L/c = 1 doppler = theta, i.e. V/lambda = 1 therefore differential delay (td), and differential doppler (fd) are: td = (thetax+thetagx)^2 +(thetay+thetagy)^2 - thetagx^2-thetagy^2 fd = (thetax + thetagx) - thetagx = thetax Jacobian = 1/(thetay+thetagy) thetay + thetagy = sqrt(td - (thetax + thetagx)^2 + thetagx^2 + thetagy^2) the arc is defined by (thetay+thetagy) == 0 where there is a half order singularity. The singularity creates a problem in the code because the sampling in fd,td is not synchronized with the arc position, so there can be some very bright points if the sample happens to lie very close to the singularity. this is not a problem in interpreting the secondary spectrum, but it causes large artifacts when Fourier transforming it to get the ACF. So I [Bill Coles, in original Matlab code] have limited the Jacobian by not allowing (thetay+thetagy) to be less than half the step size in thetax and thetay. """ fd = np.arange(-self.nf, self.nf, self.df) td = np.arange(-self.nt, self.nt, self.dt) self.fd = fd self.td = td # now get the thetax and thetay corresponding to fd and td # first initialize arrays all of same size amp = np.zeros((len(td), len(fd))) thetax = np.zeros((len(td), len(fd))) thetay = np.zeros((len(td), len(fd))) SS = np.zeros((len(td), len(fd))) for ifd in range(0, len(fd)): for itd in range(0, len(td)): thetax[itd, ifd] = fd[ifd] - self.thetagx + self.thetarx thetayplusthetagysq = td[itd] - \ (thetax[itd, ifd] + self.thetagx)**2 + self.thetarx**2 + \ self.thetary**2 if thetayplusthetagysq > 0: thymthgy = np.sqrt(thetayplusthetagysq) # thetay-thetagy thetay[itd, ifd] = thymthgy - self.thetagy if thymthgy < 0.5*self.df: if smooth_jacobian: amp[itd, ifd] = (np.arcsin(1) - np.arcsin((thetax[itd, ifd] - 0.5*self.df) / thymthgy))/self.df else: amp[itd, ifd] = 2/self.df # bound Jacobian else: amp[itd, ifd] = 1/thymthgy # Jacobian else: amp[itd, ifd] = 10**(-6) # on or outside primary arc self.thetax = thetax self.thetay = thetay # now get secondary spectrum by interpolating in the brightness array # and multiplying by the Jacobian of the tranformation from (td,fd) to # (thx,thy) SS = griddata((np.ravel(self.X), np.ravel(self.Y)), np.ravel(self.B), (np.ravel(thetax), np.ravel(thetay)), method='linear') \ * np.ravel(amp) SS = np.reshape(SS, (len(td), len(fd))) # now add the SS with the sign of td and fd changed # unfortunately that is not simply reversing the matrix # however if you take just SS(1:, 1:) then it can be reversed and # added to the original SSrev = np.flip(np.flip(SS[1:, 1:], axis=0), axis=1) SS[1:, 1:] += SSrev self.SS = SS self.LSS = 10*np.log10(SS) return
data_path_merge = '/media/sf_2_PhD_2013_-2014/1PhD_WorkDocs/PhD_Data-calculations/data/sms-call-internet-mi/csv/csv/' #data_path_merge = '/media/sf_2_PhD_2013_-2014/1PhD_WorkDocs/PhD_Data-calculations/data/OI_precipitazione_nov_dic_2013/csv2/' data_files = glob.glob(data_path + '*.tif') for data_file in data_files: ts = data_file[108:118] date_time = ''.join( [ts[0:4], '-', ts[4:6], '-', ts[6:8], 'T', ts[8:10], ':00:00+0100']) grid = gdal.Open(data_file) #grid = gdal.Open('/media/sf_2_PhD_2013_-2014/1PhD_WorkDocs/PhD_Data-calculations/data/OI_precipitazione_nov_dic_2013/gtiff/PR_2013111505UTCplus1.tif') array = np.array(grid.GetRasterBand(1).ReadAsArray()) arrayt = array.T arrayflip = np.fliplr(arrayt) datavector = np.ravel(arrayflip, order='K') df = pd.DataFrame() df['sid'] = sid df['date_time'] = date_time #df.drop(['sid'],inplace=True,axis=1) df['rain'] = datavector df = df[['date_time', 'sid', 'rain']] df.to_csv(path_or_buf=data_path_out + ts + '.csv', sep='\t', index=False, header=False, columns=['date_time', 'sid', 'rain']) # merging all created txt into one txt data_files_out = glob.glob(data_path_out + '*.csv') with open(data_path_merge + 'rain.csv', 'a') as out_file:
def train(self): if (self.status != 'init'): print("Please load train data and init W first.") return self.W self.status = 'train' # P = Q, q = p, G = -A, h = -c if (self.svm_kernel == 'soft_polynomial_kernel' or self.svm_kernel == 'soft_gaussian_kernel'): original_X = self.train_X[:, 1:] K = utility.Kernel.kernel_matrix(self, original_X) P = cvxopt.matrix(np.outer(self.train_Y, self.train_Y) * K) q = cvxopt.matrix(np.ones(self.data_num) * -1) constrain1 = np.diag(np.ones(self.data_num) * -1) constrain2 = np.identity(self.data_num) G = cvxopt.matrix(np.vstack((constrain1, constrain2))) constrain1 = np.zeros(self.data_num) * -1 constrain2 = np.ones(self.data_num) * self.C h = cvxopt.matrix(np.hstack((constrain1, constrain2))) A = cvxopt.matrix(self.train_Y, (1, self.data_num)) b = cvxopt.matrix(0.0) cvxopt.solvers.options['show_progress'] = False solution = cvxopt.solvers.qp(P, q, G, h, A, b) # Lagrange multipliers a = np.ravel(solution['x']) self.alpha = a # Support vectors have non zero lagrange multipliers sv = a > 1e-5 self.sv_index = np.arange(len(a))[sv] self.sv_alpha = a[sv] self.sv_X = original_X[sv] self.sv_Y = self.train_Y[sv] free_sv = np.logical_and(a > 1e-5, a < self.C) self.free_sv_index = np.arange(len(a))[free_sv] self.free_sv_alpha = a[free_sv] self.free_sv_X = original_X[free_sv] self.free_sv_Y = self.train_Y[free_sv] ''' sum_short_b = 0 for i in range(len(self.free_sv_alpha)): sum_short_b += self.free_sv_Y[i] for j in range(len(self.free_sv_alpha)): if (self.svm_kernel == 'soft_polynomial_kernel'): sum_short_b -= self.free_sv_alpha[j] * self.free_sv_Y[j] * utility.Kernel.polynomial_kernel(self, original_X[self.free_sv_index[j]], original_X[self.free_sv_index[i]]) elif (self.svm_kernel == 'soft_gaussian_kernel'): sum_short_b -= self.free_sv_alpha[j] * self.free_sv_Y[j] * utility.Kernel.gaussian_kernel(self, original_X[self.free_sv_index[j]], original_X[self.free_sv_index[i]]) short_b = sum_short_b / len(self.free_sv_alpha) ''' short_b = (np.sum(self.free_sv_Y) - np.sum( np.ravel(self.free_sv_alpha * self.free_sv_Y * utility.Kernel.kernel_matrix(self, self.free_sv_X))) ) / len(self.free_sv_alpha) self.sv_avg_b = short_b elif (self.svm_kernel == 'polynomial_kernel' or self.svm_kernel == 'gaussian_kernel'): original_X = self.train_X[:, 1:] K = utility.Kernel.kernel_matrix(self, original_X) P = cvxopt.matrix(np.outer(self.train_Y, self.train_Y) * K) q = cvxopt.matrix(np.ones(self.data_num) * -1) G = cvxopt.matrix(np.diag(np.ones(self.data_num) * -1)) h = cvxopt.matrix(np.zeros(self.data_num) * -1) A = cvxopt.matrix(self.train_Y, (1, self.data_num)) b = cvxopt.matrix(0.0) cvxopt.solvers.options['show_progress'] = False solution = cvxopt.solvers.qp(P, q, G, h, A, b) # Lagrange multipliers a = np.ravel(solution['x']) self.alpha = a # Support vectors have non zero lagrange multipliers sv = a > 1e-5 self.sv_index = np.arange(len(a))[sv] self.sv_alpha = a[sv] self.sv_X = original_X[sv] self.sv_Y = self.train_Y[sv] ''' sum_short_b = 0 for i in range(len(self.sv_alpha)): sum_short_b += self.sv_Y[i] for j in range(len(self.sv_alpha)): if (self.svm_kernel == 'polynomial_kernel'): sum_short_b -= self.sv_alpha[j] * self.sv_Y[j] * utility.Kernel.polynomial_kernel(self, original_X[self.sv_index[j]], original_X[self.sv_index[i]]) elif (self.svm_kernel == 'gaussian_kernel'): sum_short_b -= self.sv_alpha[j] * self.sv_Y[j] * utility.Kernel.gaussian_kernel(self, original_X[self.sv_index[j]], original_X[self.sv_index[i]]) short_b = sum_short_b / len(self.sv_alpha) ''' short_b = (np.sum(self.sv_Y) - np.sum( np.ravel(self.sv_alpha * self.sv_Y * utility.Kernel.kernel_matrix(self, self.sv_X))) ) / len(self.sv_alpha) self.sv_avg_b = short_b elif (self.svm_kernel == 'dual_hard_margin'): original_X = self.train_X[:, 1:] P = cvxopt.matrix( np.outer(self.train_Y, self.train_Y) * np.dot(original_X, np.transpose(original_X))) q = cvxopt.matrix(np.ones(self.data_num) * -1) G = cvxopt.matrix(np.diag(np.ones(self.data_num) * -1)) h = cvxopt.matrix(np.zeros(self.data_num) * -1) A = cvxopt.matrix(self.train_Y, (1, self.data_num)) b = cvxopt.matrix(0.0) cvxopt.solvers.options['show_progress'] = False solution = cvxopt.solvers.qp(P, q, G, h, A, b) # Lagrange multipliers a = np.ravel(solution['x']) self.alpha = a # Support vectors have non zero lagrange multipliers sv = a > 1e-5 self.sv_index = np.arange(len(a))[sv] self.sv_alpha = a[sv] self.sv_X = original_X[sv] self.sv_Y = self.train_Y[sv] short_w = np.zeros(self.data_demension - 1) for i in range(len(self.sv_alpha)): short_w += self.sv_alpha[i] * self.sv_Y[i] * self.sv_X[i] sum_short_b = 0 for i in range(len(self.sv_alpha)): sum_short_b += self.sv_Y[i] - np.dot( np.transpose(short_w), original_X[self.sv_index[i]]) short_b = sum_short_b / len(self.sv_alpha) self.sv_avg_b = short_b self.W = np.insert(short_w, 0, short_b) else: # primal_hard_margin eye_process = np.eye(self.data_demension) eye_process[0][0] = 0 P = cvxopt.matrix(eye_process) q = cvxopt.matrix(np.zeros(self.data_demension)) G = cvxopt.matrix( np.reshape(self.train_Y, (-1, 1)) * self.train_X * -1) h = cvxopt.matrix(np.ones(self.data_num) * -1) cvxopt.solvers.options['show_progress'] = False solution = cvxopt.solvers.qp(P, q, G, h) self.W = np.array(solution['x']) self.W = np.ravel(self.W) return self.W
def _min_or_max_filter(input, size, footprint, structure, output, mode, cval, origin, minimum): if structure is None: if footprint is None: if size is None: raise RuntimeError("no footprint provided") separable = True else: footprint = numpy.asarray(footprint) footprint = footprint.astype(bool) if numpy.alltrue(numpy.ravel(footprint), axis=0): size = footprint.shape footprint = None separable = True else: separable = False else: structure = numpy.asarray(structure, dtype=numpy.float64) separable = False if footprint is None: footprint = numpy.ones(structure.shape, bool) else: footprint = numpy.asarray(footprint) footprint = footprint.astype(bool) input = numpy.asarray(input) if numpy.iscomplexobj(input): raise TypeError('Complex type not supported') output, return_value = _ni_support._get_output(output, input) origins = _ni_support._normalize_sequence(origin, input.ndim) if separable: sizes = _ni_support._normalize_sequence(size, input.ndim) axes = list(range(input.ndim)) axes = [(axes[ii], sizes[ii], origins[ii]) for ii in range(len(axes)) if sizes[ii] > 1] if minimum: filter_ = minimum_filter1d else: filter_ = maximum_filter1d if len(axes) > 0: for axis, size, origin in axes: filter_(input, int(size), axis, output, mode, cval, origin) input = output else: output[...] = input[...] else: fshape = [ii for ii in footprint.shape if ii > 0] if len(fshape) != input.ndim: raise RuntimeError('footprint array has incorrect shape.') for origin, lenf in zip(origins, fshape): if (lenf // 2 + origin < 0) or (lenf // 2 + origin >= lenf): raise ValueError('invalid origin') if not footprint.flags.contiguous: footprint = footprint.copy() if structure is not None: if len(structure.shape) != input.ndim: raise RuntimeError('structure array has incorrect shape') if not structure.flags.contiguous: structure = structure.copy() mode = _ni_support._extend_mode_to_code(mode) _nd_image.min_or_max_filter(input, footprint, structure, output, mode, cval, origins, minimum) return return_value
def plot_density( ax, all_labels, to_plot, colors, bw, figsize, length_plotters, rows, cols, titlesize, xt_labelsize, linewidth, markersize, credible_interval, point_estimate, hpd_markers, outline, shade, n_data, data_labels, backend_kwargs, show, ): """Matplotlib densityplot.""" if ax is None: _, ax = _create_axes_grid( length_plotters, rows, cols, figsize=figsize, squeeze=False, backend="matplotlib", backend_kwargs=backend_kwargs, ) else: ax = np.atleast_2d(ax) axis_map = {label: ax_ for label, ax_ in zip(all_labels, np.ravel(ax))} for m_idx, plotters in enumerate(to_plot): for var_name, selection, values in plotters: label = make_label(var_name, selection) _d_helper( values.flatten(), label, colors[m_idx], bw, titlesize, xt_labelsize, linewidth, markersize, credible_interval, point_estimate, hpd_markers, outline, shade, axis_map[label], ) if n_data > 1: for m_idx, label in enumerate(data_labels): ax[0].plot([], label=label, c=colors[m_idx], markersize=markersize) ax[0].legend(fontsize=xt_labelsize) if backend_show(show): plt.show() return ax
def numpy_ravel_array(a): return np.ravel(a)
def kneighbors_graph(self, X=None, n_neighbors=None, mode='connectivity'): """Computes the (weighted) graph of k-Neighbors for points in X Parameters ---------- X : array-like, shape (n_queries, n_features), \ or (n_queries, n_indexed) if metric == 'precomputed' The query point or points. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor. n_neighbors : int Number of neighbors for each sample. (default is value passed to the constructor). mode : {'connectivity', 'distance'}, optional Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, in 'distance' the edges are Euclidean distance between points. Returns ------- A : sparse graph in CSR format, shape = [n_queries, n_samples_fit] n_samples_fit is the number of samples in the fitted data A[i, j] is assigned the weight of edge that connects i to j. Examples -------- >>> X = [[0], [3], [1]] >>> from sklearn.neighbors import NearestNeighbors >>> neigh = NearestNeighbors(n_neighbors=2) >>> neigh.fit(X) NearestNeighbors(n_neighbors=2) >>> A = neigh.kneighbors_graph(X) >>> A.toarray() array([[1., 0., 1.], [0., 1., 1.], [1., 0., 1.]]) See also -------- NearestNeighbors.radius_neighbors_graph """ check_is_fitted(self) if n_neighbors is None: n_neighbors = self.n_neighbors # check the input only in self.kneighbors # construct CSR matrix representation of the k-NN graph if mode == 'connectivity': A_ind = self.kneighbors(X, n_neighbors, return_distance=False) n_queries = A_ind.shape[0] A_data = np.ones(n_queries * n_neighbors) elif mode == 'distance': A_data, A_ind = self.kneighbors( X, n_neighbors, return_distance=True) A_data = np.ravel(A_data) else: raise ValueError( 'Unsupported mode, must be one of "connectivity" ' 'or "distance" but got "%s" instead' % mode) n_queries = A_ind.shape[0] n_samples_fit = self.n_samples_fit_ n_nonzero = n_queries * n_neighbors A_indptr = np.arange(0, n_nonzero + 1, n_neighbors) kneighbors_graph = csr_matrix((A_data, A_ind.ravel(), A_indptr), shape=(n_queries, n_samples_fit)) return kneighbors_graph
def to_str(row): return ''.join(map(str, np.ravel(row.astype(int))))
def setForces(self, force): self.force = force self.nodal.n_f[:] = np.ravel(force)
def _f(x): return -np.ravel(np.cos(x) + np.sin(3 * x))
def __mul__(self, other): """interpret other and call one of the following self._mul_scalar() self._mul_vector() self._mul_multivector() self._mul_sparse_matrix() """ M, N = self.shape if other.__class__ is np.ndarray: # Fast path for the most common case if other.shape == (N, ): return self._mul_vector(other) elif other.shape == (N, 1): return self._mul_vector(other.ravel()).reshape(M, 1) elif other.ndim == 2 and other.shape[0] == N: return self._mul_multivector(other) if isscalarlike(other): # scalar value return self._mul_scalar(other) if issparse(other): if self.shape[1] != other.shape[0]: raise ValueError('dimension mismatch') return self._mul_sparse_matrix(other) try: other.shape except AttributeError: # If it's a list or whatever, treat it like a matrix other = np.asanyarray(other) other = np.asanyarray(other) if other.ndim == 1 or other.ndim == 2 and other.shape[1] == 1: # dense row or column vector if other.shape != (N, ) and other.shape != (N, 1): raise ValueError('dimension mismatch') result = self._mul_vector(np.ravel(other)) if isinstance(other, np.matrix): result = np.asmatrix(result) if other.ndim == 2 and other.shape[1] == 1: # If 'other' was an (nx1) column vector, reshape the result result = result.reshape(-1, 1) return result elif other.ndim == 2: ## # dense 2D array or matrix ("multivector") if other.shape[0] != self.shape[1]: raise ValueError('dimension mismatch') result = self._mul_multivector(np.asarray(other)) if isinstance(other, np.matrix): result = np.asmatrix(result) return result else: raise ValueError('could not interpret dimensions')
def add_sample(self, sample): s = [] for k in range(len(sample)): s.append(np.ravel(sample[k])) self.samples.append(s)
def fit(self, X, y, sample_weight=None): """ Fit linear model. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Training data y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values. Will be cast to X's dtype if necessary sample_weight : array-like of shape (n_samples,), default=None Individual weights for each sample .. versionadded:: 0.17 parameter *sample_weight* support to LinearRegression. Returns ------- self : returns an instance of self. """ n_jobs_ = self.n_jobs X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight, return_mean=True) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) if sp.issparse(X): X_offset_scale = X_offset / X_scale def matvec(b): return X.dot(b) - b.dot(X_offset_scale) def rmatvec(b): return X.T.dot(b) - X_offset_scale * np.sum(b) X_centered = sparse.linalg.LinearOperator(shape=X.shape, matvec=matvec, rmatvec=rmatvec) if y.ndim < 2: out = sparse_lsqr(X_centered, y) self.coef_ = out[0] self._residues = out[3] else: # sparse_lstsq cannot handle y with shape (M, K) outs = Parallel(n_jobs=n_jobs_)( delayed(sparse_lsqr)(X_centered, y[:, j].ravel()) for j in range(y.shape[1])) self.coef_ = np.vstack([out[0] for out in outs]) self._residues = np.vstack([out[3] for out in outs]) else: self.coef_, self._residues, self.rank_, self.singular_ = \ linalg.lstsq(X, y) self.coef_ = self.coef_.T if y.ndim == 1: self.coef_ = np.ravel(self.coef_) self._set_intercept(X_offset, y_offset, X_scale) return self
def main(): usage = 'usage: %prog [options] <params_file> <model_file> <data_file>' parser = OptionParser(usage) parser.add_option( '-a', dest='act_t', default=0.5, type='float', help= 'Activation threshold (as proportion of max) to consider for PWM [Default: %default]' ) parser.add_option('-d', dest='model_hdf5_file', default=None, help='Pre-computed model output as HDF5.') parser.add_option('-o', dest='out_dir', default='.') parser.add_option('-m', dest='meme_db', default='%s/data/motifs/Homo_sapiens.meme' % os.environ['BASSETDIR'], help='MEME database used to annotate motifs') parser.add_option( '-p', dest='plot_heats', default=False, action='store_true', help= 'Plot heat maps describing filter activations in the test sequences [Default: %default]' ) parser.add_option( '-s', dest='sample', default=None, type='int', help='Sample sequences from the test set [Default:%default]') parser.add_option( '-t', dest='trim_filters', default=False, action='store_true', help= 'Trim uninformative positions off the filter ends [Default: %default]') (options, args) = parser.parse_args() if len(args) != 3: parser.error( 'Must provide Basenji parameters and model files and test data in HDF5' ' format.') else: params_file = args[0] model_file = args[1] data_file = args[2] if not os.path.isdir(options.out_dir): os.mkdir(options.out_dir) ################################################################# # load data data_open = h5py.File(data_file) test_seqs1 = data_open['test_in'] test_targets = data_open['test_out'] try: target_names = list(data_open['target_labels']) except KeyError: target_names = ['t%d' % ti for ti in range(test_targets.shape[1])] if options.sample is not None: # choose sampled indexes sample_i = sorted( random.sample(range(test_seqs1.shape[0]), options.sample)) # filter test_seqs1 = test_seqs1[sample_i] test_targets = test_targets[sample_i] # convert to letters test_seqs = basenji.dna_io.hot1_dna(test_seqs1) ################################################################# # model parameters and placeholders job = basenji.dna_io.read_job_params(params_file) job['seq_length'] = test_seqs1.shape[1] job['seq_depth'] = test_seqs1.shape[2] job['num_targets'] = test_targets.shape[2] job['target_pool'] = int(np.array(data_open.get('pool_width', 1))) t0 = time.time() dr = basenji.seqnn.SeqNN() dr.build(job) print('Model building time %ds' % (time.time() - t0)) # adjust for fourier job['fourier'] = 'train_out_imag' in data_open if job['fourier']: test_targets_imag = data_open['test_out_imag'] if options.valid: test_targets_imag = data_open['valid_out_imag'] ################################################################# # predict # initialize batcher if job['fourier']: batcher_test = basenji.batcher.BatcherF(test_seqs1, test_targets, test_targets_imag, batch_size=dr.batch_size, pool_width=job['target_pool']) else: batcher_test = basenji.batcher.Batcher(test_seqs1, test_targets, batch_size=dr.batch_size, pool_width=job['target_pool']) # initialize saver saver = tf.train.Saver() with tf.Session() as sess: # load variables into session saver.restore(sess, model_file) # get weights filter_weights = sess.run(dr.filter_weights[0]) filter_weights = np.transpose(np.squeeze(filter_weights), [2, 1, 0]) print(filter_weights.shape) # test t0 = time.time() layer_filter_outs, _ = dr.hidden(sess, batcher_test, layers=[0]) filter_outs = layer_filter_outs[0] print(filter_outs.shape) # store useful variables num_filters = filter_weights.shape[0] filter_size = filter_weights.shape[2] ################################################################# # individual filter plots ################################################################# # also save information contents filters_ic = [] meme_out = meme_intro('%s/filters_meme.txt' % options.out_dir, test_seqs) for f in range(num_filters): print('Filter %d' % f) # plot filter parameters as a heatmap plot_filter_heat(filter_weights[f, :, :], '%s/filter%d_heat.pdf' % (options.out_dir, f)) # write possum motif file filter_possum(filter_weights[f, :, :], 'filter%d' % f, '%s/filter%d_possum.txt' % (options.out_dir, f), options.trim_filters) # plot weblogo of high scoring outputs plot_filter_logo(filter_outs[:, :, f], filter_size, test_seqs, '%s/filter%d_logo' % (options.out_dir, f), maxpct_t=options.act_t) # make a PWM for the filter filter_pwm, nsites = make_filter_pwm('%s/filter%d_logo.fa' % (options.out_dir, f)) if nsites < 10: # no information filters_ic.append(0) else: # compute and save information content filters_ic.append(info_content(filter_pwm)) # add to the meme motif file meme_add(meme_out, f, filter_pwm, nsites, options.trim_filters) meme_out.close() ################################################################# # annotate filters ################################################################# # run tomtom subprocess.call( 'tomtom -dist pearson -thresh 0.1 -oc %s/tomtom %s/filters_meme.txt %s' % (options.out_dir, options.out_dir, options.meme_db), shell=True) # read in annotations filter_names = name_filters(num_filters, '%s/tomtom/tomtom.txt' % options.out_dir, options.meme_db) ################################################################# # print a table of information ################################################################# table_out = open('%s/table.txt' % options.out_dir, 'w') # print header for later panda reading header_cols = ('', 'consensus', 'annotation', 'ic', 'mean', 'std') print('%3s %19s %10s %5s %6s %6s' % header_cols, file=table_out) for f in range(num_filters): # collapse to a consensus motif consensus = filter_motif(filter_weights[f, :, :]) # grab annotation annotation = '.' name_pieces = filter_names[f].split('_') if len(name_pieces) > 1: annotation = name_pieces[1] # plot density of filter output scores fmean, fstd = plot_score_density( np.ravel(filter_outs[:, :, f]), '%s/filter%d_dens.pdf' % (options.out_dir, f)) row_cols = (f, consensus, annotation, filters_ic[f], fmean, fstd) print('%-3d %19s %10s %5.2f %6.4f %6.4f' % row_cols, file=table_out) table_out.close() ################################################################# # global filter plots ################################################################# if options.plot_heats: # plot filter-sequence heatmap plot_filter_seq_heat(filter_outs, '%s/filter_seqs.pdf' % options.out_dir) # plot filter-segment heatmap plot_filter_seg_heat(filter_outs, '%s/filter_segs.pdf' % options.out_dir) plot_filter_seg_heat(filter_outs, '%s/filter_segs_raw.pdf' % options.out_dir, whiten=False) # plot filter-target correlation heatmap plot_target_corr(filter_outs, seq_targets, filter_names, target_names, '%s/filter_target_cors_mean.pdf' % options.out_dir, 'mean') plot_target_corr(filter_outs, seq_targets, filter_names, target_names, '%s/filter_target_cors_max.pdf' % options.out_dir, 'max')
def lerp(sdata, condition): xx, yy = np.meshgrid(np.arange(sdata.shape[1]), np.arange(sdata.shape[0])) xym = np.vstack((np.ravel(xx[condition]), np.ravel(yy[condition]))).T values = np.ravel(sdata[:, :][condition]) interp = scipy.interpolate.LinearNDInterpolator(xym, values) return interp(np.ravel(xx), np.ravel(yy)).reshape(xx.shape)
def measure_fock(self, modes, select=None): """ Measures a list of modes. """ # pylint: disable=singleton-comparison if select is not None and np.any(np.array(select) == None): raise NotImplementedError( "Post-selection lists must only contain numerical values.") # Make sure the state is mixed if self._pure: state = ops.mix(self._state, self._num_modes) else: state = self._state if select is not None: # perform post-selection # make sure modes and select are the same length if len(select) != len(modes): raise ValueError( "When performing post-selection, the number of " "selected values (including None) must match the number of measured modes" ) # make sure the select values are all integers or nones if not all(isinstance(s, int) or s is None for s in select): raise TypeError( "The post-select list elements either be integers or None") # modes to measure measure = [i for i, s in zip(modes, select) if s is None] # modes already post-selected: selected = [i for i, s in zip(modes, select) if s is not None] select_values = [s for s in select if s is not None] # project out postselected modes self._state = ops.project_reset(selected, select_values, self._state, self._pure, self._num_modes, self._trunc) if self.norm() == 0: raise ZeroDivisionError("Measurement has zero probability.") self._state = self._state / self.norm() else: # no post-selection; modes to measure are the modes provided measure = modes if len(measure) > 0: # sampling needs to be performed # Compute distribution by tracing out modes not measured, then computing the diagonal unmeasured = [ i for i in range(self._num_modes) if i not in measure ] reduced = ops.partial_trace(state, self._num_modes, unmeasured) dist = np.ravel(ops.diagonal(reduced, len(measure)).real) # Make a random choice if sum(dist) != 1: # WARNING: distribution is not normalized, could hide errors i = np.random.choice(list(range(len(dist))), p=dist / sum(dist)) else: i = np.random.choice(list(range(len(dist))), p=dist) permuted_outcome = ops.unIndex(i, len(measure), self._trunc) # Permute the outcome to match the order of the modes in 'measure' permutation = np.argsort(measure) outcome = [0] * len(measure) for i in range(len(measure)): outcome[permutation[i]] = permuted_outcome[i] # Project the state onto the measurement outcome & reset in vacuum self._state = ops.project_reset(measure, outcome, self._state, self._pure, self._num_modes, self._trunc) if self.norm() == 0: raise ZeroDivisionError("Measurement has zero probability.") self._state = self._state / self.norm() # include post-selected values in measurement outcomes if select is not None: outcome = copy.copy(select) return outcome
def display_split_metrics(rf, Xt, yt, Xv, yv, target_names = None): if len(rf.classes_) == 2: numpy_yt = np.ravel(yt) numpy_yv = np.ravel(yv) if type(numpy_yt[0])==str: classes_ = rf.classes_ else: classes_ = [str(int(rf.classes_[0])), str(int(rf.classes_[1]))] zt = np.zeros(len(yt)) zv = np.zeros(len(yv)) #zt = deepcopy(yt) for i in range(len(yt)): if numpy_yt[i] == 1: zt[i] = 1 for i in range(len(yv)): if numpy_yv[i] == 1: zv[i] = 1 predict_t = rf.predict(Xt) predict_v = rf.predict(Xv) conf_matt = confusion_matrix(y_true=yt, y_pred=predict_t) conf_matv = confusion_matrix(y_true=yv, y_pred=predict_v) prob_t = rf.predict_proba(Xt) prob_v = rf.predict_proba(Xv) print("\n") print("{:.<23s}{:>15s}{:>15s}".format('Model Metrics', 'Training', 'Validation')) print("{:.<23s}{:15d}{:15d}".format('Observations', Xt.shape[0], Xv.shape[0])) print("{:.<23s}{:15d}{:15d}".format('Features', Xt.shape[1], Xv.shape[1])) if rf.max_depth==None: print("{:.<23s}{:>15s}{:>15s}".format('Maximum Tree Depth', "None", "None")) else: print("{:.<23s}{:15d}{:15d}".format('Maximum Tree Depth', rf.max_depth, rf.max_depth)) print("{:.<23s}{:15d}{:15d}".format('Minimum Leaf Size', rf.min_samples_leaf, rf.min_samples_leaf)) print("{:.<23s}{:15d}{:15d}".format('Minimum split Size', rf.min_samples_split, rf.min_samples_split)) print("{:.<23s}{:15.4f}{:15.4f}".format('Mean Absolute Error', mean_absolute_error(zt,prob_t[:,1]), mean_absolute_error(zv,prob_v[:,1]))) print("{:.<23s}{:15.4f}{:15.4f}".format('Avg Squared Error', mean_squared_error(zt,prob_t[:,1]), mean_squared_error(zv,prob_v[:,1]))) acct = accuracy_score(yt, predict_t) accv = accuracy_score(yv, predict_v) print("{:.<23s}{:15.4f}{:15.4f}".format('Accuracy', acct, accv)) if type(numpy_yt[0])==str: pre_t = precision_score(yt, predict_t, pos_label=classes_[1]) tpr_t = recall_score(yt, predict_t, pos_label=classes_[1]) f1_t = f1_score(yt,predict_t, pos_label=classes_[1]) pre_v = precision_score(yv, predict_v, pos_label=classes_[1]) tpr_v = recall_score(yv, predict_v, pos_label=classes_[1]) f1_v = f1_score(yv,predict_v, pos_label=classes_[1]) else: pre_t = precision_score(yt, predict_t) tpr_t = recall_score(yt, predict_t) f1_t = f1_score(yt,predict_t) pre_v = precision_score(yv, predict_v) tpr_v = recall_score(yv, predict_v) f1_v = f1_score(yv,predict_v) print("{:.<27s}{:11.4f}{:15.4f}".format('Precision', pre_t, pre_v)) print("{:.<27s}{:11.4f}{:15.4f}".format('Recall (Sensitivity)', tpr_t, tpr_v)) print("{:.<27s}{:11.4f}{:15.4f}".format('F1-score', f1_t, f1_v)) misct_ = conf_matt[0][1]+conf_matt[1][0] miscv_ = conf_matv[0][1]+conf_matv[1][0] misct = 100*misct_/len(yt) miscv = 100*miscv_/len(yv) n_t = [conf_matt[0][0]+conf_matt[0][1], \ conf_matt[1][0]+conf_matt[1][1]] n_v = [conf_matv[0][0]+conf_matv[0][1], \ conf_matv[1][0]+conf_matv[1][1]] misc_ = [[0,0], [0,0]] misc_[0][0] = 100*conf_matt[0][1]/n_t[0] misc_[0][1] = 100*conf_matt[1][0]/n_t[1] misc_[1][0] = 100*conf_matv[0][1]/n_v[0] misc_[1][1] = 100*conf_matv[1][0]/n_v[1] print("{:.<27s}{:11d}{:15d}".format(\ 'Total Misclassifications', misct_, miscv_)) print("{:.<27s}{:10.1f}{:s}{:14.1f}{:s}".format(\ 'MISC (Misclassification)', misct, '%', miscv, '%')) for i in range(2): print("{:s}{:.<16s}{:>10.1f}{:<1s}{:>14.1f}{:<1s}".format( ' class ', classes_[i], misc_[0][i], '%', misc_[1][i], '%')) print("\n\nTraining Class Class") print("{:<21s}{:>10s}{:>10s}".format("Confusion Matrix", classes_[0], classes_[1]) ) for i in range(2): print("{:6s}{:.<15s}".format('Class ', classes_[i]), end="") for j in range(2): print("{:>10d}".format(conf_matt[i][j]), end="") print("") print("\n\nValidation Class Class") print("{:<21s}{:>10s}{:>10s}".format("Confusion Matrix", classes_[0], classes_[1]) ) for i in range(2): print("{:6s}{:.<15s}".format('Class ', classes_[i]), end="") for j in range(2): print("{:>10d}".format(conf_matv[i][j]), end="") print("") # In the binary case, the classification report is incorrect #cr = classification_report(yv, predict_v, rf.classes_) #print("\n",cr) else: try: if len(rf.classes_) < 2: raise RuntimeError(" Call to display_nominal_split_metrics "+ "invalid.\n Target has less than two classes.\n") sys.exit() except: raise RuntimeError(" Call to display_nominal_split_metrics "+ "invalid.\n Target has less than two classes.\n") sys.exit() predict_t = rf.predict(Xt) predict_v = rf.predict(Xv) conf_mat_t = confusion_matrix(y_true=yt, y_pred=predict_t) conf_mat_v = confusion_matrix(y_true=yv, y_pred=predict_v) prob_t = rf.predict_proba(Xt) # or is this rf._predict_proba_dt ? prob_v = rf.predict_proba(Xv) n_classes = len(rf.classes_) ase_sumt = 0 ase_sumv = 0 misc_t = 0 misc_v = 0 misct = [] miscv = [] n_t = [] n_v = [] nt_obs = yt.shape[0] nv_obs = yv.shape[0] conf_matt = [] conf_matv = [] for i in range(n_classes): conf_matt.append(np.zeros(n_classes)) conf_matv.append(np.zeros(n_classes)) y_t = np.ravel(yt) # necessary because yt is a df with row keys y_v = np.ravel(yv) # likewise for i in range(n_classes): misct.append(0) n_t.append(0) miscv.append(0) n_v.append(0) for i in range(nt_obs): for j in range(n_classes): if y_t[i] == rf.classes_[j]: ase_sumt += (1-prob_t[i,j])*(1-prob_t[i,j]) idx = j else: ase_sumt += prob_t[i,j]*prob_t[i,j] for j in range(n_classes): if predict_t[i] == rf.classes_[j]: conf_matt[idx][j] += 1 break n_t[idx] += 1 if predict_t[i] != y_t[i]: misc_t += 1 misct[idx] += 1 for i in range(nv_obs): for j in range(n_classes): if y_v[i] == rf.classes_[j]: ase_sumv += (1-prob_v[i,j])*(1-prob_v[i,j]) idx = j else: ase_sumv += prob_v[i,j]*prob_v[i,j] for j in range(n_classes): if predict_v[i] == rf.classes_[j]: conf_matv[idx][j] += 1 break n_v[idx] += 1 if predict_v[i] != y_v[i]: misc_v += 1 miscv[idx] += 1 misct_ = misc_t miscv_ = misc_v misc_t = 100*misc_t/nt_obs misc_v = 100*misc_v/nv_obs aset = ase_sumt/(n_classes*nt_obs) asev = ase_sumv/(n_classes*nv_obs) print("\n") print("{:.<23s}{:>15s}{:>15s}".format('Model Metrics', 'Training', 'Validation')) print("{:.<23s}{:15d}{:15d}".format('Observations', \ Xt.shape[0], Xv.shape[0])) print("{:.<23s}{:15d}{:15d}".format('Features', Xt.shape[1], Xv.shape[1])) if type(rf) == RandomForestClassifier: print("{:.<23s}{:15d}{:15d}".format(\ 'Trees in Forest', \ rf.n_estimators, rf.n_estimators)) if rf.max_depth==None: print("{:.<23s}{:>15s}{:>15s}".format('Maximum Tree Depth', "None", "None")) else: print("{:.<23s}{:15d}{:15d}".format('Maximum Tree Depth', rf.max_depth, rf.max_depth)) print("{:.<23s}{:15d}{:15d}".format('Minimum Leaf Size', rf.min_samples_leaf, rf.min_samples_leaf)) print("{:.<23s}{:15d}{:15d}".format('Minimum split Size', rf.min_samples_split, rf.min_samples_split)) print("{:.<23s}{:15.4f}{:15.4f}".format('Avg Squared Error', aset, asev)) print("{:.<23s}{:15.4f}{:15.4f}".format(\ 'Root ASE', sqrt(aset), sqrt(asev))) acct = accuracy_score(yt, predict_t) accv = accuracy_score(yv, predict_v) print("{:.<23s}{:15.4f}{:15.4f}".format('Accuracy', acct, accv)) print("{:.<23s}{:15.4f}{:15.4f}".format('Precision', precision_score(yt,predict_t, average='macro'), precision_score(yv,predict_v, average='macro'))) print("{:.<23s}{:15.4f}{:15.4f}".format('Recall (Sensitivity)', recall_score(yt,predict_t, average='macro'), recall_score(yv,predict_v, average='macro'))) print("{:.<23s}{:15.4f}{:15.4f}".format('F1-score', f1_score(yt,predict_t, average='macro'), f1_score(yv,predict_v, average='macro'))) print("{:.<27s}{:11d}{:15d}".format(\ 'Total Misclassifications', misct_, miscv_)) print("{:.<27s}{:10.1f}{:s}{:14.1f}{:s}".format(\ 'MISC (Misclassification)', misc_t, '%', misc_v, '%')) classes_ = [] if type(rf.classes_[0])==str: classes_ = rf.classes_ else: for i in range(n_classes): classes_.append(str(int(rf.classes_[i]))) for i in range(n_classes): misct[i] = 100*misct[i]/n_t[i] miscv[i] = 100*miscv[i]/n_v[i] print("{:s}{:.<16s}{:>10.1f}{:<1s}{:>14.1f}{:<1s}".format( ' class ', classes_[i], misct[i], '%', miscv[i], '%')) print("\n\nTraining") print("Confusion Matrix ", end="") for i in range(n_classes): print("{:>7s}{:<3s}".format('Class ', classes_[i]), end="") print("") for i in range(n_classes): print("{:s}{:.<6s}".format('Class ', classes_[i]), end="") for j in range(n_classes): print("{:>10d}".format(conf_mat_t[i][j]), end="") print("") ct = classification_report(yt, predict_t, target_names) print("\nTraining \nMetrics:\n",ct) print("\n\nValidation") print("Confusion Matrix ", end="") for i in range(n_classes): print("{:>7s}{:<3s}".format('Class ', classes_[i]), end="") print("") for i in range(n_classes): print("{:s}{:.<6s}".format('Class ', classes_[i]), end="") for j in range(n_classes): print("{:>10d}".format(conf_mat_v[i][j]), end="") print("") cv = classification_report(yv, predict_v, target_names) print("\nValidation \nMetrics:\n",cv)
def precon_norm(v, ml): ''' helper function to calculate preconditioner norm of v ''' v = ravel(v) w = ml.aspreconditioner() * v return sqrt(dot(v.conjugate(), w))
constraints = [] for i in range(len(matrices)): constraints.append(quad_form(q_var, matrices[i]) <= t_var) constraints.append(s_vec.T * q_var == s_vec.T * s_vec) constraints.append(quad_form(q_var, I) <= alpha * s_vec.T * s_vec) #===Solve System===# obj = Minimize(t_var) prob = Problem(obj, constraints) prob.solve() #===Extract===# print "f0* =", obj.value xopt = prob.variables()[1].value print "q* =", xopt q = numpy.ravel(xopt) q_vec = numpy.asmatrix(q).T #===Convolve===# rss = numpy.convolve(s, s[::-1]) csq = numpy.convolve(s, q[::-1]) print "s*s = ", rss print "s*q = ", csq #===Make F Matrix===# ones = numpy.r_[numpy.ones(N - 1), 0, numpy.ones(N - 1)] F = numpy.asmatrix(numpy.diag(ones)) #===Print M/S Ratio Improvement===# sidelobes = F * numpy.asmatrix(rss).T mainlobe = numpy.asmatrix(rss).T - sidelobes
def get_energy_dependent_integration_weights(self, spin, energy): integration_weights = np.zeros(self._ir_weights_shape[spin]) tetrahedra_mask = self.get_intersecting_tetrahedra(spin, energy) if not np.any(tetrahedra_mask): return integration_weights energies = self.ir_tetrahedra_energies[spin][tetrahedra_mask] e21 = self.e21[spin][tetrahedra_mask] e31 = self.e31[spin][tetrahedra_mask] e41 = self.e41[spin][tetrahedra_mask] e32 = self.e32[spin][tetrahedra_mask] e42 = self.e42[spin][tetrahedra_mask] e43 = self.e43[spin][tetrahedra_mask] cond_a_mask = (energies[:, 0] < energy) & (energy < energies[:, 1]) cond_b_mask = (energies[:, 1] <= energy) & (energy < energies[:, 2]) cond_c_mask = (energies[:, 2] <= energy) & (energy < energies[:, 3]) ee1 = energy - energies[:, 0] ee2 = energy - energies[:, 1] ee3 = energy - energies[:, 2] e2e = energies[:, 1] - energy e3e = energies[:, 2] - energy e4e = energies[:, 3] - energy kpoints_idx = self.ir_tetrahedra[spin][tetrahedra_mask] ir_kpoints_idx = self.ir_kpoint_mapping[kpoints_idx] # calculate the integrand for each vertices vert_weights = np.zeros_like(energies) vert_weights[cond_a_mask] = _get_energy_dependent_weight_a( ee1[cond_a_mask], e2e[cond_a_mask], e3e[cond_a_mask], e4e[cond_a_mask], e21[cond_a_mask], e31[cond_a_mask], e41[cond_a_mask], ) vert_weights[cond_b_mask] = _get_energy_dependent_weight_b( ee1[cond_b_mask], ee2[cond_b_mask], e3e[cond_b_mask], e4e[cond_b_mask], e31[cond_b_mask], e41[cond_b_mask], e32[cond_b_mask], e42[cond_b_mask], ) vert_weights[cond_c_mask] = _get_energy_dependent_weight_c( ee1[cond_c_mask], ee2[cond_c_mask], ee3[cond_c_mask], e4e[cond_c_mask], e41[cond_c_mask], e42[cond_c_mask], e43[cond_c_mask], ) # finally, get the integrand for each ir_kpoint by summing over all # tetrahedra and multiplying by the tetrahedra multiplicity and # tetrahedra weight; Finally, divide by the k-point multiplicity # to get the final weight band_idx, tetrahedra_idx = np.where(tetrahedra_mask) # include tetrahedra multiplicity vert_weights *= self.ir_tetrahedra_weights[tetrahedra_idx][:, None] flat_ir_kpoints = np.ravel(ir_kpoints_idx) flat_ir_weights = np.ravel(vert_weights) flat_bands = np.repeat(band_idx, 4) # sum integrand, note this sums in place and is insanely fast np.add.at(integration_weights, (flat_bands, flat_ir_kpoints), flat_ir_weights) integration_weights *= (self._tetrahedron_volume / self.ir_kpoint_weights[None, :]) return integration_weights
def read_class_npvar_red(datadir='./data', pfile='pvar.dat', proc=0, verbose=False, reduce_to=-1, set_reduce=-1): dims = pc.read_dim(datadir, proc) pdims = pc.read_pdim(datadir) npar_loc = read_npar_loc(datadir=datadir, pfile=pfile, proc=proc) # # the Next bit calculates how many particles are written for all but # the last processor. The last processor is assigned a number of particles # to write so that the required number of particles is obtained # if (reduce_to > 0): if (set_reduce <= 0): reductionfactor = float(reduce_to) / float(pdims.npar) npar_red = int(round(npar_loc * reductionfactor)) else: npar_red = set_reduce if (verbose): #print 'reducing '+str(npar_loc)+' to '+str(npar_red)+ ' on proc'+str(proc) print('reducing {} to {} on proc {}'.format( npar_loc, npar_red, proc)) written_parts = npar_red else: written_parts = set_reduce if (verbose): #print npar_loc,' particles on processor: ',proc # Python 2 print(str(npar_loc) + ' particles on processor: ' + str(proc)) mvars = pdims.mpaux + pdims.mpvar ltot = npar_loc * mvars if (dims.precision == 'S'): REAL = '<f4' else: REAL = '<f8' array_shape = np.dtype([('header', '<i4'), ('npar_loc', '<i4'), ('footer', '<i4'), ('header2', '<i4'), ('ipar', '<i4', npar_loc), ('footer2', '<i4'), ('header3', '<i4'), ('fp', REAL, ltot), ('footer3', '<i4'), ('header4', '<i4'), ('t', REAL), ('x', REAL, dims.mx), ('y', REAL, dims.my), ('z', REAL, dims.mz), ('dx', REAL), ('dy', REAL), ('dz', REAL), ('footer4', '<i4')]) p_data = np.fromfile(datadir + '/proc' + str(proc) + '/' + pfile, dtype=array_shape) partpars = np.array(p_data['fp'].reshape(mvars, npar_loc)) if (reduce_to > 0): particle_list = map( lambda x: int(x), np.linspace(0.0, npar_loc, num=npar_red, endpoint=False)) red_parts = partpars[:, particle_list] red_shape = np.dtype([('header', '<i4'), ('npar_loc', '<i4'), ('footer', '<i4'), ('header2', '<i4'), ( 'ipar', '<i4', (npar_red), ), ('footer2', '<i4'), ('header3', '<i4'), ('fp', REAL, npar_red * mvars), ('footer3', '<i4'), ('header4', '<i4'), ('t', REAL), ('x', REAL, (dims.mx, )), ('y', REAL, (dims.my, )), ('z', REAL, (dims.mz, )), ('dx', REAL), ('dy', REAL), ('dz', REAL), ('footer4', '<i4')]) p_red = np.array( [(4, npar_red, 4, (npar_red * 4), (np.squeeze(p_data['ipar'][0, :npar_red])), (npar_red * 4), (npar_red * mvars * 8), (np.squeeze(np.ravel(red_parts))), (npar_red * mvars * 8), (p_data['header4'][0]), (p_data['t']), (p_data['x']), (p_data['y']), (p_data['z']), (p_data['dx']), (p_data['dy']), (p_data['dz']), p_data['footer4'][0])], dtype=red_shape) p_red.tofile(datadir + '/proc' + str(proc) + '/' + str(reduce_to) + '_' + pfile) ipar = np.squeeze(p_data['ipar'].reshape(p_data['ipar'].size)) return ipar, partpars, written_parts
def display_metrics(rf, X, y): if len(rf.classes_) == 2: numpy_y = np.ravel(y) if type(numpy_y[0])==str: classes_ = rf.classes_ else: classes_ = [str(int(rf.classes_[0])), str(int(rf.classes_[1]))] z = np.zeros(len(y)) predictions = rf.predict(X) # get binary class predictions conf_mat = confusion_matrix(y_true=y, y_pred=predictions) tmisc = conf_mat[0][1]+conf_mat[1][0] misc = 100*(tmisc)/(len(y)) for i in range(len(y)): if numpy_y[i] == 1: z[i] = 1 probability = rf.predict_proba(X) # get binary probabilities #probability = rf.predict_proba(X) print("\nModel Metrics") print("{:.<27s}{:10d}".format('Observations', X.shape[0])) print("{:.<27s}{:10d}".format('Features', X.shape[1])) if rf.max_depth==None: print("{:.<27s}{:>10s}".format('Maximum Tree Depth',\ "None")) else: print("{:.<27s}{:10d}".format('Maximum Tree Depth',\ rf.max_depth)) print("{:.<27s}{:10d}".format('Minimum Leaf Size', \ rf.min_samples_leaf)) print("{:.<27s}{:10d}".format('Minimum split Size', \ rf.min_samples_split)) print("{:.<27s}{:10.4f}".format('Mean Absolute Error', \ mean_absolute_error(z,probability[:, 1]))) print("{:.<27s}{:10.4f}".format('Avg Squared Error', \ mean_squared_error(z,probability[:, 1]))) acc = accuracy_score(y, predictions) print("{:.<27s}{:10.4f}".format('Accuracy', acc)) if type(numpy_y[0]) == str: pre = precision_score(y, predictions, pos_label=classes_[1]) tpr = recall_score(y, predictions, pos_label=classes_[1]) f1 = f1_score(y,predictions, pos_label=classes_[1]) else: pre = precision_score(y, predictions) tpr = recall_score(y, predictions) f1 = f1_score(y,predictions) print("{:.<27s}{:10.4f}".format('Precision', pre)) print("{:.<27s}{:10.4f}".format('Recall (Sensitivity)', tpr)) print("{:.<27s}{:10.4f}".format('F1-Score', f1)) print("{:.<27s}{:10d}".format(\ 'Total Misclassifications', tmisc)) print("{:.<27s}{:9.1f}{:s}".format(\ 'MISC (Misclassification)', misc, '%')) n_ = [conf_mat[0][0]+conf_mat[0][1], conf_mat[1][0]+conf_mat[1][1]] miscc = [100*conf_mat[0][1]/n_[0], 100*conf_mat[1][0]/n_[1]] for i in range(2): print("{:s}{:<16s}{:>9.1f}{:<1s}".format(\ ' class ', classes_[i], miscc[i], '%')) print("\n\n Confusion Class Class") print(" Matrix", end="") print("{:1s}{:>10s}{:>10s}".format(" ", classes_[0], classes_[1])) for i in range(2): print("{:s}{:.<6s}".format(' Class ', classes_[i]), end="") for j in range(2): print("{:>10d}".format(conf_mat[i][j]), end="") print("") print("") else: n_classes = len(rf.classes_) n_obs = len(y) try: if n_classes < 2: raise RuntimeError(" Call to display_nominal_metrics "+ "invalid.\n Target has less than two classes.\n") sys.exit() except: raise RuntimeError(" Call to display_nominal_metrics "+ "invalid.\n Target has less than two classes.\n") sys.exit() np_y = np.ravel(y) classes_ = [" "]*len(rf.classes_) if type(np_y[0])==str: classes_ = rf.classes_ else: for i in range(len(rf.classes_)): classes_[i] = str(int(rf.classes_[i])) probability = rf.predict_proba(X) # get class probabilitie predictions = rf.predict(X) # get nominal class predictions conf_mat = confusion_matrix(y_true=y, y_pred=predictions) misc = 0 miscc = [] n_ = [] for i in range(n_classes): miscc.append(0) n_.append(0) for j in range(n_classes): n_[i] = n_[i] + conf_mat[i][j] if i != j: misc = misc + conf_mat[i][j] miscc[i] = miscc[i] + conf_mat[i][j] miscc[i] = 100*miscc[i]/n_[i] tmisc = misc misc = 100*misc/n_obs ase_sum = 0 mase_sum = 0 for i in range(n_obs): for j in range(n_classes): if np_y[i] == rf.classes_[j]: ase_sum += (1-probability[i,j])*(1-probability[i,j]) mase_sum += 1-probability[i,j] else: ase_sum += probability[i,j]*probability[i,j] mase_sum += probability[i,j] ase = ase_sum/(n_classes*n_obs) mase = mase_sum/(n_classes*n_obs) print("\nModel Metrics") print("{:.<27s}{:10d}".format('Observations', X.shape[0])) print("{:.<27s}{:10d}".format('Features', X.shape[1])) if type(rf) == RandomForestClassifier: print("{:.<27s}{:10d}".format('Trees in Forest', \ rf.n_estimators)) if rf.max_depth==None: print("{:.<27s}{:>10s}".format('Maximum Tree Depth',\ "None")) else: print("{:.<27s}{:10d}".format('Maximum Tree Depth',\ rf.max_depth)) print("{:.<27s}{:10d}".format('Minimum Leaf Size', \ rf.min_samples_leaf)) print("{:.<27s}{:10d}".format('Minimum split Size', \ rf.min_samples_split)) print("{:.<27s}{:10.4f}".format('ASE', ase)) print("{:.<27s}{:10.4f}".format('Root ASE', sqrt(ase))) print("{:.<27s}{:10.4f}".format('Mean Absolute Error', mase)) acc = accuracy_score(np_y, predictions) print("{:.<27s}{:10.4f}".format('Accuracy', acc)) pre = precision_score(np_y, predictions, average='macro') print("{:.<27s}{:10.4f}".format('Precision', pre)) tpr = recall_score(np_y, predictions, average='macro') print("{:.<27s}{:10.4f}".format('Recall (Sensitivity)', tpr)) f1 = f1_score(np_y,predictions, average='macro') print("{:.<27s}{:10.4f}".format('F1-Score', f1)) print("{:.<27s}{:10d}".format(\ 'Total Misclassifications', tmisc)) print("{:.<27s}{:9.1f}{:s}".format(\ 'MISC (Misclassification)', misc, '%')) if type(rf.classes_[0]) == str: fstr = "{:s}{:.<16s}{:>9.1f}{:<1s}" else: fstr = "{:s}{:.<16.0f}{:>9.1f}{:<1s}" for i in range(len(rf.classes_)): print(fstr.format(\ ' class ', rf.classes_[i], miscc[i], '%')) print("\n\n Confusion") print(" Matrix ", end="") if type(rf.classes_[0]) == str: fstr1 = "{:>7s}{:<3s}" fstr2 = "{:s}{:.<6s}" else: fstr1 = "{:>7s}{:<3.0f}" fstr2 = "{:s}{:.<6.0f}" for i in range(n_classes): print(fstr1.format('Class ', rf.classes_[i]), end="") print("") for i in range(n_classes): print(fstr2.format('Class ', rf.classes_[i]), end="") for j in range(n_classes): print("{:>10d}".format(conf_mat[i][j]), end="") print("") print("") cr = classification_report(np_y, predictions, rf.classes_) print("\n",cr)