def plot_embedding(features, classes, labels, title=None): x_min, x_max = np.min(features, 0), np.max(features, 0) features = (features - x_min) / (x_max - x_min) plt.figure() ax = plt.subplot(111) for i in range(features.shape[0]): plt.text(features[i, 0], features[i, 1], str(labels[i]), color=plt.cm.Set1(float(classes[i]) / 10), fontdict={'weight': 'bold', 'size': 9}) if hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(features.shape[0]): dist = np.sum((features[i] - shown_images) ** 2, 1) if np.min(dist) < 4e-3: # don't show points that are too close continue shown_images = np.r_[shown_images, [features[i]]] """imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(digits.images[i], cmap=plt.cm.gray_r), X[i]) ax.add_artist(imagebox)""" plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title)
def __compute_alternative_params(self): # Copied directly from skopt transformed_bounds = np.array(self.__opt.space.transformed_bounds) est = clone(self.__opt.base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(self.__opt.space.transform(self.__opt.Xi), self.__opt.yi) X = self.__opt.space.transform(self.__opt.space.rvs( n_samples=self.__opt.n_points, random_state=self.__opt.rng)) values = _gaussian_acquisition(X=X, model=est, y_opt=np.min(self.__opt.yi), acq_func='EI', acq_func_kwargs=dict(n_points=10000)) print('original point ei: %s' % np.min(values)) discount_width = .5 values = self.__discount_leased_params(X, values, discount_width) while np.min(values) > -1e-5 and discount_width > 1e-2: discount_width *= .9 values = _gaussian_acquisition(X=X, model=est, y_opt=np.min(self.__opt.yi), acq_func='EI', acq_func_kwargs=dict(n_points=10000)) values = self.__discount_leased_params(X, values, discount_width) next_x = X[np.argmin(values)] print('new point ei: %s' % np.min(values)) if not self.__opt.space.is_categorical: next_x = np.clip(next_x, transformed_bounds[:, 0], transformed_bounds[:, 1]) return self.__opt.space.inverse_transform(next_x.reshape((1, -1)))[0]
def beta_limiter(r,cfl,theta=0.95,beta=0.66666666666666666): r""" Modification of CFL Superbee limiter with theta and beta parameters Additional Input: - *theta* - *beta* """ a = np.empty((2,len(r))) b = np.zeros((2,len(r))) a[0,:] = 0.001 a[1,:] = cfl cfmod1 = np.max(a,axis=0) a[0,:] = 0.999 cfmod2 = np.min(a,axis=0) s1 = theta * 2.0 / cfmod1 s2 = (1.0 + cfl) / 3.0 phimax = theta * 2.0 / (1.0 - cfmod2) a[0,:] = s1*r a[1,:] = phimax b[1,:] = np.min(a) ultra = np.max(b) a[0,:] = 1.0 + (s2 - beta/2.0) * (r-1.0) a[1,:] = 1.0 + (s2 + beta/2.0) * (r-1.0) b[0,:] = ultra b[1,:] = np.max(a) a[0,:] = 0.0 a[1,:] = np.min(b) return np.max(a)
def inverse_magnification_tensors(self, imagepositions): tiny = 1e-12 # to deal with lens and image both exactly at origin ipos = np.atleast_2d(imagepositions) mag = np.zeros((len(ipos), 2, 2)) mag[:,0,0] = 1. mag[:,1,1] = 1. # print "inverse_magnification_tensors: ipos = ",ipos dpos = ipos - self.position rcubed = np.sum(dpos * dpos, axis=1)**1.5 + tiny # tiny little hack if np.min(rcubed) <= 0.0: print "image positions = ",ipos print "lens position = ",self.position print "differences = ",dpos print "rcubed = ",rcubed print self else: mag[:,0,0] -= self.einsteinradius * dpos[:,1] * dpos[:,1] / rcubed mag[:,0,1] += self.einsteinradius * dpos[:,1] * dpos[:,0] / rcubed mag[:,1,0] += self.einsteinradius * dpos[:,0] * dpos[:,1] / rcubed mag[:,1,1] -= self.einsteinradius * dpos[:,0] * dpos[:,0] / rcubed mag[:,0,0] -= self.gammacos2phi mag[:,0,1] -= self.gammasin2phi mag[:,1,0] -= self.gammasin2phi mag[:,1,1] += self.gammacos2phi assert(np.min(rcubed) > 0.0) return mag
def cada_torrilhon_limiter(r,cfl,epsilon=1.0e-3): r""" Cada-Torrilhon modified Additional Input: - *epsilon* = """ a = np.ones((2,len(r))) * 0.95 b = np.empty((3,len(r))) a[0,:] = cfl cfl = np.min(a) a[1,:] = 0.05 cfl = np.max(a) # Multiply all parts except b[0,:] by (1.0 - epsilon) as well b[0,:] = 1.0 + (1+cfl) / 3.0 * (r - 1) b[1,:] = 2.0 * np.abs(r) / (cfl + epsilon) b[2,:] = (8.0 - 2.0 * cfl) / (np.abs(r) * (cfl - 1.0 - epsilon)**2) b[1,::2] *= (1.0 - epsilon) a[0,:] = np.min(b) a[1,:] = (-2.0 * (cfl**2 - 3.0 * cfl + 8.0) * (1.0-epsilon) / (np.abs(r) * (cfl**3 - cfl**2 - cfl + 1.0 + epsilon))) return np.max(a)
def theta_limiter(r,cfl,theta=0.95): r""" Theta limiter Additional Input: - *theta* = """ a = np.empty((2,len(r))) b = np.empty((3,len(r))) a[0,:] = 0.001 a[1,:] = cfl cfmod1 = np.max(a,axis=0) a[0,:] = 0.999 cfmod2 = np.min(a,axis=0) s1 = 2.0 / cfmod1 s2 = (1.0 + cfl) / 3.0 phimax = 2.0 / (1.0 - cfmod2) a[0,:] = (1.0 - theta) * s1 a[1,:] = 1.0 + s2 * (r - 1.0) left = np.max(a,axis=0) a[0,:] = (1.0 - theta) * phimax * r a[1,:] = theta * s1 * r middle = np.max(a,axis=0) b[0,:] = left b[1,:] = middle b[2,:] = theta*phimax return np.min(b,axis=0)
def cfl_superbee_theta(r,cfl,theta=0.95): r""" CFL-Superbee (Roe's Ultrabee) with theta parameter """ a = np.empty((2,len(r))) b = np.zeros((2,len(r))) a[0,:] = 0.001 a[1,:] = cfl cfmod1 = np.max(a,axis=0) a[0,:] = 0.999 cfmod2 = np.min(a,axis=0) s1 = theta * 2.0 / cfmod1 phimax = theta * 2.0 / (1.0 - cfmod2) a[0,:] = s1*r a[1,:] = phimax b[1,:] = np.min(a,axis=0) ultra = np.max(b,axis=0) a[0,:] = ultra b[0,:] = 1.0 b[1,:] = r a[1,:] = np.max(b,axis=0) return np.min(a,axis=0)
def draw_ohl_graph(ax, data): # sort data along args.x_column and make it np.array again all_data = sorted(data, key=itemgetter(args.x_column)) scores = list({e[0] for e in all_data}) scores.sort() print("scores=", scores) np_all_data = np.array(all_data) all_x = np_all_data[:, args.x_column] all_y = np_all_data[:, args.y_column] x_max = np.max(all_x) x_min = np.min(all_x) y_max = np.max(all_y) y_min = np.min(all_y) # print("ymax=", y_max, "ymin=", y_min) y_width = y_max - y_min if y_width == 0: if y_max == 0: y_width = 1.0 else: y_min = 0 y_width = y_max ax.set_xlim(xmax = x_max / args.scale) ax.set_xlim(xmin = 0) ax.set_ylim(ymax = y_max + y_width * 0.05) ax.set_ylim(ymin = y_min - y_width * 0.05) for score in scores: # print("score=", score) data = list(filter(lambda e: e[0] == score, all_data)) data = np.array(data) x = data[:, args.x_column] y = data[:, args.y_column] x = x / args.scale ans = args.ans if len(data) < 5: ax.plot(x, y, '.', label=str(score)) continue elif len(data) * 0.1 < args.ans: ans = int(len(data) * 0.1) if ans < 4: ans = 4 # print("ans=", ans) weight = np.ones(ans, dtype=np.float)/ans y_average = np.convolve(y, weight, 'valid') rim = ans - 1 rim_l = rim // 2 rim_r = rim - rim_l ax.plot(x[rim_l:-rim_r], y_average, label=str(score)) ax.legend(loc=2) ax.set_xlabel(args.xlabel) ax.set_ylabel(args.ylabel) ax.grid(linewidth=1, linestyle="-", alpha=0.1)
def _crinfo_from_specific_data (self, data, margin): # hledáme automatický ořez, nonzero dá indexy nzi = np.nonzero(data) x1 = np.min(nzi[0]) - margin[0] x2 = np.max(nzi[0]) + margin[0] + 1 y1 = np.min(nzi[1]) - margin[0] y2 = np.max(nzi[1]) + margin[0] + 1 z1 = np.min(nzi[2]) - margin[0] z2 = np.max(nzi[2]) + margin[0] + 1 # ošetření mezí polí if x1 < 0: x1 = 0 if y1 < 0: y1 = 0 if z1 < 0: z1 = 0 if x2 > data.shape[0]: x2 = data.shape[0]-1 if y2 > data.shape[1]: y2 = data.shape[1]-1 if z2 > data.shape[2]: z2 = data.shape[2]-1 # ořez crinfo = [[x1, x2],[y1,y2],[z1,z2]] #dataout = self._crop(data,crinfo) #dataout = data[x1:x2, y1:y2, z1:z2] return crinfo
def normalize_dataset(x): """ Normalize dataset such that minimum is 0 and maximum is 1. x: NxK matrix of input data Returns normalized data matrix """ return (x - np.min(x)) / (np.max(x) - np.min(x))
def __init__(self, shape, successes, trials=None, coef=1., offset=None, quadratic=None, initial=None): smooth_atom.__init__(self, shape, offset=offset, quadratic=quadratic, initial=initial, coef=coef) if sparse.issparse(successes): #Convert sparse success vector to an array self.successes = successes.toarray().flatten() else: self.successes = np.asarray(successes) if trials is None: if not set([0,1]).issuperset(np.unique(self.successes)): raise ValueError("Number of successes is not binary - must specify number of trials") self.trials = np.ones(self.successes.shape, np.float) else: if np.min(trials-self.successes) < 0: raise ValueError("Number of successes greater than number of trials") if np.min(self.successes) < 0: raise ValueError("Response coded as negative number - should be non-negative number of successes") self.trials = trials * 1. saturated = self.successes / self.trials deviance_terms = np.log(saturated) * self.successes + np.log(1-saturated) * (self.trials - self.successes) deviance_constant = -2 * coef * deviance_terms[~np.isnan(deviance_terms)].sum() devq = identity_quadratic(0,0,0,-deviance_constant) self.quadratic += devq
def plot_embedding(X, title=None): x_min, x_max = np.min(X, 0), np.max(X, 0) X = (X - x_min) / (x_max - x_min) pl.figure() ax = pl.subplot(111) for i in range(digits.data.shape[0]): pl.text( X[i, 0], X[i, 1], str(digits.target[i]), color=pl.cm.Set1(digits.target[i] / 10.0), fontdict={"weight": "bold", "size": 9}, ) if hasattr(offsetbox, "AnnotationBbox"): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1.0, 1.0]]) # just something big for i in range(digits.data.shape[0]): dist = np.sum((X[i] - shown_images) ** 2, 1) if np.min(dist) < 4e-3: # don't show points that are too close continue shown_images = np.r_[shown_images, [X[i]]] imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage(digits.images[i], cmap=pl.cm.gray_r), X[i]) ax.add_artist(imagebox) pl.xticks([]), pl.yticks([]) if title is not None: pl.title(title)
def grid_xyz(xyz, n_x, n_y, **kwargs): """ Grid data as a list of X,Y,Z coords into a 2D array Parameters ---------- xyz: np.array Numpy array of X,Y,Z values, with shape (n_points, 3) n_x: int Number of points in x direction (fastest varying!) n_y: int Number of points in y direction Returns ------- gridded_data: np.array 2D array of gridded data, with shape (n_x, n_y) Notes ----- 'x' is the inner dimension, i.e. image dimensions are (n_y, n_x). This is counterintuitive (to me at least) but in line with numpy definitions. """ x, y, z = xyz[:, 0], xyz[:, 1], xyz[:, 2] x_ax = np.linspace(np.min(x), np.max(x), n_x) y_ax = np.linspace(np.min(y), np.max(y), n_y) xg, yg = np.meshgrid(x_ax, y_ax) data = griddata(xyz[:, :2], z, (xg, yg), **kwargs) return data
def min(self, axis=None, out=None, keepdims=False): self._prepare_out(out=out) try: value = np.min(self.value, axis=axis, out=out, keepdims=keepdims) except: # numpy < 1.7 value = np.min(self.value, axis=axis, out=out) return self._new_view(value)
def testEncodeAdjacentPositions(self, verbose=False): repetitions = 100 n = 999 w = 25 radius = 10 minThreshold = 0.75 avgThreshold = 0.90 allOverlaps = np.empty(repetitions) for i in range(repetitions): overlaps = overlapsForRelativeAreas(n, w, np.array([i * 10, i * 10]), radius, dPosition=np.array([0, 1]), num=1) allOverlaps[i] = overlaps[0] self.assertGreater(np.min(allOverlaps), minThreshold) self.assertGreater(np.average(allOverlaps), avgThreshold) if verbose: print ("===== Adjacent positions overlap " "(n = {0}, w = {1}, radius = {2}) ===").format(n, w, radius) print "Max: {0}".format(np.max(allOverlaps)) print "Min: {0}".format(np.min(allOverlaps)) print "Average: {0}".format(np.average(allOverlaps))
def dist_avg_closest_pair(feats1,feats2,alpha=10): """ Distance measure between two sets of fingerprint maxes feats is a 2xN matrix first row - time in seconds, usually starting from the beat second row - frequency, usually a row index Computes euclidean distance between feats1 and their closest point in feats2, samething reverse, average alpha is a multiplier of the seconds """ # special cases with no maxes if feats1.shape[1] == 0 and feats2.shape[1] == 0: return 0 if feats1.shape[1] == 0 and feats2.shape[1] > 0: return np.inf # we'll find better #return 250. / 100 * feats2.shape[1] if feats1.shape[1] > 0 and feats2.shape[1] == 0: return np.inf # we'll find better #return 250. / 100 * feats1.shape[1] # compute distance from each of the points in a N x M matrix distmat = np.zeros([feats1.shape[1],feats2.shape[1]]) for l in range(distmat.shape[0]): for c in range(distmat.shape[1]): distmat[l,c] = math.hypot(alpha*(feats1[0,l]-feats2[0,c]), feats1[1,l]-feats2[1,c]) # measure closest ones shortest_from_feats1 = map(lambda x: np.min(distmat[x,:]),range(feats1.shape[1])) shortest_from_feats2 = map(lambda x: np.min(distmat[:,x]),range(feats2.shape[1])) # return average of both return np.min([np.average(shortest_from_feats1), np.average(shortest_from_feats2)])
def quantify(self): """Quantify shape of the contours.""" four_pi = 4. * np.pi for edge in self.edges: # Positions x = edge['x'] y = edge['y'] A, perimeter, x_center, y_center, distances = \ self.get_shape_factor(x, y) # Set values. edge['area'] = A edge['perimeter'] = perimeter edge['x_center'] = x_center edge['y_center'] = y_center # Circle is 1. Rectangle is 0.78. Thread-like is close to zero. edge['shape_factor'] = four_pi * edge['area'] / \ edge['perimeter'] ** 2. # We assume that the radius of the edge # as the median value of the distances from the center. radius = np.median(distances) edge['radius_deviation'] = np.std(distances - radius) / radius edge['x_min'] = np.min(x) edge['x_max'] = np.max(x) edge['y_min'] = np.min(y) edge['y_max'] = np.max(y)
def get_spherical_bounding_box(lons, lats): """ Given a collection of points find and return the bounding box, as a pair of longitudes and a pair of latitudes. Parameters define longitudes and latitudes of a point collection respectively in a form of lists or numpy arrays. :return: A tuple of four items. These items represent western, eastern, northern and southern borders of the bounding box respectively. Values are floats in decimal degrees. :raises ValueError: If points collection has the longitudinal extent of more than 180 degrees (it is impossible to define a single hemisphere bound to poles that would contain the whole collection). """ north, south = numpy.max(lats), numpy.min(lats) west, east = numpy.min(lons), numpy.max(lons) assert (-180 <= west <= 180) and (-180 <= east <= 180) if get_longitudinal_extent(west, east) < 0: # points are lying on both sides of the international date line # (meridian 180). the actual west longitude is the lowest positive # longitude and east one is the highest negative. west = min(lon for lon in lons if lon > 0) east = max(lon for lon in lons if lon < 0) if not all((get_longitudinal_extent(west, lon) >= 0 and get_longitudinal_extent(lon, east) >= 0) for lon in lons): raise ValueError('points collection has longitudinal extent ' 'wider than 180 deg') return west, east, north, south
def min(self, axis=None, out=None, keepdims=False): self._prepare_out(out=out) try: value = np.min(self.value, axis=axis, out=out, keepdims=keepdims) except: # numpy < 1.7 value = np.min(self.value, axis=axis, out=out) return self.__quantity_instance__(value, self.unit, copy=False)
def check_min_samples_split(name): X, y = hastie_X, hastie_y ForestEstimator = FOREST_ESTIMATORS[name] # test boundary value assert_raises(ValueError, ForestEstimator(min_samples_split=-1).fit, X, y) assert_raises(ValueError, ForestEstimator(min_samples_split=0).fit, X, y) assert_raises(ValueError, ForestEstimator(min_samples_split=1.1).fit, X, y) est = ForestEstimator(min_samples_split=10, n_estimators=1, random_state=0) est.fit(X, y) node_idx = est.estimators_[0].tree_.children_left != -1 node_samples = est.estimators_[0].tree_.n_node_samples[node_idx] assert_greater(np.min(node_samples), len(X) * 0.5 - 1, "Failed with {0}".format(name)) est = ForestEstimator(min_samples_split=0.5, n_estimators=1, random_state=0) est.fit(X, y) node_idx = est.estimators_[0].tree_.children_left != -1 node_samples = est.estimators_[0].tree_.n_node_samples[node_idx] assert_greater(np.min(node_samples), len(X) * 0.5 - 1, "Failed with {0}".format(name))
def check_min_samples_leaf(name): X, y = hastie_X, hastie_y # Test if leaves contain more than leaf_count training examples ForestEstimator = FOREST_ESTIMATORS[name] # test boundary value assert_raises(ValueError, ForestEstimator(min_samples_leaf=-1).fit, X, y) assert_raises(ValueError, ForestEstimator(min_samples_leaf=0).fit, X, y) est = ForestEstimator(min_samples_leaf=5, n_estimators=1, random_state=0) est.fit(X, y) out = est.estimators_[0].tree_.apply(X) node_counts = np.bincount(out) # drop inner nodes leaf_count = node_counts[node_counts != 0] assert_greater(np.min(leaf_count), 4, "Failed with {0}".format(name)) est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1, random_state=0) est.fit(X, y) out = est.estimators_[0].tree_.apply(X) node_counts = np.bincount(out) # drop inner nodes leaf_count = node_counts[node_counts != 0] assert_greater(np.min(leaf_count), len(X) * 0.25 - 1, "Failed with {0}".format(name))
def allclose_with_out(x, y, atol=0.0, rtol=1.0e-5): # run the np.allclose on x and y # if it fails print some stats # before returning ac = np.allclose(x, y, rtol=rtol, atol=atol) if not ac: dd = np.abs(x - y) neon_logger.display('abs errors: %e [%e, %e] Abs Thresh = %e' % (np.median(dd), np.min(dd), np.max(dd), atol)) amax = np.argmax(dd) if np.isscalar(x): neon_logger.display('worst case: %e %e' % (x, y.flat[amax])) elif np.isscalar(y): neon_logger.display('worst case: %e %e' % (x.flat[amax], y)) else: neon_logger.display('worst case: %e %e' % (x.flat[amax], y.flat[amax])) dd = np.abs(dd - atol) / np.abs(y) neon_logger.display('rel errors: %e [%e, %e] Rel Thresh = %e' % (np.median(dd), np.min(dd), np.max(dd), rtol)) amax = np.argmax(dd) if np.isscalar(x): neon_logger.display('worst case: %e %e' % (x, y.flat[amax])) elif np.isscalar(y): neon_logger.display('worst case: %e %e' % (x.flat[amax], y)) else: neon_logger.display('worst case: %e %e' % (x.flat[amax], y.flat[amax])) return ac
def function1D(self, t): A = self.getParamValue(0) B = self.getParamValue(1) R = self.getParamValue(2) T0 = self.getParamValue(3) Scale = self.getParamValue(4) HatWidth = self.getParamValue(5) KConv = self.getParamValue(6) # A/2 Scale factor has been removed to make A and Scale independent f_int = Scale*((1-R)*np.power((A*(t-T0)),2)* np.exp(-A*(t-T0))+2*R*A**2*B/np.power((A-B),3) * (np.exp(-B*(t-T0))-np.exp(-A*(t-T0))*(1+(A-B)*(t-T0)+0.5*np.power((A-B),2)*np.power((t-T0),2)))) f_int[t<T0] = 0 mid_point_hat = len(f_int)//2 gc_x = np.array(range(len(f_int))).astype(float) ppd = 0.0*gc_x lowIDX = int(np.floor(np.max([mid_point_hat-np.abs(HatWidth),0]))) highIDX = int(np.ceil(np.min([mid_point_hat+np.abs(HatWidth),len(gc_x)]))) ppd[lowIDX:highIDX] = 1.0 ppd = ppd/sum(ppd) gc_x = np.array(range(len(f_int))).astype(float) gc_x = 2*(gc_x-np.min(gc_x))/(np.max(gc_x)-np.min(gc_x))-1 gc_f = np.exp(-KConv*np.power(gc_x,2)) gc_f = gc_f/np.sum(gc_f) npad = len(f_int) - 1 first = npad - npad//2 f_int = np.convolve(f_int,ppd,'full')[first:first+len(f_int)] f_int = np.convolve(f_int,gc_f,'full')[first:first+len(f_int)] return f_int
def explore_city_data(city_data): """Calculate the Boston housing statistics.""" # Get the labels and features from the housing data housing_prices = city_data.target housing_features = city_data.data ################################### ### Step 1. YOUR CODE GOES HERE ### ################################### # Please calculate the following values using the Numpy library print "Size of data (number of houses)" print np.size(housing_prices) print "Number of features" print np.size(housing_features, 1) print "Minimum price" print np.min(housing_prices) print "Maximum price" print np.max(housing_prices) print "Calculate mean price" print np.mean(housing_prices) print "Calculate median price" print np.median(housing_prices) print "Calculate standard deviation" print np.std(housing_prices)
def coverage_string(self): """Coverage of reader to be reported as string for debug output""" corners = self.xy2lonlat([self.xmin, self.xmin, self.xmax, self.xmax], [self.ymax, self.ymin, self.ymax, self.ymin]) return '%.2f-%.2fE, %.2f-%.2fN' % ( np.min(corners[0]), np.max(corners[0]), np.min(corners[1]), np.max(corners[1]))
def rootSpI(img, list_remove=[], sc=None, lut_range = False, verbose=False): """ case where the data is a spatialimage """ # -- cells are positionned inside a structure, the polydata, and assigned a scalar value. polydata,polydata2 = img2polydata_complexe(img, list_remove=list_remove, sc=sc, verbose=verbose) m = tvtk.PolyDataMapper(input=polydata.output) m2 = tvtk.PolyDataMapper(input=polydata2.output) # -- definition of the scalar range (default : min to max of the scalar value). if sc: ran=[sc[i] for i in sc.keys() if i not in list_remove] if (lut_range != None) and (lut_range != False): print lut_range m.scalar_range = lut_range[0],lut_range[1] else: m.scalar_range = np.min(ran), np.max(ran) else: m.scalar_range=np.min(img), np.max(img) # -- actor that manage changes of view if memory is short. a = tvtk.QuadricLODActor(mapper=m) a.property.point_size=8 a2 = tvtk.QuadricLODActor(mapper=m2) a2.property.point_size=8 #scalebar if lut_range != None: sc=tvtk.ScalarBarActor(orientation='vertical',lookup_table=m.lookup_table) return a, a2, sc, m, m2
def hausdorffnorm(A, B): ''' Finds the hausdorff norm between two matrices A and B. INPUTS: A: numpy array B : numpy array OUTPUTS: Housdorff norm between matrices A and B ''' # ensure matrices are 3 dimensional, and shaped conformably if len(A.shape) == 1: A = np.atleast_2d(A) if len(B.shape) == 1: B = np.atleast_2d(B) A = np.atleast_3d(A) B = np.atleast_3d(B) x, y, z = B.shape A = np.reshape(A, (z, x, y)) B = np.reshape(B, (z, x, y)) # find hausdorff norm: starting from A to B z, x, y = B.shape temp1 = np.tile(np.reshape(B.T, (y, z, x)), (max(A.shape), 1)) temp2 = np.tile(np.reshape(A.T, (y, x, z)), (1, max(B.shape))) D1 = np.min(np.sqrt(np.sum((temp1-temp2)**2, 0)), axis=0) # starting from B to A temp1 = np.tile(np.reshape(A.T, (y, z, x)), (max(B.shape), 1)) temp2 = np.tile(np.reshape(B.T, (y, x, z)), (1, max(A.shape))) D2 = np.min(np.sqrt(np.sum((temp1-temp2)**2, 0)), axis=0) return np.max([D1, D2])
def signmag_plot(a, b, z, ref): imdata1 = np.sign(ref) cmap1 = plt.cm.RdBu cmap1.set_bad('k', 1) imdata2 = np.log10(np.abs(ref)) cmap2 = plt.cm.YlOrRd cmap2.set_bad('k', 1) fig, axarr = plt.subplots(ncols=2, figsize=(12, 6)) axarr[0].pcolormesh(a, b, imdata1, cmap=cmap1, vmin=-1, vmax=1) im = axarr[1].pcolormesh(a, b, imdata2, cmap=cmap2, vmin=np.percentile(imdata2, 5), vmax=np.percentile(imdata2, 95)) for ax in axarr: ax.set_xlim((np.min(a), np.max(a))) ax.set_ylim((np.min(b), np.max(b))) ax.set_xlabel("a") ax.set_ylabel("b") ax.set(adjustable='box-forced', aspect='equal') fig.subplots_adjust(right=0.8) cbar_ax = fig.add_axes([0.85, 0.15, 0.03, 0.7]) fig.colorbar(im, cax=cbar_ax) axarr[0].set_title("Sign of hyp1f1") axarr[1].set_title("Magnitude of hyp1f1") plt.suptitle("z = {:.2e}".format(np.float64(z))) return fig
def _makewindows(self, indices, window): """ Make masks used by windowing functions Given a list of indices specifying window centers, and a window size, construct a list of index arrays, one per window, that index into the target array Parameters ---------- indices : array-like List of times specifying window centers window : int Window size """ div = divmod(window, 2) before = div[0] after = div[0] + div[1] index = asarray(self.index) indices = asarray(indices) if where(index == max(indices))[0][0] + after > len(index): raise ValueError("Maximum requested index %g, with window %g, exceeds length %g" % (max(indices), window, len(index))) if where(index == min(indices))[0][0] - before < 0: raise ValueError("Minimum requested index %g, with window %g, is less than 0" % (min(indices), window)) masks = [arange(where(index == i)[0][0]-before, where(index == i)[0][0]+after, dtype='int') for i in indices] return masks
def collect_statistics_from_sigma_bins(sigma, bins_start, bins_end, burnin=0, smooth=True, area_fraction=0.68, numpoints=1000): sigma = flatten_commander_chain(sigma, burnin) lmax = sigma.shape[2] - 1 means = [] stds = [] mls = [] uppers = [] lowers = [] for lstart, lend in zip(bins_start, bins_end): vars = [] sigmas = [] for l in range(lstart, lend+1): print l vars.append(np.var(sigma[:, 0, l])) print vars[-1] sigmas.append(sigma[:, 0, l] / vars[-1]) vars = np.array(vars) sigmas = np.array(sigmas) if lstart == lend: samps = sigmas samps = samps * vars else: samps = np.sum(sigmas, axis=0) samps = samps / np.sum(1 / vars) print np.min(samps) print np.max(samps) x = np.linspace(np.min(samps), np.max(samps), numpoints) mean, std, ml, upper, lower = collect_statistics(samps, x, area_fraction=area_fraction, smooth=smooth) means.append(mean) stds.append(std) mls.append(ml) uppers.append(upper) lowers.append(lower) return means, stds, mls, uppers, lowers
def extract_binary_masks_from_structural_channel(Y, min_area_size=30, min_hole_size=15, gSig=5, expand_method='closing', selem=np.ones((3, 3))): """Extract binary masks by using adaptive thresholding on a structural channel Inputs: ------ Y: caiman movie object movie of the structural channel (assumed motion corrected) min_area_size: int ignore components with smaller size min_hole_size: int fill in holes up to that size (donuts) gSig: int average radius of cell expand_method: string method to expand binary masks (morphological closing or dilation) selem: np.array morphological element with which to expand binary masks Output: ------- A: sparse column format matrix matrix of binary masks to be used for CNMF seeding mR: np.array mean image used to detect cell boundaries """ mR = Y.mean(axis=0) img = cv2.blur(mR, (gSig, gSig)) img = (img - np.min(img)) / (np.max(img) - np.min(img)) * 255. img = img.astype(np.uint8) th = cv2.adaptiveThreshold(img, np.max(img), cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, gSig, 0) th = remove_small_holes(th > 0, min_size=min_hole_size) th = remove_small_objects(th, min_size=min_area_size) areas = label(th) A = np.zeros((np.prod(th.shape), areas[1]), dtype=bool) for i in range(areas[1]): temp = (areas[0] == i + 1) if expand_method == 'dilation': temp = dilation(temp, selem=selem) elif expand_method == 'closing': temp = dilation(temp, selem=selem) A[:, i] = temp.flatten('F') return A, mR
input_dim=100, batch_size=20, learning_rate=0.01, epsilon=0.1, fully_output_shape=[4,4,256], deconv_stride=[2,2,2,2], deconv_channel=[128,64,32,3], ref_image=face_ref, loss_w=0.001, iterates=300) gnn.train() print 'image shape:', gnn.pic.shape print 'label shape:', gnn.prob.shape print 'learning target:', gnn.learning_target print 'first image:', gnn.pic[0].shape, np.min(gnn.pic[0]), np.max(gnn.pic[0]) if 'face' in gnn.model_url: for i in range(11): misc.imsave('figures/face_image'+str(i)+'.jpg', gnn.pic[i]) np.savetxt('figures/number_image.txt', gnn.prob[0:11,gnn.learning_target]) name = [s.strip() for s in open('names.txt').readlines()] for j in range(11): rank = np.argsort(gnn.prob[j])[::-1] print "image:", str(j) print "top 1:", name[rank[0]], gnn.prob[j][rank[0]] print "top 2:", name[rank[1]], gnn.prob[j][rank[1]] print "top 3:", name[rank[2]], gnn.prob[j][rank[2]] # print "top 4:", name[rank[3]], gnn.prob[0][rank[3]] # print "top 5:", name[rank[4]], gnn.prob[0][rank[4]] f = plt.figure()
# perform inferences in Fourier space dk = 2. / (4.1 * 365.25) # frequency resolution for testing testks = np.arange(-200.5, 6001., 1.) * dk + (truth.abks[0])[2] strt = time.time() amp2s = spg.superpgram(lcf.starts, lcf.stops, data, ivar, testks) print "computed super-resolution periodogram:", time.time() - strt # save output output = open(picklefn, "wb") pickle.dump((lcf, data, ivar, truth, testks, amp2s), output) output.close() # plot data plt.clf() plt.plot(lcf.centers, data, "k.", ms=0.75) plt.xlabel("time [day]") plt.ylabel("intensity") plt.savefig("foo.png") # plot fourier tests plt.clf() plt.step(testks, np.log10(amp2s), color="k", where="mid") # plt.plot(testks, np.log10(amp2s), "ko") for a, b, k in truth.abks: plt.axvline(k, alpha=0.5) plt.xlabel("wave number [rad per day]") plt.ylabel("log10 squared amplitude of best-fit sinusoid") plt.xlim(np.min(testks), np.max(testks)) plt.savefig("bar.png")
# plot.imsave('Test_gt_depth_{:05d}.png'.format(idx), input_gt_depth_image, cmap="viridis") # plot.imsave('Test_pred_depth_{:05d}.png'.format(idx), pred_depth_image, cmap="viridis") cv2.imwrite(save_path + "Test_gt_depth_" + str(idx) + ".png", input_gt_depth_image * 50) cv2.imwrite(save_path + "Test_pred_depth_" + str(idx) + ".png", pred_depth_image * 50) print('idx', idx, 'saved') check_min = copy.deepcopy(input_gt_depth_image) check_min[input_gt_depth_image == 0.] = np.nan mask = copy.deepcopy(input_gt_depth_image) mask[input_gt_depth_image < 0.82] = 0. sum = np.sum(mask) difference = np.sum(np.multiply(np.abs(np.subtract(input_gt_depth_image, pred_depth_image)), mask)) / sum mean_difference += difference print str(idx), ":", difference print np.max(input_gt_depth_image), np.max(pred_depth_image), np.nanmin(check_min), np.min(pred_depth_image) n = np.sum(input_gt_depth_image > 1e-3) idxs = (input_gt_depth_image <= 1e-3) pred_depth_image[idxs] = 1 input_gt_depth_image[idxs] = 1 pred_d_gt = pred_depth_image / input_gt_depth_image pred_d_gt[idxs] = 100 gt_d_pred = input_gt_depth_image / pred_depth_image gt_d_pred[idxs] = 100 Threshold_1_25 += np.sum(np.maximum(pred_d_gt, gt_d_pred) < 1.25) / n Threshold_1_25_2 += np.sum(np.maximum(pred_d_gt, gt_d_pred) < 1.25 * 1.25) / n Threshold_1_25_3 += np.sum(np.maximum(pred_d_gt, gt_d_pred) < 1.25 * 1.25 * 1.25) / n
def build_dihedral_angles(self): """ Function which determines the optimal dihedral angles by fitting them to a Fourrier Sum. This is useful because one of the bond functional forms in LAMMPS takes uses 3 (?) parameter Fourier Sum. Bit of a monster function, but works pretty well. Basically, I implemented an inteligent iterative approach to the Fourier sum fitting because we need the parameters to be within some limits, but by default scipy doesn't allow constrains to be placed on the fitting algrorithm, so I had to implement my own cost function to get around this. Doesn't give *exactly* the same answer as the MATLAB implementation, but does pretty well and is clearly close enough! """ ## ---------------------------------------------------------------------------- ## STAGE 1 - EXTRACT THE DIHEDRAL ANGLES FROM THE ALL ATOM TRAJECTORIES ## theta_by_res= [] self.STDMessage('Extracting dihedral angles from atomistic simulation replicas...', msgType='STATUS') # cycle through each residue extracting the appropriate theta vector on the # COM of groups of 4 residues for res in self.resVector[0:-3]: tmp_theta = np.array([]) sys.stdout.write('.') sys.stdout.flush() for replica in self.replica_vector: prot = replica.proteinTrajectoryList[0] i = res j = i + 1 k = j + 1 l = k + 1 # extract COM vector between the 4 residues # (so get 3 vectors) # x10 so we move into Angstroms now... b1 = prot.get_interResidueCOMVector(j,i)*10 b2 = prot.get_interResidueCOMVector(k,j)*10 b3 = prot.get_interResidueCOMVector(l,k)*10 n1_numerator = np.cross(b1,b2) n1_denominator = np.linalg.norm(np.cross(b1,b2),axis=1) n2_numerator = np.cross(b2,b3) n2_denominator = np.linalg.norm(np.cross(b2,b3),axis=1) n1 = n1_numerator / np.transpose([n1_denominator,n1_denominator,n1_denominator]) n2 = n2_numerator / np.transpose([n2_denominator,n2_denominator,n2_denominator]) b2n = b2 / np.transpose([np.linalg.norm(b2,axis=1),np.linalg.norm(b2,axis=1),np.linalg.norm(b2,axis=1)]) m1 = np.cross(n1,b2n) # avoids calculating the full dot product and then getting the diagonal - this is a # really efficient vectorized way to get this (seriously this shaves multiple seconds per replica) x = inner1d(n1,n2) y = inner1d(m1,n2) tmp_theta = np.concatenate((tmp_theta, 180/np.pi*np.arctan2(y,x))) # NOTE! # We multiply by -1 because this gives a dihedral form where the chirality of the final CG model # matches the all atom model (note that this is a relective operations around 0). This, in itself # is kind of interesting! theta_by_res.append(-1*tmp_theta) # this to add a new line print "" ## ---------------------------------------------------------------------------- ## STAGE 2 - Fit angle histograms to 3 term Fourier Series equation ## # For each residue we # 1) Histogram the data and generate an empyrical PDF from that histogram # ugh... bins_edges = np.arange(-180,181,1) bin_centers = np.arange(-179.5,180.5,1) n_angles = len(theta_by_res) print "" self.STDMessage('Fitting dihedral angles to Fourier Series function...', msgType='STATUS') params_by_res = [] for i in xrange(0, n_angles): self.STDMessage('Fitting angle along %i-%i-%i-%i vector.'%(i, i+1, i+2, i+3), msgType='STATUS') sys.stdout.write('.') sys.stdout.flush() # histogram the data (density means our histogram Y axis is now in # density units (0 < density < 1) rather than count) (vals,b)=np.histogram(theta_by_res[i],bins_edges,density=True) # Convert probability into energy and normalize onto some scale such # that the lowest minima is 0 vals_EN = -np.log(vals)*self.kB*self.TEMP - np.min(-np.log(vals)*self.kB*self.TEMP) # set any values which have become inft to the max observed energy vals_EN[ vals_EN == np.inf] = max(vals_EN[np.isfinite(vals_EN)]) # 2) Fit that data to a 3 term Fourier series # lower and upper bounds LB = [0.0, -360.0, 0.0, -360.0, 0.0, -540.0] UB = [10.0, 360.0, 10.0, 360.0, 10.0, 540.0] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # Internal function which returns a custom optimizer function # def make_optimization_function(LB_setter, UB_setter, penalty_multiplier_setter): #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> def funct(x, P1, P2, P3, P4, P5, P6): # define our 3 term Fourier Series function (T1/2/3 are the three terms) T1 = P1*(1 - np.cos(np.deg2rad(1*x - P2))) T2 = P3*(1 - np.cos(np.deg2rad(2*x - P4))) T3 = P5*(1 - np.cos(np.deg2rad(3*x - P6))) # when make_optimization_function returns 'funct' the following # variables have been defined BY the make_optimization_function # - functional programming FTW! LB = LB_setter UB = UB_setter penalty_multiplier = penalty_multiplier_setter # we have to define limits manually because SciPy does not allow # automatic parameter constraints. To do this we define a flat bottom # function for each parameter which = 0 within the range we care about but grows # rapidly outside that range. How rapidly depends on the penalty multiplier penalty = 0 # IDX sets the index into the LB and UB vector, which specifies # bounds for each parameter IDX=0 for P in [P1, P2, P3, P4, P5, P6]: if P > UB[IDX]: penalty=(P-UB[IDX])*penalty_multiplier + penalty if P < LB[IDX]: penalty=(LB[IDX]-P)*penalty_multiplier + penalty IDX=IDX+1 # sum with penality return T1 + T2 + T3 + penalty #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> return funct #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # now we iterativly do this with a more and more painful penality_multiplier penalty_multiplier=10000 boundary_fail = True while boundary_fail: # construct a cost function with a defined penality value threeParamFunct = make_optimization_function(LB, UB, penalty_multiplier) # set initial goodness and past parameter set goodness = 1000000000000000 best_params = [-1,-1,-1,-1,-1,-1] # set initial guesses for parameters np.random.seed() guess = [0.5, 90, 0.5, 90, 0.5, 90] # try 20 different runs with random starting positions for the 0-10 parameters for run in xrange(0,20): try: # optimizie using our customized function with a set penality term, where we *only* optimize # the 6 parameters defined in guess (params, tmp) = scipy.optimize.curve_fit(threeParamFunct, bin_centers, vals_EN, p0=guess,maxfev=100000) # using parameters see how well we did by determinig the goodness of fit to the emprical histogram tmpgood = sum(abs((threeParamFunct(bin_centers, params[0],params[1],params[2],params[3],params[4],params[5])-vals_EN))) # if this fit was better than the previous one update the current gold standard (goodness) and the current best # parameter set (best_params) if tmpgood < goodness: goodness = tmpgood best_params = params # New random parameters to try again with # (while the coefficients *can* go 0 to 10 we find better results come from lower intial guesses (i.e constraining # the initial guess to the 0 to 3 interval) guess = [np.random.rand()*3, 90, np.random.rand()*3, 90, np.random.rand()*3, 90] except RuntimeError: self.STDMessage('Runtime error fitting Fourier series to angle starting on r %i'%i, msgType='WARNING') self.STDMessage("Don't worry - we'll try again with different initial parameters", msgType='WARNING') # set paramst to the best parameters you saw params = best_params # check ALL parameters lie inside the boundaries boundaries IDX = 0 old_pm = penalty_multiplier for P in params: if P > UB[IDX]: penalty_multiplier=penalty_multiplier*10 break if P < LB[IDX]: penalty_multiplier=penalty_multiplier*10 break IDX=IDX+1 # if we didn't update the penality multiplier then all parameters looked good if old_pm == penalty_multiplier: boundary_fail=False # if shit has hit the fan (can't imagine this happening) if best_params[0] == -1: raise SimulationsException("FUNDEMENTAL ERROR: Despite our best effors we could not fit the dihedral associated with the %i-%i-%i-%i stretch to a 3-term Fourier Series. This is either indicative of a bug in how we do the fitting, or a more fundemental issue. Note this doesn't mean we could't get a *good* fit, it means we literally couldn't fit it at all. Please contact [email protected] because this is bad news!") # save the parameters! params_by_res.append(params) # plot fit (note need to make this optional!) if self.PLOT_DIHEDRAL_HISTOGRAMS: plt.plot(bin_centers,threeParamFunct(bin_centers, params[0],params[1],params[2],params[3],params[4],params[5],)) plt.plot(bin_centers, vals_EN) plt.title("Theta angle fit (goodness = %4.2f" % goodness) plt.xlabel('Angle (degrees)') plt.ylabel('Probability') plt.savefig('plots/DIHEDRAL_res_%i.png'%(i),dpi=150) plt.close() ## ---------------------------------------------------------------------------- ## STAGE 3 - Write the derived parameters out to file ## # write summary histograms fitted to normal distribution for manual inspection self.STDMessage('Writing dihedral definition to [%s]...'%self.DIHEDRAL_DEFINITION_FILE,msgType='WRITING') with open(self.DIHEDRAL_DEFINITION_FILE,'w') as fh: for idx in self.idxVector[0:-3]: i=idx+1 j=idx+2 k=idx+3 l=idx+4 fh.write('@dihedral:Res%iRes%iRes%iRes%i @atom:Res%i @atom:Res%i @atom:Res%i @atom:Res%i @bond:Res%iRes%i @bond:Res%iRes%i @bond:Res%iRes%i\n' % (i,j,k,l,i,j,k,l,i,j,j,k,k,l)) self.STDMessage('Writing dihedral parameters to [%s]...'%self.DIHEDRAL_PARAMETER_FILE,msgType='WRITING') with open(self.DIHEDRAL_PARAMETER_FILE,'w') as fh: for idx in self.idxVector[0:-3]: i=idx+1 j=idx+2 k=idx+3 l=idx+4 fh.write('dihedral_coeff @dihedral:Res%iRes%iRes%iRes%i %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f\n' % (i,j,k,l, params_by_res[idx][0],params_by_res[idx][1],params_by_res[idx][2],params_by_res[idx][3],params_by_res[idx][4],params_by_res[idx][5])) self.write_other_dihedral_angles(n_angles) self.STDMessage("Dihedral parameters generated", msgType='SUCCESS') return params_by_res
# Two groups were visually seen , Lets start to build 2 cluster model ( k = 2) irisCluster = KMeans(n_clusters =2).fit(newIris) irisCluster.labels_ # Label 0 and label 1 newIris_WithClusterlbl = newIris.copy() newIris_WithClusterlbl['Cluster_Label'] = irisCluster.labels_ ## Again plot points with color c = irisCluster.labels_ plt.scatter("Petal.Length","Petal.Width",data=newIris_WithClusterlbl, c=irisCluster.labels_) # Plot other data too # Step 3 & 4: Evaluate Clustering and optimizing cluster cluster_centers = irisCluster.cluster_centers_ # Why np min -> find the least distice of the point between the 2 clusters cDist_output = np.min(cdist(newIris,irisCluster.cluster_centers_,'euclidean'),axis=1) # To draw elbow point for k = 2 np.sum(cDist_output**2) # Repeat with k = 3 , k=4 ,k=5 # what happens inside cdist -> centroid_0 = cluster_centers[0,:] centroid_1 = cluster_centers[1,:] newIris_1_Obs = newIris.iloc[0,:].as_matrix() # euclidean distance between centroid and 1 obs c00 = ((centroid_0[0] - newIris_1_Obs[0])**2 + (centroid_0[1] - newIris_1_Obs[1])**2 + (centroid_0[2] - newIris_1_Obs[2])**2+ (centroid_0[3] - newIris_1_Obs[3])**2)**0.5 c01 = ((centroid_1[0] - newIris_1_Obs[0])**2 +
def extract_binary_masks_blob(A, neuron_radius, dims, num_std_threshold=1, minCircularity=0.5, minInertiaRatio=0.2, minConvexity=.8): """ Function to extract masks from data. It will also perform a preliminary selectino of good masks based on criteria like shape and size Parameters: ---------- A: scipy.sparse matris contains the components as outputed from the CNMF algorithm neuron_radius: float neuronal radius employed in the CNMF settings (gSiz) num_std_threshold: int number of times above iqr/1.349 (std estimator) the median to be considered as threshold for the component minCircularity: float parameter from cv2.SimpleBlobDetector minInertiaRatio: float parameter from cv2.SimpleBlobDetector minConvexity: float parameter from cv2.SimpleBlobDetector Returns: -------- masks: np.array pos_examples: neg_examples: """ params = cv2.SimpleBlobDetector_Params() params.minCircularity = minCircularity params.minInertiaRatio = minInertiaRatio params.minConvexity = minConvexity # Change thresholds params.blobColor = 255 params.minThreshold = 0 params.maxThreshold = 255 params.thresholdStep = 3 params.minArea = np.pi * ((neuron_radius * .75)**2) params.filterByColor = True params.filterByArea = True params.filterByCircularity = True params.filterByConvexity = True params.filterByInertia = True detector = cv2.SimpleBlobDetector_create(params) masks_ws = [] pos_examples = [] neg_examples = [] for count, comp in enumerate(A.tocsc()[:].T): print(count) comp_d = np.array(comp.todense()) gray_image = np.reshape(comp_d, dims, order='F') gray_image = (gray_image - np.min(gray_image)) / \ (np.max(gray_image) - np.min(gray_image)) * 255 gray_image = gray_image.astype(np.uint8) # segment using watershed markers = np.zeros_like(gray_image) elevation_map = sobel(gray_image) thr_1 = np.percentile(gray_image[gray_image > 0], 50) iqr = np.diff(np.percentile(gray_image[gray_image > 0], (25, 75))) thr_2 = thr_1 + num_std_threshold * iqr / 1.35 markers[gray_image < thr_1] = 1 markers[gray_image > thr_2] = 2 edges = watershed(elevation_map, markers) - 1 # only keep largest object label_objects, _ = ndi.label(edges) sizes = np.bincount(label_objects.ravel()) if len(sizes) > 1: idx_largest = np.argmax(sizes[1:]) edges = (label_objects == (1 + idx_largest)) edges = ndi.binary_fill_holes(edges) else: print('empty component') edges = np.zeros_like(edges) masks_ws.append(edges) keypoints = detector.detect((edges * 200.).astype(np.uint8)) if len(keypoints) > 0: pos_examples.append(count) else: neg_examples.append(count) return np.array(masks_ws), np.array(pos_examples), np.array(neg_examples)
n_bars = len(mses_diabetes) xval = np.arange(n_bars) colors = ['r', 'g', 'b', 'orange', 'black'] # plot diabetes results plt.figure(figsize=(12, 6)) ax1 = plt.subplot(121) for j in xval: ax1.barh(j, mses_diabetes[j], xerr=stds_diabetes[j], color=colors[j], alpha=0.6, align='center') ax1.set_title('Imputation Techniques with Diabetes Data') ax1.set_xlim(left=np.min(mses_diabetes) * 0.9, right=np.max(mses_diabetes) * 1.1) ax1.set_yticks(xval) ax1.set_xlabel('MSE') ax1.invert_yaxis() ax1.set_yticklabels(x_labels) # plot california dataset results ax2 = plt.subplot(122) for j in xval: ax2.barh(j, mses_california[j], xerr=stds_california[j], color=colors[j], alpha=0.6, align='center') ax2.set_title('Imputation Techniques with California Data') ax2.set_yticks(xval) ax2.set_xlabel('MSE')
print(ids.groupby('Species').size()) # Split-out validation dataset; source https://machinelearningmastery.com/machine-learning-in-python-step-by-step/ # changes datatype to `ndarray` and separates the columns with numerical values (first 4 columns); idsv - iris data set values idsv = ids.values[:, 0:4] # print(type(idsv)) # confirming the data type - commented out for clarity # separates the last column with the species names (50 setosas, 50 versicolors and 50 virginicas) spec_names = ids.values[:, 4] # print(type(spec_names)) # confirming the data type - commented out for clarity # print(Y) # Further separation - slicing by attribute and species - first attributer of first 50 instances - sepal length of Iris Setosa sepal_l_setosa = idsv[0:49, 0] # minimum value of the subset, see next cell for results sels_min = np.min(sepal_l_setosa) # minimum value of the subset sels_mean = np.mean(sepal_l_setosa) # mean value of the subset sels_max = np.max(sepal_l_setosa) # maximum value of the subset print(sels_min, sels_mean, sels_max) # print(sepal_l_setosa) # histogram # pl.hist(sepal_l_setosa) # pl.show print(ids.columns) # 2-D scatter plot; source: https://youtu.be/FLuqwQgSBDw?t=1069 # idsv.plot(kind="scatter", X="Sepal length, cm", Y="Sepal width, cm") # pl.show()
def register_ROIs(A1, A2, dims, template1=None, template2=None, align_flag=True, D=None, thresh_cost=.7, max_dist=10, enclosed_thr=None, print_assignment=False, plot_results=False, Cn=None, cmap='viridis'): """ Register ROIs across different sessions using an intersection over union metric and the Hungarian algorithm for optimal matching Parameters: ----------- A1: ndarray or csc_matrix # pixels x # of components ROIs from session 1 A2: ndarray or csc_matrix # pixels x # of components ROIs from session 2 dims: list or tuple dimensionality of the FOV template1: ndarray dims template from session 1 template2: ndarray dims template from session 2 align_flag: bool align the templates before matching D: ndarray matrix of distances in the event they are pre-computed thresh_cost: scalar maximum distance considered max_dist: scalar max distance between centroids enclosed_thr: float if not None set distance to at most the specified value when ground truth is a subset of inferred print_assignment: bool print pairs of matched ROIs plot_results: bool create a plot of matches and mismatches Cn: ndarray background image for plotting purposes cmap: string colormap for background image Returns: -------- matched_ROIs1: list indeces of matched ROIs from session 1 matched_ROIs2: list indeces of matched ROIs from session 2 non_matched1: list indeces of non-matched ROIs from session 1 non_matched2: list indeces of non-matched ROIs from session 1 performance: list (precision, recall, accuracy, f_1 score) with A1 taken as ground truth """ if template1 is None or template2 is None: align_flag = False if align_flag: # first align ROIs from session 2 to the template from session 1 template2, shifts, _, xy_grid = tile_and_correct( template2, template1 - template1.min(), [int(dims[0] / 4), int(dims[1] / 4)], [16, 16], [10, 10], add_to_movie=template2.min(), shifts_opencv=True) A_2t = np.reshape(A2.toarray(), dims + (-1, ), order='F').transpose(2, 0, 1) dims_grid = tuple( np.max(np.stack(xy_grid, axis=0), axis=0) - np.min(np.stack(xy_grid, axis=0), axis=0) + 1) _sh_ = np.stack(shifts, axis=0) shifts_x = np.reshape(_sh_[:, 1], dims_grid, order='C').astype(np.float32) shifts_y = np.reshape(_sh_[:, 0], dims_grid, order='C').astype(np.float32) x_grid, y_grid = np.meshgrid( np.arange(0., dims[0]).astype(np.float32), np.arange(0., dims[1]).astype(np.float32)) x_remap = (-np.resize(shifts_x, dims) + x_grid).astype(np.float32) y_remap = (-np.resize(shifts_y, dims) + y_grid).astype(np.float32) A2 = np.stack([ cv2.remap(img.astype(np.float32), x_remap, y_remap, cv2.INTER_CUBIC) for img in A_2t ], axis=0) A2 = np.reshape(A2.transpose(1, 2, 0), (A1.shape[0], A_2t.shape[0]), order='F') if D is None: if 'csc_matrix' not in str(type(A1)): A1 = scipy.sparse.csc_matrix(A1) if 'csc_matrix' not in str(type(A2)): A2 = scipy.sparse.csc_matrix(A2) cm_1 = com(A1, dims[0], dims[1]) cm_2 = com(A2, dims[0], dims[1]) A1_tr = (A1 > 0).astype(float) A2_tr = (A2 > 0).astype(float) D = distance_masks([A1_tr, A2_tr], [cm_1, cm_2], max_dist, enclosed_thr=enclosed_thr) matches, costs = find_matches(D, print_assignment=print_assignment) matches = matches[0] costs = costs[0] #%% store indeces idx_tp = np.where(np.array(costs) < thresh_cost)[0] if len(idx_tp) > 0: matched_ROIs1 = matches[0][idx_tp] # ground truth matched_ROIs2 = matches[1][idx_tp] # algorithm - comp non_matched1 = np.setdiff1d(list(range(D[0].shape[0])), matches[0][idx_tp]) non_matched2 = np.setdiff1d(list(range(D[0].shape[1])), matches[1][idx_tp]) TP = np.sum(np.array(costs) < thresh_cost) * 1. else: TP = 0. plot_results = False matched_ROIs1 = [] matched_ROIs2 = [] non_matched1 = list(range(D[0].shape[0])) non_matched2 = list(range(D[0].shape[1])) #%% compute precision and recall FN = D[0].shape[0] - TP FP = D[0].shape[1] - TP TN = 0 performance = dict() performance['recall'] = old_div(TP, (TP + FN)) performance['precision'] = old_div(TP, (TP + FP)) performance['accuracy'] = old_div((TP + TN), (TP + FP + FN + TN)) performance['f1_score'] = 2 * TP / (2 * TP + FP + FN) print(performance) if plot_results: if Cn is None: if template1 is not None: Cn = template1 elif template2 is not None: Cn = template2 else: Cn = np.reshape(A1.sum(1) + A2.sum(1), dims, order='F') masks_1 = np.reshape(A1.toarray(), dims + (-1, ), order='F').transpose(2, 0, 1) masks_2 = np.reshape(A2.toarray(), dims + (-1, ), order='F').transpose(2, 0, 1) # try : #Plotting function level = 0.98 pl.rcParams['pdf.fonttype'] = 42 font = {'family': 'Myriad Pro', 'weight': 'regular', 'size': 10} pl.rc('font', **font) lp, hp = np.nanpercentile(Cn, [5, 95]) pl.subplot(1, 2, 1) pl.imshow(Cn, vmin=lp, vmax=hp, cmap=cmap) [ pl.contour(norm_nrg(mm), levels=[level], colors='w', linewidths=1) for mm in masks_1[matched_ROIs1] ] [ pl.contour(norm_nrg(mm), levels=[level], colors='r', linewidths=1) for mm in masks_2[matched_ROIs2] ] pl.title('Matches') pl.axis('off') pl.subplot(1, 2, 2) pl.imshow(Cn, vmin=lp, vmax=hp, cmap=cmap) [ pl.contour(norm_nrg(mm), levels=[level], colors='w', linewidths=1) for mm in masks_1[non_matched1] ] [ pl.contour(norm_nrg(mm), levels=[level], colors='r', linewidths=1) for mm in masks_2[non_matched2] ] pl.title('Mismatches') pl.axis('off') # except Exception as e: # print("not able to plot precision recall usually because we are on travis") # print(e) return matched_ROIs1, matched_ROIs2, non_matched1, non_matched2, performance
def _idx_from_zero(idx_tensor): return idx_tensor - np.min(idx_tensor)
with gzip.open( os.path.join(NME_nxgraphs, 'discrall_dict_allNMEs_10binsize.pklz'), 'rb') as fin: discrall_dict_allNMEs = pickle.load(fin) ######### # exclude rich club bcs differnet dimenstions delRC = discrall_dict_allNMEs.pop('discrallDEL_rich_club') mstRC = discrall_dict_allNMEs.pop('discrallMST_rich_club') delsC = discrall_dict_allNMEs.pop('discrallMST_scluster') mstsC = discrall_dict_allNMEs.pop('discrallDEL_scluster') ########## for nxGdiscfeatures.shape = (202, 420) ds = discrall_dict_allNMEs.pop('DEL_dassort') ms = discrall_dict_allNMEs.pop('MST_dassort') # normalize 0-1 x_min, x_max = np.min(ds, 0), np.max(ds, 0) ds = (ds - x_min) / (x_max - x_min) x_min, x_max = np.min(ms, 0), np.max(ms, 0) ms = (ms - x_min) / (x_max - x_min) ## concatenate dictionary items into a nd array ## normalize per x normgdiscf = [] for fname, fnxg in discrall_dict_allNMEs.iteritems(): print 'Normalizing.. {} \n min={}, \n max={} \n'.format( fname, np.min(fnxg, 0), np.max(fnxg, 0)) x_min, x_max = np.min(fnxg, 0), np.max(fnxg, 0) x_max[x_max == 0] = 1.0e-07 fnxg = (fnxg - x_min) / (x_max - x_min) normgdiscf.append(fnxg) print(np.min(fnxg, 0))
setting = '/your path/results/davis/' out_folder = '/your path/results/davis-crf/' for d in listdir(setting): vidDir = join(davis_path, d) resDir = join(out_folder, d) if not os.path.exists(resDir): os.makedirs(resDir) for f in listdir(vidDir): img = imread(join(vidDir, f)) segDir = join(setting, d) frameName = str.split(f, '.')[0] anno_rgb = imread(segDir + '/' + frameName + '.png').astype(np.uint32) min_val = np.min(anno_rgb.ravel()) max_val = np.max(anno_rgb.ravel()) out = (anno_rgb.astype('float') - min_val) / (max_val - min_val) labels = np.zeros((2, img.shape[0], img.shape[1])) labels[1, :, :] = out labels[0, :, :] = 1 - out colors = [0, 255] colorize = np.empty((len(colors), 1), np.uint8) colorize[:,0] = colors n_labels = 2 crf = dcrf.DenseCRF(img.shape[1] * img.shape[0], n_labels) U = unary_from_softmax(labels)
def main(): torch.set_num_threads(1) device = torch.device("cuda:0" if args.cuda else "cpu") if args.vis: from visdom import Visdom viz = Visdom(port=args.port) win = None envs = make_vec_envs(args.env_name, args.seed, 1, args.gamma, args.log_dir, args.add_timestep, device, False) # Determine the observation and action lengths for the robot and human, respectively obs = envs.reset() action = torch.tensor([envs.action_space.sample()]) _, _, _, info = envs.step(action) obs_robot_len = info[0]['obs_robot_len'] obs_human_len = info[0]['obs_human_len'] action_robot_len = info[0]['action_robot_len'] action_human_len = info[0]['action_human_len'] obs_robot = obs[:, :obs_robot_len] obs_human = obs[:, obs_robot_len:] if len(obs_robot[0]) != obs_robot_len or len( obs_human[0]) != obs_human_len: print('robot obs shape:', obs_robot.shape, 'obs space robot shape:', (obs_robot_len, )) print('human obs shape:', obs_human.shape, 'obs space human shape:', (obs_human_len, )) exit() envs = make_vec_envs(args.env_name, args.seed, args.num_processes, args.gamma, args.log_dir, args.add_timestep, device, False) # Reset environment obs = envs.reset() obs_robot = obs[:, :obs_robot_len] obs_human = obs[:, obs_robot_len:] action_space_robot = spaces.Box(low=np.array([-1.0] * action_robot_len), high=np.array([1.0] * action_robot_len), dtype=np.float32) action_space_human = spaces.Box(low=np.array([-1.0] * action_human_len), high=np.array([1.0] * action_human_len), dtype=np.float32) if args.load_policy is not None: actor_critic_robot, actor_critic_human, ob_rms = torch.load( args.load_policy) vec_norm = get_vec_normalize(envs) if vec_norm is not None: vec_norm.eval() vec_norm.ob_rms = ob_rms else: actor_critic_robot = Policy( [obs_robot_len], action_space_robot, base_kwargs={'recurrent': args.recurrent_policy}) actor_critic_human = Policy( [obs_human_len], action_space_human, base_kwargs={'recurrent': args.recurrent_policy}) actor_critic_robot.to(device) actor_critic_human.to(device) if args.algo == 'a2c': agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, alpha=args.alpha, max_grad_norm=args.max_grad_norm) elif args.algo == 'ppo': agent_robot = algo.PPO(actor_critic_robot, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) agent_human = algo.PPO(actor_critic_human, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) elif args.algo == 'acktr': agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, acktr=True) rollouts_robot = RolloutStorage( args.num_steps, args.num_processes, [obs_robot_len], action_space_robot, actor_critic_robot.recurrent_hidden_state_size) rollouts_human = RolloutStorage( args.num_steps, args.num_processes, [obs_human_len], action_space_human, actor_critic_human.recurrent_hidden_state_size) rollouts_robot.obs[0].copy_(obs_robot) rollouts_robot.to(device) rollouts_human.obs[0].copy_(obs_human) rollouts_human.to(device) episode_rewards = deque( maxlen=(args.num_processes if args.num_processes > 10 else 10)) start = time.time() for j in range(num_updates): if args.use_linear_lr_decay: # decrease learning rate linearly if args.algo == "acktr": # use optimizer's learning rate since it's hard-coded in kfac.py update_linear_schedule(agent.optimizer, j, num_updates, agent.optimizer.lr) else: update_linear_schedule(agent_robot.optimizer, j, num_updates, args.lr) update_linear_schedule(agent_human.optimizer, j, num_updates, args.lr) if args.algo == 'ppo' and args.use_linear_clip_decay: agent_robot.clip_param = args.clip_param * (1 - j / float(num_updates)) agent_human.clip_param = args.clip_param * (1 - j / float(num_updates)) for step in range(args.num_steps): # Sample actions with torch.no_grad(): value_robot, action_robot, action_log_prob_robot, recurrent_hidden_states_robot = actor_critic_robot.act( rollouts_robot.obs[step], rollouts_robot.recurrent_hidden_states[step], rollouts_robot.masks[step]) value_human, action_human, action_log_prob_human, recurrent_hidden_states_human = actor_critic_human.act( rollouts_human.obs[step], rollouts_human.recurrent_hidden_states[step], rollouts_human.masks[step]) # Obser reward and next obs action = torch.cat((action_robot, action_human), dim=-1) obs, reward, done, infos = envs.step(action) obs_robot = obs[:, :obs_robot_len] obs_human = obs[:, obs_robot_len:] for info in infos: if 'episode' in info.keys(): episode_rewards.append(info['episode']['r']) # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) rollouts_robot.insert(obs_robot, recurrent_hidden_states_robot, action_robot, action_log_prob_robot, value_robot, reward, masks) rollouts_human.insert(obs_human, recurrent_hidden_states_human, action_human, action_log_prob_human, value_human, reward, masks) with torch.no_grad(): next_value_robot = actor_critic_robot.get_value( rollouts_robot.obs[-1], rollouts_robot.recurrent_hidden_states[-1], rollouts_robot.masks[-1]).detach() next_value_human = actor_critic_human.get_value( rollouts_human.obs[-1], rollouts_human.recurrent_hidden_states[-1], rollouts_human.masks[-1]).detach() rollouts_robot.compute_returns(next_value_robot, args.use_gae, args.gamma, args.tau) rollouts_human.compute_returns(next_value_human, args.use_gae, args.gamma, args.tau) value_loss_robot, action_loss_robot, dist_entropy_robot = agent_robot.update( rollouts_robot) value_loss_human, action_loss_human, dist_entropy_human = agent_human.update( rollouts_human) rollouts_robot.after_update() rollouts_human.after_update() # save for every interval-th episode or for the last epoch if (j % args.save_interval == 0 or j == num_updates - 1) and args.save_dir != "": save_path = os.path.join(args.save_dir, args.algo) try: os.makedirs(save_path) except OSError: pass # A really ugly way to save a model to CPU save_model_robot = actor_critic_robot save_model_human = actor_critic_human if args.cuda: save_model_robot = copy.deepcopy(actor_critic_robot).cpu() save_model_human = copy.deepcopy(actor_critic_human).cpu() save_model = [ save_model_robot, save_model_human, getattr(get_vec_normalize(envs), 'ob_rms', None) ] torch.save(save_model, os.path.join(save_path, args.env_name + ".pt")) total_num_steps = (j + 1) * args.num_processes * args.num_steps if j % args.log_interval == 0 and len(episode_rewards) > 1: end = time.time() print( "Robot/Human updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n" .format(j, total_num_steps, int(total_num_steps / (end - start)), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), dist_entropy_robot, value_loss_robot, action_loss_robot)) sys.stdout.flush() if (args.eval_interval is not None and len(episode_rewards) > 1 and j % args.eval_interval == 0): eval_envs = make_vec_envs(args.env_name, args.seed + args.num_processes, args.num_processes, args.gamma, eval_log_dir, args.add_timestep, device, True) vec_norm = get_vec_normalize(eval_envs) if vec_norm is not None: vec_norm.eval() vec_norm.ob_rms = get_vec_normalize(envs).ob_rms eval_episode_rewards = [] obs = eval_envs.reset() obs_robot = obs[:, :obs_robot_len] obs_human = obs[:, obs_robot_len:] eval_recurrent_hidden_states_robot = torch.zeros( args.num_processes, actor_critic_robot.recurrent_hidden_state_size, device=device) eval_recurrent_hidden_states_human = torch.zeros( args.num_processes, actor_critic_human.recurrent_hidden_state_size, device=device) eval_masks = torch.zeros(args.num_processes, 1, device=device) while len(eval_episode_rewards) < 10: with torch.no_grad(): _, action_robot, _, eval_recurrent_hidden_states_robot = actor_critic_robot.act( obs_robot, eval_recurrent_hidden_states_robot, eval_masks, deterministic=True) _, action_human, _, eval_recurrent_hidden_states_human = actor_critic_human.act( obs_human, eval_recurrent_hidden_states_human, eval_masks, deterministic=True) # Obser reward and next obs action = torch.cat((action_robot, action_human), dim=-1) obs, reward, done, infos = eval_envs.step(action) obs_robot = obs[:, :obs_robot_len] obs_human = obs[:, obs_robot_len:] eval_masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) for info in infos: if 'episode' in info.keys(): eval_episode_rewards.append(info['episode']['r']) eval_envs.close() print(" Evaluation using {} episodes: mean reward {:.5f}\n".format( len(eval_episode_rewards), np.mean(eval_episode_rewards))) sys.stdout.flush() if args.vis and j % args.vis_interval == 0: try: # Sometimes monitor doesn't properly flush the outputs win = visdom_plot(viz, win, args.log_dir, args.env_name, args.algo, args.num_env_steps) except IOError: pass
for walker in range(Nwalkers): xrand = np.random.uniform(low = -2.0, high = 2.0) yrand = np.random.uniform(low = -2.0, high = 2.0) zrand = np.random.uniform(low = -2.0, high = 2.0) # Creating the Displacement vector displVec = np.array([xrand, yrand, zrand], dtype = float) # Normalizing it displVec *= np.sum(np.square(displVec)) # adding the normalized displacement vector to our position coordinate r[walker,:] += displVec if np.max(r[:, 0]) >= xmax or np.min(r[:,0]) <= xmin or np.max(r[:, 1]) >= ymax or np.min(r[:,1]) <= ymin or np.max(r[:, 2]) >= xmax or np.min(r[:,2]) <= xmin : break fig = plt.figure() ax = fig.add_subplot(111, projection = '3d') ax.scatter(xs = r[:,0], ys = r[:,1], zs = r[:,2]) # creating the orthogonal grid ax.plot(xs = (xmax, 0, 0), ys = (0,0,0), zs = (0,0,0)) ax.plot(xs = (0, 0, 0), ys = (ymax,0,0), zs = (0,0,0)) ax.plot(xs = (0, 0, 0), ys = (0,0,0), zs = (zmax,0,0))
def _to_onehot(label_tensor): label_num = label_tensor.shape[0] assert np.min(label_tensor) == 0 one_hot_tensor = np.zeros((label_num, np.max(label_tensor) + 1)) one_hot_tensor[np.arange(label_num), label_tensor] = 1 return one_hot_tensor
output=True) os.system("clear") data = wf.readframes(_chunk) while len(data) > 0: stream.write(data) data_visual = np.fromstring(data,dtype=np.int16) dataL = data_visual[0::2] dataR = data_visual[1::2] data = wf.readframes(_chunk) if(len(dataL)==0 or len(dataR)==0): break peakL = np.abs(np.max(dataL)-np.min(dataL))/maxValue; peakR = np.abs(np.max(dataR)-np.min(dataR))/maxValue; lString = "|"*int(peakL*bars)+" "*int(bars-peakL*bars) rString = "|"*int(peakR*bars)+" "*int(bars-peakR*bars) print("L=[%s] R=[%s]"%(lString, rString)) stream.stop_stream() stream.close() audio.terminate()
def run_training(data_train, data_test, queries, query_summary, Tags, concepts, concept_embeeding, model_save_dir, test_mode): if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) max_f1 = MAX_F1 with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # placeholders features_holder = tf.placeholder(tf.float32, shape=(hp.bc * hp.gpu_num, hp.seq_len, D_FEATURE)) labels_holder = tf.placeholder(tf.float32, shape=(hp.bc * hp.gpu_num, hp.seq_len, D_OUTPUT)) scores_src_holder = tf.placeholder(tf.float32, shape=(hp.bc * hp.gpu_num, hp.seq_len + CONCEPT_NUM)) scores_tgt_holder = tf.placeholder(tf.float32, shape=(hp.bc * hp.gpu_num, hp.seq_len)) txt_emb_holder = tf.placeholder(tf.float32, shape=(hp.bc * hp.gpu_num, CONCEPT_NUM, D_TXT_EMB)) img_emb_holder = tf.placeholder(tf.float32, shape=(hp.bc * hp.gpu_num, CONCEPT_NUM, D_IMG_EMB)) dropout_holder = tf.placeholder(tf.float32, shape=()) training_holder = tf.placeholder(tf.bool, shape=()) # training operations lr = noam_scheme(hp.lr_noam, global_step, hp.warmup) opt_train = tf.train.AdamOptimizer(lr) # graph building tower_grads_train = [] logits_list = [] loss_list = [] for gpu_index in range(hp.gpu_num): with tf.device('/gpu:%d' % gpu_index): features = features_holder[gpu_index * hp.bc : (gpu_index+1) * hp.bc] labels = labels_holder[gpu_index * hp.bc : (gpu_index+1) * hp.bc] scores_src = scores_src_holder[gpu_index * hp.bc : (gpu_index+1) * hp.bc] scores_tgt = scores_tgt_holder[gpu_index * hp.bc : (gpu_index+1) * hp.bc] txt_emb = txt_emb_holder[gpu_index * hp.bc : (gpu_index+1) * hp.bc] img_emb = img_emb_holder[gpu_index * hp.bc : (gpu_index+1) * hp.bc] # predict concept distribution logits = transformer(features, labels, scores_src, scores_tgt, txt_emb, img_emb, dropout_holder, training_holder, hp) # 输入的shot在所有concept上的相关性分布 logits_list.append(logits) loss = tower_loss(logits,labels) varlist = tf.trainable_variables() # 全部训练 grads_train = opt_train.compute_gradients(loss, varlist) thresh = GRAD_THRESHOLD # 梯度截断 防止爆炸 grads_train_cap = [(tf.clip_by_value(grad, -thresh, thresh), var) for grad, var in grads_train] tower_grads_train.append(grads_train_cap) loss_list.append(loss) grads_t = average_gradients(tower_grads_train) train_op = opt_train.apply_gradients(grads_t, global_step=global_step) if test_mode == 1: train_op = tf.no_op() # session config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # load model saver_overall = tf.train.Saver(max_to_keep=100) if LOAD_CKPT_MODEL: logging.info(' Ckpt Model Restoring: ' + CKPT_MODEL_PATH) saver_overall.restore(sess, CKPT_MODEL_PATH) logging.info(' Ckpt Model Resrtored !') # train & test preparation train_scheme = train_scheme_build(data_train, hp.seq_len) test_scheme, test_vids = test_scheme_build(data_test, hp.seq_len) epoch_step = math.ceil(len(train_scheme) / (hp.gpu_num * hp.bc)) max_test_step = math.ceil(len(test_scheme) / (hp.gpu_num * hp.bc)) # concept embedding processing txt_emb_b = [] img_emb_b = [] for c in concepts: txt_emb_b.append(concept_embeeding[c]['txt']) img_emb_b.append(concept_embeeding[c]['img']) txt_emb_b = np.array(txt_emb_b).reshape([1, CONCEPT_NUM, D_TXT_EMB]) img_emb_b = np.array(img_emb_b).reshape([1, CONCEPT_NUM, D_IMG_EMB]) txt_emb_b = np.tile(txt_emb_b, [hp.gpu_num * hp.bc, 1, 1]) # (bc*gpu_num)*48*d_txt img_emb_b = np.tile(img_emb_b, [hp.gpu_num * hp.bc, 1, 1]) # begin training ob_loss = [] timepoint = time.time() for step in range(hp.maxstep): features_b, labels_b, scores_b = get_batch_train(data_train, train_scheme, step, hp.gpu_num, hp.bc, hp.seq_len) scores_src_b = np.hstack((scores_b, np.ones((hp.gpu_num * hp.bc, CONCEPT_NUM)))) # encoder中开放所有concept节点 scores_tgt_b = scores_b observe = sess.run([train_op] + loss_list + logits_list + [global_step, lr], feed_dict={features_holder: features_b, labels_holder: labels_b, scores_src_holder: scores_src_b, scores_tgt_holder: scores_tgt_b, txt_emb_holder: txt_emb_b, img_emb_holder: img_emb_b, dropout_holder: hp.dropout, training_holder: True}) loss_batch = np.array(observe[1:1 + hp.gpu_num]) ob_loss.append(loss_batch) # 卡0和卡1返回的是来自同一个batch的两部分loss,求平均 # save checkpoint & evaluate epoch = step / epoch_step if step % epoch_step == 0 or (step + 1) == hp.maxstep: if step == 0 and test_mode == 0: continue duration = time.time() - timepoint timepoint = time.time() loss_array = np.array(ob_loss) ob_loss.clear() logging.info(' Step %d: %.3f sec' % (step, duration)) logging.info(' Evaluate: ' + str(step) + ' Epoch: ' + str(epoch)) logging.info(' Average Loss: ' + str(np.mean(loss_array)) + ' Min Loss: ' + str( np.min(loss_array)) + ' Max Loss: ' + str(np.max(loss_array))) if not int(epoch) % hp.eval_epoch == 0: continue # 增大测试间隔 # 按顺序预测测试集中每个视频的每个分段,全部预测后在每个视频内部排序,计算指标 pred_scores = [] # 每个batch输出的预测得分 for test_step in range(max_test_step): features_b, labels_b, scores_b = get_batch_test(data_test, test_scheme, test_step, hp.gpu_num, hp.bc, hp.seq_len) scores_src_b = np.hstack((scores_b, np.ones((hp.gpu_num * hp.bc, CONCEPT_NUM)))) # encoder中开放所有concept节点 scores_tgt_b = scores_b logits_temp_list = sess.run(logits_list, feed_dict={features_holder: features_b, labels_holder: labels_b, scores_src_holder: scores_src_b, scores_tgt_holder: scores_tgt_b, txt_emb_holder: txt_emb_b, img_emb_holder: img_emb_b, dropout_holder: hp.dropout, training_holder: False}) for preds in logits_temp_list: pred_scores.append(preds.reshape((-1, D_OUTPUT))) p, r, f = evaluation(pred_scores, queries, query_summary, Tags, test_vids, concepts) logging.info('Precision: %.3f, Recall: %.3f, F1: %.3f' % (p, r, f)) random.shuffle(train_scheme) if test_mode == 1: return # save model if step > MIN_TRAIN_STEPS - PRESTEPS and f >= max_f1: if f > max_f1: max_f1 = f model_path = model_save_dir + 'S%d-E%d-L%.6f-F%.3f' % (step, epoch, np.mean(loss_array), f) saver_overall.save(sess, model_path) logging.info('Model Saved: ' + model_path + '\n') if step % 3000 == 0 and step > 0: model_path = model_save_dir + 'S%d-E%d' % (step + PRESTEPS, epoch) # saver_overall.save(sess, model_path) logging.info('Model Saved: ' + str(step + PRESTEPS)) # saving final model model_path = model_save_dir + 'S%d' % (hp.maxstep + PRESTEPS) # saver_overall.save(sess, model_path) logging.info('Model Saved: ' + str(hp.maxstep + PRESTEPS))
def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None, settings: PlotSettings = plot_settings): # Work out min, max of y axis for the diagonal so we can adjust # them all to the same value diagonal_ylim = (np.min([ ax[i, i].get_ylim()[0] for i in range(space.n_dims) ]), np.max([ax[i, i].get_ylim()[1] for i in range(space.n_dims)])) if dim_labels is None: dim_labels = [ "$X_{%i}$" % i if d.name is None else d.name for i, d in enumerate(space.dimensions) ] # Deal with formatting of the axes for i in range(space.n_dims): # rows for j in range(space.n_dims): # columns ax_ = ax[i, j] if j > i: ax_.axis("off") # off-diagonal axis if i != j: # plots on the diagonal are special, like Texas. They have # their own range so do not mess with them. ax_.set_ylim(*space.dimensions[i].bounds) ax_.set_xlim(*space.dimensions[j].bounds) if j > 0: ax_.set_yticklabels([]) else: ax_.set_ylabel(dim_labels[i], fontsize=settings.lbl_sz, color=settings.lbl_col) ax_.tick_params(axis='y', labelsize=settings.tk_sz, labelcolor=settings.tk_col) # for all rows except ... if i < space.n_dims - 1: ax_.set_xticklabels([]) # ... the bottom row else: [l.set_rotation(45) for l in ax_.get_xticklabels()] ax_.set_xlabel(dim_labels[j], fontsize=settings.lbl_sz, color=settings.lbl_col) ax_.tick_params(axis='x', labelsize=settings.tk_sz, labelcolor=settings.tk_col) # configure plot for linear vs log-scale priors = (space.dimensions[j].prior, space.dimensions[i].prior) scale_setters = (ax_.set_xscale, ax_.set_yscale) loc_setters = (ax_.xaxis.set_major_locator, ax_.yaxis.set_major_locator) for set_major_locator, set_scale, prior in zip( loc_setters, scale_setters, priors): if prior == 'log-uniform': set_scale('log') else: set_major_locator(MaxNLocator(6, prune='both')) else: ax_.set_ylim(*diagonal_ylim) ax_.yaxis.tick_right() ax_.yaxis.set_label_position('right') ax_.yaxis.set_ticks_position('both') ax_.set_ylabel(ylabel, fontsize=settings.lbl_sz, color=settings.lbl_col) ax_.tick_params(axis='y', labelsize=settings.tk_sz, labelcolor=settings.tk_col) ax_.xaxis.tick_top() ax_.xaxis.set_label_position('top') ax_.set_xlabel(dim_labels[j], fontsize=settings.lbl_sz, color=settings.lbl_col) ax_.tick_params(axis='x', labelsize=settings.tk_sz, labelcolor=settings.tk_col) if space.dimensions[i].prior == 'log-uniform': ax_.set_xscale('log') else: ax_.xaxis.set_major_locator(MaxNLocator(6, prune='both')) return ax
def init(mdlParams_): mdlParams = {} # Save summaries and model here local_path = '/isic2019/' # local_path = '\isic2019\\' mdlParams['saveDir'] = mdlParams_['pathBase']+'/' # Data is loaded from here mdlParams['dataDir'] = mdlParams_['pathBase']+local_path ### Model Selection ### mdlParams['model_type'] = 'Dense169' mdlParams['dataset_names'] = ['official'] # ,'sevenpoint_rez3_ll'] mdlParams['file_ending'] = '.jpg' mdlParams['exclude_inds'] = False mdlParams['same_sized_crops'] = True mdlParams['multiCropEval'] = 9 mdlParams['var_im_size'] = True mdlParams['orderedCrop'] = True mdlParams['voting_scheme'] = 'average' mdlParams['classification'] = True mdlParams['balance_classes'] = 9 mdlParams['extra_fac'] = 1.0 mdlParams['numClasses'] = 9 mdlParams['no_c9_eval'] = True mdlParams['numOut'] = mdlParams['numClasses'] mdlParams['numCV'] = 1 mdlParams['trans_norm_first'] = True # Scale up for b1-b7 mdlParams['input_size'] = [224, 224, 3] ### Training Parameters ### # Batch size mdlParams['batchSize'] = 20 # *len(mdlParams['numGPUs']) # Initial learning rate mdlParams['learning_rate'] = 0.000015 # *len(mdlParams['numGPUs']) # Lower learning rate after no improvement over 100 epochs mdlParams['lowerLRAfter'] = 25 # If there is no validation set, start lowering the LR after X steps mdlParams['lowerLRat'] = 50 # Divide learning rate by this value mdlParams['LRstep'] = 5 # Maximum number of training iterations mdlParams['training_steps'] = 100 # 250 # Display error every X steps mdlParams['display_step'] = 10 # Scale? mdlParams['scale_targets'] = False # Peak at test error during training? (generally, dont do this!) mdlParams['peak_at_testerr'] = False # Print trainerr mdlParams['print_trainerr'] = False # Subtract trainset mean? mdlParams['subtract_set_mean'] = False mdlParams['setMean'] = np.array([0.0, 0.0, 0.0]) mdlParams['setStd'] = np.array([1.0, 1.0, 1.0]) # Data AUG # mdlParams['full_color_distort'] = True mdlParams['autoaugment'] = True mdlParams['flip_lr_ud'] = False mdlParams['full_rot'] = 0 mdlParams['scale'] = (0.8, 1.2) mdlParams['shear'] = 10 mdlParams['cutout'] = 0 ### Data ### mdlParams['preload'] = False # Labels first # Targets, as dictionary, indexed by im file name mdlParams['labels_dict'] = {} path1 = mdlParams['dataDir'] + '/labels/' # path1 = mdlParams['dataDir'] + '\labels\\' # All sets allSets = glob(path1 + '*/') # allSets = glob(path1 + '*\\') # Go through all sets for i in range(len(allSets)): # Check if want to include this dataset foundSet = False for j in range(len(mdlParams['dataset_names'])): if mdlParams['dataset_names'][j] in allSets[i]: foundSet = True if not foundSet: continue # Find csv file files = sorted(glob(allSets[i] + '*')) for j in range(len(files)): if 'csv' in files[j]: break # Load csv file with open(files[j], newline='') as csvfile: labels_str = csv.reader(csvfile, delimiter=',', quotechar='|') for row in labels_str: if 'image' == row[0]: continue # if 'ISIC' in row[0] and '_downsampled' in row[0]: # print(row[0]) if row[0] + '_downsampled' in mdlParams['labels_dict']: print("removed", row[0] + '_downsampled') continue if mdlParams['numClasses'] == 7: mdlParams['labels_dict'][row[0]] = np.array( [int(float(row[1])), int(float(row[2])), int(float(row[3])), int(float(row[4])), int(float(row[5])), int(float(row[6])), int(float(row[7]))]) elif mdlParams['numClasses'] == 8: if len(row) < 9 or row[8] == '': class_8 = 0 else: class_8 = int(float(row[8])) mdlParams['labels_dict'][row[0]] = np.array( [int(float(row[1])), int(float(row[2])), int(float(row[3])), int(float(row[4])), int(float(row[5])), int(float(row[6])), int(float(row[7])), class_8]) elif mdlParams['numClasses'] == 9: if len(row) < 9 or row[8] == '': class_8 = 0 else: class_8 = int(float(row[8])) if len(row) < 10 or row[9] == '': class_9 = 0 else: class_9 = int(float(row[9])) mdlParams['labels_dict'][row[0]] = np.array( [int(float(row[1])), int(float(row[2])), int(float(row[3])), int(float(row[4])), int(float(row[5])), int(float(row[6])), int(float(row[7])), class_8, class_9]) # Save all im paths here mdlParams['im_paths'] = [] mdlParams['labels_list'] = [] # Define the sets path1 = mdlParams['dataDir'] + '/images/' # path1 = mdlParams['dataDir'] + '\images\\' # All sets allSets = sorted(glob(path1 + '*/')) # allSets = sorted(glob(path1 + '*\\')) # Ids which name the folders # Make official first dataset for i in range(len(allSets)): if mdlParams['dataset_names'][0] in allSets[i]: temp = allSets[i] allSets.remove(allSets[i]) allSets.insert(0, temp) print(allSets) # Set of keys, for marking old HAM10000 mdlParams['key_list'] = [] if mdlParams['exclude_inds']: with open(mdlParams['saveDir'] + 'indices_exclude.pkl', 'rb') as f: indices_exclude = pickle.load(f) exclude_list = [] for i in range(len(allSets)): # All files in that set files = sorted(glob(allSets[i] + '*')) # Check if there is something in there, if not, discard if len(files) == 0: continue # Check if want to include this dataset foundSet = False for j in range(len(mdlParams['dataset_names'])): if mdlParams['dataset_names'][j] in allSets[i]: foundSet = True if not foundSet: continue for j in range(len(files)): if '.jpg' in files[j] or '.jpeg' in files[j] or '.JPG' in files[j] or '.JPEG' in files[j] or '.png' in \ files[j] or '.PNG' in files[j]: # Add according label, find it first found_already = False for key in mdlParams['labels_dict']: if key + mdlParams['file_ending'] in files[j]: if found_already: print("Found already:", key, files[j]) mdlParams['key_list'].append(key) mdlParams['labels_list'].append(mdlParams['labels_dict'][key]) found_already = True if found_already: mdlParams['im_paths'].append(files[j]) if mdlParams['exclude_inds']: for key in indices_exclude: if key in files[j]: exclude_list.append(indices_exclude[key]) # Convert label list to array mdlParams['labels_array'] = np.array(mdlParams['labels_list']) print(np.mean(mdlParams['labels_array'], axis=0)) # Create indices list with HAM10000 only mdlParams['HAM10000_inds'] = [] HAM_START = 24306 HAM_END = 34320 for j in range(len(mdlParams['key_list'])): try: curr_id = [int(s) for s in re.findall(r'\d+', mdlParams['key_list'][j])][-1] except: continue if curr_id >= HAM_START and curr_id <= HAM_END: mdlParams['HAM10000_inds'].append(j) mdlParams['HAM10000_inds'] = np.array(mdlParams['HAM10000_inds']) print("Len ham", len(mdlParams['HAM10000_inds'])) # Perhaps preload images if mdlParams['preload']: mdlParams['images_array'] = np.zeros( [len(mdlParams['im_paths']), mdlParams['input_size_load'][0], mdlParams['input_size_load'][1], mdlParams['input_size_load'][2]], dtype=np.uint8) for i in range(len(mdlParams['im_paths'])): x = scipy.ndimage.imread(mdlParams['im_paths'][i]) # x = x.astype(np.float32) # Scale to 0-1 # min_x = np.min(x) # max_x = np.max(x) # x = (x-min_x)/(max_x-min_x) mdlParams['images_array'][i, :, :, :] = x if i % 1000 == 0: print(i + 1, "images loaded...") if mdlParams['subtract_set_mean']: mdlParams['images_means'] = np.zeros([len(mdlParams['im_paths']), 3]) for i in range(len(mdlParams['im_paths'])): x = scipy.ndimage.imread(mdlParams['im_paths'][i]) x = x.astype(np.float32) # Scale to 0-1 min_x = np.min(x) max_x = np.max(x) x = (x - min_x) / (max_x - min_x) mdlParams['images_means'][i, :] = np.mean(x, (0, 1)) if i % 1000 == 0: print(i + 1, "images processed for mean...") ### Define Indices ### with open(mdlParams['saveDir'] + 'indices_isic2019.pkl', 'rb') as f: indices = pickle.load(f) mdlParams['trainIndCV'] = indices['trainIndCV'] mdlParams['valIndCV'] = indices['valIndCV'] if mdlParams['exclude_inds']: exclude_list = np.array(exclude_list) all_inds = np.arange(len(mdlParams['im_paths'])) exclude_inds = all_inds[exclude_list.astype(bool)] for i in range(len(mdlParams['trainIndCV'])): mdlParams['trainIndCV'] = np.setdiff1d(mdlParams['trainIndCV'], exclude_inds) for i in range(len(mdlParams['valIndCV'])): mdlParams['valIndCV'] = np.setdiff1d(mdlParams['valIndCV'], exclude_inds) # Consider case with more than one set if len(mdlParams['dataset_names']) > 1: restInds = np.array(np.arange(25331, mdlParams['labels_array'].shape[0])) for i in range(mdlParams['numCV']): mdlParams['trainIndCV'] = np.concatenate((mdlParams['trainIndCV'], restInds)) print("Train") # for i in range(len(mdlParams['trainIndCV'])): # print(mdlParams['trainIndCV'][i].shape) # print("Val") # for i in range(len(mdlParams['valIndCV'])): # print(mdlParams['valIndCV'][i].shape) # Use this for ordered multi crops if mdlParams['orderedCrop']: # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc. mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']), mdlParams['multiCropEval'], 2], dtype=np.int64) # mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) for u in range(len(mdlParams['im_paths'])): height, width = imagesize.get(mdlParams['im_paths'][u]) if width < mdlParams['input_size'][0]: height = int(mdlParams['input_size'][0] / float(width)) * height width = mdlParams['input_size'][0] if height < mdlParams['input_size'][0]: width = int(mdlParams['input_size'][0] / float(height)) * width height = mdlParams['input_size'][0] ind = 0 for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): mdlParams['cropPositions'][u, ind, 0] = mdlParams['input_size'][0] / 2 + i * ( (width - mdlParams['input_size'][1]) / (np.sqrt(mdlParams['multiCropEval']) - 1)) mdlParams['cropPositions'][u, ind, 1] = mdlParams['input_size'][1] / 2 + j * ( (height - mdlParams['input_size'][0]) / (np.sqrt(mdlParams['multiCropEval']) - 1)) # mdlParams['imSizes'][u,ind,0] = curr_im_size[0] ind += 1 # Sanity checks # print("Positions",mdlParams['cropPositions']) # Test image sizes height = mdlParams['input_size'][0] width = mdlParams['input_size'][1] for u in range(len(mdlParams['im_paths'])): height_test, width_test = imagesize.get(mdlParams['im_paths'][u]) if width_test < mdlParams['input_size'][0]: height_test = int(mdlParams['input_size'][0] / float(width_test)) * height_test width_test = mdlParams['input_size'][0] if height_test < mdlParams['input_size'][0]: width_test = int(mdlParams['input_size'][0] / float(height_test)) * width_test height_test = mdlParams['input_size'][0] test_im = np.zeros([width_test, height_test]) for i in range(mdlParams['multiCropEval']): im_crop = test_im[np.int32(mdlParams['cropPositions'][u, i, 0] - height / 2):np.int32( mdlParams['cropPositions'][u, i, 0] - height / 2) + height, np.int32(mdlParams['cropPositions'][u, i, 1] - width / 2):np.int32( mdlParams['cropPositions'][u, i, 1] - width / 2) + width] if im_crop.shape[0] != mdlParams['input_size'][0]: print("Wrong shape", im_crop.shape[0], mdlParams['im_paths'][u]) if im_crop.shape[1] != mdlParams['input_size'][1]: print("Wrong shape", im_crop.shape[1], mdlParams['im_paths'][u]) return mdlParams
def get_raw_examples(example_file_name, num_examples, example_dir_name, example_id_file_name): """Returns raw examples. The difference between `get_raw_examples` and `get_examples_for_inference` is that `get_raw_examples` returns examples in their raw form, *not* pre-processed to be fed through a model for inference. :param example_file_name: See doc for `get_examples_for_inference`. :param num_examples: Same. :param example_dir_name: Same. :param example_id_file_name: Same. :return: example_dict: See doc for `example_io.read_file`. """ error_checking.assert_is_string(example_file_name) use_specific_ids = example_file_name == '' if use_specific_ids: error_checking.assert_is_string(example_id_file_name) print('Reading desired example IDs from: "{0:s}"...'.format( example_id_file_name)) example_id_strings = read_example_ids_from_netcdf(example_id_file_name) valid_times_unix_sec = example_utils.parse_example_ids( example_id_strings)[example_utils.VALID_TIMES_KEY] example_file_names = example_io.find_many_files( directory_name=example_dir_name, first_time_unix_sec=numpy.min(valid_times_unix_sec), last_time_unix_sec=numpy.max(valid_times_unix_sec)) num_files = len(example_file_names) example_dicts = [dict()] * num_files for i in range(num_files): print('Reading data from: "{0:s}"...'.format( example_file_names[i])) example_dicts[i] = example_io.read_file(example_file_names[i]) example_dict = example_utils.concat_examples(example_dicts) good_indices = example_utils.find_examples( all_id_strings=example_dict[example_utils.EXAMPLE_IDS_KEY], desired_id_strings=example_id_strings, allow_missing=False) example_dict = example_utils.subset_by_index( example_dict=example_dict, desired_indices=good_indices) else: error_checking.assert_is_string(example_dir_name) error_checking.assert_is_integer(num_examples) error_checking.assert_is_greater(num_examples, 0) print('Reading data from: "{0:s}"...'.format(example_file_name)) example_dict = example_io.read_file(example_file_name) num_examples_total = len(example_dict[example_utils.VALID_TIMES_KEY]) desired_indices = numpy.linspace(0, num_examples_total - 1, num=num_examples_total, dtype=int) if num_examples < num_examples_total: desired_indices = numpy.random.choice(desired_indices, size=num_examples, replace=False) example_dict = example_utils.subset_by_index( example_dict=example_dict, desired_indices=desired_indices) return example_dict
def plot_convergence(*args, **kwargs): """Plot one or several convergence traces. Parameters ---------- * `args[i]` [`OptimizeResult`, list of `OptimizeResult`, or tuple]: The result(s) for which to plot the convergence trace. - if `OptimizeResult`, then draw the corresponding single trace; - if list of `OptimizeResult`, then draw the corresponding convergence traces in transparency, along with the average convergence trace; - if tuple, then `args[i][0]` should be a string label and `args[i][1]` an `OptimizeResult` or a list of `OptimizeResult`. * `ax` [`Axes`, optional]: The matplotlib axes on which to draw the plot, or `None` to create a new one. * `true_minimum` [float, optional]: The true minimum value of the function, if known. * `yscale` [None or string, optional]: The scale for the y-axis. Returns ------- * `ax`: [`Axes`]: The matplotlib axes. """ # <3 legacy python ax = kwargs.get("ax", None) true_minimum = kwargs.get("true_minimum", None) yscale = kwargs.get("yscale", None) if ax is None: ax = plt.gca() ax.set_title("Convergence plot") ax.set_xlabel("Number of calls $n$") ax.set_ylabel(r"$\min f(x)$ after $n$ calls") ax.grid() if yscale is not None: ax.set_yscale(yscale) colors = cm.viridis(np.linspace(0.25, 1.0, len(args))) for results, color in zip(args, colors): if isinstance(results, tuple): name, results = results else: name = None if isinstance(results, OptimizeResult): n_calls = len(results.x_iters) mins = [ np.min(results.func_vals[:i]) for i in range(1, n_calls + 1) ] ax.plot(range(1, n_calls + 1), mins, c=color, marker=".", markersize=12, lw=2, label=name) elif isinstance(results, list): n_calls = len(results[0].x_iters) iterations = range(1, n_calls + 1) mins = [[np.min(r.func_vals[:i]) for i in iterations] for r in results] for m in mins: ax.plot(iterations, m, c=color, alpha=0.2) ax.plot(iterations, np.mean(mins, axis=0), c=color, marker=".", markersize=12, lw=2, label=name) if true_minimum: ax.axhline(true_minimum, linestyle="--", color="r", lw=1, label="True minimum") if true_minimum or name: ax.legend(loc="best") return ax
def calc_best_font_size(self, x): diff_array = [(x - i) for i in self.text_widths] item_index = diff_array.index(np.min([n for n in diff_array if n > 0])) self.power_font = self.font_obj_list[item_index]
def _x_for_spln(x, nx, log_spacing): """ Create vector of values to be used in constructing a spline. Parameters ---------- x : num | num iterable Resulted values will span the range [min(x), max(x)] nx : int Length of returned vector. log_spacing: bool False - Create linearly spaced values. True - Create logarithmically spaced values. To extend to negative values, the spacing is done separately on the negative and positive range, and these are later combined. The number of points in the negative/positive range is proportional to their relative range in log space. i.e., for data in the range [-100, 1000] 2/5 of the resulting points will be in the negative range. Returns ------- x_spln : array """ x = asarray(x) xmin = min(x) xmax = max(x) if xmin == xmax: return asarray([xmin] * nx) if xmax <= 0: # all values<=0 return -_x_for_spln(-x, nx, log_spacing)[::-1] if not log_spacing: return linspace(xmin, xmax, nx) # All code below is to handle-log-spacing when x has potentially both negative # and positive values. if xmin > 0: return logspace(log10(xmin), log10(xmax), nx) else: lxmax = max([log10(xmax), 0]) lxmin = max([log10(abs(xmin)), 0]) # All the code below is for log-spacing, when xmin < 0 and xmax > 0 if lxmax == 0 and lxmin == 0: return linspace(xmin, xmax, nx) # Use linear spacing as fallback if xmin > 0: x_spln = logspace(lxmin, lxmax, nx) elif xmin == 0: x_spln = r_[0, logspace(-1, lxmax, nx - 1)] else: # (xmin < 0) f = lxmin / (lxmin + lxmax) nx_neg = int(f * nx) nx_pos = nx - nx_neg if nx <= 1: # If triggered fix edge case behavior raise AssertionError(u'nx should never bebe 0 or 1') # Work-around various edge cases if nx_neg == 0: nx_neg = 1 nx_pos = nx_pos - 1 if nx_pos == 0: nx_pos = 1 nx_neg = nx_neg - 1 x_spln_pos = logspace(-1, lxmax, nx_pos) x_spln_neg = -logspace(lxmin, -1, nx_neg) x_spln = r_[x_spln_neg, x_spln_pos] return x_spln
def run_optimizer(self, data, random_seed=None, search_pts=51, plot=False, figsize=(8, 6), groundtruth=None): if groundtruth is not None: sr_true = groundtruth[0] ss_true = groundtruth[1] else: sr_true = None ss_true = None ths = np.logspace(-5, -1, search_pts) ho_error = [] full_error = [] for th in ths: bool_msk = detect_sun(data, th) measured = rise_set_rough(bool_msk) sunrises = measured['sunrises'] sunsets = measured['sunsets'] np.random.seed(random_seed) use_set_sr = np.arange(len(sunrises))[~np.isnan(sunrises)] use_set_ss = np.arange(len(sunsets))[~np.isnan(sunsets)] if len(use_set_sr) / len(sunrises) > 0.6 and len(use_set_ss) / len( sunsets) > 0.6: run_ho_errors = [] num_trials = 1 # if > 1, average over multiple random selections for run in range(num_trials): np.random.shuffle(use_set_sr) np.random.shuffle(use_set_ss) split_at_sr = int(len(use_set_sr) * .8) # 80-20 train test split split_at_ss = int(len(use_set_ss) * .8) train_sr = use_set_sr[:split_at_sr] train_ss = use_set_ss[:split_at_ss] test_sr = use_set_sr[split_at_sr:] test_ss = use_set_ss[split_at_ss:] train_msk_sr = np.zeros_like(sunrises, dtype=np.bool) train_msk_ss = np.zeros_like(sunsets, dtype=np.bool) train_msk_sr[train_sr] = True train_msk_ss[train_ss] = True test_msk_sr = np.zeros_like(sunrises, dtype=np.bool) test_msk_ss = np.zeros_like(sunsets, dtype=np.bool) test_msk_sr[test_sr] = True test_msk_ss[test_ss] = True sr_smoothed = local_quantile_regression_with_seasonal( sunrises, train_msk_sr, tau=0.05, solver='MOSEK') ss_smoothed = local_quantile_regression_with_seasonal( sunsets, train_msk_ss, tau=0.95, solver='MOSEK') r1 = (sunrises - sr_smoothed)[test_msk_sr] r2 = (sunsets - ss_smoothed)[test_msk_ss] ho_resid = np.r_[r1, r2] #### TESTING # print(th) # plt.plot(ho_resid) # plt.show() ##### ### 7/30/20: # Some sites can have "consistent" fit (low holdout error) # that is not the correct estimate. We impose the restriction # that the range of sunrise times and sunset times must be # greater than 15 minutes. Any solution that is less than # that must be non-physical. (See: PVO ID# 30121) cond1 = np.max(sr_smoothed) - np.min(sr_smoothed) > 0.25 cond2 = np.max(ss_smoothed) - np.min(ss_smoothed) > 0.25 if cond1 and cond2: ### L1-loss instead of L2 # L1-loss is better proxy for goodness of fit when using # quantile loss function ### run_ho_errors.append(np.mean(np.abs(ho_resid))) else: run_ho_errors.append(1e2) ho_error.append(np.average(run_ho_errors)) if groundtruth is not None: full_fit = rise_set_smoothed(measured, sunrise_tau=0.05, sunset_tau=0.95) sr_full = full_fit['sunrises'] ss_full = full_fit['sunsets'] e1 = (sr_true - sr_full) e2 = (ss_true - ss_full) e_both = np.r_[e1, e2] full_error.append(np.sqrt(np.mean(e_both**2))) else: ho_error.append(1e2) full_error.append(1e2) ho_error = np.array(ho_error) min_val = np.min(ho_error) slct_vals = ho_error < 1.2 * min_val selected_th = np.min(ths[slct_vals]) bool_msk = detect_sun(data, selected_th) measured = rise_set_rough(bool_msk) smoothed = rise_set_smoothed(measured, sunrise_tau=.05, sunset_tau=.95) self.sunrise_estimates = smoothed['sunrises'] self.sunset_estimates = smoothed['sunsets'] self.sunrise_measurements = measured['sunrises'] self.sunset_measurements = measured['sunsets'] self.sunup_mask_measured = bool_msk data_sampling = int(24 * 60 / data.shape[0]) num_days = data.shape[1] mat = np.tile(np.arange(0, 24, data_sampling / 60), (num_days, 1)).T sr_broadcast = np.tile(self.sunrise_estimates, (data.shape[0], 1)) ss_broadcast = np.tile(self.sunset_estimates, (data.shape[0], 1)) self.sunup_mask_estimated = np.logical_and(mat >= sr_broadcast, mat < ss_broadcast) self.threshold = selected_th if groundtruth is not None: sr_residual = sr_true - self.sunrise_estimates ss_residual = ss_true - self.sunset_estimates total_rmse = np.sqrt(np.mean(np.r_[sr_residual, ss_residual]**2)) self.total_rmse = total_rmse else: self.total_rmse = None if plot: fig = plt.figure(figsize=figsize) plt.plot(ths, ho_error, marker='.', color='blue', label='HO error') plt.yscale('log') plt.xscale('log') plt.plot(ths[slct_vals], ho_error[slct_vals], marker='.', ls='none', color='red') plt.axvline(selected_th, color='blue', ls='--', label='optimized parameter') if groundtruth is not None: best_th = ths[np.argmin(full_error)] plt.plot(ths, full_error, marker='.', color='orange', label='true error') plt.axvline(best_th, color='orange', ls='--', label='best parameter') plt.legend() return fig else: return
def goodness_of_fit(self, X_test, Y_test, J_test=None, response=0, partial=0): assert X_test.shape[1] == Y_test.shape[1] assert Y_test.shape[0] == Y_test.shape[0] assert X_test.shape[0] == self.number_of_inputs assert Y_test.shape[0] == self.number_of_outputs if type(J_test) == np.ndarray: assert X_test.shape[1] == J_test.shape[2] assert X_test.shape[0] == J_test.shape[1] number_test_examples = Y_test.shape[1] Y_pred_test = self.evaluate(X_test) J_pred_test = self.gradient(X_test) X_train, Y_train, J_train = self.training_data Y_pred_train = self.evaluate(X_train) J_pred_train = self.gradient(X_train) if type(J_test) == np.ndarray: test = J_test[response, partial, :].reshape((1, number_test_examples)) test_pred = J_pred_test[response, partial, :].reshape((1, number_test_examples)) train = J_train[response, partial, :].reshape((1, self.number_training_examples)) train_pred = J_pred_train[response, partial, :].reshape((1, self.number_training_examples)) title = 'Goodness of fit for dY' + str(response) + '/dX' + str(partial) else: test = Y_test[response, :].reshape((1, number_test_examples)) test_pred = Y_pred_test[response, :].reshape((1, number_test_examples)) train = Y_train[response, :].reshape((1, self.number_training_examples)) train_pred = Y_pred_train[response, :].reshape((1, self.number_training_examples)) title = 'Goodness of fit for Y' + str(response) metrics = dict() metrics['R_squared'] = np.round(rsquare(test_pred, test), 2).squeeze() metrics['std_error'] = np.round(np.std(test_pred - test).reshape(1, 1), 2).squeeze() metrics['avg_error'] = np.round(np.mean(test_pred - test).reshape(1, 1), 2).squeeze() # Reference line y = np.linspace(min(np.min(test), np.min(train)), max(np.max(test), np.max(train)), 100) # Prepare to plot fig = plt.figure(figsize=(12, 6)) fig.suptitle(title, fontsize=16) spec = gridspec.GridSpec(ncols=2, nrows=1, wspace=0.25) # Plot ax1 = fig.add_subplot(spec[0, 0]) ax1.plot(y, y) ax1.scatter(test, test_pred, s=20, c='r') ax1.scatter(train, train_pred, s=100, c='k', marker="+") plt.legend(["perfect", "test", "train"]) plt.xlabel("actual") plt.ylabel("predicted") plt.title("RSquare = " + str(metrics['R_squared'])) ax2 = fig.add_subplot(spec[0, 1]) error = (test_pred - test).T weights = np.ones(error.shape) / test_pred.shape[1] ax2.hist(error, weights=weights, facecolor='g', alpha=0.75) plt.xlabel('Absolute Prediction Error') plt.ylabel('Probability') plt.title('$\mu$=' + str(metrics['avg_error']) + ', $\sigma=$' + str(metrics['std_error'])) plt.grid(True) plt.show() return metrics
def ppca(Y, d, dia): """ Implements probabilistic PCA for data with missing values, using a factorizing distribution over hidden states and hidden observations. Args: Y: (N by D ) input numpy ndarray of data vectors d: ( int ) dimension of latent space dia: (boolean) if True: print objective each step Returns: C: (D by d ) C*C' + I*ss is covariance model, C has scaled principal directions as cols ss: ( float ) isotropic variance outside subspace M: (D by 1 ) data mean X: (N by d ) expected states Ye: (N by D ) expected complete observations (differs from Y if data is missing) Based on MATLAB code from J.J. VerBeek, 2006. http://lear.inrialpes.fr/~verbeek """ N, D = shape( Y ) # N observations in D dimensions (i.e. D is number of features, N is samples) threshold = 1E-4 # minimal relative change in objective function to continue hidden = isnan(Y) missing = hidden.sum() if missing > 0: M = nanmean(Y, axis=0) else: M = average(Y, axis=0) Ye = Y - repmat(M, N, 1) if missing > 0: Ye[hidden] = 0 # initialize C = normal(loc=0.0, scale=1.0, size=(D, d)) CtC = C.T @ C X = Ye @ C @ inv(CtC) recon = X @ C.T recon[hidden] = 0 ss = np.sum((recon - Ye)**2) / (N * D - missing) count = 1 old = np.inf # EM Iterations while (count): Sx = inv(eye(d) + CtC / ss) # E-step, covariances ss_old = ss if missing > 0: proj = X @ C.T Ye[hidden] = proj[hidden] X = Ye @ C @ Sx / ss # E-step: expected values SumXtX = X.T @ X # M-step C = Ye.T @ X @ (SumXtX + N * Sx).T @ inv( ((SumXtX + N * Sx) @ (SumXtX + N * Sx).T)) CtC = C.T @ C ss = (np.sum((X @ C.T - Ye)**2) + N * np.sum(CtC * Sx) + missing * ss_old) / (N * D) # transform Sx determinant into numpy longdouble in order to deal with high dimensionality Sx_det = np.min(Sx).astype(np.longdouble)**shape(Sx)[0] * det( Sx / np.min(Sx)) objective = N * D + N * (D * log(ss) + tr(Sx) - log(Sx_det)) + tr( SumXtX) - missing * log(ss_old) rel_ch = np.abs(1 - objective / old) old = objective count = count + 1 if rel_ch < threshold and count > 5: count = 0 if dia: print(f"Objective: {objective:.2f}, Relative Change {rel_ch:.5f}") C = orth(C) covM = cov((Ye @ C).T) vals, vecs = eig(covM) ordr = np.argsort(vals)[::-1] vecs = vecs[:, ordr] C = C @ vecs X = Ye @ C # add data mean to expected complete data Ye = Ye + repmat(M, N, 1) return C, ss, M, X, Ye
#get coordinate range and shift coordinates by half of the step size to make sure rater overlay is centered. #This is not really necessary and only matters for very small point clouds with edge effects or for very large steps sizes: x_coords = np.arange(x_min, x_max, raster_m) + raster_m / 2 #create combination of all coordinates (this is using lists and could be optimized) xy_coordinates = np.array([(x,y) for x in x_coords for y in y_coords]) else: #no GeoTiff file given, using min/max coordinates to generate equally-spaced grid ### Search KDTree with points on a regularly-spaced raster #generating equally-spaced raster overlay from input coordinates with stepsize rstep_size #This will be used to query the point cloud. Step_size should be small enough and likely 1/2 of the output file resolution. #Note that this uses a 2D raster overlay to slice a 3D point cloud. raster_m = args.raster_m [x_min, x_max] = np.min(pc_xyzg[:,0]), np.max(pc_xyzg[:,0]) [y_min, y_max] = np.min(pc_xyzg[:,1]), np.max(pc_xyzg[:,1]) [z_min, z_max] = np.min(pc_xyzg[:,2]), np.max(pc_xyzg[:,2]) x_elements = len(np.arange(x_min.round(), x_max.round(), raster_m)) y_elements = len(np.arange(y_min.round(), y_max.round(), raster_m)) #get coordinate range and shift coordinates by half of the step size to make sure rater overlay is centered. #This is not really necessary and only matters for very small point clouds with edge effects or for very large steps sizes: x_coords = np.arange(x_min.round(), x_max.round(), raster_m) + raster_m / 2 y_coords = np.arange(y_min.round(), y_max.round(), raster_m) + raster_m / 2 #create combination of all coordinates (this is using lists and could be optimized) xy_coordinates = np.array([(x,y) for x in x_coords for y in y_coords]) #using the 2D KDTree to find the points that are closest to the defined 2D raster overlay [pc_xyg_pykdtree_distance, pc_xyg_pykdtree_id] = pc_xyg_pykdtree.query(xy_coordinates, k=1)
def __init__(self, opt): """Initialize this dataset class. Parameters: opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions """ BaseDataset.__init__(self, opt) assert (opt.image_type == 'exr') assert (opt.output_nc == 1) assert (opt.input_nc == 1) #self.A = os.path.join(opt.dataroot, opt.phase + '_input') self.A1 = os.path.join(opt.dataroot, opt.phase + '_input_terraform') self.B = os.path.join(opt.dataroot, opt.phase + '_output') #self.A_paths = sorted(make_dataset(self.A, opt.max_dataset_size)) self.A1_paths = sorted(make_dataset(self.A1, opt.max_dataset_size)) self.B_paths = sorted(make_dataset(self.B, opt.max_dataset_size)) #self.A_size = len(self.A_paths) # get the size of dataset A self.A1_size = len(self.A1_paths) self.B_size = len(self.B_paths) # get the size of dataset B self.A1_test_paths = sorted( make_dataset(os.path.join(opt.dataroot, 'test_input_terraform'))) self.B_test_paths = sorted( make_dataset(os.path.join(opt.dataroot, 'test_output'))) self.A1_test_size = len(self.A1_test_paths) self.B_test_size = len(self.B_test_paths) self.input_names = np.array([ "RockDetailMask.RockDetailMask", "SoftDetailMask.SoftDetailMask", "cliffs.cliffs", "height.height", "mesa.mesa", "slope.slope", "slopex.slopex", "slopez.slopez" ]) self.output_names = np.array([ "RockDetailMask.RockDetailMask", "SoftDetailMask.SoftDetailMask", "bedrock.bedrock", "cliffs.cliffs", "flow.flow", "flowx.flowx", "flowz.flowz", "height.height", "mesa.mesa", "sediment.sediment", "water.water" ]) self.input_channels = np.array([3]) self.output_channels = np.array([7]) if not self.opt.compute_bounds: self.i_channels_min = np.array([[[-86]]]) #0 self.i_channels_max = np.array([[[910]]]) #824 self.o_channels_min = np.array([[[-86]]]) #-4 self.o_channels_max = np.array([[[910]]]) #819 return channels_min = np.array([2**16 for _ in self.input_channels]) channels_max = np.array([0 for _ in self.input_channels]) examples = 0 for A1_path in self.A1_paths: A1_img = exrlib.read_exr_float32( A1_path, list(self.input_names[self.input_channels]), 512, 512).transpose(2, 0, 1).reshape(len(self.input_channels), -1) channels_min = np.min( np.concatenate((np.expand_dims( channels_min, 1), np.expand_dims(np.min(A1_img, 1), 1)), 1), 1) channels_max = np.max( np.concatenate((np.expand_dims( channels_min, 1), np.expand_dims(np.max(A1_img, 1), 1)), 1), 1) examples += 1 if examples >= 1000: break print(channels_min) self.i_channels_min = np.expand_dims( np.expand_dims(np.array(channels_min), 1), 2) print(channels_max) self.i_channels_max = np.expand_dims( np.expand_dims(np.array(channels_max), 1), 2) channels_min = np.array([2**16 for _ in self.output_channels]) channels_max = np.array([0 for _ in self.output_channels]) examples = 0 for B_path in self.B_paths: B_img = exrlib.read_exr_float32( B_path, list(self.output_names[self.output_channels]), 512, 512).transpose(2, 0, 1).reshape(len(self.output_channels), -1) channels_min = np.min( np.concatenate((np.expand_dims( channels_min, 1), np.expand_dims(np.min(B_img, 1), 1)), 1), 1) channels_max = np.max( np.concatenate((np.expand_dims( channels_min, 1), np.expand_dims(np.max(B_img, 1), 1)), 1), 1) examples += 1 if examples >= 1000: break print(channels_min) self.o_channels_min = np.expand_dims(np.expand_dims(channels_min, 1), 2) print(channels_max) self.o_channels_max = np.expand_dims(np.expand_dims(channels_max, 1), 2)
def Normalize(dataSet): min=np.min(dataSet,axis=0) #每个维度的最小值最大值,(3,)) max = dataSet.max(axis=0) #同样可以使用这种方式求跨行的最大值 normData=np.zeros(dataSet.shape) normData=(dataSet-min)/(max-min) #可以使用np.tile(dataSet.shape[0],1)将其变成与dataSet.shape等大小的N*3数据,然后与da#taSet.shape操作 return normData,min,max #也可以因为min,max都是(3,),dataSet是(N,3)可以使用python的广播