def generate_lookup_table(self): """ generate lookup talbe between degree corrdinates and linear corrdinates return two matrix: lookupI: i index in linear matrix to this pixel after warping lookupJ: j index in linear matrix to this pixel after warping """ # length of one degree on monitor at gaze point degDis = np.tan(np.pi / 180) * self.dis # generate degree coordinate without warpping degNoWarpCorX = self.lin_coord_x / degDis degNoWarpCorY = self.lin_coord_y / degDis # deg coordinates degCorX = self.deg_coord_x + self.center_coordinates[0] degCorY = self.deg_coord_y + self.center_coordinates[1] lookupI = np.zeros(degCorX.shape).astype(np.int32) lookupJ = np.zeros(degCorX.shape).astype(np.int32) for j in xrange(lookupI.shape[1]): currDegX = degCorX[0, j] diffDegX = degNoWarpCorX[0, :] - currDegX IndJ = np.argmin(np.abs(diffDegX)) lookupJ[:, j] = IndJ for i in xrange(lookupI.shape[0]): currDegY = degCorY[i, j] diffDegY = degNoWarpCorY[:, IndJ] - currDegY indI = np.argmin(np.abs(diffDegY)) lookupI[i, j] = indI return lookupI, lookupJ
def plotISVar(): plt.figure() plt.title('Variance minimization problem (call).\nVertical lines mark the minima.') for K in [0.6, 0.8, 1.0, 1.2]: theta = np.linspace(-0.6, 2) var = [BS.exactCallVar(K*s0, theta) for theta in theta] minth = theta[np.argmin(var)] line, = plt.plot(theta, var, label=str(K)) plt.axvline(minth, color=line.get_color()) plt.xlabel(r'$\theta$') plt.ylabel('call variance') plt.legend(title=r'$K/s_0$', loc='upper left') plt.autoscale(tight=True) plt.figure() plt.title('Variance minimization problem (put).\nVertical lines mark the minima.') for K in [0.8, 1.0, 1.2, 1.4]: theta = np.linspace(-2, 0.5) var = [BS.exactPutVar(K*s0, theta) for theta in theta] minth = theta[np.argmin(var)] line, = plt.plot(theta, var, label=str(K)) plt.axvline(minth, color=line.get_color()) plt.xlabel(r'$\theta$') plt.ylabel('put variance') plt.legend(title=r'$K/s_0$', loc='upper left') plt.autoscale(tight=True)
def select_minibatch(x_win, masks, extra, y, window_size, i, minibatch_size, order=None, add_oov_noise=False, oov_noise_prob=0.0): n = len(masks) if order is None: order = range(n) ms = min(minibatch_size, n-i) if ms > 1: minibatch_mask = np.vstack([masks[j] for j in range(i, min(i+ms, n))]) max_len = np.max(np.argmin(minibatch_mask, axis=1)) if max_len == 0: max_len = len(masks[i]) try: minibatch_mask = minibatch_mask[:, 0: max_len].reshape((ms, max_len)) except: e = sys.exc_info()[0] print e print max_len print minibatch_mask minibatch_x = x_win[0: max_len, order[i: min(i+ms, n)], :] minibatch_extra = np.vstack([extra[j] for j in range(i, min(i+ms, n))]) minibatch_y = np.vstack([y[j] for j in range(i, min(i+ms, n))]) else: max_len = np.argmin(masks[i]) if max_len == 0: max_len = len(masks[i]) minibatch_mask = np.array(masks[i][0: max_len]).reshape((1, max_len)) minibatch_x = x_win[0: max_len, order[i], :].reshape((max_len, 1, window_size)) minibatch_extra = np.array(extra[i]).reshape((1, len(extra[i]))) minibatch_y = np.array(y[i]).reshape((1, len(y[i]))) if add_oov_noise: draws = np.random.rand(max_len, ms, window_size) minibatch_x = np.array(minibatch_x * np.array(draws > oov_noise_prob, dtype='int32'), dtype='int32') return minibatch_x, minibatch_mask, minibatch_extra, minibatch_y
def find_surface(cube): """ Return the `cube` index for the surface layer of for any model grid (rgrid, ugrid, sgrid), and any non-dimensional coordinate. TODO: Fold this into `find_layer()` """ z = z_coord(cube) if not z: msg = "Cannot find the surface for cube {!r}".format raise ValueError(msg(cube)) else: if np.argmin(z.shape) == 0 and z.ndim == 2: points = z[:, 0].points elif np.argmin(z.shape) == 1 and z.ndim == 2: points = z[0, :].points else: points = z.points positive = z.attributes.get('positive', None) if positive == 'up': idx = np.unique(points.argmax(axis=0))[0] else: idx = np.unique(points.argmin(axis=0))[0] return idx
def fitChiSq(self, sampleSpec, returnChiSq=False): if np.all(sampleSpec.wave == self.wave): newSampleSpec = sampleSpec grid = self.values else: print "error in function" newSampleSpec = sampleSpec.interpolate(self.wave) minIDx = self.wave.searchsorted(sampleSpec.wave[0]) maxIDx = self.wave.searchsorted(sampleSpec.wave[-1]) grid = self.values[:,minIDx:maxIDx] if newSampleSpec.var != None: var = newSampleSpec.var else: var = 1. if newSampleSpec.dq != None: dqMask = newSampleSpec.dq else: dqMask = np.ones(grid.shape[1]).astype(bool) chiSq = ((grid[:,dqMask]-newSampleSpec.flux[dqMask])/var[dqMask])**2 nu = (np.ones(grid.shape[0])*grid.shape[1]) - len(self.params) - 1 redChiSq = np.sum(chiSq, axis=1) / nu if returnChiSq: return np.min(redChiSq), self.points[np.argmin(redChiSq)] else: return self.points[np.argmin(redChiSq)]
def GetSpectralIndex(E_min, E_max): """ Returns the spectal index between evaluated at the two endpoints E_min and E_max based on the averaged P7REPv15 diffuse model :param E_min: Min energy in MeV :param E_max: Max energy in MeV :return spectral index: The power law index averaged over the given energy (positive value). """ E = np.array([58.473133087158203, 79.970359802246108, 109.37088726489363, 149.58030713742139, 204.57243095354417, 279.7820134691566, 382.64185792772452, 523.31738421248383, 715.71125569520109, 978.83734991844688, 1338.6998597146596, 1830.8632323330951, 2503.9669281982829, 3424.532355440329, 4683.5370393234753, 6405.4057377695362, 8760.3070758200847, 11980.97094929486, 16385.688725918291, 22409.769304923335, 30648.559770668966, 41916.282280063475, 57326.501908367274, 78402.17791960774, 107226.17459483664, 146647.10628359963, 200560.85990769751, 274295.61718814232, 375138.42752394517, 513055.37160154793]) dnde = np.array([1.3259335, 0.94195729, 0.66580701, 0.46162829, 0.30296713, 0.18484889, 0.10698333, 0.059697378, 0.032260861, 0.01673951, 0.0082548652, 0.0039907703, 0.0018546022, 0.00082937587, 0.0003599966, 0.00015557533, 6.7215013e-05, 2.8863404e-05, 1.2341489e-05, 5.3399754e-06, 2.2966778e-06, 9.9477847e-07, 4.53333e-07, 2.1135656e-07, 9.9832157e-08, 4.6697188e-08, 2.1986754e-08, 1.0368451e-08, 5.0197251e-09, 2.4097735e-09]) # Find bin and check bounds E_bin_min, E_bin_max = int(np.argmin(np.abs(E-E_min))), int(np.argmin(np.abs(E-E_max))) if E_bin_min == E_bin_max: E_bin_max = E_bin_min+1 if E_bin_max >= len(dnde): E_bin_min, E_bin_max = len(dnde)-3, len(dnde)-1 return -np.log(dnde[E_bin_min]/dnde[E_bin_max])/np.log(E[E_bin_min]/E[E_bin_max])
def main(): try: prog = sys.argv[0] # a = float(sys.argv[1]) # b = float(sys.argv[2]) except IndexError: # print '\nusage: '+prog+' a b (where a & b are numbers)\n' sys.exit(0) unoptFile = open('i.dat','r') gamma_data = [] i_data = [] for line in unoptFile: gamma_data.append(float(line.split()[0])) i_data.append(float(line.split()[1])) gamma_data, i_data = np.array(gamma_data), np.array(i_data) unopt_gamma = gamma_data[np.argmin(i_data)] unoptFile.close() optFile = open('omega_opt_ideriv/i.dat','r') gamma_data = [] i_data = [] for line in optFile: gamma_data.append(float(line.split()[0])) i_data.append(float(line.split()[1])) gamma_data, i_data = np.array(gamma_data), np.array(i_data) opt_gamma = gamma_data[np.argmin(i_data)] optFile.close() print unopt_gamma - opt_gamma, ' '
def kappa_profile_chi2(env, models, model0, frac='1sigma'): n_max,d_max=0,0 n_min,d_min=np.inf,np.inf ns, ds = [], [] for m in models: n,d = 0,0 for m1,m2 in izip(m['obj,data'], model0['obj,data']): obj,data = m1 obj0,data0 = m2 rs = [ abs(img.pos) for src in obj.sources for img in src.images] #rs = [ abs(img.pos) for src in obj.sources for img in src.images if img.parity_name != 'max'] rmin, rmax = np.amin(rs), np.amax(rs) if 0: b = 0 else: rmin = obj.basis.top_level_cell_size * 1.6 b = np.argmin(abs(data['R'] - rmin)) e = np.argmin(abs(data['R'] - rmax)) v0 = data0['kappa(R)'][b:e+1] v1 = data['kappa(R)'][b:e+1] n += np.sum((v1 - v0)**2) d += np.sum(v0**2) #d += len(v0) #np.sum(v0**2) ns.append(n) ds.append(d) nd = array(ns) / array(ds) return dist_range(nd, frac)
def get_rectangular_subset(lon,lat,data,view,xmin,xmax,ymin,ymax,buffer=0): imin = np.argmin(np.abs(lon-xmin)) imax = np.argmin(np.abs(lon-xmax)) jmin = np.argmin(np.abs(lat-ymin)) jmax = np.argmin(np.abs(lat-ymax)) return lon[imin-buffer:imax+1+buffer], lat[jmin-buffer:jmax+1+buffer], grid_convert(grid_convert(data,view,'x+y+')[imin-buffer:imax+1+buffer,jmin-buffer:jmax+1+buffer],'x+y+',view)
def ix(self): spec = self.spec start_freq_ix = np.argmin(np.abs(spec.freqs-self.start_freq)) end_freq_ix = np.argmin(np.abs(spec.freqs-self.end_freq)) start_time_ix = np.argmin(np.abs(spec.times-self.start_time)) end_time_ix = np.argmin(np.abs(spec.times-self.end_time)) return slice(start_freq_ix, end_freq_ix), slice(start_time_ix, end_time_ix)
def click(self,event): """ What to do, if a click on the figure happens: 1. Check which axis 2. Get data coord's. 3. Plot resulting data. 4. Update Figure """ if event.inaxes==self.overview: #Get nearest data xpos=np.argmin(np.abs(event.xdata-self.x)) ypos=np.argmin(np.abs(event.ydata-self.y)) #Check which mouse button: if event.button==1: #Plot it c,=self.y_subplot.plot(self.y, self.z[:,xpos],label=str(self.x[xpos])) self.overview.axvline(self.x[xpos],color=c.get_color(),lw=2) elif event.button==3: #Plot it c,=self.x_subplot.plot(self.x, self.z[ypos,:],label=str(self.y[ypos])) self.overview.axhline(self.y[ypos],color=c.get_color(),lw=2) if event.inaxes==self.y_subplot: ypos=np.argmin(np.abs(event.xdata-self.y)) c=self.x_subplot.plot(self.x, self.z[ypos,:],label=str(self.y[ypos])) self.overview.axhline(self.y[ypos],color=c.get_color(),lw=2) if event.inaxes==self.x_subplot: xpos=np.argmin(np.abs(event.xdata-self.x)) c,=self.y_subplot.plot(self.y, self.z[:,xpos],label=str(self.x[xpos])) self.overview.axvline(self.x[xpos],color=c.get_color(),lw=2) #Show it plt.draw()
def lattice(unit_cell_size, shape): """ We should just have delta functions at 1/d however, if the unit cell does not divide the detector shape evenly then these fall between pixels. """ # generate the q-space coordinates qi = np.fft.fftfreq(shape[0]) qj = np.fft.fftfreq(shape[1]) qk = np.fft.fftfreq(shape[2]) # generate the recirocal lattice points from the unit cell size qs_unit = np.meshgrid( np.fft.fftfreq(unit_cell_size[0]), np.fft.fftfreq(unit_cell_size[1]), np.fft.fftfreq(unit_cell_size[2]), indexing="ij", ) # now we want qs[qs_unit] = 1. lattice = np.zeros(shape, dtype=np.float) for ii, jj, kk in zip(qs_unit[0].ravel(), qs_unit[1].ravel(), qs_unit[2].ravel()): i = np.argmin(np.abs(ii - qi)) j = np.argmin(np.abs(jj - qj)) k = np.argmin(np.abs(kk - qk)) lattice[i, j, k] = 1.0 return lattice
def draw(self): self.spec_widg.on_draw(no_draw=True) # Add text? for kk,lls in enumerate(self.abssys_widg.all_abssys): # Label ipos = self.abssys_widg.all_items[kk].rfind('_') ilbl = self.abssys_widg.all_items[kk][ipos+1:] # Add text for wv,lbl in self.plt_wv: idx = np.argmin(np.abs(self.continuum.dispersion-wv*(1+lls.zabs))) self.spec_widg.ax.text(wv.value*(1+lls.zabs), self.continuum.flux[idx], '{:s}_{:s}'.format(ilbl,lbl), ha='center', color='blue', size='small', rotation=90.) # Ticks for selected LLS idxl = self.get_sngl_sel_sys() if idxl is not None: lls = self.abssys_widg.all_abssys[idxl] # Label ipos = self.abssys_widg.all_items[idxl].rfind('_') ilbl = self.abssys_widg.all_items[idxl][ipos+1:] for line in lls.lls_lines: if line.wrest < 915.*u.AA: continue idx = np.argmin(np.abs(self.continuum.dispersion- line.wrest*(1+lls.zabs))) self.spec_widg.ax.text(line.wrest.value*(1+lls.zabs), self.continuum.flux[idx], '-{:s}'.format(ilbl), ha='center', color='red', size='small', rotation=90.) # Draw self.spec_widg.canvas.draw()
def online_k_means(k,b,t,X_in): random_number = 11232015 random_num = np.random.randint(X_in.shape[0], size =300 ) rng = np.random.RandomState(random_number) permutation1 = rng.permutation(len(random_num)) random_num = random_num[permutation1] x_input = X_in[random_num] c,l = mykmeansplusplus(x_input,k,t) v = np.zeros((k)) for i in range(t): random_num = np.random.randint(X_in.shape[0], size = b) rng = np.random.RandomState(random_number) permutation1 = rng.permutation(len(random_num)) random_num = random_num[permutation1] M = X_in[random_num] Y = cdist(M,c,metric='euclidean', p=2, V=None, VI=None, w=None) clust_index = np.argmin(Y,axis = 1) for i in range(M.shape[0]): c_in = clust_index[i] v[c_in] += 1 ita = 1 / v[c_in] c[c_in] = np.add(np.multiply((1 - ita),c[c_in]),np.multiply(ita,M[i])) Y_l = cdist(X_in,c,metric='euclidean', p=2, V=None, VI=None, w=None) l = np.argmin(Y_l,axis = 1) return c,l
def kmeans(xx, centroids, maxIters = 20, minclust=30, maxDiff = 2): # Cluster Assignment step ca = np.array([np.argmin([np.dot(x_i-y_k, x_i-y_k) for y_k in centroids]) for x_i in xx]) # all clusters have at least minclust? (unique, counts) = np.unique(ca, return_counts=True) for cc in counts: if cc < minclust: return("error: too few", np.array(centroids), ca) # Move centroids step centroids = np.array([xx[ca == k].mean(axis = 0) for k in range(centroids.shape[0])]) iter=1 while (iter<maxIters): # Cluster Assignment step canew = np.array([np.argmin([np.dot(x_i-y_k, x_i-y_k) for y_k in centroids]) for x_i in xx]) # all clusters have at least minclust? (unique, counts) = np.unique(canew, return_counts=True) for cc in counts: if cc < minclust: return("error: too few", np.array(centroids), canew) numdiff = sum(ca != canew) if numdiff < maxDiff: return("converged", np.array(centroids), canew) ca = canew # Move centroids step centroids = np.array([xx[ca == k].mean(axis = 0) for k in range(centroids.shape[0])]) iter += 1 return("error: not converged", np.array(centroids), ca)
def normalization(self): lmin = np.argmin(np.abs(self.energy - self.lowerBoundNorm)) lmax = np.argmin(np.abs(self.energy - self.upperBoundNorm)) sumRef = np.sum(self.display[:, np.amin([lmin, lmax]):np.amax([lmin, lmax])], axis = 1) for idx in range(len(sumRef)): self.displaytemp[idx, :] = self.display[idx, :]/sumRef[idx]
def optimize_pa(self,fixed_pa=False,step=10,pa_init=0,pa_max=180): self.fixed_pa_best=False if fixed_pa: self.fixed_pa_best=True self.pa_best=fixed_pa else: self.pas=[] self.chi2=[] pa=pa_init while True: ## ## this could be made more efficient by only calling the sub-routine rotate_and_fft ## but then need to watch out for differential vs. absolute rotations i=img2vis(self.f_model, self.pxscale, self.lam, oifits=self.oifits, pa=pa, phot=self.phot) self.chi2.append(i.vis_chi2()) self.pas.append(pa) pa+=step if pa > pa_max: break ## chose global chi2 minimum here for the moment, and plot PA vs. chi2 ## so that we see if global minimum is bad. self.pa_best = self.pas[np.argmin(self.chi2)] ## ## write out best chi**2 and name of model with open("chi2_min.txt","a") as f: txt="{0:06.0f} -- {1:5.3f} -- {2:5.2f} -- {3}\n".format(self.chi2[np.argmin(self.chi2)], self.pxscale, self.f_p, self.f_model) f.write(txt) self.rotate_and_fft(self.pa_best)
def optimal_clustering(df, patch, method='kmeans', statistic='gap', max_K=5): if len(patch) == 1: return [patch] if statistic == 'db': if method == 'kmeans': if len(patch) <= 5: K_max = 2 else: K_max = min(len(patch) / 2, max_K) clustering = {} db_index = [] X = df.ix[patch, :] for k in range(2, K_max + 1): kmeans = cluster.KMeans(n_clusters=k).fit(X) clustering[k] = pd.DataFrame(kmeans.predict(X), index=patch) dist_mu = squareform(pdist(kmeans.cluster_centers_)) sigma = [] for i in range(k): points_in_cluster = clustering[k][clustering[k][0] == i].index sigma.append(sqrt(X.ix[points_in_cluster, :].var(axis=0).sum())) db_index.append(davies_bouldin(dist_mu, np.array(sigma))) db_index = np.array(db_index) k_optimal = np.argmin(db_index) + 2 return [list(clustering[k_optimal][clustering[k_optimal][0] == i].index) for i in range(k_optimal)] elif method == 'agglomerative': if len(patch) <= 5: K_max = 2 else: K_max = min(len(patch) / 2, max_K) clustering = {} db_index = [] X = df.ix[patch, :] for k in range(2, K_max + 1): agglomerative = cluster.AgglomerativeClustering(n_clusters=k, linkage='average').fit(X) clustering[k] = pd.DataFrame(agglomerative.fit_predict(X), index=patch) tmp = [list(clustering[k][clustering[k][0] == i].index) for i in range(k)] centers = np.array([np.mean(X.ix[c, :], axis=0) for c in tmp]) dist_mu = squareform(pdist(centers)) sigma = [] for i in range(k): points_in_cluster = clustering[k][clustering[k][0] == i].index sigma.append(sqrt(X.ix[points_in_cluster, :].var(axis=0).sum())) db_index.append(davies_bouldin(dist_mu, np.array(sigma))) db_index = np.array(db_index) k_optimal = np.argmin(db_index) + 2 return [list(clustering[k_optimal][clustering[k_optimal][0] == i].index) for i in range(k_optimal)] elif statistic == 'gap': X = np.array(df.ix[patch, :]) if method == 'kmeans': f = cluster.KMeans gaps = gap(X, ks=range(1, min(max_K, len(patch))), method=f) k_optimal = list(gaps).index(max(gaps))+1 clustering = pd.DataFrame(f(n_clusters=k_optimal).fit_predict(X), index=patch) return [list(clustering[clustering[0] == i].index) for i in range(k_optimal)] else: raise 'error: only db and gat statistics are supported'
def kMedoids(k, centres, data, error, distance_func = None): # centres (kx3) # data (Nx3) # error: epsilon m = centres[:] if(distance_func is None): distance_func = lambda single_point, set: np.sum((single_point - set) ** 2, axis = 1) while(True): sets = [[] for i in range(k)] for point in data: # Calculate distance dist_sq = distance_func(point, m) # Choose the nearest centre and add point into corresponding set sets[np.argmin(dist_sq)].append(point) temp_m = m[:] for i in range(len(sets)): if sets[i] != []: # Find a suitable point for next centre distances = [] for chosen_point in sets[i]: distances.append(distance_func(chosen_point, sets[i])) temp_m[i] = sets[i][np.argmin(distances)] temp_m = np.array(temp_m) changes = temp_m - m m = temp_m if((changes < error).all()): break return m
def _readDem(self): """ Read coordinates defining DEM and create vectors of x, y, and z values. """ # Load each coordinate as a numpy array. x, y, z = numpy.loadtxt(self.inputDem, dtype=numpy.float64, unpack=True) self.numZIn = len(z) if (y[0] == y[1]): # Ordered by rows. self.numXIn = max(numpy.argmax(x) + 1, numpy.argmin(x) + 1) self.xIn = x[0:self.numXIn] self.numYIn = self.numZIn/self.numXIn self.yIn = y[0:self.numZIn:self.numXIn] self.zIn = numpy.reshape(z, (self.numYIn, self.numXIn)) else: # Ordered by columns. self.numYIn = max(numpy.argmax(y) + 1, numpy.argmin(y) + 1) self.yIn = y[0:self.numYIn] self.numXIn = self.numZIn/self.numYIn self.xIn = x[0:self.numZIn:self.numYIn] self.ZIn = numpy.transpose(numpy.reshape(z, (self.numXIn, self.numYIn))) if (self.xIn[0] > self.xIn[1]): self.xIn = numpy.flipud(self.xIn) self.zIn = numpy.fliplr(self.zIn) if (self.yIn[0] > self.yIn[1]): self.yIn = numpy.flipud(self.yIn) self.zIn = numpy.flipud(self.zIn) return
def close_gripper(self, lr, step_viewer=1, max_vel=.02, close_dist_thresh=0.004, grab_dist_thresh=0.005): print 'CLOSING GRIPPER' # generate gripper finger trajectory joint_ind = self.robot.GetJoint("%s_gripper_l_finger_joint" % lr).GetDOFIndex() start_val = self.robot.GetDOFValues([joint_ind])[0] print 'start_val: ', start_val # execute gripper finger trajectory dyn_bt_objs = [bt_obj for sim_obj in self.dyn_sim_objs for bt_obj in sim_obj.get_bullet_objects()] next_val = start_val while next_val: flr2finger_pts_grid = self._get_finger_pts_grid(lr) ray_froms, ray_tos = flr2finger_pts_grid['l'], flr2finger_pts_grid['r'] # stop closing if any ray hits a dynamic object within a distance of close_dist_thresh from both sides next_vel = max_vel for bt_obj in dyn_bt_objs: from_to_ray_collisions = self.bt_env.RayTest(ray_froms, ray_tos, bt_obj) to_from_ray_collisions = self.bt_env.RayTest(ray_tos, ray_froms, bt_obj) rays_dists = np.inf * np.ones((len(ray_froms), 2)) for rc in from_to_ray_collisions: ray_id = np.argmin(np.apply_along_axis(np.linalg.norm, 1, ray_froms - rc.rayFrom)) rays_dists[ray_id, 0] = np.linalg.norm(rc.pt - rc.rayFrom) for rc in to_from_ray_collisions: ray_id = np.argmin(np.apply_along_axis(np.linalg.norm, 1, ray_tos - rc.rayFrom)) rays_dists[ray_id, 1] = np.linalg.norm(rc.pt - rc.rayFrom) colliding_rays_inds = np.logical_and(rays_dists[:, 0] != np.inf, rays_dists[:, 1] != np.inf) if np.any(colliding_rays_inds): rays_dists = rays_dists[colliding_rays_inds, :] if np.any(np.logical_and(rays_dists[:, 0] < close_dist_thresh, rays_dists[:, 1] < close_dist_thresh)): next_vel = 0 else: next_vel = np.minimum(next_vel, np.min(rays_dists.sum(axis=1))) if next_vel == 0: break next_val = np.maximum(next_val - next_vel, 0) self.robot.SetDOFValues([next_val], [joint_ind]) self.step() if self.viewer and step_viewer: self.viewer.Step() handles = [] # add constraints at the points where a ray hits a dynamic link within a distance of grab_dist_thresh for bt_obj in dyn_bt_objs: from_to_ray_collisions = self.bt_env.RayTest(ray_froms, ray_tos, bt_obj) to_from_ray_collisions = self.bt_env.RayTest(ray_tos, ray_froms, bt_obj) for i in range(ray_froms.shape[0]): self.viewer.Step() ray_collisions = [rc for rcs in [from_to_ray_collisions, to_from_ray_collisions] for rc in rcs] for rc in ray_collisions: if rc.link == bt_obj.GetKinBody().GetLink('rope_59'): self.viewer.Step() if np.linalg.norm(rc.pt - rc.rayFrom) < grab_dist_thresh: link_tf = rc.link.GetTransform() link_tf[:3, 3] = rc.pt self._add_constraints(lr, rc.link, link_tf) if self.viewer and step_viewer: self.viewer.Step()
def correct_shift_at_349nm(self): shift = self.values[closest_wavelength_index(self.nm,349)] - self.values[closest_wavelength_index(self.nm,348)] min_ = min(np.argmin(self.nm), closest_wavelength_index(self.nm,348)) max_ = min(np.argmin(self.nm), closest_wavelength_index(self.nm,348)) for i in range(min_,max_): self.values[i] -= shift self.state += ' 349nm_shift_corrected'
def get_cmd_shape(color, mag): ''' gets the outline of a cmd. Guesses at a large polygon, and then add points that are outside of the polygon, ignores points within. then polar sorts the result. returns: N,2 array. ''' # make a guess at the polygon. left = (np.min(color), mag[np.argmin(color)]) right = (np.max(color), mag[np.argmax(color)]) up = (color[np.argmin(mag)], np.min(mag)) down = (color[np.argmax(mag)], np.max(mag)) verts = np.vstack((left, right, up, down)) points = np.column_stack((color, mag)) for point in points: if nxutils.pnpoly(point[0], point[1], verts) == 0.: # add point to verts col = verts[:, 0] m = verts[:, 1] col = np.append(col, point[0]) m = np.append(m, point[1]) # order the new points in a circle verts = polar_sort(zip(col, m)) verts = np.append(verts, [verts[0]], axis=0) # plt.plot(verts[:, 0], verts[:, 1], lw = 2) return verts
def _make_tuples(self, key): print('Populating', key) spikes = np.vstack([s.squeeze() for s in (preprocess.Spikes.RateTrace() & key).fetch('rate_trace')]) s = spikes.sum(axis=0) nans = np.isnan(s) key['leading_nans'] = int(nans[0]) key['trailing_nans'] = int(nans[1]) t = (preprocess.Sync() & key).fetch1('frame_times') # does not need to be unique flip_first = (vis.Trial() * preprocess.Sync().proj('psy_id', trial_idx='first_trial') & key).fetch1('flip_times') flip_last = (vis.Trial() * preprocess.Sync().proj('psy_id', trial_idx='last_trial') & key).fetch1('flip_times') # (vis.Trial() * preprocess.Sync() & 'trial_idx between first_trial and last_trial') fro = np.atleast_1d(flip_first.squeeze())[0] to = np.atleast_1d(flip_last.squeeze())[ -1] # not necessarily where the stimulus stopped, just last presentation idx_fro = np.argmin(np.abs(t - fro)) idx_to = np.argmin(np.abs(t - to)) + 1 key['stimulus_nans'] = int(np.any(nans[idx_fro:idx_to])) if np.any(nans): key['nan_idx'] = nans key['stimulus_start'] = idx_fro + 1 key['stimulus_end'] = idx_to self.insert1(key)
def find_boundaries(self, all=False): """Find the local minima on either side of each peak Arguments: (none) """ try: prevb = np.argmin(self.y[0:self._idx[0]]) except IndexError: prevb = 0 bounds = [] if not all: pos = self._idx[self._keep] else: pos = self._idx npks = len(pos) for i in range(npks): thismax = pos[i] if i < npks-1: nextmax = pos[i + 1] relb = np.argmin(self.y[thismax:nextmax]) nextb = relb + thismax else: nextmax = len(self.y)-1 nextb = len(self.y)-1 bounds.append([prevb, nextb]) prevb = nextb self._bounds = np.array(bounds)
def estimate_pk_parms_1d(x,f,pktype): """ Gives initial guess of parameters for analytic fit of one dimensional peak data. Required Arguments: x -- (n) ndarray of coordinate positions f -- (n) ndarray of intensity measurements at coordinate positions x pktype -- string, type of analytic function that will be used to fit the data, current options are "gaussian","lorentzian","pvoigt" (psuedo voigt), and "split_pvoigt" (split psuedo voigt) Outputs: p -- (m) ndarray containing initial guesses for parameters for the input peaktype (see peak function help for what each parameters corresponds to) """ data_max=np.max(f) # lbg=np.mean(f[:2]) # rbg=np.mean(f[:2]) if((f[0]> (0.25*data_max)) and (f[-1]> (0.25*data_max))):#heuristic for wide peaks bg0=0. elif (f[0]> (0.25*data_max)): #peak cut off on the left bg0=f[-1] elif (f[-1]> (0.25*data_max)): #peak cut off on the right bg0=f[0] else: bg0=(f[0]+f[-1])/2. #bg1=(rbg-lbg)/(x[-1]-x[0]) cen_index=np.argmax(f) x0=x[cen_index] A=data_max-bg0#-(bg0+bg1*x0) num_pts=len(f) #checks for peaks that are cut off if cen_index == (num_pts-1): FWHM=x[cen_index]-x[np.argmin(np.abs(f[:cen_index]-A/2.))]#peak cut off on the left elif cen_index == 0: FWHM=x[cen_index+np.argmin(np.abs(f[cen_index+1:]-A/2.))]-x[0] #peak cut off on the right else: FWHM=x[cen_index+np.argmin(np.abs(f[cen_index+1:]-A/2.))]-x[np.argmin(np.abs(f[:cen_index]-A/2.))] if FWHM <=0:##uh,oh something went bad FWHM=(x[-1]-x[0])/4. #completely arbitrary, set peak width to 1/4 window size if pktype=='gaussian' or pktype=='lorentzian': p=[A,x0,FWHM,bg0,0.] elif pktype=='pvoigt': p=[A,x0,FWHM,0.5,bg0,0.] elif pktype=='split_pvoigt': p=[A,x0,FWHM,FWHM,0.5,0.5,bg0,0.] p=np.array(p) return p
def _transform_indices(self, key): """ Transforms indices by snapping to the closest value if values are numeric, otherwise applies no transformation. """ ndims = self.ndims if all(not isinstance(el, slice) for el in key): dim_inds = [] for dim in self._cached_index_names: dim_type = self.get_dimension_type(dim) if isinstance(dim_type, type) and issubclass(dim_type, Number): dim_inds.append(self.get_dimension_index(dim)) str_keys = iter(key[i] for i in range(self.ndims) if i not in dim_inds) num_keys = [] if len(dim_inds): keys = list({tuple(k[i] if ndims > 1 else k for i in dim_inds) for k in self.keys()}) q = np.array([tuple(key[i] if ndims > 1 else key for i in dim_inds)]) idx = np.argmin([np.inner(q - np.array(x), q - np.array(x)) if len(dim_inds) == 2 else np.abs(q-x) for x in keys]) num_keys = iter(keys[idx]) key = tuple(next(num_keys) if i in dim_inds else next(str_keys) for i in range(self.ndims)) elif any(not isinstance(el, slice) for el in key): index_ind = [idx for idx, el in enumerate(key) if not isinstance(el, (slice, str))][0] dim_keys = np.array([k[index_ind] for k in self.keys()]) snapped_val = dim_keys[np.argmin(np.abs(dim_keys-key[index_ind]))] key = list(key) key[index_ind] = snapped_val key = tuple(key) return key
def _augmenting_row_reduction(self): """ Augmenting row reduction step from LAPJV algorithm """ unassigned = np.where(self._x == -1)[0] for i in unassigned: while True: #find smallest 2 values and indices temp = self.c[i] - self._v j1 = np.argmin(temp) u1 = temp[j1] temp[j1] = np.max(temp) + 1 j2 = np.argmin(temp) u2 = temp[j2] if u1 < u2: self._v[j1] -= u2 - u1 elif self._y[j1] != -1: j1 = j2 k = self._y[j1] if k != -1: self._x[k] = -1 self._x[i] = j1 self._y[j1] = i i = k if np.allclose(u1, u2) or k == -1: break
def gpu_nnc_predict(trX, trY, teX, metric='cosine', batch_size=4096): if metric == 'cosine': metric_fn = cosine_dist else: metric_fn = euclid_dist idxs = [] for i in range(0, len(teX), batch_size): mb_dists = [] mb_idxs = [] for j in range(0, len(trX), batch_size): dist = metric_fn(floatX(teX[i:i+batch_size]), floatX(trX[j:j+batch_size])) if metric == 'cosine': mb_dists.append(np.max(dist, axis=1)) mb_idxs.append(j+np.argmax(dist, axis=1)) else: mb_dists.append(np.min(dist, axis=1)) mb_idxs.append(j+np.argmin(dist, axis=1)) mb_idxs = np.asarray(mb_idxs) mb_dists = np.asarray(mb_dists) if metric == 'cosine': i = mb_idxs[np.argmax(mb_dists, axis=0), np.arange(mb_idxs.shape[1])] else: i = mb_idxs[np.argmin(mb_dists, axis=0), np.arange(mb_idxs.shape[1])] idxs.append(i) idxs = np.concatenate(idxs, axis=0) nearest = trY[idxs] return nearest
def rescale_frontoparallel(p_fp,box_fp,p_im): """ The fronto-parallel image region is rescaled to bring it in the same approx. size as the target region size. p_fp : nx2 coordinates of countour points in the fronto-parallel plane box : 4x2 coordinates of bounding box of p_fp p_im : nx2 coordinates of countour in the image NOTE : p_fp and p are corresponding, i.e. : p_fp[i] ~ p[i] Returns the scale 's' to scale the fronto-parallel points by. """ l1 = np.linalg.norm(box_fp[1,:]-box_fp[0,:]) l2 = np.linalg.norm(box_fp[1,:]-box_fp[2,:]) n0 = np.argmin(np.linalg.norm(p_fp-box_fp[0,:][None,:],axis=1)) n1 = np.argmin(np.linalg.norm(p_fp-box_fp[1,:][None,:],axis=1)) n2 = np.argmin(np.linalg.norm(p_fp-box_fp[2,:][None,:],axis=1)) lt1 = np.linalg.norm(p_im[n1,:]-p_im[n0,:]) lt2 = np.linalg.norm(p_im[n1,:]-p_im[n2,:]) s = max(lt1/l1,lt2/l2) if not np.isfinite(s): s = 1.0 return s
for i in range(0, ncol): cent[:,i] = sample(tuple(data[:,i]), k) cent_ls.append(cent) #Distances from observations to centroids dist_ls = [] clus = [] dist = np.zeros(shape=(nrow, k)) for i in range(0, (nrow)): for j in range(0, (ncol-1)): dist[i,j] = math.sqrt((sum(data[i,:] - cent[j,:]))**2) clus.append(np.argmin(dist[i,:])) dist_ls.append(dist) data_df = pd.DataFrame(data) clus_df = pd.DataFrame(clus) data_c = pd.concat([data_df.reset_index(drop=True), clus_df], axis=1) data_clus = data_c.values for h in range(1, niter): #Recompute centroids cent = np.zeros(shape=(k, ncol)) for i in range(0, ncol): cent[:,i] = sample(tuple(data[:,i]), k)
# convert to numpy arrays xTrain = numpy.array(xTrainTemp) yTrain = numpy.array(yListTrain) xTest = numpy.array(xTestTemp) yTest = numpy.array(yListTest) # use sci-kit learn linear regression wineQModel = linear_model.LinearRegression() wineQModel.fit(xTrain, yTrain) # use trained model to generate prediction and calculate rmsError rmsError = numpy.linalg.norm((yTest - wineQModel.predict(xTest)), 2) / sqrt(len(yTest)) errorList.append(rmsError) attTemp = [] iBest = numpy.argmin(errorList) attributeList.append(attTry[iBest]) oosError.append(errorList[iBest]) print "Out of sample error versus attribute set size:" print oosError print "\n" + "Best attribute indices" print attributeList namesList = [names[i] for i in attributeList] print "\n" + "Best attribute names" print namesList # Plot error versus number of attributes x = range(len(oosError)) plt.plot(x, oosError, 'k') plt.xlabel('Number of Attributes')
def rollImage(compared, rolled, scope = 5): values = [compareImages(compared, np.roll(rolled,i, axis = 0)) for i in range(-scope,scope+1)] return np.roll(rolled,np.argmin(values) - scope, axis = 0)
title=f'Channel: {ch}') ax.legend(ncol=4) ax.secondary_xaxis('top', functions=(lambda x: 1e7 / x, lambda x: 1e7 / x)) # %% # Using the inital channel 63 clearly leads to an offset, indicating the # zero-order position was not correct. Using instead 58 as the center channel we # get an good greement. The peak at 1601 is isolated from water vapor lines, # hence we will use it to calibrate the dispersion. For that we will look at # three spectra at once: One where the peak is at the center channel and one for # each side. We will try to find a suitable dispersion factor to get some # reasonable overlap. As seen below, a factor of 7.7 nm/pixel gives us really # good fit. fig, ax = plt.subplots() i = np.argmin(abs(cwl - 1e7 / 1601)) disp = 7.7 new_x = disp * (np.arange(128) - 58) ax.plot(new_x + cwl[i], pr[i, :]) ax.plot(new_x + cwl[i - 130], 0.58 * pr[i - 130, :]) ax.plot(new_x + cwl[i + 130], 1.4 * pr[i + 130, :]) new_wl = disp * (np.arange(128) - 58)[:, None] + cwl[None, :] # %% # Using that factor we can extract the region around the peak for multiple spectra. # In this region, we just look for the minium. fig, (ax, ax2) = plt.subplots(2, sharex=True, figsize=(3, 4)) mask = (abs(new_wl - 1e7 / 1601) < 80).T ax.plot(np.arange(128) - 58, cwl[np.argmax(mask, 0)], lw=1, c='k', ls='--')
def _crop_eyes(facecrop, eye_cascade, Lexpect=(0.7, 0.4), Rexpect=(0.3, 0.4), maxdist=0.2, maxsize=0.3): """Attempts to find eyes in an image of a face, and crops the left and the right eye separately. When more than two potential eyes are detected, the eyes that are closest to the expected positions of the eyes will be selected. Arguments facecrop - A numpy.ndarray with unsigned, 8-bit integers that reflect the greyscale values of a face. Keyword Arguments Lexpect - A (x,y) tuple that indicates where the left eye is expected to be. Note that the coordinates are in relative space, where (0,0) is the top-left of the image, (0,1) is the bottom-left, and (1,1) is the bottom-right. Also note that the left eye is likely to be on the right side of the image, and the right eye is likely to be in the left part of the image. Default = (0.7,0.4) Rexpect - A (x,y) tuple that indicates where the right eye is expected to be. Note that the coordinates are in relative space, where (0,0) is the top-left of the image, (0,1) is the bottom-left, and (1,1) is the bottom-right. Also note that the left eye is likely to be on the right side of the image, and the right eye is likely to be in the left part of the image. Default = (0.3,0.4) maxdist - A float that indicates what the maximal allowable distance is between the expected eye position, and the position of detected potential eye. The maximal distance is defined as a proportion of the image height. It can also be set to None. Default = (0.2) maxsize - A float that indicates what the maximal allowable width is of the detected eyes. The maximal size is defined as a proportion of the image width. It can also be set to None. Default = (0.3) Returns success, [left, right] - success is a Boolean that indicates whether the eyes could be detected. left and right are both a numpy.ndarray with unsigned, 8-bit integers that reflect the greyscale values of what are assumed to be the left and the right eye. """ # Return straight away when facecrop==None if facecrop is None: return False, [None, None] # DETECT THE EYES eyes = eye_cascade.detectMultiScale(facecrop) # Return if no eyes could be detected. if len(eyes) == 0: return False, [None, None] # Remove all the potential eye rects that are too large. if maxsize != None: eyes = eyes[eyes[:, 3] < maxsize * facecrop.shape[0]] # Calculate the distances between each potential eye and the # expected locations. (NOTE: These are not the actual distances, # but the squared distances. They need to be compared to the # squared maximum distance.) cx = (eyes[:, 0] + eyes[:, 2] / 2) / float(facecrop.shape[1]) cy = (eyes[:, 1] + eyes[:, 3] / 2) / float(facecrop.shape[0]) dl = (cx - Lexpect[0]) ** 2 + (cy - Lexpect[1]) ** 2 dr = (cx - Rexpect[0]) ** 2 + (cy - Rexpect[1]) ** 2 # Exclude all potential eyes that are too far from expected eye # locations. if maxdist != None: good = numpy.min([dl, dr], axis=0) < maxdist ** 2 eyes = eyes[good] dl = dl[good] dr = dr[good] # COUNT THE EYES # If no eye was detected, there is no eye index. if len(eyes) == 0: li = None ri = None # If only one eye is detected, its index is 0. elif len(eyes) == 1: # Check whether the distance to the left eye is closer. If it # is, than only the left eye was recorded. If not, the right # eye was recorded. if dl[0] < dr[0]: li = 0 ri = None else: li = None ri = 0 # If two or more eyes were detected, choose the potential rects # that were closest to the expected eye positions. else: li = numpy.argmin(dl) ri = numpy.argmin(dr) # RETURN CROPPED EYES # If no eye was detected, return no success. success = True if (li is None) & (ri is None): success = False # If the left eye was detected, crop it from the face. if li is None: left = None else: x, y, w, h = eyes[li] x, y, w, h = int(x), int(y), int(w), int(h) y += h // 4 h = h // 2 left = facecrop[y:y + h, x:x + w] # If the right eye was detected, crop it from the face. if ri is None: right = None else: x, y, w, h = eyes[ri] x, y, w, h = int(x), int(y), int(w), int(h) y += h // 4 h = h // 2 right = facecrop[y:y + h, x:x + w] # In DEBUG mode, draw the original frame and the detected eyes. if _DEBUG: # Display the face in the bottom-left pane. _AX[1][0].imshow(facecrop, cmap='gray') _AX[1][0].set_title("potential eyes") for i in range(len(eyes)): r = eyes[i] if i == li or i == ri: _AX[1][0].add_patch(patches.Rectangle( (r[0], r[1]), r[2], r[3], fill=False, linewidth=3)) else: _AX[1][0].add_patch(patches.Rectangle( (r[0], r[1]), r[2], r[3], fill=False, linewidth=1)) return success, [left, right]
def _sample_logger(self, event, samplequeue): """Continuously monitors the Queue, and writes samples to the log file whenever over five are still in the Queue. """ # Create a list to keep track of samples. timestamps = [] samplelist = [] # Continuously run. while event.is_set(): # Only process samples if the tracker is recording. if self._recording: # Obtain a new sample if the Queue isn't empty, and lock # the Queue while we're using it. if not samplequeue.empty(): # if samplequeue.qsize() > 0: # Get the oldest sample in the Queue. sample = samplequeue.get() # Add the sample to the list. timestamps.append(sample[0]) samplelist.append(sample[1]) # Store the sample locally, but only if it's not # a message. if sample[1][0][0] != 'MSG': self._latest_sample = sample # Write the oldest samples from the list, but make sure # there are always at least five samples still in the # list. We do this, because the sampling happens # asynchronously in several parallel processes. This # might result in newer samples being processed and # becoming available before older samples. Obviously, # we want to log the samples in chronological order of # obtaining them, not of them becoming available. So # we keep a buffer of five samples, which should # hopefully be enough to allow slightly older samples # to come in. while len(timestamps) > 5: # Find the oldest timestamp. i = numpy.argmin(timestamps) t = timestamps.pop(i) LR = samplelist.pop(i) # Log the sample. self._log_sample([t, LR]) # If we're not recording anymore, but there are samples left # in the list, then we need to process those first. elif not self._recording and len(timestamps) > 0: # Empty out the sample buffer. while len(timestamps) > 0: # Find the oldest timestamp. i = numpy.argmin(timestamps) t = timestamps.pop(i) LR = samplelist.pop(i) # Log the sample. self._log_sample([t, LR]) # If the tracker is not recording, wait for a bit to avoid # wasting processing resources on continuously checking # whether the tracker is recording. else: # Pause for 10 milliseconds. time.sleep(0.01)
### with a window of the length of the chunk -- keep the high frequencies # window = tau # weights = np.repeat(1.0,window)/window # dsst_box = np.convolve(dsst_chunk, weights,'valid') ### power spectral density of the chunck time series low pass filtered # nn = 0 # for c in range(0,NumChunk-1): # f_box[c,:], pxx_box[c,:] = signal.periodogram(dsst_box[nn \ # :nn+tau]) # nn = nn + tau # inter chunk variance of the fast varying process # average periodograms in time -- estimate the spectrum for all the chunks pxx_jall = np.mean(pxx_j, axis=0) idx = np.argmin(np.abs(f_j[0][:] - (1 / tau))) # variance of the fast varying component PP['Var_F'][jcnt_id, icnt_id] = (1 / tau) * pxx_jall[idx] # PP['Var_F2'][jcnt_id,icnt_id] = np.nanmean(pxx_box[:][idx]/tau) jcnt_id = jcnt_id + 1 # elapsed_lat = time.time() - t_lat # print('elapsed time for each lat:', elapsed_lat) elapsed_lon = time.time() - t_lon print('elapsed time for each lon:', elapsed_lon) icnt_id = icnt_id + 1 # variance ratio PP['S_tau'][:, :] = PP['Var_interC'][:, :] / PP['Var_F'][:, :] #PP['S_tau2'][:,:] = PP['Var_interC'][:,:] / PP['Var_F2'][:,:]
angle = utils.copy_docstring( 'tf.math.angle', lambda input, name=None: np.angle(input)) argmax = utils.copy_docstring( 'tf.math.argmax', lambda input, axis=None, output_type=np.int64, name=None: ( # pylint: disable=g-long-lambda np.argmax(input, axis=0 if axis is None else int(axis)) .astype(utils.numpy_dtype(output_type)))) argmin = utils.copy_docstring( 'tf.math.argmin', lambda input, axis=None, output_type=np.int64, name=None: ( # pylint: disable=g-long-lambda np.argmin(_convert_to_tensor( input), axis=0 if axis is None else int(axis)) .astype(utils.numpy_dtype(output_type)))) asin = utils.copy_docstring( 'tf.math.asin', lambda x, name=None: np.arcsin(x)) asinh = utils.copy_docstring( 'tf.math.asinh', lambda x, name=None: np.arcsinh(x)) atan = utils.copy_docstring( 'tf.math.atan', lambda x, name=None: np.arctan(x)) atan2 = utils.copy_docstring(
plt.subplot(3, 1, j + 1) plt.plot([x for x in range(1, n) if x % 10 == 0], smooth, lw=1, color='blue', label='Smoothed') plt.title(titles[i], fontsize=12) # plt.plot([x for x in range(1, n) if x % 10 == 0],[np.mean(x) for x in np.array(trials).T], lw = 1, color = 'red', label = 'Actual') plt.show() plt.figure(figsize=(12, 6)) for i in range(n_dims): n = dims[i] trials = times[i] smooth = signal.savgol_filter([np.mean(x) for x in np.array(trials).T], 5, 3) plt.subplot(2, 2, i + 1) plt.plot([2**x + 1 for x in range(4, int(math.log2(n)) + 1)], smooth, lw=1, color='blue', label='Smoothed') plt.title(titles[i]) # plt.plot([x for x in range(1, n) if x % 10 == 0],[np.mean(x) for x in np.array(trials).T], lw = 1, color = 'red', label = 'Actual') min_vals = [np.argmin(x) for x in np.array(trials).T] plt.show()
def _box_sphere_collision(H_g0, half_extents0, p_g1, radius1): """ Get information on box/sphere collision. :param H_g0: pose of the center of the box relative to the ground :type H_g0: (4,4)-array :param half_extents0: half lengths of the box :type half_extents0: (3,)-array :param p_g1: position of the center of the sphere relative to the ground :type p_g1: (3,) array :param float radius1: radius of the sphere :return: a tuple (*sdist*, *H_gc0*, *H_gc1*) with: * *sdist*: the minimal distance between the box and the sphere * *H_gc0*: the pose from the ground to the closest contact point on box 0 (normal along z) * *H_gc1*: the pose from the ground to the closest contact point on sphere 1 (normal along z) .. image:: img/box_sphere_collision.svg :width: 300px **Tests:** >>> from numpy import array, eye >>> H_g0 = eye(4) >>> lengths0 = array([1., 2., 3.]) >>> r1 = 0.1 >>> p_g1 = array([0., 3., 1.]) >>> (sdist, H_gc0, H_gc1)=_box_sphere_collision(H_g0, lengths0/2, p_g1, r1) >>> print(sdist) 1.9 >>> print(H_gc0) [[ 0. 1. 0. 0.] [-0. 0. 1. 1.] [ 1. -0. 0. 1.] [ 0. 0. 0. 1.]] >>> print(H_gc1) [[ 0. 1. 0. 0. ] [-0. 0. 1. 2.9] [ 1. -0. 0. 1. ] [ 0. 0. 0. 1. ]] >>> p_g1 = array([0.55, 0., 0.]) >>> (sdist, H_gc0, H_gc1)=_box_sphere_collision(H_g0, lengths0/2, p_g1, r1) >>> print(sdist) -0.05 >>> print(H_gc0) [[-0. 0. 1. 0.5] [ 0. -1. 0. 0. ] [ 1. 0. 0. 0. ] [ 0. 0. 0. 1. ]] >>> print(H_gc1) [[-0. 0. 1. 0.45] [ 0. -1. 0. 0. ] [ 1. 0. 0. 0. ] [ 0. 0. 0. 1. ]] >>> p_g1 = array([0.45, 0., 0.]) >>> (sdist, H_gc0, H_gc1)=_box_sphere_collision(H_g0, lengths0/2, p_g1, r1) >>> print(sdist) -0.15 >>> print(H_gc0) [[-0. 0. 1. 0.5] [ 0. -1. 0. 0. ] [ 1. 0. 0. 0. ] [ 0. 0. 0. 1. ]] >>> print(H_gc1) [[-0. 0. 1. 0.35] [ 0. -1. 0. 0. ] [ 1. 0. 0. 0. ] [ 0. 0. 0. 1. ]] """ assert Hg.ishomogeneousmatrix(H_g0) p_01 = Hg.pdot(Hg.inv(H_g0), p_g1) if (abs(p_01) <= half_extents0).all(): # p_01 is inside the box, we need to find the nearest face near_face = zeros(6) near_face[0:3] = half_extents0 - p_01 near_face[3:6] = half_extents0 + p_01 i = argmin(near_face) f_0 = p_01.copy() normal = zeros(3) if i < 3: f_0[i] = half_extents0[i] normal[i] = 1 else: f_0[i - 3] = -half_extents0[i - 3] normal[i - 3] = -1 #TODO check this line is correct f_g = Hg.pdot(H_g0, f_0) sdist = -norm(f_g - p_g1) - radius1 else: # find the point x inside the box that is the nearest to # the sphere center: f_0 = zeros(3) for i in arange(3): f_0[i] = max(min(half_extents0[i], p_01[i]), -half_extents0[i]) f_g = Hg.pdot(H_g0, f_0) vec = p_g1 - f_g normal = vec / norm(vec) sdist = norm(vec) - radius1 H_gc0 = Hg.zaligned(normal) H_gc1 = H_gc0.copy() H_gc0[0:3, 3] = f_g H_gc1[0:3, 3] = p_g1 - radius1 * normal return (sdist, H_gc0, H_gc1)
def get_original_probsevere_ids( best_track_storm_object_table, probsevere_storm_object_table): """For each best-track storm object, returns the original probSevere ID. N = number of best-track storm objects Each input is a pandas DataFrame with columns documented in `storm_tracking_io.write_processed_file`. :param best_track_storm_object_table: N-row pandas DataFrame with storm objects *after* fixing duplicate probSevere IDs and running best-track. :param probsevere_storm_object_table: pandas DataFrame with storm objects *before* fixing duplicate probSevere IDs and running best-track. :return: orig_probsevere_ids: length-N list of original probSevere IDs (strings). :raises: ValueError: if any best-track object cannot be found in the original probSevere table. """ num_best_track_objects = len(best_track_storm_object_table.index) orig_probsevere_ids = [None] * num_best_track_objects for i in range(num_best_track_objects): these_time_indices = numpy.where( probsevere_storm_object_table[TIME_COLUMN].values == best_track_storm_object_table[TIME_COLUMN].values[i])[0] if not len(these_time_indices): this_time_string = time_conversion.unix_sec_to_string( best_track_storm_object_table[TIME_COLUMN].values[i], TIME_FORMAT_FOR_LOG_MESSAGES) error_string = ( 'Cannot find any probSevere objects at {0:s}, even though there' ' are best-track objects at this time.' ).format(this_time_string) raise ValueError(error_string) these_latitude_diffs_deg = numpy.absolute( probsevere_storm_object_table[CENTROID_LAT_COLUMN].values[ these_time_indices] - best_track_storm_object_table[CENTROID_LAT_COLUMN].values[i]) these_longitude_diffs_deg = numpy.absolute( probsevere_storm_object_table[CENTROID_LNG_COLUMN].values[ these_time_indices] - best_track_storm_object_table[CENTROID_LNG_COLUMN].values[i]) this_min_latlng_diff_deg = numpy.min( these_latitude_diffs_deg + these_longitude_diffs_deg) this_nearest_index = numpy.argmin( these_latitude_diffs_deg + these_longitude_diffs_deg) this_nearest_index = these_time_indices[this_nearest_index] if this_min_latlng_diff_deg > TOLERANCE: this_time_string = time_conversion.unix_sec_to_string( best_track_storm_object_table[TIME_COLUMN].values[i], TIME_FORMAT_FOR_LOG_MESSAGES) error_string = ( 'Cannot find original probSevere ID for best-track object ' '"{0:s}" at {1:s}, {2:.4f} deg N, and {3:.4f} deg E. Nearest ' 'probSevere object at {1:s} is at {4:.4f} deg N and {5:.4f} deg' ' E.' ).format(best_track_storm_object_table[STORM_ID_COLUMN].values[i], this_time_string, best_track_storm_object_table[CENTROID_LAT_COLUMN].values[ i], best_track_storm_object_table[CENTROID_LNG_COLUMN].values[ i], probsevere_storm_object_table[CENTROID_LAT_COLUMN].values[ this_nearest_index], probsevere_storm_object_table[CENTROID_LNG_COLUMN].values[ this_nearest_index]) raise ValueError(error_string) orig_probsevere_ids[i] = probsevere_storm_object_table[ STORM_ID_COLUMN].values[this_nearest_index] return orig_probsevere_ids
def find_onset_and_cessation(timeseries, start, end_year, window1, window2, year_length): """ This function finds the onset and cessation of the wet season The methodology is based on that of Liebmann et al. (2012) Journal of Climate The full methodology is described in Dunning et al. (2016) It uses the numpy library It does not work out the onset and end for first and last year For each year it starts searching 'window1' days prior to the start of the water year The index of the start of the water year is given by 'start' 'timeseries' should be one dimensional and be a multiple of year_length long window1 and window2 determines how far each side of the wet season days are searched for (start and end resp.) Leap days should have already been removed start should be in the range 0-364 The anomaly is calculated over all year first, then the relevant parts are selected and summed """ # Calculate p-alpha p_minus_alpha = timeseries - np.mean(timeseries) # Create storage arrays # Don't calculate for the first or last year years = timeseries.shape[0] / year_length - 1 onset = np.zeros(years - 1) end = np.zeros(years - 1) onset_success = [] end_success = [] # Loop through second to penultimate year #plt.figure() for year in np.arange(1, years): # Select the anomaly from start - window to end + window begin = int(year * year_length + start - window1) endd = int(year * year_length + end_year + window2) this_year = p_minus_alpha[begin:endd] # Calculate the cumulative precipitation for that period delta = this_year.copy() for day in np.arange(len(this_year)): delta[day] = np.sum(this_year[0:day + 1]) # Find the index of the absolute min and max # Modify so that the min is not at the end and max is after min min_index = np.argmin(delta) # Check that the min is not right at the end of the record (within last 8 days) # Iterate until it is not at the end length_6 = len(delta) - 8 iterr = 0 while min_index > length_6: iterr += 1 delta_con = delta[:-7 * iterr] min_index = np.argmin(delta_con) length_6 = length_6 - 7 if length_6 < 10: min_index = float('nan') break if math.isnan(min_index): max_index = float('nan') else: max_index = np.argmax(delta[min_index:]) + min_index # Plot to test #ax1 = plt.subplot(4,4,year) #ax1.plot(this_year, color='DodgerBlue') #ax2 = ax1.twinx() #ax2.plot(delta, 'g', linewidth=3) #ax2.plot([min_index+1, min_index+1], [min(delta)-10,max(delta)+10], 'm', linewidth=4) #ax2.plot([max_index+1, max_index+1], [min(delta)-10,max(delta)+10], 'm', linewidth=4) # Transform to 'day of the year' onset_day = min_index + start - window1 + 1 cessation_day = max_index + start - window1 # Store in onset and cessation arrays if onset_day > cessation_day: #Not successful print 'why did this happen? - onset>cesssation in find_onset_and_cessation function' onset[year - 1] = float('nan') end[year - 1] = float('nan') else: # If onset day and cessation day are nans then it will do this loop onset[year - 1] = onset_day end[year - 1] = cessation_day #plt.show() return onset, end
def minimize(fun, bounds, cons=None, method='L-BFGS-B', grad=None, prior=None, n_start_points=10, maxiter=1000, random_state=None): """Find the minimum of function 'fun'. Parameters ---------- fun : callable Function to minimize. bounds : list of tuples Bounds for each parameter. cons : dict Constraints. method : string Minimization method. grad : callable Gradient of fun or None. prior : scipy-like distribution object Used for sampling initialization points. If None, samples uniformly. n_start_points : int, optional Number of initialization points. maxiter : int, optional Maximum number of iterations. random_state : np.random.RandomState, optional Used only if no elfi.Priors given. Returns ------- tuple of the found coordinates of minimum and the corresponding value. """ ndim = len(bounds) start_points = np.empty((n_start_points, ndim)) if prior is None: # Sample initial points uniformly within bounds # TODO: combine with the the bo.acquisition.UniformAcquisition method? random_state = random_state or np.random for i in range(ndim): start_points[:, i] = random_state.uniform(*bounds[i], n_start_points) else: start_points = prior.rvs(n_start_points, random_state=random_state) if len(start_points.shape) == 1: # Add possibly missing dimension when ndim=1 start_points = start_points[:, None] for i in range(ndim): start_points[:, i] = np.clip(start_points[:, i], *bounds[i]) # Run the optimisation from each initialization point. locs = [] vals = np.empty(n_start_points) for i in range(n_start_points): result = scipy.optimize.minimize(fun, start_points[i, :], method=method, jac=grad, bounds=bounds, constraints=cons) locs.append(result['x']) vals[i] = result['fun'] # Return the optimal case. ind_min = np.argmin(vals) locs_out = locs[ind_min] for i in range(ndim): locs_out[i] = np.clip(locs_out[i], *bounds[i]) return locs[ind_min], vals[ind_min]
def plot_steps_with_surrogate(w_path): # make figure to update fig = plt.figure(figsize=(12, 5)) ax1 = fig.add_subplot(111) # make cost function path based on gradient descent steps (in w_path) g_path = [] for i in range(0, len(w_path)): w = w_path[i] g_path.append(calculate_cost_value(w)) # plot costs function make_function(ax1) display.clear_output(wait=True) display.display(plt.gcf()) # colors for points s = np.linspace(1 / len(g_path), 1, len(g_path)) s.shape = (len(s), 1) colorspec = np.concatenate((s, np.flipud(s)), 1) colorspec = np.concatenate((colorspec, np.zeros((len(s), 1))), 1) # plot initial point ax1.plot(w_path[0], g_path[0], 'o', markersize=12, color=colorspec[0, :], markerfacecolor=colorspec[0, :]) display.clear_output(wait=True) display.display(plt.gcf()) # plot a tracer on this first point just for visualization purposes t = np.linspace(-0.5, g_path[0], 100) s = w_path[0] * np.ones((100)) ax1.plot(s, t, '--k') display.clear_output(wait=True) display.display(plt.gcf()) time.sleep(2) # plot first quadratic surrogate s_range = 3 # range over which to show the linear surrogate s = np.linspace(w_path[0] - s_range, w_path[0] + s_range, 10000) t = surrogate(w_path[0], s) h, = ax1.plot(s, t, 'm', linewidth=2) display.clear_output(wait=True) display.display(plt.gcf()) time.sleep(1) # plot minimum of quadratic surrogate ind = np.argmin(t) x_mark, = ax1.plot(s[ind], t[ind], 'kx', markersize=12, markeredgewidth=3) display.clear_output(wait=True) display.display(plt.gcf()) # loop over the remaining iterations, showing # - the quadratic surrogate at the first few steps # - color changing from green (start) to red (end) of gradient descent run for i in range(1, len(g_path)): # with the first few points plot the surrogates as well for illustration if i <= 1: time.sleep(2.5) # plot cost function evaluated at next newton's method step ax1.plot(w_path[i], g_path[i], 'o', markersize=12, color=colorspec[i - 1, :], markerfacecolor=colorspec[i - 1, :]) display.clear_output(wait=True) display.display(plt.gcf()) time.sleep(1) # remove old quadratic and stationary pt from drawing h.remove() x_mark.remove() display.clear_output(wait=True) display.display(plt.gcf()) # draw new quadratic s_range = 3 s = np.linspace(w_path[i] - s_range, w_path[i] + s_range, 10000) t = surrogate(w_path[i], s) h, = ax1.plot(s, t, 'm', linewidth=2) display.clear_output(wait=True) display.display(plt.gcf()) time.sleep(1) # draw minimum / maximum of quadratic ind = np.argmin(t) x_mark, = ax1.plot(s[ind], t[ind], 'kx', markersize=12, markeredgewidth=3) display.clear_output(wait=True) display.display(plt.gcf()) # remove quadratic surrogate, point, etc., if i == 1: time.sleep(2.5) ax1.plot(w_path[i + 1], g_path[i + 1], 'o', markersize=12, color=colorspec[i, :], markerfacecolor=colorspec[i, :]) display.clear_output(wait=True) display.display(plt.gcf()) # remove quadratic and pt time.sleep(1) h.remove() x_mark.remove() display.clear_output(wait=True) display.display(plt.gcf()) # for later iterations just plot point so things don't get too visually cluttered if i >= 1: # just plot point so things don't get too cluttered time.sleep(0.01) ax1.plot(w_path[i], g_path[i], 'o', markersize=12, color=colorspec[i - 1, :], markerfacecolor=colorspec[i - 1, :]) display.clear_output(wait=True) display.display(plt.gcf()) # color the final point red just for visualization purposes if i == len(g_path) - 1: t = np.linspace(-0.5, g_path[i], 100) s = w_path[i] * np.ones((100)) ax1.plot(s, t, '--k') display.clear_output(wait=True) display.display(plt.gcf())
def fit(self, X_train, y_train, X_valid=None, y_valid=None, n_epochs=100, batch_size=10, max_epochs_from_best=10, keep_training=False): if self.gradient_steps < 1: seq_length = 1 else: seq_length = 2 * self.gradient_steps start_epoch = 0 best_epoch = 0 best_loss = np.inf best_params = self.get_params() validate = X_valid is not None and y_valid is not None train_loss_fn = os.path.join(self.out_dir, 'train_loss.txt') if not keep_training: delete_if_exists(train_loss_fn) if validate: valid_loss_fn = os.path.join(self.out_dir, 'valid_loss_reg.txt') valid_loss_2_fn = os.path.join(self.out_dir, 'valid_loss.txt') if not keep_training: delete_if_exists(valid_loss_fn) delete_if_exists(valid_loss_2_fn) else: if os.path.exists(valid_loss_fn): valid_loss_old = np.loadtxt(valid_loss_fn) best_loss_idx = np.argmin(valid_loss_old[:, 1]) best_loss = valid_loss_old[best_loss_idx, 1] best_epoch = valid_loss_old[best_loss_idx, 0] start_epoch = int(best_epoch) + 1 elif os.path.exists(train_loss_fn): train_loss_old = np.loadtxt(train_loss_fn) best_loss_idx = np.argmin(train_loss_old[:, 1]) best_loss = train_loss_old[best_loss_idx, 1] best_epoch = train_loss_old[best_loss_idx, 0] start_epoch = int(best_epoch) + 1 else: named_valid_results = () train_batch_provider = BatchProvider(theano.config.floatX) train_batch_provider.store_data(X_train, y_train) valid_batch_provider = BatchProvider(theano.config.floatX) valid_batch_provider.store_data(X_valid, y_valid) total_train_instances = np.sum(len(x) for x in X_train) n_train_batches_per_epoch = max( 1, 2 * total_train_instances / (batch_size * seq_length)) LOGGER.info('Batch size: {}; Batches per epoch: {}' .format(batch_size, n_train_batches_per_epoch)) # variables to hold data batches; reusing rather than recreating the # arrays saves (a little bit of) time X_t = train_batch_provider.make_X_batch_array(batch_size, seq_length) y_t = train_batch_provider.make_Y_batch_array(batch_size, seq_length) if isinstance(self.train_fun, (tuple, list)): train_mode_selector = ParameterUpdate(start_epoch, n_epochs) else: train_mode_selector = SimpleParameterUpdate() # Initialize valid loss (in case there is no validation set) valid_loss = np.array([0 for o in self.valid_fun.outputs]) try: for epoch in xrange(start_epoch, n_epochs): # train_results = [] mode = train_mode_selector.select_mode(epoch) LOGGER.info('Training {0} params'.format(mode)) train_loss, named_train_results = _train_loss( self, train_batch_provider, batch_size, seq_length, X_t, y_t, train_loss_fn, epoch, n_train_batches_per_epoch, mode=mode) if validate and ( np.mod(epoch - start_epoch, 5) == 0 or epoch == n_epochs - 1): valid_results = [] for i, (X_v, y_v) in enumerate( valid_batch_provider.iter_pieces()): valid_results.append(self.valid_fun(X_v, y_v)) valid_loss = np.nanmean(valid_results, axis=0) write_append( valid_loss_fn, epoch, valid_loss[0]) write_append( valid_loss_2_fn, epoch, valid_loss[-1]) named_valid_results = zip([o.variable.name for o in self.valid_fun.outputs], valid_loss) LOGGER.info( ("Epoch: {0}/{3}, " "train: {1}, " "validate: {2} ") .format(epoch, '; '.join( '{0} ={1: .3f}'.format(k, v) for k, v in named_train_results), '; '.join( '{0} ={1: .3f}'.format(k, v) for k, v in named_valid_results), n_epochs)) params = self.get_params() # Early stopping if validate: es_loss = valid_loss[0] else: es_loss = train_loss[0] if es_loss < best_loss: best_params = params best_loss = es_loss best_epoch = epoch # Make a backup every 100 epochs (Astrud is sometimes # unreliable) if np.mod(epoch - start_epoch, 100) == 0: LOGGER.info('Backing parameters up!') save_pyc_bz(best_params, os.path.join(self.out_dir, 'backup_params.pyc.bz')) early_stop = ( epoch > (best_epoch + max_epochs_from_best)) if early_stop: break except KeyboardInterrupt: print('Training interrupted') if best_loss < np.inf: print('Reloading best self (epoch = {0}, {2} loss = {1:.3f})' .format(best_epoch + 1, best_loss, 'validation' if validate else 'training')) self.set_params(best_params) return self.get_params()
best_for_best_test_result = np.zeros( (th_test[0].eval().shape[0], dl_params_1['out_size']), dtype=config.floatX) for tmp in range(len(xgbAccuracyByClass)): print('Acc by class for classifier ', tmp, ': ', xgbAccuracyByClass[tmp]) acc_by_class = np.asarray(xgbAccuracyByClass) for row_i in range(best_for_best_result.shape[0]): class_for_row = [] for proba in xgbProbas: class_for_row.append(np.argmax(proba[row_i])) counter = Counter(class_for_row) maj_vote = counter.most_common()[0][0] best_classif = np.argmin(acc_by_class[:, int(maj_vote)]) best_probs = xgbProbas[best_classif][row_i] best_for_best_result[row_i] = best_probs for row_i in range(best_for_best_test_result.shape[0]): class_for_row = [] for proba in xgbTestProbas: class_for_row.append(np.argmax(proba[row_i])) counter = Counter(class_for_row) maj_vote = counter.most_common()[0][0] best_classif = np.argmin(acc_by_class[:, int(maj_vote)]) best_test_probs = xgbTestProbas[best_classif][row_i] best_for_best_test_result[row_i] = best_test_probs for r_i, result in enumerate(xgbProbas): test_result = xgbTestProbas[r_i]
def learning(self, actionPolicy, actionParam, eval_greedy=False): from scipy.special import digamma from scipy.stats import t as studentTdist state = self.obj.reset(self.np_random) deltaR = 0.1 deltaM = 0.2 deltaT = 0.02 epsilon = 0.0001 while(self.step <= self.obj.timeH): if actionPolicy == 'egreedy': if self.np_random.rand() < actionParam: action = self.np_random.choice(range(self.obj.anum)) else: action = np.argmax(self.ng_param[state,:,0]) elif actionPolicy == 'Q-sample': p = self.np_random.rand(self.obj.anum) xs = np.arange(min(self.ng_param[state,:,0])-10.0, max(self.ng_param[state,:,0])+10.0,0.01) p_table = [] for x in xs: p_table.append(studentTdist.cdf((x-self.ng_param[state,:,0])*np.sqrt(self.ng_param[state,:,1]*self.ng_param[state,:,2]/self.ng_param[state,:,3]), 2*self.ng_param[state,:,2])) ids = np.argmin(np.abs(p-np.asarray(p_table)),axis=0) action = np.argmax([xs[ids[i]] for i in range(self.obj.anum)]) else: raise ValueError('Not provided action policy.') cur_param = self.ng_param[state,action] reward, state_n, done = self.obj.observe(state,action,self.np_random) std = np.sqrt((cur_param[1]+1)*cur_param[3]/cur_param[1]/(cur_param[2]-1)) R_samples = np.arange(cur_param[0]-3*std, cur_param[0]+3*std, deltaR, dtype=np.float32) R_pdf = norm.pdf(R_samples, cur_param[0],std) M1 = reward + self.discount*R_samples M2 = M1**2 mu0_new = (cur_param[1]*cur_param[0] + 1*M1)/(cur_param[1]+1) l_new = (cur_param[1]+1)*np.ones(R_samples.shape) a_new = (cur_param[2]+0.5)*np.ones(R_samples.shape) b_new = cur_param[3]+0.5*(M2-M1**2)+0.5*cur_param[1]*(M1-cur_param[0])**2/l_new x_mu = np.arange(-20.0+cur_param[0],20.0+cur_param[0], deltaM, dtype=np.float32) x_tau = np.arange(0.01+cur_param[0],0.2+cur_param[0], deltaT, dtype=np.float32) e_mt, e_t, e_m2t, e_logt = 0.0, 0.0, 0.0, 0.0 tot_prob = 0.0 tmp_mu = [] for x_m in x_mu: tmp_tau = [] for x_t in x_tau: p_mix = deltaR*np.dot(util.normalGamma(x_m, x_t, mu0_new, l_new, a_new, b_new),R_pdf) tmp_tau.append(p_mix) tot_prob += p_mix e_mt += x_m*x_t*p_mix e_t += x_t*p_mix e_m2t += x_m*x_m*x_t*p_mix e_logt += np.log(x_t)*p_mix tmp_mu.append(tmp_tau) tot_prob *= deltaT*deltaM if abs(tot_prob-1.0) > 0.1: pdb.set_trace() e_mt *= deltaT*deltaM/tot_prob e_t *= deltaT*deltaM/tot_prob e_m2t *= deltaT*deltaM/tot_prob e_logt *= deltaT*deltaM/tot_prob if (np.log(e_t)-e_logt)<0: pdb.set_trace() # UPDATE self.ng_param[state,action,0] = e_mt/e_t self.ng_param[state,action,1] = 1.0/(e_m2t - e_t*self.ng_param[state,action,0]**2) self.ng_param[state,action,2] = max(1+epsilon, self.finverse(np.log(e_t)-e_logt, digamma)) self.ng_param[state,action,3] = self.ng_param[state,action,2]/e_t
total_months.append(lines[0]) profit_loss.append(int(lines[1])) monthly_change.append(np.diff(profit_loss)) # Converting the array into a list monthly_change_list = sum(monthly_change) # Summing the monthly change total = sum(monthly_change_list) average_change = total / (len(total_months)-1) formated_avg_change = "{:.2f}".format(average_change) max_num = np.argmax(monthly_change_list, axis=0) min_num = np.argmin(monthly_change_list, axis=0) print ("Financial Analysis") print ("-------------------------") print (f"Total Months: {len(total_months)}") print (f"Total: ${sum(profit_loss)}") print (f"Average Change: ${formated_avg_change}") print (f"Greatest Increase in Profits: {total_months[max_num + 1]} $({monthly_change_list[max_num]})") print (f"Greatest Decrease in Profits: {total_months[min_num + 1]} $({monthly_change_list[min_num]})") output_file = os.path.join('Analysis', 'financial_analysis.txt') with open(output_file, 'w') as txtfile: # Initialize csv.writer print("Financial Analysis", file=txtfile)
def minimum_bounding_rectangle(points): """ Find the smallest bounding rectangle for a set of points. Returns a set of points representing the corners of the bounding box. :param points: an nx2 matrix of coordinates :rval: an nx2 matrix of coordinates """ from scipy.ndimage.interpolation import rotate pi2 = np.pi / 2. # get the convex hull for the points hull_points = points[ConvexHull(points).vertices] # calculate edge angles edges = np.zeros((len(hull_points) - 1, 2)) edges = hull_points[1:] - hull_points[:-1] angles = np.zeros((len(edges))) angles = np.arctan2(edges[:, 1], edges[:, 0]) angles = np.abs(np.mod(angles, pi2)) angles = np.unique(angles) # find rotation matrices # XXX both work rotations = np.vstack([ np.cos(angles), np.cos(angles - pi2), np.cos(angles + pi2), np.cos(angles) ]).T # rotations = np.vstack([ # np.cos(angles), # -np.sin(angles), # np.sin(angles), # np.cos(angles)]).T rotations = rotations.reshape((-1, 2, 2)) # apply rotations to the hull rot_points = np.dot(rotations, hull_points.T) # find the bounding points min_x = np.nanmin(rot_points[:, 0], axis=1) max_x = np.nanmax(rot_points[:, 0], axis=1) min_y = np.nanmin(rot_points[:, 1], axis=1) max_y = np.nanmax(rot_points[:, 1], axis=1) # find the box with the best area areas = (max_x - min_x) * (max_y - min_y) best_idx = np.argmin(areas) # return the best box x1 = max_x[best_idx] x2 = min_x[best_idx] y1 = max_y[best_idx] y2 = min_y[best_idx] r = rotations[best_idx] rval = np.zeros((4, 2)) rval[0] = np.dot([x1, y2], r) rval[1] = np.dot([x2, y2], r) rval[2] = np.dot([x2, y1], r) rval[3] = np.dot([x1, y1], r) return rval
def _fit_init(model, batch_provider, X_train, y_train, X_valid, y_valid, keep_training, em_train=False): start_epoch = 0 best_epoch = 0 best_loss = np.inf best_params = model.get_params() validate = X_valid is not None and y_valid is not None epoch_idx = 0 if em_train: start_cycle = 0 best_cycle = 0 epoch_idx = 1 cycle_idx = 0 train_loss_fn = os.path.join(model.out_dir, 'train_loss.txt') if not keep_training: delete_if_exists(train_loss_fn) if validate: valid_loss_fn = os.path.join(model.out_dir, 'valid_loss_reg.txt') valid_loss_2_fn = os.path.join(model.out_dir, 'valid_loss.txt') if not keep_training: delete_if_exists(valid_loss_fn) delete_if_exists(valid_loss_2_fn) else: if os.path.exists(valid_loss_fn): valid_loss_old = np.loadtxt(valid_loss_fn) best_loss_idx = np.argmin(valid_loss_old[:, -1]) best_loss = valid_loss_old[best_loss_idx, -1] best_epoch = valid_loss_old[best_loss_idx, epoch_idx] start_epoch = int(best_epoch) + 1 if em_train: best_cycle = valid_loss_old[best_loss_idx, cycle_idx] start_cycle = int(best_cycle) + 1 elif os.path.exists(train_loss_fn): train_loss_old = np.loadtxt(train_loss_fn) best_loss_idx = np.argmin(train_loss_old[:, -1]) best_loss = train_loss_old[best_loss_idx, -1] best_epoch = train_loss_old[best_loss_idx, epoch_idx] start_epoch = int(best_epoch) + 1 if em_train: best_cycle = train_loss_old[best_loss_idx, cycle_idx] start_cycle = int(best_cycle) + 1 train_batch_provider = batch_provider(theano.config.floatX) train_batch_provider.store_data(X_train, y_train) if validate: valid_batch_provider = batch_provider(theano.config.floatX) valid_batch_provider.store_data(X_valid, y_valid) else: valid_batch_provider = None total_train_instances = np.sum(len(x) for x in X_train) if em_train: return (train_batch_provider, valid_batch_provider, start_epoch, best_loss, best_params, total_train_instances, train_loss_fn, valid_loss_fn, valid_loss_2_fn, validate, best_epoch, start_cycle, best_cycle) else: return (train_batch_provider, valid_batch_provider, start_epoch, best_loss, best_params, total_train_instances, train_loss_fn, valid_loss_fn, valid_loss_2_fn, validate, best_epoch)
def train_and_save(args): seed = args.seed np.random.seed(seed) torch.manual_seed(seed) device = args.device num_workers = args.num_workers n_epoch = args.epochs lr = args.lr lr_scheduler = args.lr_scheduler save_per_epoch = args.save_per_epoch save_dir = args.save_dir weight_decay = args.weight_decay n_iterations = args.num_iterations read_level_info = args.read_level_info model_config = toml.load(args.model_config) train_config = toml.load(args.train_config) print("Saving training information to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) train_info = dict() train_info["model_config"] = model_config train_info["train_config"] = train_config train_info["train_config"]["learning_rate"] = lr train_info["train_config"]["epochs"] = n_epoch train_info["train_config"]["save_per_epoch"] = save_per_epoch train_info["train_config"]["weight_decay"] = weight_decay train_info["train_config"]["number_of_validation_iterations"] = n_iterations train_info["train_config"]["lr_scheduler"] = lr_scheduler train_info["train_config"]["seed"] = seed with open(os.path.join(save_dir, "train_info.toml"), 'w') as f: toml.dump(train_info, f) model = MILModel(model_config).to(device) train_dl, test_dl, val_dl = build_dataloader(train_config, num_workers) optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) train_criterion = build_train_loss_function(train_config['train_loss_function']) test_criterion = build_test_loss_function(train_config['test_loss_function']) train_results, test_results = train(model, train_dl, test_dl, optimizer, n_epoch, device, train_criterion, test_criterion, save_dir=save_dir, scheduler=None, save_per_epoch=save_per_epoch, n_iterations=n_iterations, read_level_info=read_level_info) joblib.dump(train_results, os.path.join(save_dir, "train_results.joblib")) joblib.dump(test_results, os.path.join(save_dir, "test_results.joblib")) test_results = joblib.load(os.path.join(save_dir, "test_results.joblib")) selection_criteria = ['avg_loss', 'roc_auc', 'pr_auc'] if read_level_info: selection_criteria = ['avg_loss', 'avg_loss_read', 'roc_auc', 'pr_auc', 'roc_auc_read', 'pr_auc_read'] for selection_criterion in selection_criteria: test_loss = [test_results[selection_criterion][i] for i in range (0, len(test_results[selection_criterion]), save_per_epoch)] if selection_criterion in ('avg_loss', 'avg_loss_read'): best_model = (np.argmin(test_loss) + 1) * save_per_epoch else: best_model = (np.argmax(test_loss) + 1) * save_per_epoch model.load_state_dict(torch.load(os.path.join(save_dir, "model_states", str(best_model), "model_states.pt"))) val_results = validate(model, val_dl, device, test_criterion, n_iterations, read_level_info=read_level_info) print("Criteria: {criteria} \t" "Compute time: {compute_time:.3f}".format(criteria=selection_criterion, compute_time=val_results["compute_time"])) print("Val Loss: {loss:.3f} \t" "Val Accuracy: {accuracy:.3f} \t " "Val ROC AUC: {roc_auc:.3f} \t " "Val PR AUC: {pr_auc:.3f}".format(loss=val_results["avg_loss"], accuracy=val_results["accuracy"], roc_auc=val_results["roc_auc"], pr_auc=val_results["pr_auc"])) if read_level_info: print("Val Loss read: {loss:.3f} \t" "Val Accuracy read: {accuracy:.3f} \t " "Val ROC AUC read: {roc_auc:.3f} \t " "Val PR AUC read: {pr_auc:.3f}".format(loss=val_results["avg_loss_read"], accuracy=val_results["accuracy_read"], roc_auc=val_results["roc_auc_read"], pr_auc=val_results["pr_auc_read"])) print("=====================================") joblib.dump(val_results, os.path.join(save_dir, "val_results_{}.joblib".format(selection_criterion)))
face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations) face_names = [] for face_encoding in face_encodings: # See if the face is a match for the known face(s) matches = face_recognition.compare_faces(known_face_encodings, face_encoding) name = "Unknown" # # If a match was found in known_face_encodings, just use the first one. # if True in matches: # first_match_index = matches.index(True) # name = known_face_names[first_match_index] # Or instead, use the known face with the smallest distance to the new face face_distances = face_recognition.face_distance(known_face_encodings, face_encoding) best_match_index = np.argmin(face_distances) if matches[best_match_index]: name = known_face_names[best_match_index] face_names.append(name) process_this_frame = not process_this_frame # Display the results for (top, right, bottom, left), name in zip(face_locations, face_names): # Scale back up face locations since the frame we detected in was scaled to 1/4 size top *= 4 right *= 4 bottom *= 4 left *= 4
import xarray # Reading in new netcdf file that contains both the observation and the forecast fpath = 'CMEMS_ACCESS_out_20201211.nc' ncd = nc.Dataset(fpath, 'r') lon = ncd['lon'][:] lat = ncd['lat'][:] time = ncd['time'][:] ##### Subsampling the grid to show only tropical Pacific ##### latbounds = [-50, 50] lonbounds = [90, 300] # latitude lower and upper index latli = np.argmin(np.abs(lat - latbounds[0])) latui = np.argmin(np.abs(lat - latbounds[1])) # longitude lower and upper index lonli = np.argmin(np.abs(lon - lonbounds[0])) lonui = np.argmin(np.abs(lon - lonbounds[1])) # ssh = ncd['ssh'][:, :, :] # sea surface height from CMEMS observation (months 1 to 7) or ACCESS-s1 forecast (months) # # 8 to 13) # skill = ncd['skill'][:, :, :] # the variable is only for the forecast months, 8-13, 1-7 are empty # trend = ncd['trend'][:] # Observed trend, static map (not time dependent) lat = lat[latli:latui] lon = lon[lonli:lonui] ssh = ncd['ssh'][:, latli:latui, lonli:lonui] # sea surface height from CMEMS observation (months 1 to 7) or ACCESS-s1 forecast (months) # 8 to 13)
def pso(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={}, swarmsize=10, w_max=0.9, w_min=.4, c1=0.5, c2=0.3, maxiter=100, minstep=1e-8, minfunc=1e-8, debug=False, processes=1, particle_output=False): """ Perform a particle swarm optimization (PSO) Parameters ========== func : function The function to be minimized lb : array The lower bounds of the design variable(s) ub : array The upper bounds of the design variable(s) Optional ======== ieqcons : list A list of functions of length n such that ieqcons[j](x,*args) >= 0.0 in a successfully optimized problem (Default: []) f_ieqcons : function Returns a 1-D array in which each element must be greater or equal to 0.0 in a successfully optimized problem. If f_ieqcons is specified, ieqcons is ignored (Default: None) args : tuple Additional arguments passed to objective and constraint functions (Default: empty tuple) kwargs : dict Additional keyword arguments passed to objective and constraint functions (Default: empty dict) swarmsize : int The number of particles in the swarm (Default: 100) w_max : scalar Maximum particle velocity scaling factor (Default: 0.9) w_min : scalar Minimum particle velocity scaling factor (Default: 0.4) c1 : scalar Scaling factor to search away from the particle's best known position (Default: 0.5) c2 : scalar Scaling factor to search away from the swarm's best known position (Default: 0.3) maxiter : int The maximum number of iterations for the swarm to search (Default: 100) minstep : scalar The minimum stepsize of swarm's best position before the search terminates (Default: 1e-8) minfunc : scalar The minimum change of swarm's best objective value before the search terminates (Default: 1e-8) debug : boolean If True, progress statements will be displayed every iteration (Default: False) processes : int The number of processes to use to evaluate objective function and constraints (default: 1) particle_output : boolean Whether to include the best per-particle position and the objective values at those. Returns ======= g : array The swarm's best known position (optimal design) fg : scalar The objective value at ``g`` fg_hist : array list of objective function value by iteration it : scalar number of iterations to convergence fnc_calls: scalar number of objective function calls to convergence """ assert len(lb)==len(ub), 'Lower- and upper-bounds must be the same length' assert hasattr(func, '__call__'), 'Invalid function handle' lb = np.array(lb) ub = np.array(ub) assert np.all(ub>lb), 'All upper-bound values must be greater than lower-bound values' vhigh = np.abs(ub - lb) vlow = -vhigh # Initialize objective function obj = partial(_obj_wrapper, func, args, kwargs) # Check for constraint function(s) ######################################### if f_ieqcons is None: if not len(ieqcons): if debug: print('No constraints given.') cons = _cons_none_wrapper else: if debug: print('Converting ieqcons to a single constraint function') cons = partial(_cons_ieqcons_wrapper, ieqcons, args, kwargs) else: if debug: print('Single constraint function given in f_ieqcons') cons = partial(_cons_f_ieqcons_wrapper, f_ieqcons, args, kwargs) is_feasible = partial(_is_feasible_wrapper, cons) # Initialize the multiprocessing module if necessary if processes > 1: import multiprocessing mp_pool = multiprocessing.Pool(processes) # Initialize the particle swarm ############################################ S = swarmsize D = len(lb) # the number of dimensions each particle has x = np.random.rand(S, D) # particle positions v = np.zeros_like(x) # particle velocities p = np.zeros_like(x) # best particle positions fx = np.zeros(S) # current particle function values fs = np.zeros(S, dtype=bool) # feasibility of each particle fp = np.ones(S)*np.inf # best particle function values g = [] # best swarm position fg = np.inf # best swarm position starting value fg_hist = [] # store best objective value at every iteration fnc_calls = 0 w = np.linspace(w_max,w_min,maxiter,endpoint=True) # linearly decreasing w # Initialize the particle's position x = lb + x*(ub - lb) # Calculate objective and constraints for each particle if processes > 1: fx = np.array(mp_pool.map(obj, x)) fs = np.array(mp_pool.map(is_feasible, x)) else: for i in range(S): fx[i] = obj(x[i, :]); fnc_calls += 1 fs[i] = is_feasible(x[i, :]) # Store particle's best position (if constraints are satisfied) i_update = np.logical_and((fx < fp), fs) p[i_update, :] = x[i_update, :].copy() fp[i_update] = fx[i_update] # Update swarm's best position i_min = np.argmin(fp) if fp[i_min] < fg: fg = fp[i_min] g = p[i_min, :].copy() else: # At the start, there may not be any feasible starting point, so just # give it a temporary "best" point since it's likely to change g = x[0, :].copy() # Initialize the particle's velocity v = vlow + np.random.rand(S, D)*(vhigh - vlow) # Iterate until termination criterion met ################################## for it in tqdm(range(1,maxiter+1)): rp = np.random.uniform(size=(S, D)) rg = np.random.uniform(size=(S, D)) # Update the particles velocities v = w[it-1]*v + c1*rp*(p - x) + c2*rg*(g - x) # Update the particles' positions x = x + v # Correct for bound violations maskl = x < lb masku = x > ub x = x*(~np.logical_or(maskl, masku)) + lb*maskl + ub*masku # Update objectives and constraints if processes > 1: fx = np.array(mp_pool.map(obj, x)) fs = np.array(mp_pool.map(is_feasible, x)) else: for i in range(S): fx[i] = obj(x[i, :]); fnc_calls+=1 fs[i] = is_feasible(x[i, :]) # Store particle's best position (if constraints are satisfied) i_update = np.logical_and((fx < fp), fs) p[i_update, :] = x[i_update, :].copy() fp[i_update] = fx[i_update] # Compare swarm's best position with global best position i_min = np.argmin(fp) if fp[i_min] < fg: if debug: print('New best for swarm at iteration {:}: {:} {:}'\ .format(it, p[i_min, :], fp[i_min])) p_min = p[i_min, :].copy() stepsize = np.sqrt(np.sum((g - p_min)**2)) if np.abs(fg - fp[i_min]) <= minfunc: print('Stopping search: Swarm best objective change less than {:}'\ .format(minfunc)) if particle_output: return p_min, fp[i_min], p, fp else: return p_min, fp[i_min], fg_hist, it, fnc_calls elif stepsize <= minstep: print('Stopping search: Swarm best position change less than {:}'\ .format(minstep)) if particle_output: return p_min, fp[i_min], p, fp else: return p_min, fp[i_min], fg_hist, it, fnc_calls else: g = p_min.copy() fg = fp[i_min] fg_hist.append(fg) if debug: print('Best after iteration {:}: {:} {:}'.format(it, g, fg)) it += 1 print('Stopping search: maximum iterations reached --> {:}'.format(maxiter)) if not is_feasible(g): print("However, the optimization couldn't find a feasible design. Sorry") if particle_output: return g, fg, p, fp else: return g, fg, fg_hist, it, fnc_calls
def CTMM(world: World): '''Run a continuous time markov model based on an initialized World object''' results = [] t = 0 # for _ in range(100): while world.I > 0: infect_vec = [ sample_from_exp(country.l1 * country.I * (country.S / country.N)) for country in world.countries ] t_i, i = min(infect_vec), np.argmin(infect_vec) recover_vec = [ sample_from_exp(country.l2 * country.I) for country in world.countries ] t_r, r = min(recover_vec), np.argmin(recover_vec) migrateS_vec = [ sample_from_exp(country.l3 * country.S) for country in world.countries ] t_m1, m1 = min(migrateS_vec), np.argmin(migrateS_vec) migrateI_vec = [ sample_from_exp(country.l3 * country.I) for country in world.countries ] t_m2, m2 = min(migrateI_vec), np.argmin(migrateI_vec) migrateR_vec = [ sample_from_exp(country.l3 * country.R) for country in world.countries ] t_m3, m3 = min(migrateR_vec), np.argmin(migrateR_vec) t_min_global = min(t_i, t_r, t_m1, t_m2, t_m3) # logger.info((t_i, t_r, t_m)) if not (t_i == float('inf') and t_r == float('inf') and t_m1 == float('inf') and t_m2 == float('inf') and t_m3 == float('inf')): if t_i == t_min_global: t += t_i world.countries[i].I += 1 world.countries[i].S -= 1 #including these staments in all conditionals in case there's a tie in the min (?) elif t_r == t_min_global: # world.new_recovery(country) t += t_r world.countries[r].I -= 1 world.countries[r].R += 1 elif t_m1 == t_min_global: #randomly select a destination country for migration... t += t_m1 j = np.random.choice(np.setdiff1d(range(world.m), i)) world.countries[m1].S -= 1 world.countries[j].S += 1 elif t_m2 == t_min_global: #randomly select a destination country for migration... t += t_m2 j = np.random.choice(np.setdiff1d(range(world.m), i)) world.countries[m2].I -= 1 world.countries[j].I += 1 elif t_m3 == t_min_global: t += t_m3 j = np.random.choice(np.setdiff1d(range(world.m), i)) world.countries[m3].R -= 1 world.countries[j].R += 1 for country in world.countries: results.append((country.l1, country.R / country.N)) return results
def main(): input_dimensions = 13 map_width = 7 map_height = 5 radius0 = max(map_width,map_height)/2 learning_rate0 = 0.1 epochs = 5000 radius=radius0 learning_rate = learning_rate0 BMU = np.zeros([2],dtype=np.int32) timestep=1 e=0.001 flag=0 epoch=0 patterns = [] classes = [] #carregando o arquivo vizinhos.txt file = open('vizinhos.txt','r') for line in file.readlines(): row = line.strip().split(',') patterns.append(row[1:14]) classes.append(row[0]) file.close patterns = np.asarray(patterns,dtype=np.float32) max_iterations = epochs*len(patterns) too_many_iterations = 10*max_iterations MAP = np.random.uniform(size=(map_height,map_width,input_dimensions)) prev_MAP = np.zeros((map_height,map_width,input_dimensions)) result_map = np.zeros([map_height,map_width,3],dtype=np.float32) coordinate_map = np.zeros([map_height,map_width,2],dtype=np.int32) for i in range(map_height): for j in range(map_width): coordinate_map[i][j] = [i,j] while (epoch <= epochs): shuffle = random.sample(list(np.arange(0,len(patterns),1,'int')),len(patterns)) for i in range(len(patterns)): J = np.sqrt(np.sum(np.sum((prev_MAP-MAP)**2,2))) if J<= e: flag=1 break else: if timestep == max_iterations and timestep != too_many_iterations: epochs += 1 max_iterations = epochs*len(patterns) pattern = patterns[shuffle[i]] Eucli_MAP = Eucli_dists(MAP,pattern) BMU[0] = np.argmin(np.amin(Eucli_MAP,1),0) BMU[1] = np.argmin(Eucli_MAP,1)[int(BMU[0])] Eucli_from_BMU = Eucli_dists(coordinate_map,BMU) prev_MAP = np.copy(MAP) for i in range(map_height): for j in range(map_width): distance = Eucli_from_BMU[i][j] if distance <= radius: theta = math.exp(-(distance**2)/(2*(radius**2))) MAP[i][j] = MAP[i][j] + theta*learning_rate*(pattern-MAP[i][j]) learning_rate = learning_rate0*math.exp(-(timestep)/max_iterations) time_constant = max_iterations/math.log(radius) radius = radius0*math.exp(-(timestep)/time_constant) timestep+=1 if flag==1: break epoch+=1 #visualizaĆ§Ć£o i=0 for pattern in patterns: Eucli_MAP = Eucli_dists(MAP,pattern) BMU[0] = np.argmin(np.amin(Eucli_MAP,1),0) BMU[1] = np.argmin(Eucli_MAP,1)[int(BMU[0])] x = BMU[0] y = BMU[1] if classes[i] == '1': if result_map[x][y][0] <= 0.5: result_map[x][y] += np.asarray([0.5,0,0]) elif classes[i] == '2': if result_map[x][y][1] <= 0.5: result_map[x][y] += np.asarray([0,0.5,0]) elif classes[i] == '3': if result_map[x][y][2] <= 0.5: result_map[x][y] += np.asarray([0,0,0.5]) i+=1 result_map = np.flip(result_map,0) print ("\nRed = Iris-Setosa") print ("Green = Iris-Virginica") print ("Blue = Iris-Versicolor\n") plt.imshow(toimage(result_map),interpolation='nearest')
def train(self, disp=True): """ Train SVR model Args: disp (bool): Display process or not. Default to True. Returns: None """ if self.svrinfo.nepsi == 0 and self.svrinfo.nc == 0 and self.svrinfo.nlens == 0: if disp: print("Construct SVR without tuning parameters.") self.svrinfo.optimizer = None xparamopt = [ self.svrinfo.theta, self.svrinfo.epsilon, self.svrinfo.c, self.svrinfo.wgk ] if self.svrinfo.errtype == 'L2': self.svrinfo.errloo, self.svrinfo.mu, self.svrinfo.alpha, self.svrinfo.epsilon, \ self.svrinfo.theta, self.svrinfo.c, self.svrinfo.wgk = l2svr(xparamopt, self.svrinfo, return_all=True) else: raise NotImplementedError( 'Other options are not yet available') else: xhyp0_norm = sobol_points(self.svrinfo.nrestart + 1, len(self.svrinfo.lbhyp)) xhyp0 = realval(np.array(self.svrinfo.lbhyp), np.array(self.svrinfo.ubhyp), xhyp0_norm[1:, :]) optimbound = np.transpose( np.vstack((self.svrinfo.lbhyp, self.svrinfo.ubhyp))) bestxcand = np.zeros(np.shape(xhyp0)) errloocand = np.zeros(shape=[self.svrinfo.nrestart]) for ii in range(self.svrinfo.nrestart): xhyp0_ii = xhyp0[ii, :] if self.svrinfo.optimizer == 'lbfgsb': res = minimize(l2svr, xhyp0_ii, method='L-BFGS-B', options={ 'eps': 1e-03, 'disp': False }, bounds=optimbound, args=(self.svrinfo, False)) bestxcand_ii = res.x errloocand_ii = res.fun elif self.svrinfo.optimizer == 'diff_evo': res = differential_evolution(l2svr, optimbound, args=(self.svrinfo, False)) bestxcand_ii = res.x errloocand_ii = res.fun else: raise NotImplementedError( 'Other optimizers are not yet implemented') bestxcand[ii, :] = bestxcand_ii errloocand[ii] = errloocand_ii I = np.argmin(errloocand) xparamopt = bestxcand[I, :] if disp: print("Train hyperparam finished.") print(f"Best hyperparameter is {xparamopt}") print(f"With Error LOO of {errloocand[I]}") self.svrinfo.errloo, self.svrinfo.mu, self.svrinfo.alpha, self.svrinfo.epsilon, \ self.svrinfo.theta, self.svrinfo.c, self.svrinfo.wgk = l2svr(xparamopt, self.svrinfo, return_all=True)
def _alignment_matrix( self, src: str, tar: str, backtrace: bool = True ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """Return the Levenshtein alignment matrix. Parameters ---------- src : str Source string for comparison tar : str Target string for comparison backtrace : bool Return the backtrace matrix as well Returns ------- numpy.ndarray or tuple(numpy.ndarray, numpy.ndarray) The alignment matrix and (optionally) the backtrace matrix .. versionadded:: 0.4.1 """ src_len = len(src) tar_len = len(tar) if self._discount_from == 'coda': discount_from = [0, 0] src_voc = src.lower() for i in range(len(src_voc)): if src_voc[i] in self._vowels: discount_from[0] = i break for i in range(discount_from[0], len(src_voc)): if src_voc[i] not in self._vowels: discount_from[0] = i break else: discount_from[0] += 1 tar_voc = tar.lower() for i in range(len(tar_voc)): if tar_voc[i] in self._vowels: discount_from[1] = i break for i in range(discount_from[1], len(tar_voc)): if tar_voc[i] not in self._vowels: discount_from[1] = i break else: discount_from[1] += 1 elif isinstance(self._discount_from, int): discount_from = [self._discount_from, self._discount_from] else: discount_from = [1, 1] d_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.float_) if backtrace: trace_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.int8) for i in range(1, src_len + 1): d_mat[i, 0] = d_mat[i - 1, 0] + self._discount_func( max(0, i - discount_from[0]) ) if backtrace: trace_mat[i, 0] = 1 for j in range(1, tar_len + 1): d_mat[0, j] = d_mat[0, j - 1] + self._discount_func( max(0, j - discount_from[1]) ) if backtrace: trace_mat[0, j] = 0 for i in range(src_len): i_extend = self._discount_func(max(0, i - discount_from[0])) for j in range(tar_len): traces = ((i + 1, j), (i, j + 1), (i, j)) cost = min( i_extend, self._discount_func(max(0, j - discount_from[1])) ) opts = ( d_mat[traces[0]] + cost, # ins d_mat[traces[1]] + cost, # del d_mat[traces[2]] + (cost if src[i] != tar[j] else 0), # sub/== ) d_mat[i + 1, j + 1] = min(opts) if backtrace: trace_mat[i + 1, j + 1] = int(np.argmin(opts)) if self._mode == 'osa': if ( i + 1 > 1 and j + 1 > 1 and src[i] == tar[j - 1] and src[i - 1] == tar[j] ): # transposition d_mat[i + 1, j + 1] = min( d_mat[i + 1, j + 1], d_mat[i - 1, j - 1] + cost ) if backtrace: trace_mat[i + 1, j + 1] = 2 if backtrace: return d_mat, trace_mat return d_mat
def train(args): # load data print("[Loading data with batch size {}...]".format(args['batch_size'])) writer = SummaryWriter() if args['debug']: args['train_tok_file'] = args['eval_tok_file'] args['train_idx_file'] = args['eval_idx_file'] dataset = QuACDataset(os.path.join(args['data_dir'], args['train_tok_file']), os.path.join(args['data_dir'], args['train_idx_file']), os.path.join(args['data_dir'], args['train_question_freq_idx_file']), max_turns=args['max_turns']) train_batch = QuACDataLoader(dataset, num_workers=0, batch_sampler=QuACBatchSampler(dataset, batch_size=args['batch_size'])) with open(os.path.join(args['data_dir'], args['vocab_file']), 'rb') as f: vocab = pickle.load(f) args['vocab_size'] = len(vocab['word2id']) args['char_vocab_size'] = len(vocab['char2id']) dev_dataset = QuACDataset(os.path.join(args['data_dir'], args['eval_tok_file']), os.path.join(args['data_dir'], args['eval_idx_file']), os.path.join(args['data_dir'], args['train_question_freq_idx_file']), max_turns=args['max_turns']) dev_batch = QuACDataLoader(dev_dataset, batch_size=args['batch_size'], num_workers=0) pathlib.Path(args['model_dir']).mkdir(parents=True, exist_ok=True) model_file = '{}/{}'.format(args['model_dir'], args['model_file']) finetuned_model_file = '{}/{}'.format(args['model_dir'], args['finetuned_model_file']) teacher_model_file = '{}/{}'.format(args['model_dir'], args['teacher_model_file']) pprint(args) if args['mode'] == 'train': args['lambda2'] = 0 args['finetune'] = False elif args['mode'] == 'train_teacher': args['lambda2'] = 1 args['finetune'] = False elif args['mode'] == 'finetune': args['finetune'] = True # skip training if the language does not have training or dev data if len(train_batch) == 0 or len(dev_batch) == 0: print("[Skip training because no data available...]") sys.exit(0) # start training args['vocab'] = vocab finetuning = False if args['finetune']: print('Start finetuning...') finetuning = True trainer = Trainer(model_file=model_file, use_cuda=args['cuda'], vocab=vocab, teacher_model_file=teacher_model_file, args=args) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(trainer.optimizer, factor=args['lr_decay'], patience=args['patience'], mode='max') trainer.teacher.eval() else: lambda_rl = args['lambda_reinforce'] args['lambda_reinforce'] = 0 trainer = Trainer(args=args, vocab=vocab, use_cuda=args['cuda'], emb_matrix=vocab['vecs']) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(trainer.optimizer, factor=args['lr_decay'], patience=args['patience'], mode='min' if args['eval_ppl'] else 'max') print("[Training seq2seq-based question generator...]") global_step = 0 max_steps = len(train_batch) * args['num_epoch'] dev_score_history = [] best_dev_preds = [] current_lr = args['lr'] global_start_time = time.time() format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}, lambda: {:.6f}' references = [' '.join([x.lower() for x in tgt_text]) for i in range(len(dev_dataset)) for tgt_text in dev_dataset[i]['tgt_text']] if args['lambda2'] > 0: teacher_lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(trainer.teacher_optimizer, factor=args['lr_decay'], patience=args['patience'], mode='max') writer_tag = f"lambda1={args['lambda1']};lambda2={args['lambda2']}" if args['teacher_info_only']: writer_tag += '/informativeness_reward_only' elif args['teacher_spec_only']: writer_tag += '/specificity_reward_only' # eval on dev print("Evaluating on dev set...") dev_preds = [] dev_edits = [] dev_acc = (0, 0, 0) logppl = 0 total_count = 0 total_sent = 0 total_reward = 0 for i, batch in enumerate(tqdm(dev_batch)): if args['eval_ppl']: loss, acc, reward = trainer.update(batch, eval=True, freeze_teacher=args['finetune']) count = (batch['tgt_out'] > 0).sum().item() logppl += loss * count total_count += count sent_count = sum(len(x) for x in batch['tgt_text']) total_sent += sent_count dev_acc = tuple(x * sent_count + y for x, y in zip(acc, dev_acc)) total_reward += reward * sent_count else: preds = trainer.predict(batch, args['beam_size']) dev_preds += preds dev_acc = tuple(x / total_sent for x in dev_acc) writer.add_scalars(writer_tag, {'spec_reward': dev_acc[0], 'novelty_reward': dev_acc[1], 'nll': dev_acc[2], 'weighted_sum': total_reward / total_sent}, 0) writer.flush() best_dev_acc = (0, 0, -1e10) patience = 0 target_lambda = args['lambda2'] finetune_start = 1 # start training for epoch in range(1, args['num_epoch']+1): if trainer.optimizer.param_groups[0]['lr'] < args['lr'] * 1e-2 and \ (args['lambda2'] == 0 or trainer.teacher_optimizer.param_groups[0]['lr'] < args['lr'] * 1e-2 or finetuning): if finetuning or lambda_rl == 0: break print('Start finetuning...') finetuning = True args['lambda_reinforce'] = lambda_rl trainer = Trainer(model_file=model_file, use_cuda=args['cuda'], vocab=vocab, teacher_model_file=teacher_model_file, args=args) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(trainer.optimizer, factor=args['lr_decay'], patience=args['patience'], mode='max') finetune_start = epoch trainer.teacher.eval() dev_score_history = [] train_loss = 0 teacher_acc = (0, 0, 0) for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss, acc, reward = trainer.update(batch, eval=False, freeze_teacher=finetuning, i=global_step) # update step train_loss += loss teacher_acc = tuple(x+y for x, y in zip(acc, teacher_acc)) if global_step % args['log_step'] == 0: duration = time.time() - start_time print(format_str.format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), global_step,\ max_steps, epoch, args['num_epoch'], loss, duration, trainer.optimizer.param_groups[0]['lr'], trainer.args['lambda2'])) # eval on dev print("Evaluating on dev set...") dev_preds = [] dev_edits = [] dev_acc = (0, 0, 0) logppl = 0 total_count = 0 total_sent = 0 total_reward = 0 for i, batch in enumerate(tqdm(dev_batch)): if args['eval_ppl']: loss, acc, reward = trainer.update(batch, eval=True, freeze_teacher=finetuning) count = (batch['tgt_out'] > 0).sum().item() logppl += loss * count total_count += count sent_count = sum(len(x) for x in batch['tgt_text']) total_sent += sent_count dev_acc = tuple(x * sent_count + y for x, y in zip(acc, dev_acc)) total_reward += reward * sent_count else: preds = trainer.predict(batch, args['beam_size']) dev_preds += preds if args['eval_ppl']: if finetuning: dev_score = total_reward / total_sent else: dev_score = np.exp(logppl / total_count) else: dev_score = rouge_score(references, [' '.join(x) for x in dev_preds]) lr_scheduler.step(dev_score) train_loss = train_loss / len(train_batch) # avg loss per batch print("epoch {}: train_loss = {:.6f}, dev_score = {:.4f}".format(epoch, train_loss, dev_score)) if args['lambda2'] != 0: dev_acc = tuple(x / total_sent for x in dev_acc) teacher_acc = tuple(x / len(train_batch) for x in teacher_acc) writer.add_scalars(writer_tag, {'spec_reward': dev_acc[0], 'novelty_reward': dev_acc[1], 'nll': dev_acc[2], 'weighted_sum': total_reward / total_sent}, epoch) writer.flush() teacher_lr_scheduler.step(sum(dev_acc)) print("train_acc = {:s}, dev_acc = {:s}, teacher lr = {:.6f}".format(str(teacher_acc), str(dev_acc), trainer.teacher_optimizer.param_groups[0]['lr'])) # save best model compare = (lambda new, old: new > max(old)) if finetuning else (lambda new, old: new < min(old)) if args['lambda2'] != 1 and (len(dev_score_history) == 0 or compare(dev_score, dev_score_history)): if finetuning: trainer.save(finetuned_model_file) else: trainer.save(model_file) print("new best model saved.") best_dev_preds = dev_preds patience = 0 if not finetuning and args['lambda2'] != 0 and dev_acc[0] + dev_acc[1] + 0.1 * dev_acc[2] > best_dev_acc[0] + best_dev_acc[1] + 0.1 * best_dev_acc[2]: trainer.save_teacher(teacher_model_file) print("new best teacher model saved.") best_dev_acc = dev_acc patience = 0 dev_score_history += [dev_score] print("") if patience >= args['trainer_patience']: break patience += 1 print("Training ended with {} epochs.".format(epoch)) if finetuning: best_f, best_epoch = max(dev_score_history), np.argmax(dev_score_history)+1 print("Best dev score = {:.2f}, at epoch = {}".format(best_f, best_epoch)) else: best_f, best_epoch = min(dev_score_history), np.argmin(dev_score_history)+1 print("Best dev perplexity = {:.2f}, at epoch = {}".format(best_f, best_epoch))
def MinLowerBounds_varylambda(salmon_exps, lbs): ''' Assuming Salmon must be (1-lambda) proportion, the lower bounds becomes (1-lambda) * salmon + lambda * lb This function calculates the minimum lower bounds from multiple samples with varying lambda proportions. The min lower bounds of the samples is a piece-wise linear functions with respect to lambda. input: - salmon_exps: Salmon expression of transcripts from multiple samples - lbs: the G2 lower bounds of the transcript from multiple samples output: - region - interceptions: the interception of each linear functions in the piece-wise linear curves - slopes: the slope of each linear functions in the piece-wise linear curves ''' assert (len(salmon_exps) == len(lbs)) assert (np.all([lbs[i] <= salmon_exps[i] for i in range(len(salmon_exps))])) # variables to record which sample has the min lb, and the corresponding interception and slope of the lb line indexes = [] region_start = [] region_end = [] interceptions = [] slopes = [] # from lambda = 0, find the minimum salmon expression s = 0 region_start.append(s) i = np.argmin(salmon_exps) indexes.append(i) interceptions.append(salmon_exps[i]) slopes.append(lbs[i] - salmon_exps[i]) # record the possible index of lines that can cross with the current min set_possible_indexes = set(list(range(len(salmon_exps)))) - set([i]) while s < 1: # find the region_end: either 1 or the first crossing with other lines crossing = [] for j in set_possible_indexes: # parallel case if lbs[j] - salmon_exps[j] == lbs[i] - salmon_exps[i]: continue # find x axis of the crossing crossing.append( (j, 1.0 * (salmon_exps[j] - salmon_exps[i]) / (lbs[i] - salmon_exps[i] - lbs[j] + salmon_exps[j]))) crossing.sort(key=lambda x: x[1]) # remove the crossing that before s: these lines are not possible to be below the current line for (k, x) in crossing: if x < s: set_possible_indexes.remove(k) crossing = [(k, x) for (k, x) in crossing if x > s] # find the crossing if len(crossing) == 0 or crossing[0][1] >= 1: region_end.append(1) s = 1 else: region_end.append(crossing[0][1]) s = region_end[-1] i = crossing[0][0] region_start.append(s) indexes.append(i) interceptions.append(salmon_exps[i]) slopes.append(lbs[i] - salmon_exps[i]) set_possible_indexes.remove(i) assert (len(indexes) == len(region_start)) assert (len(region_start) == len(region_end)) assert (len(region_end) == len(interceptions)) assert (len(interceptions) == len(slopes)) # assertion about the monotonic decreasing of min lb as lambda increases for i in range(1, len(region_start)): minlb_start = interceptions[i - 1] + slopes[i - 1] * region_start[i - 1] minlb_end = interceptions[i] + slopes[i] * region_start[i] assert (minlb_start >= minlb_end - floatpoint_error) regions = [(region_start[i], region_end[i]) for i in range(len(region_start))] return regions, interceptions, slopes