def get_num_samples(self, idx): """ Number of samples needed to estimate the population variance within the tolerance limit Sample variance is normally distributed http://stats.stackexchange.com/a/105338/71884 (see warning below). Var(s^2) /approx 1/n * (\mu_4 - \sigma^4) Adjust n as per the tolerance needed to estimate the sample variance warning: does not work for some distributions like bernoulli - https://stats.stackexchange.com/a/104911 use the min_samples for explicitly controlling the number of samples to be drawn """ if self.min_samples: return self.min_samples min_samples = 1000 tol = 10.0 required_precision = self.prec / tol if not self.scipy_dist: return min_samples args, kwargs = self.scipy_arg_fn(**self.get_dist_params(idx, wrap_tensor=False)) try: fourth_moment = np.max(self.scipy_dist.moment(4, *args, **kwargs)) var = np.max(self.scipy_dist.var(*args, **kwargs)) min_computed_samples = int(math.ceil((fourth_moment - math.pow(var, 2)) / required_precision)) except (AttributeError, ValueError): return min_samples return max(min_samples, min_computed_samples)
def report_statistics(id_sub, stats): records = stats['records'] distance = records['distance'] delta = records['delta'] order = scale_score(distance) order = order / float(order.size) r = Report('stats-%s' % id_sub) r.data('records', records) f = r.figure() with f.plot('scatter') as pylab: pylab.scatter(delta, distance) pylab.xlabel('delta') pylab.ylabel('distance') pylab.axis((-1, np.max(delta) + 1, -0.05, np.max(distance))) with f.plot('with_stats', **dp_predstats_fig) as pylab: fancy_error_display(pylab, delta, distance, 'g') with f.plot('distance_order', **dp_predstats_fig) as pylab: fancy_error_display(pylab, delta, order, color='k') f = r.figure(cols=1) bins = np.linspace(0, np.max(distance), 100) for i, d in enumerate(set(delta)): with f.plot('conditional%d' % i) as pylab: which = delta == d pylab.hist(distance[which], bins) return r
def beta_limiter(r,cfl,theta=0.95,beta=0.66666666666666666): r""" Modification of CFL Superbee limiter with theta and beta parameters Additional Input: - *theta* - *beta* """ a = np.empty((2,len(r))) b = np.zeros((2,len(r))) a[0,:] = 0.001 a[1,:] = cfl cfmod1 = np.max(a,axis=0) a[0,:] = 0.999 cfmod2 = np.min(a,axis=0) s1 = theta * 2.0 / cfmod1 s2 = (1.0 + cfl) / 3.0 phimax = theta * 2.0 / (1.0 - cfmod2) a[0,:] = s1*r a[1,:] = phimax b[1,:] = np.min(a) ultra = np.max(b) a[0,:] = 1.0 + (s2 - beta/2.0) * (r-1.0) a[1,:] = 1.0 + (s2 + beta/2.0) * (r-1.0) b[0,:] = ultra b[1,:] = np.max(a) a[0,:] = 0.0 a[1,:] = np.min(b) return np.max(a)
def _get_initial_classes(self): images = map(lambda f: cv2.imread(path.join(self._root, f)), self._files) self._avg_pixels = np.array([], dtype=np.uint8) # extract parts from each image for all of our 6 categories for i in range(0, self._n_objects): rects = self._rects[:, i] # compute maximum rectangle rows = np.max(rects['f2'] - rects['f0']) cols = np.max(rects['f3'] - rects['f1']) # extract annotated rectangles im_rects = map(lambda (im, r): im[r[0]:r[2],r[1]:r[3],:], zip(images, rects)) # resize all rectangles to the max size & average all the rectangles im_rects = np.array(map(lambda im: cv2.resize(im, (cols, rows)), im_rects), dtype=np.float) avgs = np.around(np.average(im_rects, axis = 0)) # average the resulting rectangle to compute mn = np.around(np.array(cv2.mean(avgs), dtype='float'))[:-1].astype('uint8') if(self._avg_pixels.size == 0): self._avg_pixels = mn else: self._avg_pixels = np.vstack((self._avg_pixels, mn))
def cada_torrilhon_limiter(r,cfl,epsilon=1.0e-3): r""" Cada-Torrilhon modified Additional Input: - *epsilon* = """ a = np.ones((2,len(r))) * 0.95 b = np.empty((3,len(r))) a[0,:] = cfl cfl = np.min(a) a[1,:] = 0.05 cfl = np.max(a) # Multiply all parts except b[0,:] by (1.0 - epsilon) as well b[0,:] = 1.0 + (1+cfl) / 3.0 * (r - 1) b[1,:] = 2.0 * np.abs(r) / (cfl + epsilon) b[2,:] = (8.0 - 2.0 * cfl) / (np.abs(r) * (cfl - 1.0 - epsilon)**2) b[1,::2] *= (1.0 - epsilon) a[0,:] = np.min(b) a[1,:] = (-2.0 * (cfl**2 - 3.0 * cfl + 8.0) * (1.0-epsilon) / (np.abs(r) * (cfl**3 - cfl**2 - cfl + 1.0 + epsilon))) return np.max(a)
def theta_limiter(r,cfl,theta=0.95): r""" Theta limiter Additional Input: - *theta* = """ a = np.empty((2,len(r))) b = np.empty((3,len(r))) a[0,:] = 0.001 a[1,:] = cfl cfmod1 = np.max(a,axis=0) a[0,:] = 0.999 cfmod2 = np.min(a,axis=0) s1 = 2.0 / cfmod1 s2 = (1.0 + cfl) / 3.0 phimax = 2.0 / (1.0 - cfmod2) a[0,:] = (1.0 - theta) * s1 a[1,:] = 1.0 + s2 * (r - 1.0) left = np.max(a,axis=0) a[0,:] = (1.0 - theta) * phimax * r a[1,:] = theta * s1 * r middle = np.max(a,axis=0) b[0,:] = left b[1,:] = middle b[2,:] = theta*phimax return np.min(b,axis=0)
def cfl_superbee_theta(r,cfl,theta=0.95): r""" CFL-Superbee (Roe's Ultrabee) with theta parameter """ a = np.empty((2,len(r))) b = np.zeros((2,len(r))) a[0,:] = 0.001 a[1,:] = cfl cfmod1 = np.max(a,axis=0) a[0,:] = 0.999 cfmod2 = np.min(a,axis=0) s1 = theta * 2.0 / cfmod1 phimax = theta * 2.0 / (1.0 - cfmod2) a[0,:] = s1*r a[1,:] = phimax b[1,:] = np.min(a,axis=0) ultra = np.max(b,axis=0) a[0,:] = ultra b[0,:] = 1.0 b[1,:] = r a[1,:] = np.max(b,axis=0) return np.min(a,axis=0)
def work(self, **kwargs): self.__dict__.update(kwargs) self.worked = True samples = LGMM1(rng=self.rng, size=(self.n_samples,), **self.LGMM1_kwargs) samples = np.sort(samples) edges = samples[::self.samples_per_bin] centers = .5 * edges[:-1] + .5 * edges[1:] print edges pdf = np.exp(LGMM1_lpdf(centers, **self.LGMM1_kwargs)) dx = edges[1:] - edges[:-1] y = 1 / dx / len(dx) if self.show: plt.scatter(centers, y) plt.plot(centers, pdf) plt.show() err = (pdf - y) ** 2 print np.max(err) print np.mean(err) print np.median(err) if not self.show: assert np.max(err) < .1 assert np.mean(err) < .01 assert np.median(err) < .01
def Haffine_from_points(fp, tp): '''计算仿射变换的单应性矩阵H,使得tp是由fp经过仿射变换得到的''' if fp.shape != tp.shape: raise RuntimeError('number of points do not match') # 对点进行归一化 # 映射起始点 m = numpy.mean(fp[:2], axis=1) maxstd = numpy.max(numpy.std(fp[:2], axis=1)) + 1e-9 C1 = numpy.diag([1/maxstd, 1/maxstd, 1]) C1[0, 2] = -m[0] / maxstd C1[1, 2] = -m[1] / maxstd fp_cond = numpy.dot(C1, fp) # 映射对应点 m = numpy.mean(tp[:2], axis=1) maxstd = numpy.max(numpy.std(tp[:2], axis=1)) + 1e-9 C2 = numpy.diag([1/maxstd, 1/maxstd, 1]) C2[0, 2] = -m[0] / maxstd C2[1, 2] = -m[1] / maxstd tp_cond = numpy.dot(C2, tp) # 因为归一化之后点的均值为0,所以平移量为0 A = numpy.concatenate((fp_cond[:2], tp_cond[:2]), axis=0) U, S, V = numpy.linalg.svd(A.T) # 创建矩阵B和C tmp = V[:2].T B = tmp[:2] C = tmp[2:4] tmp2 = numpy.concatenate((numpy.dot(C, numpy.linalg.pinv(B)), numpy.zeros((2, 1))), axis=1) H = numpy.vstack((tmp2, [0, 0, 1])) H = numpy.dot(numpy.linalg.inv(C2), numpy.dot(H, C1)) # 反归一化 return H / H[2, 2] # 归一化,然后返回
def set_hard_hard_constraints(self, tdata1, tdata2, seeds): tdata1[seeds==2] = np.max(tdata1) + 1 tdata2[seeds==1] = np.max(tdata2) + 1 tdata1[seeds==1] = 0 tdata2[seeds==2] = 0 return tdata1, tdata2
def work(self): self.worked = True kwargs = dict( weights=self.weights, mus=self.mus, sigmas=self.sigmas, low=self.low, high=self.high, q=self.q, ) samples = GMM1(rng=self.rng, size=(self.n_samples,), **kwargs) samples = np.sort(samples) edges = samples[::self.samples_per_bin] #print samples pdf = np.exp(GMM1_lpdf(edges[:-1], **kwargs)) dx = edges[1:] - edges[:-1] y = 1 / dx / len(dx) if self.show: plt.scatter(edges[:-1], y) plt.plot(edges[:-1], pdf) plt.show() err = (pdf - y) ** 2 print np.max(err) print np.mean(err) print np.median(err) if not self.show: assert np.max(err) < .1 assert np.mean(err) < .01 assert np.median(err) < .01
def draw_ohl_graph(ax, data): # sort data along args.x_column and make it np.array again all_data = sorted(data, key=itemgetter(args.x_column)) scores = list({e[0] for e in all_data}) scores.sort() print("scores=", scores) np_all_data = np.array(all_data) all_x = np_all_data[:, args.x_column] all_y = np_all_data[:, args.y_column] x_max = np.max(all_x) x_min = np.min(all_x) y_max = np.max(all_y) y_min = np.min(all_y) # print("ymax=", y_max, "ymin=", y_min) y_width = y_max - y_min if y_width == 0: if y_max == 0: y_width = 1.0 else: y_min = 0 y_width = y_max ax.set_xlim(xmax = x_max / args.scale) ax.set_xlim(xmin = 0) ax.set_ylim(ymax = y_max + y_width * 0.05) ax.set_ylim(ymin = y_min - y_width * 0.05) for score in scores: # print("score=", score) data = list(filter(lambda e: e[0] == score, all_data)) data = np.array(data) x = data[:, args.x_column] y = data[:, args.y_column] x = x / args.scale ans = args.ans if len(data) < 5: ax.plot(x, y, '.', label=str(score)) continue elif len(data) * 0.1 < args.ans: ans = int(len(data) * 0.1) if ans < 4: ans = 4 # print("ans=", ans) weight = np.ones(ans, dtype=np.float)/ans y_average = np.convolve(y, weight, 'valid') rim = ans - 1 rim_l = rim // 2 rim_r = rim - rim_l ax.plot(x[rim_l:-rim_r], y_average, label=str(score)) ax.legend(loc=2) ax.set_xlabel(args.xlabel) ax.set_ylabel(args.ylabel) ax.grid(linewidth=1, linestyle="-", alpha=0.1)
def _crinfo_from_specific_data (self, data, margin): # hledáme automatický ořez, nonzero dá indexy nzi = np.nonzero(data) x1 = np.min(nzi[0]) - margin[0] x2 = np.max(nzi[0]) + margin[0] + 1 y1 = np.min(nzi[1]) - margin[0] y2 = np.max(nzi[1]) + margin[0] + 1 z1 = np.min(nzi[2]) - margin[0] z2 = np.max(nzi[2]) + margin[0] + 1 # ošetření mezí polí if x1 < 0: x1 = 0 if y1 < 0: y1 = 0 if z1 < 0: z1 = 0 if x2 > data.shape[0]: x2 = data.shape[0]-1 if y2 > data.shape[1]: y2 = data.shape[1]-1 if z2 > data.shape[2]: z2 = data.shape[2]-1 # ořez crinfo = [[x1, x2],[y1,y2],[z1,z2]] #dataout = self._crop(data,crinfo) #dataout = data[x1:x2, y1:y2, z1:z2] return crinfo
def zplane(self, title="", fontsize=18): """ Display filter in the complex plane Parameters ---------- """ rb = self.z ra = self.p t = np.arange(0, 2 * np.pi + 0.1, 0.1) plt.plot(np.cos(t), np.sin(t), "k") plt.plot(np.real(ra), np.imag(ra), "x", color="r") plt.plot(np.real(rb), np.imag(rb), "o", color="b") M1 = -10000 M2 = -10000 if len(ra) > 0: M1 = np.max([np.abs(np.real(ra)), np.abs(np.imag(ra))]) if len(rb) > 0: M2 = np.max([np.abs(np.real(rb)), np.abs(np.imag(rb))]) M = 1.6 * max(1.2, M1, M2) plt.axis([-M, M, -0.7 * M, 0.7 * M]) plt.title(title, fontsize=fontsize) plt.show()
def viterbi_decode(score, transition_params): """Decode the highest scoring sequence of tags outside of TensorFlow. This should only be used at test time. Args: score: A [seq_len, num_tags] matrix of unary potentials. transition_params: A [num_tags, num_tags] matrix of binary potentials. Returns: viterbi: A [seq_len] list of integers containing the highest scoring tag indicies. viterbi_score: A float containing the score for the Viterbi sequence. """ trellis = np.zeros_like(score) backpointers = np.zeros_like(score, dtype=np.int32) trellis[0] = score[0] for t in range(1, score.shape[0]): v = np.expand_dims(trellis[t - 1], 1) + transition_params trellis[t] = score[t] + np.max(v, 0) backpointers[t] = np.argmax(v, 0) viterbi = [np.argmax(trellis[-1])] for bp in reversed(backpointers[1:]): viterbi.append(bp[viterbi[-1]]) viterbi.reverse() viterbi_score = np.max(trellis[-1]) return viterbi, viterbi_score
def scatter(x, y, equal=False, xlabel=None, ylabel=None, xinvert=False, yinvert=False): """ Plot a scatter with simple formatting options """ plt.scatter(x, y, 200, color=[0.3, 0.3, 0.3], edgecolors="white", linewidth=1, zorder=2) sns.despine() if xlabel: plt.xlabel(xlabel) if ylabel: plt.ylabel(ylabel) if equal: plt.axes().set_aspect("equal") plt.plot([0, max([x.max(), y.max()])], [0, max([x.max(), y.max()])], color=[0.6, 0.6, 0.6], zorder=1) bmin = min([x.min(), y.min()]) bmax = max([x.max(), y.max()]) rng = abs(bmax - bmin) plt.xlim([bmin - rng * 0.05, bmax + rng * 0.05]) plt.ylim([bmin - rng * 0.05, bmax + rng * 0.05]) else: xrng = abs(x.max() - x.min()) yrng = abs(y.max() - y.min()) plt.xlim([x.min() - xrng * 0.05, x.max() + xrng * 0.05]) plt.ylim([y.min() - yrng * 0.05, y.max() + yrng * 0.05]) if xinvert: plt.gca().invert_xaxis() if yinvert: plt.gca().invert_yaxis()
def plot3d(X, Y, Z, point, zlim=None, ax=None, fig=None, xylabelsize=33): # Plot from matplotlib import cm if fig is None: fig = plt.figure() if ax is None: ax = fig.add_subplot(111, projection='3d') z_min = np.min(Z) - np.max(Z)/2 ax.plot_surface(X, Y, Z, rstride=10, cstride=10, #vmin=Z.min(), vmax=Z.max(), cmap=cm.coolwarm, linewidth=1, antialiased=True) cset = ax.contourf(X, Y, Z, zdir='z', offset=z_min, #norm=colors.LogNorm(vmin=Z.min(), vmax=Z.max()), cmap=cm.coolwarm) argmin = X.ravel()[Z.argmin()], Y.ravel()[Z.argmin()] print("argmin", argmin) # add point and cross at defined point ax.plot([point[0]], [point[1]], 'wo', zs=[z_min], ms=20) ax.plot([X.min(), X.max()], [point[1], point[1]], '--w', zs=[z_min, z_min], linewidth=2.0) ax.plot([point[0], point[0]], [Y.min(), Y.max()], '--w', zs=[z_min, z_min], linewidth=2.0) # add point and cross at argmin ax.plot([argmin[0]], [argmin[1]], 'o', color='k', zs=[z_min], ms=20) ax.plot([X.min(), X.max()], [argmin[1], argmin[1]], '--', color='k', zs=[z_min, z_min], linewidth=2.0) ax.plot([argmin[0], argmin[0]], [Y.min(), Y.max()], '--', color='k', zs=[z_min, z_min], linewidth=2.0) #ax.text(argmin[0], argmin[1], z_min, ". (%.3f, %.3f)" % argmin) ax.set_xlabel(r'$\beta_1$', size=xylabelsize) ax.set_ylabel(r'$\beta_2$', size=xylabelsize) #ax.set_zlabel(r'Error', size=xylabelsize) ax.set_zlim(z_min, np.max(Z)) return ax, z_min, argmin
def test_zernike_get_opd(): zernike_optic = wfe.ZernikeWFE(coefficients=[NWAVES * WAVELENGTH,], radius=RADIUS) opd_map = zernike_optic.get_opd(WAVELENGTH, units='meters') assert np.max(opd_map) == NWAVES * WAVELENGTH opd_map_waves = zernike_optic.get_opd(WAVELENGTH, units='waves') assert np.max(opd_map_waves) == NWAVES
def testEncodeUnrelatedAreas(self): """ assert unrelated areas don"t share bits (outside of chance collisions) """ avgThreshold = 0.3 maxThreshold = 0.12 overlaps = overlapsForUnrelatedAreas(1499, 37, 5) self.assertLess(np.max(overlaps), maxThreshold) self.assertLess(np.average(overlaps), avgThreshold) maxThreshold = 0.12 overlaps = overlapsForUnrelatedAreas(1499, 37, 10) self.assertLess(np.max(overlaps), maxThreshold) self.assertLess(np.average(overlaps), avgThreshold) maxThreshold = 0.17 overlaps = overlapsForUnrelatedAreas(999, 25, 10) self.assertLess(np.max(overlaps), maxThreshold) self.assertLess(np.average(overlaps), avgThreshold) maxThreshold = 0.25 overlaps = overlapsForUnrelatedAreas(499, 13, 10) self.assertLess(np.max(overlaps), maxThreshold) self.assertLess(np.average(overlaps), avgThreshold)
def update_im_clim(self, val, im, slider): if np.mean(self.data[self.frame_slice]) < 0: self.im.set_clim(np.min(self.data[self.frame_slice]) * (self.sliders[-1]._slider.val / 100), np.max(self.data[self.frame_slice]) * (self.sliders[-2]._slider.val / 100)) else: self.im.set_clim(np.max(self.data[self.frame_slice]) * (self.sliders[-1]._slider.val / 100), np.max(self.data[self.frame_slice]) * (self.sliders[-2]._slider.val / 100))
def max(self, axis=None, out=None, keepdims=False): self._prepare_out(out=out) try: value = np.max(self.value, axis=axis, out=out, keepdims=keepdims) except: # numpy < 1.7 value = np.max(self.value, axis=axis, out=out) return self._new_view(value)
def quantify(self): """Quantify shape of the contours.""" four_pi = 4. * np.pi for edge in self.edges: # Positions x = edge['x'] y = edge['y'] A, perimeter, x_center, y_center, distances = \ self.get_shape_factor(x, y) # Set values. edge['area'] = A edge['perimeter'] = perimeter edge['x_center'] = x_center edge['y_center'] = y_center # Circle is 1. Rectangle is 0.78. Thread-like is close to zero. edge['shape_factor'] = four_pi * edge['area'] / \ edge['perimeter'] ** 2. # We assume that the radius of the edge # as the median value of the distances from the center. radius = np.median(distances) edge['radius_deviation'] = np.std(distances - radius) / radius edge['x_min'] = np.min(x) edge['x_max'] = np.max(x) edge['y_min'] = np.min(y) edge['y_max'] = np.max(y)
def get_batch(self, model, batch_size): len_memory = len(self.memory) num_actions = 6 encouraged_actions = np.zeros(num_actions, dtype=np.int) predicted_actions = np.zeros(num_actions, dtype=np.int) inputs = np.zeros((min(len_memory, batch_size), 4, 80, 74)) targets = np.zeros((inputs.shape[0], num_actions)) q_list = np.zeros(inputs.shape[0]) for i, idx in enumerate(np.random.randint(0, len_memory, size=inputs.shape[0])): input_t, action_t, reward_t, input_tp1 = self.memory[idx][0] terminal = self.memory[idx][1] inputs[i] = input_t targets[i] = model.predict(input_t.reshape(1, 4, 80, 74))[0] q_next = np.max(model.predict(input_tp1.reshape(1, 4, 80, 74))[0]) q_list[i] = np.max(targets[i]) predicted_actions[np.argmax(targets[i])] += 1 targets[i, action_t] = (1. - terminal) * self.discount * q_next + reward_t if reward_t > 0. or terminal: print "Action %d rewarded with %f (sample #%d)"%(action_t, targets[i, action_t], idx) encouraged_actions[np.argmax(targets[i])] += 1 return inputs, targets, encouraged_actions, predicted_actions, np.average(q_list)
def grid_xyz(xyz, n_x, n_y, **kwargs): """ Grid data as a list of X,Y,Z coords into a 2D array Parameters ---------- xyz: np.array Numpy array of X,Y,Z values, with shape (n_points, 3) n_x: int Number of points in x direction (fastest varying!) n_y: int Number of points in y direction Returns ------- gridded_data: np.array 2D array of gridded data, with shape (n_x, n_y) Notes ----- 'x' is the inner dimension, i.e. image dimensions are (n_y, n_x). This is counterintuitive (to me at least) but in line with numpy definitions. """ x, y, z = xyz[:, 0], xyz[:, 1], xyz[:, 2] x_ax = np.linspace(np.min(x), np.max(x), n_x) y_ax = np.linspace(np.min(y), np.max(y), n_y) xg, yg = np.meshgrid(x_ax, y_ax) data = griddata(xyz[:, :2], z, (xg, yg), **kwargs) return data
def makeThresholdMap(image, findCars, scales=[1.5], percentOfHeapmapToToss=.5): print("scales:", scales, ", type:", type(scales), "image.shape:", image.shape, ", dtype:", image.dtype, ", percentOfHeapmapToToss:", percentOfHeapmapToToss) boundingBoxList=[] boundingBoxWeights=[] for scale in scales: listOfBoundingBoxes, listOfWeights = findCars(image, scale) boundingBoxList+=listOfBoundingBoxes boundingBoxWeights+=listOfWeights if USEBOUNDINGBOXWEIGHTS: unNormalizedHeatMap=addWeightedHeat(image.shape, boundingBoxList, boundingBoxWeights) else: unNormalizedHeatMap=addHeat(image.shape, boundingBoxList) if USESTACKOFHEATMAPS: unNormalizedHeatMap,_=totalHeatmapStack(unNormalizedHeatMap) unNormalizedHeatMapCounts=np.unique(unNormalizedHeatMap, return_counts=True) if TESTING: print("makeThresholdMap-unNormalizedHeatMapCounts:", unNormalizedHeatMapCounts, ", len(unNormalizedHeatMapCounts):", len(unNormalizedHeatMapCounts), ", len(unNormalizedHeatMapCounts[0]):", len(unNormalizedHeatMapCounts[0])) unNormalizedHeatMapMidpoint=unNormalizedHeatMapCounts[0][int(round(len(unNormalizedHeatMapCounts[0])*percentOfHeapmapToToss))] thresholdMap=applyThreshold(unNormalizedHeatMap, unNormalizedHeatMapMidpoint) print("makeThresholdMap-max(thresholdMap):", np.max(thresholdMap), ", min(thresholdMap):", np.min(thresholdMap)) if TESTING: print("makeThresholdMap-thresholdMap counts:", (np.unique(thresholdMap, return_counts=True)), ", len(thresholdMap):", len(thresholdMap), ", len(thresholdMap[0]):", len(thresholdMap[0])) normalizedMap=normalizeMap(thresholdMap) if TESTING: print("makeThresholdMap-normalizedMap counts:", (np.unique(normalizedMap, return_counts=True)), ", len(normalizedMap):", len(normalizedMap), ", len(normalizedMap[0]):", len(normalizedMap[0])) print("makeThresholdMap-max(normalizedMap):", np.max(normalizedMap), ", min(normalizedMap):", np.min(normalizedMap)) return normalizedMap, boundingBoxList, unNormalizedHeatMap, boundingBoxWeights
def gm_assign_to_cluster(X, center_list, cov_list, p_k): """Assigns each sample to one of the Gaussian clusters given. Returns an array with numbers, 0 corresponding to the first cluster in the cluster list. """ # Reused code from E-step, should be unified somehow: samples = X.shape[0] K = len(center_list) log_p_Xn_mat = np.zeros((samples, K)) for k in range(K): log_p_Xn_mat[:, k] = logmulnormpdf(X, center_list[k], cov_list[k]) + np.log(p_k[k]) pmax = np.max(log_p_Xn_mat, axis=1) log_p_Xn = pmax + np.log(np.sum(np.exp(log_p_Xn_mat.T - pmax), axis=0).T) logL = np.sum(log_p_Xn) log_p_nk = np.zeros((samples, K)) for k in range(K): # log_p_nk[:,k] = logmulnormpdf(X, center_list[k], cov_list[k]) + np.log(p_k[k]) - log_p_Xn log_p_nk[:, k] = log_p_Xn_mat[:, k] - log_p_Xn print log_p_nk # Assign to cluster: maxP_k = np.c_[np.max(log_p_nk, axis=1)] == log_p_nk # print np.max(log_p_nk, axis=1) maxP_k = maxP_k * (np.array(range(K)) + 1) return np.sum(maxP_k, axis=1) - 1
def diff_dist_matrix(self, res_range=None, scaled=False): if res_range != None: assert(len(res_range) == 2) dist_matrices = [] for pdb in self.get_next_pdb(): ca_xyz = pdb.get_ca_xyz_matrix() if res_range != None: ca_xyz = ca_xyz[res_range[0]-1:res_range[1], :] dist_matrix = calc_distance_matrix(ca_xyz) dist_matrices.append(dist_matrix) scaled_diff_dist_matrix = num.zeros(dist_matrices[0].shape, 'd') count = 0 for i in range(len(dist_matrices)): for j in range(i+1, len(dist_matrices)): diff_dist_matrix = num.abs(dist_matrices[i] - dist_matrices[j]) if scaled: scale = num.max(diff_dist_matrix) if scale == 0: continue diff_dist_matrix /= scale scaled_diff_dist_matrix += diff_dist_matrix count += 1 #print >> sys.stderr, count scaled_diff_dist_matrix /= count if scaled: scaled_diff_dist_matrix /= num.max(scaled_diff_dist_matrix) return scaled_diff_dist_matrix
def max(self, axis=None, out=None, keepdims=False): self._prepare_out(out=out) try: value = np.max(self.value, axis=axis, out=out, keepdims=keepdims) except: # numpy < 1.7 value = np.max(self.value, axis=axis, out=out) return self.__quantity_instance__(value, self.unit, copy=False)
def get_spherical_bounding_box(lons, lats): """ Given a collection of points find and return the bounding box, as a pair of longitudes and a pair of latitudes. Parameters define longitudes and latitudes of a point collection respectively in a form of lists or numpy arrays. :return: A tuple of four items. These items represent western, eastern, northern and southern borders of the bounding box respectively. Values are floats in decimal degrees. :raises ValueError: If points collection has the longitudinal extent of more than 180 degrees (it is impossible to define a single hemisphere bound to poles that would contain the whole collection). """ north, south = numpy.max(lats), numpy.min(lats) west, east = numpy.min(lons), numpy.max(lons) assert (-180 <= west <= 180) and (-180 <= east <= 180) if get_longitudinal_extent(west, east) < 0: # points are lying on both sides of the international date line # (meridian 180). the actual west longitude is the lowest positive # longitude and east one is the highest negative. west = min(lon for lon in lons if lon > 0) east = max(lon for lon in lons if lon < 0) if not all((get_longitudinal_extent(west, lon) >= 0 and get_longitudinal_extent(lon, east) >= 0) for lon in lons): raise ValueError('points collection has longitudinal extent ' 'wider than 180 deg') return west, east, north, south
def compare_objs(x, y): assert type(x) is type(y) if type(x) is dict: assert x.keys().sort() == y.keys().sort() for ky in x: compare_objs(x[ky], y[ky]) elif type(x) is list: assert len(x) == len(y) for ind in range(len(x)): compare_objs(x[ind], y[ind]) elif type(x) is np.ndarray: assert x.shape == y.shape if not np.allclose(x, y, atol=1.0e-5, rtol=0.0): x = x.reshape(x.size) y = y.reshape(y.size) dd = x - y worst_case = np.max(np.abs(dd)) print "worst case abs diff = %e" % worst_case ind = np.where((x != 0) | (y != 0)) rel_err = np.abs(np.divide(dd[ind], np.abs(x[ind]) + np.abs(y[ind]))) worst_case = np.max(rel_err) print "worst case rel diff = %e" % worst_case assert False else: assert x == y
def hmapsave(self, filename, polar=False): ''' This helper function saves a beam to a fits file. The default is to save a rectangular-coordinate beam map to a binary table extension. Args: filename -- file in which to save the data. polar -- Save in a FEBECOP-compatible polar-coordinate format. default is False. ''' # SAVE HYBRID BEAM MAP (self.beam) INTO FITS # FORMAT: # self.beam['SQUARE_X']: -hrmax, +hrmax in arcmin per # pixsize arcsec steps - 1D vector # self.beam['SQUARE_Y'] # self.beam['SQUARE']: associated beam values - 1D vector # self.beam['POLAR_X']: carthesian coordinates of spherical phi, theta # for phi in 0, hrmax per pixsize arcsec steps # and theta in 0, 359.5 per half a degree steps # self.beam['POLAR_Y'] # self.beam['POLAR']: associated beam values - 1D vector fits_keyword_data = {} if polar: # set up fits keywords fits_keyword_data['Mintheta'] = [ np.min(self.beam['POLAR_Y']) * np.pi / 180, 'Min polar angle [rad]'] fits_keyword_data['Maxtheta'] = [ np.max(self.beam['POLAR_Y']) * np.pi / 180, 'Max polar angle [rad]'] fits_keyword_data['Nphi'] = [np.size(self.beam['POLAR_X']), 'Number of points in azimuth angle.'] fits_keyword_data['Ntheta'] = [np.size(self.beam['POLAR_Y']), 'Number of points in polar angle.'] # Add a bunch of zeros to the polarized beam since it doesn't exist nulldata = np.zeros(np.shape(np.ravel(self.beam['POLAR']))) tbhdu = pyfits.BinTableHDU.from_columns( [pyfits.Column(name='BEAMDATA', unit='', format='E', array=np.ravel(self.beam['POLAR'])), pyfits.Column(name='BEAMDATAQ', unit='', format='E', array=nulldata), pyfits.Column(name='BEAMDATAU', unit='', format='E', array=nulldata), pyfits.Column(name='BEAMDATAV', unit='', format='E', array=nulldata)]) else: fits_keyword_data['NX'] = [np.size(self.beam['SQUARE_X']), 'Grid X size'] fits_keyword_data['NY'] = [np.size(self.beam['SQUARE_Y']), 'Grid Y size'] fits_keyword_data['XDELTA'] = [ (self.beam['SQUARE_X'][1] - self.beam['SQUARE_X'][0]) * np.pi / 180 / 60, 'Grid X step [radians]'] fits_keyword_data['YDELTA'] = [ (self.beam['SQUARE_Y'][1] - self.beam['SQUARE_Y'][0]) * np.pi / 180 / 60, 'Grid Y step [radians]'] fits_keyword_data['XCENTRE'] = [np.size(self.beam['SQUARE_X']) / 2, 'Center location (X index)'] fits_keyword_data['YCENTRE'] = [np.size(self.beam['SQUARE_Y']) / 2, 'Center location (Y index)'] tbhdu = pyfits.BinTableHDU.from_columns([ pyfits.Column(name='BEAMDATA', unit='', format='E', array=np.ravel(self.beam['SQUARE']))]) for kk in fits_keyword_data.keys(): tbhdu.header.set(kk, fits_keyword_data[kk][0], fits_keyword_data[kk][1]) tbhdu.writeto(filename) return
print("n comp: {}".format(env.state.board_config.sum())) s = State.init_state(config=cfg.vals) state_features = Features.featurize_state(s) y_hat = env._predict(state_features, n_samples=500) print(y_hat) print(y_hat.shape) agg_sales = y_hat.sum(axis=1) agg_mean = np.mean(agg_sales) agg_lower, agg_upper = np.quantile(agg_sales, q=[.05, .95]) x_lim = [np.min(agg_sales), np.max(np.max(agg_sales))] #plt.hist(agg_sales) plt.axvline(agg_mean,linestyle='--',c='blue') plt.axvline(agg_lower,linestyle='dotted',c='red') plt.axvline(agg_upper,linestyle='dotted',c='red') sns.distplot(agg_sales, hist=True, kde=True, color = 'blue', hist_kws={'edgecolor':'black'}, norm_hist=True) plt.xlabel("Revenue ($)") plt.savefig("figs/unseen_state-dist.pdf") plt.clf() plt.close() board_config = s.board_config
def train(self, X, y, learning_rate=1e-3, num_iters=100, batch_size=200, verbose=False): """ Train this linear classifier using stochastic gradient descent. Inputs: - X: A numpy array of shape (N, D) containing training data; there are N training samples each of dimension D. - y: A numpy array of shape (N,) containing training labels; y[i] = c means that X[i] has label 0 <= c < C for C classes. - learning_rate: (float) learning rate for optimization. - reg: (float) regularization strength. - num_iters: (integer) number of steps to take when optimizing - batch_size: (integer) number of training examples to use at each step. - verbose: (boolean) If true, print progress during optimization. Outputs: A list containing the value of the loss function at each training iteration. """ num_train, dim = X.shape num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes if self.W is None: # lazily initialize W self.W = 0.001 * np.random.randn(dim, num_classes) # Run stochastic gradient descent to optimize W loss_history = [] for it in range(num_iters): ######################################################################### # TODO: # # Sample batch_size elements from the training data and their # # corresponding labels to use in this round of gradient descent. # # Store the data in X_batch and their corresponding labels in # # y_batch; after sampling X_batch should have shape (dim, batch_size) # # and y_batch should have shape (batch_size,) # # # # Hint: Use np.random.choice to generate indices. Sampling with # # replacement is faster than sampling without replacement. # ######################################################################### indices = np.random.choice(np.arange(num_train), size=batch_size) X_batch = X[indices,:] y_batch = y[indices] ######################################################################### # END OF YOUR CODE # ######################################################################### # evaluate loss and gradient loss, grad = self.loss(X_batch, y_batch) loss_history.append(loss) # perform parameter update ######################################################################### # TODO: # # Update the weights using the gradient and the learning rate. # ######################################################################### self.W -= learning_rate*grad ######################################################################### # END OF YOUR CODE # ######################################################################### if verbose and it % 100 == 0: print('iteration %d / %d: loss %f' % (it, num_iters, loss)) return loss_history
estimator = LogisticRegression(x_train, y_train, polynomial_degree, sinusoid_degree) cost = estimator.train(max_iteration) labels = estimator.unique_labels plt.plot(range(len(cost[0])), cost[0], label=labels[0]) plt.plot(range(len(cost[1])), cost[1], label=labels[1]) plt.legend() plt.show() y_train_predictions = estimator.predict(x_train) precision = np.sum(y_train_predictions == y_train) / y_train.shape[0] * 100 print(precision) x_min = np.min(x_train[:, 0]) x_max = np.max(x_train[:, 0]) y_min = np.min(x_train[:, 1]) y_max = np.max(x_train[:, 1]) X = np.linspace(x_min, x_max, num_examples) Y = np.linspace(y_min, y_max, num_examples) Z = np.zeros((num_examples, num_examples)) for x_index, x in enumerate(X): for y_index, y in enumerate(Y): data = np.array([[x, y]]) predictions = estimator.predict(data) Z[x_index][y_index] = estimator.predict(data)[0, 0] positives = (y_train == 1).flatten()
def softmax(x): """Compute softmax values for each sets of scores in x.""" e_x = np.exp(x - np.max(x)) return e_x / e_x.sum()
def save(self, trainer): """ Saves the results of trainer in a file Args ; trainer : """ # create results folders and files self.create() results_dict = {key: None for key in self.columns_list} # Launches a comparison before saving results trainer.comparator.launch_comparison(50) lead_times = [] for agent in trainer.agents: lead_times.append(agent.lead_time.display()) test_id = self.get_test_id() results_dict['ID'] = test_id #### GAME PARAMETERS results_dict['CLT_DEMAND'] = trainer.env.params['client_demand'].display() results_dict['CP_AGENT'] = trainer.params['comparison_agent'].label results_dict['AGENTS'] = trainer.get_agents_labels() results_dict['USE_BO'] = trainer.params['use_backorders'] results_dict['T'] = trainer.params['number_periods'] results_dict['LDT'] = str(lead_times) results_dict['HC'] = trainer.params['holding_cost'] results_dict['SC'] = trainer.params['shortage_cost'] results_dict['SR'] = trainer.params['TS'] results_dict['IIL'] = trainer.params['initial_inventory'] #### AI PARAMETERS results_dict['ACTIONS'] = str("(min = "+str(np.min(trainer.params['AI_possible_actions']))+" , max = "+str(np.max(trainer.params['AI_possible_actions']))+")") results_dict['m'] = trainer.params['m'] results_dict['AI_DN'] = str(trainer.params['AI_DN']) results_dict['N_ITER'] = trainer.train_iter results_dict["TIME_PERF"] = round(trainer.time_per_iteration * 100,2) #### RESULTS results_dict['AVG_SUM_DEMAND'] = trainer.comparator.AI_performance['sum_demand'] results_dict['AI_AVG_CUM_COSTS'] = trainer.comparator.AI_performance['costs'] results_dict['CP_AVG_CUM_COSTS'] = trainer.comparator.CP_performance['costs'] #### AI RESULTS results_dict['AI_AVG_CR'] = trainer.comparator.AI_performance['coverage_rate'] results_dict["AI_AVG_BR"] = trainer.comparator.AI_performance['breakdown_rate'] results_dict["AI_AVG_SR"] = trainer.comparator.AI_performance['service_rate'] #### CP RESULTS results_dict['CP_AVG_CR'] = trainer.comparator.CP_performance['coverage_rate'] results_dict["CP_AVG_BR"] = trainer.comparator.CP_performance['breakdown_rate'] results_dict["CP_AVG_SR"] = trainer.comparator.CP_performance['service_rate'] #### DATE results_dict["TEST_DATE"] = datetime.datetime.now().replace(second=0, microsecond=0) # Convert result dictionnary into a list results = list(results_dict.values()) # Load the excel workbook wb = load_workbook(Path(self.results_file_path)) ws = wb.active i = 1 while ws['A'+str(i)].value != None: i += 1 for j, result in enumerate(results): cell = ws.cell(column = j+1, row=i, value=result) cell.alignment = Alignment(horizontal='center') wb.save(Path(self.results_file_path))
def _plot_categorical(df, xlabel, ylabel, x_keys, y_keys, prefac, ax, cmap, s): """ Plot two categorical variables against each other in a bubble plot. Parameters ---------- df : pd.DataFrame A pandas DataFrame with the data xlabel : str The column name for the variable on the x-axis ylabel : str The column name for the variable on the y-axis x_keys : iterable A list containing the different categories in df[xlabel] y_keys: iterable A list containing the different categories in df[ylabel] prefac : float A pre-factor steering the shading of the bubbles ax : matplotlib.Axes object The matplotlib.Axes object to plot the bubble plot into cmap : matplotlib.cm.colormap A matplotlib colormap to use for shading the bubbles s : float A pre-factor changing the overall size of the bubbles Returns ------- ax : matplotlib.Axes object The same matplotlib.Axes object for further manipulation """ tuples, counts = [], [] for i in range(len(x_keys)): for j in range(len(y_keys)): tuples.append((i,j)) counts.append(len(df[(df[xlabel] == x_keys[i]) & (df[ylabel] == y_keys[j])])) x, y = list(zip(*tuples)) cmap = plt.cm.get_cmap(cmap) sizes = (np.array(counts)/np.sum(counts)) ax.scatter(x, y, s=s*1000*sizes, marker='o', linewidths=1, edgecolor='black', c=cmap(prefac*sizes/(np.max(sizes)-np.min(sizes))), alpha=0.7) ax.set_xticks(np.arange(len(x_keys))) ax.set_xticklabels(x_keys) ax.set_xlim(np.min(x)-1, np.max(x)+1) ax.set_xlabel(xlabel) ax.set_yticks(np.arange(len(y_keys))) ax.set_yticklabels(y_keys) ax.set_ylim(np.min(y)-1, np.max(y)+1) ax.set_ylabel(ylabel) return ax
y_acc = ["Time Series/Y/Original Record/Acceleration"] sa_gmroti50 = ims.gmrotipp(x_acc.value, x_acc.attrs["Time-step"], y_acc.value, y_acc.attrs["Time-step"], periods, 50.0) # Assumes Psuedo-spectral acceleration sa_gmroti50 = sa_gmroti50["PSA"] return sa_gmroti50 SPECTRA_FROM_FILE = {"Geometric": get_geometric_mean, "GMRotI50": get_gmroti50, "GMRotD50": get_gmrotd50} SCALAR_XY = {"Geometric": lambda x, y : np.sqrt(x * y), "Arithmetic": lambda x, y : (x + y) / 2., "Larger": lambda x, y: np.max(np.array([x, y])), "Vectorial": lambda x, y : np.sqrt(x ** 2. + y ** 2.)} def get_scalar(fle, i_m, component="Geometric"): """ Retrieves the scalar IM from the database :param fle: Instance of :class: h5py.File :param str i_m: Intensity measure :param str component: Horizontal component of IM """ if not "H" in fle["IMS"].keys(): x_im = fle["IMS/X/Scalar/" + i_m].value[0]
def run(self): ( results_dir, ligands_file, target_sel, flex_sel, box_sel, ph, exhaustiveness, num_modes, energy_range, cpu, seed, ) = self.args self.logEvent.emit("<h2>Preparation</h2>") # # Check if the output # if os.listdir(results_dir): self.logEvent.emit(f""" <br/> <font color="red"> <b>The output folder is not empty!</b> </font> """) # # Create ligand directory # ligands_dir = results_dir + "/ligands" try: os.mkdir(ligands_dir) except FileExistsError: shutil.rmtree(ligands_dir) os.mkdir(ligands_dir) # # Convert SMILES file into PDBQT # obabel = pymol.plugins.pref_get("DOCKING_OBABEL") command = (f'"{obabel}" -i smi "{ligands_file}"' f" -ph {ph} --gen3d -m" f' -O "{ligands_dir}/.pdbqt"') output, success = run(command) if success: self.logEvent.emit(f""" <br/> <br/><b>Ligands converted to PDBQT.</b> <br/><b>OpenBabel command:</b> {command} """) self.logCodeEvent.emit(output) else: self.logEvent.emit(f""" <br/> <br/><b>Ligands conversion to PDBQT failed.</b> <br/><b>OpenBabel command:</b> {command} """) self.logCodeEvent.emit(output) self.done.emit(False) return # # Rename PDBQT files accordingly to SMILES # Be aware that not every SMILES file has a name column # count = 0 lineno = 0 has_names = True with open(ligands_file) as smi: for line in smi: lineno += 1 # skip empty lines if line.strip() == "": continue count += 1 if has_names: try: # it really has names smiles, name = line.split() except: if count != 1 and has_names: self.logEvent.emit(f""" <br/> <br/><b>Inconsistent SMILES naming at molecule line #{lineno}.</b> <br/><b><i>Please check you SMILES file.</i></b> """) self.done.emit(False) return # first line don't have name # don't rename files has_names = False continue shutil.move(f"{ligands_dir}/{count}.pdbqt", f"{ligands_dir}/{name}.pdbqt") if len(glob(f"{ligands_dir}/*.pdbqt")) != count: # The number of generated ligands and SMILES differ self.logEvent.emit(f""" <br/> <br/><b>Number of generated PDBQT files and SMILES molecules differ.</b> <br/><b>Please check you SMILES file.</b> """) self.done.emit(False) return # # The number of dockings to do # n_ligands = count self.numSteps.emit(count) # # Prepare rigid target # target_pdb = f"{results_dir}/target.pdb" cmd.save(target_pdb, target_sel) with chdir(dirname(target_pdb)): adt_python = pymol.plugins.pref_get("DOCKING_ADT_PYTHON") prepare_target = pymol.plugins.pref_get("DOCKING_PREPARE_RECEPTOR") command = f'"{adt_python}"' f' "{prepare_target}" -r "{target_pdb}"' output, success = run(command) if success: self.logEvent.emit(f""" <br/> <br/><b>Rigid target prepared.</b> <br/><b>AutoDock command:</b> {command} """) self.logCodeEvent.emit(output) else: self.logEvent.emit(f""" <br/> <br/><b>Rigid target preparation failed.</b> <br/><b>AutoDock command:</b> {command} """) self.logCodeEvent.emit(output) self.done.emit(False) return # # Prepare flexible target # if flex_sel != "": # # Construct residues string # flex_residues = set() for atom in cmd.get_model(flex_sel).atom: flex_residues.add(f"{atom.chain}:{atom.resn}{atom.resi}") flex_residues = ",".join(f"target:{res}" for res in flex_residues) # # Run AutoDock command # target_pdbqt = f"{results_dir}/target.pdbqt" with chdir(dirname(target_pdb)): adt_python = pymol.plugins.pref_get("DOCKING_ADT_PYTHON") prepare_flexreceptor = pymol.plugins.pref_get( "DOCKING_PREPARE_FLEXRECEPTOR") command = (f'"{adt_python}"' f'"{prepare_flexreceptor}"' f' -r "{target_pdbqt}"' f" -s {flex_residues}") output, success = run(command) if success: self.logEvent.emit(f""" <br/> <br/><b>Flexible target prepared.</b> <br/><b>AutoDock command:</b> {command} """) self.logCodeEvent.emit(output) else: self.logEvent.emit(f""" <br/> <br/><b>Flexible target preparation failed.</b> <br/><b>AutoDock command:</b> {command} """) self.logCodeEvent.emit(output) self.done.emit(False) return # # Create Vina results directory # output_dir = f"{results_dir}/poses" try: os.mkdir(output_dir) except FileExistsError: pass # # Compute box variables # box_coords = cmd.get_coords(box_sel) max = np.max(box_coords, axis=0) min = np.min(box_coords, axis=0) half_size = (max - min) / 2 center = min + half_size size_x, size_y, size_z = half_size * 2 center_x, center_y, center_z = center size_x, size_y, size_z = ( round(float(size_x), 2), round(float(size_y), 2), round(float(size_z), 2), ) center_x, center_y, center_z = ( round(float(center_x), 2), round(float(center_y), 2), round(float(center_z), 2), ) # # Project data # project_file = results_dir + "/docking.json" project_data = {} project_data.update({ "program": "vina", "results_dir": results_dir, "ligands_dir": ligands_dir, "output_dir": output_dir, "size_x": size_x, "size_y": size_y, "size_z": size_z, "center_x": center_x, "center_y": center_y, "center_z": center_z, }) if flex_sel == "": project_data.update({ "flexible": False, "target_pdbqt": f"{results_dir}/target.pdbqt" }) else: project_data.update({ "flexible": True, "rigid_pdbqt": f"{results_dir}/target_rigid.pdbqt", "flex_pdbqt": f"{results_dir}/target_flex.pdbqt", }) # # Prompt for user confirmation # vina = pymol.plugins.pref_get("DOCKING_VINA") base_command = (f"{vina}" f" --center_x {center_x}" f" --center_y {center_y}" f" --center_z {center_z}" f" --size_x {size_x}" f" --size_y {size_y}" f" --size_z {size_z}" f" --cpu {cpu}" f" --seed {seed}" f" --exhaustiveness {exhaustiveness}" f" --num_modes {num_modes}" f" --energy_range {energy_range}") self.logEvent.emit(f""" <br/> <h2>Docking</h2> <br/> <b>Vina base command:</b> {base_command} """) fail_count = 0 for i, ligand_pdbqt in enumerate(glob(f"{ligands_dir}/*.pdbqt")): name, _ = splitext(basename(ligand_pdbqt)) output_pdbqt = f"{output_dir}/{name}.out.pdbqt" log_txt = f"{output_dir}/{name}.log" command = base_command + (f' --ligand "{ligand_pdbqt}"' f' --out "{output_pdbqt}"' f' --log "{log_txt}"') if project_data["flexible"]: rigid_pdbqt = project_data["rigid_pdbqt"] flex_pdbqt = project_data["flex_pdbqt"] command += f' --receptor "{rigid_pdbqt}"' f' --flex "{flex_pdbqt}"' else: target_pdbqt = project_data["target_pdbqt"] command += f' --receptor "{target_pdbqt}"' output, success = run(command) self.currentStep.emit(i + 1) if not success: fail_count += 1 if fail_count <= 10: self.logEvent.emit(f""" <br/> <font color="red"> <b>Vina command failed:</b> {command} <br/> <pre>{output}</pre> </font> """) elif fail_count == 11: self.logEvent.emit(f""" <br/> <h3> <font color="red"> Too many errors. Omitting output. </font> <h3>f """) done_ligands = len(glob(f"{output_dir}/*.out.pdbqt")) self.logEvent.emit("<br/><h2>Summary</h2>") summary = f""" <br/><b>Total expected runs:</b> {n_ligands} <br/><b>Total failures:</b> {fail_count} <br/><b>Total found PDBQT files:</b> {done_ligands} """ if done_ligands < n_ligands or fail_count > 0: self.logEvent.emit(f"<font color='red'>{summary}</font>") else: self.logEvent.emit(f"{summary}") with open(results_dir + f"/docking.json", "w") as docking_file: json.dump(project_data, docking_file, indent=4) self.done.emit(True)
def step(self, action): # Check action action = np.clip(action, self.low_action, self.high_action) assert self.action_space.contains(action), "%r (%s) invalid action" % (action, type(action)) # States before simulate xpos, ypos, xface, yface = self.state rotate = action[0] theta = np.arctan2(yface, xface) # Simulate # update facing theta = theta + self.rotate_scale*rotate xface = np.cos(theta) yface = np.sin(theta) # update position xpos = xpos + xface*self.speed_scale ypos = ypos + yface*self.speed_scale # States after simulate self.state = [xpos, ypos, xface, yface] self.state = np.clip(self.state, self.low_state, self.high_state) # Record Trajectory self.traj.append(self.state[:2]) # Define reward function # Define done done = False reward = 0 xpos, ypos, xface, yface = self.state # time penalty(distance) vec = np.array([xpos, ypos])-self.target_coord[self.task[0]] dist = np.linalg.norm(vec) reward += -dist #print('Distance Reward: {}'.format(reward)) # time penalty(task) reward += self.task_penalty if self.task_penalty > 0: #print('Task: {}'.format(self.task_penalty)) self.task_penalty = np.max((0, self.task_penalty-self.speed_scale/3)) done_status = '' # hit the target for i in range(self.num_targets): # skip finished target if self.finished_task.count(i) > 0: continue vec_i = np.array([xpos, ypos])-self.target_coord[i] dist_i = np.linalg.norm(vec_i) if dist_i < self.target_size: if i == self.task[0]: # hit right target if len(self.task) == 1: # finish all tasks done = True reward += 10 done_status = 'Finish Task' else: # finish subtask done_status = 'Right Target' # start task penalty self.task_penalty = np.linalg.norm(self.target_coord[self.task[0]]-self.target_coord[self.task[1]]) # pop task self.finished_task.append(self.task[0]) self.task = self.task[1:] else: # hit wrong target done = True done_status = 'Wrong Target' break # hit the wall if not done: if xpos == 1 or xpos == -1 or ypos == 1 or ypos == -1: done = True done_status = 'Hit the Wall' # times up self.timesteps += 1 if not done and self.timesteps >= self.max_timesteps: done = True done_status = 'Times Up' # record min_dist_cp = 0 min_dist_ft = 0 if done: self.traj = np.array(self.traj) # find dist closest to checkpoint ctcp = np.argmin(np.linalg.norm(self.traj-self.target_coord[self.fixed_task[0]], axis=1)) ctft = ctcp+np.argmin(np.linalg.norm(self.traj[ctcp:]-self.target_coord[self.fixed_task[1]], axis=1)) min_dist_cp = np.linalg.norm(self.traj[ctcp]-self.target_coord[self.fixed_task[0]]) min_dist_ft = np.linalg.norm(self.traj[ctft]-self.target_coord[self.fixed_task[1]]) # episode self.episode = (self.episode + 1) % 10 return self.get_obs(), reward, done, {'done_status': done_status, 'dist': dist, 'min_dist_cp': min_dist_cp, 'min_dist_ft': min_dist_ft}
def generate_quanser_summary(trajectory, residuals, detections): # # Print reprojection error statistics # print("summary") weights = detections[:, ::3] reprojection_errors = [] for i in range(trajectory.shape[0]): valid = np.reshape(residuals[i], [2,-1])[:, weights[i,:] == 1] reprojection_errors.extend(np.linalg.norm(valid, axis=0)) reprojection_errors = np.array(reprojection_errors) print('Reprojection error over whole image sequence:') print('- Maximum: %.04f pixels' % np.max(reprojection_errors)) print('- Average: %.04f pixels' % np.mean(reprojection_errors)) print('- Median: %.04f pixels' % np.median(reprojection_errors)) # # Figure: Reprojection error distribution # plt.figure(figsize=(8,3)) plt.hist(reprojection_errors, bins=80, color='k') plt.ylabel('Frequency') plt.xlabel('Reprojection error (pixels)') plt.title('Reprojection error distribution') plt.tight_layout() plt.savefig('out_histogram.png') # # Figure: Comparison between logged encoder values and vision estimates # logs = np.loadtxt('../data/logs.txt') enc_time = logs[:,0] enc_yaw = logs[:,1] enc_pitch = logs[:,2] enc_roll = logs[:,3] # Note: The logs have been time-synchronized with the image sequence, # but there will be an offset between the motor angles and the vision # estimates. That offset is automatically subtracted here. vis_yaw = trajectory[:,0] + enc_yaw[0] - trajectory[0,0] vis_pitch = trajectory[:,1] + enc_pitch[0] - trajectory[0,1] vis_roll = trajectory[:,2] + enc_roll[0] - trajectory[0,2] vis_fps = 16 enc_frame = enc_time*vis_fps vis_frame = np.arange(trajectory.shape[0]) fig,axes = plt.subplots(3, 1, figsize=[6,6], sharex='col') axes[0].plot(enc_frame, enc_yaw, 'k:', label='Encoder log') axes[0].plot(vis_frame, vis_yaw, 'k', label='Vision estimate') axes[0].legend() axes[0].set_xlim([0, vis_frame[-1]]) axes[0].set_ylim([-1, 1]) axes[0].set_ylabel('Yaw (radians)') axes[1].plot(enc_frame, enc_pitch, 'k:') axes[1].plot(vis_frame, vis_pitch, 'k') axes[1].set_xlim([0, vis_frame[-1]]) axes[1].set_ylim([0.0, 0.6]) axes[1].set_ylabel('Pitch (radians)') axes[2].plot(enc_frame, enc_roll, 'k:') axes[2].plot(vis_frame, vis_roll, 'k') axes[2].set_xlim([0, vis_frame[-1]]) axes[2].set_ylim([-0.6, 0.6]) axes[2].set_ylabel('Roll (radians)') axes[2].set_xlabel('Image number') plt.tight_layout() plt.savefig('out_trajectory.png')
def extract_structure_information(data, k, outlier_threshold, distance_measure, minkowski_p=None): """ Extract structure information from the dataset :param data: a numpy-matrix. each column represents an attribute; each row a data item :param k: the amount of neighbours to use for the initial knn graph :param outlier_threshold: the maximum density an outlier can have :param distance_measure: a str describing the distance measure: ‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’, ‘correlation’, ‘cosine’, ‘dice’, ‘euclidean’, ‘hamming’, ‘jaccard’, ‘kulsinski’, ‘mahalanobis’, ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’ :param minkowski_p: The p-norm to apply. Mandatory for un/weighted Minkowski distance :param weighted_minkowski_weights: The weight vector. Mandatory for weighted Minkowski :return: a tuple of lists: (cluster supporting objects, cluster outliers, rest), where each list contains the indices of the objects in the data matrix * Cluster Supporting Object (CSO): object with density higher than all its neighbors * Cluster Outliers: object with density lower than all its neighbors, and lower than a predefined threshold * Rest Object: object not assigned to one of the previous groups * Distance matrix: a matrix of distances between data points * K-Nearest neighbours: a python list of numpy arrays of the nearest neighbours of each element """ # check that a p value exists if minkowski distance is used if distance_measure == 'minkowski' and minkowski_p is None: raise FlameError( "Minkowski distance requires a p value to be supplied!") item_count = data.shape[0] if k > item_count: raise FlameError("More cluster neighbours (" + str(k) + ") requested than data points available! (" + str(item_count) + ")") if k <= 0: raise FlameError("Requested cluster neighbour count is " + str(k) + "...") # get a distance matrix describing our data from scipy, square it so creating the knn graph is easy distance_matrix = squareform(pdist(data, distance_measure, p=minkowski_p)) # creates an adjacency list where each row contains the k nearest neighbours # neighbours after the k-nearest-neighbour are appended if they have the same distance as the k-nearest-neighbour knn_graph = [] for i in range(item_count): # get the distances and sort them distance_matrix_row = distance_matrix[i] knns = distance_matrix_row.argsort()[1:] # append neighbours with the same distance as the k-nearest-neighbour same_distance_k = k last_neighbour_distance = distance_matrix_row[knns[k - 1]] for j in range(k, item_count): if j >= len(knns) or distance_matrix_row[ knns[j]] > last_neighbour_distance: break else: same_distance_k += 1 knn_graph.append(knns[:same_distance_k]) # calculate the density for each item max_distance = numpy.max(distance_matrix) densities = numpy.empty((item_count, ), dtype=float) for i in range(item_count): distance_sum = (numpy.sum(distance_matrix[i].take(knn_graph[i])) / len(knn_graph[i])) if distance_sum > 0: densities[i] = max_distance / distance_sum else: densities[i] = sys.float_info.max # create item bins cluster_supporting_objects = [] outliers = [] rest = [] # sort items for i in range(densities.shape[0]): knn_densities = densities.take(knn_graph[i]) item_density = densities[i] if item_density <= outlier_threshold and item_density < knn_densities.min( ): outliers.append(i) elif item_density > knn_densities.max(): cluster_supporting_objects.append(i) else: rest.append(i) return cluster_supporting_objects, outliers, rest, distance_matrix, knn_graph
marker='o', linestyle='None', linewidth=2, color=col.maroon1) f3.set_xlabel('P(fitter)') f3.set_ylabel('P(equal fitness)') #f3.set_xlim([0,0.6]) #f3.set_ylim([0,0.3]) nbins = 20 # Replaces 0 in A[:,1], so that the log operation works. A[np.where(A[:, 1] == 0), 1] = 0.1 D = np.log(A[:, 1]) print('D min/max: {0}/{1}'.format(np.min(D), np.max(D))) bins = np.linspace(np.min(D), np.max(D), nbins) h, b = np.histogram(D, bins) f4 = F1.add_subplot(2, 2, 4) f4.plot(b[:-1], h, '.', color=col.darkviolet, marker='o', markersize=8) f4.set_xlabel('log (P(fitter))') f4.set_ylabel('Freq') F1.tight_layout() figpath = 'figures/prob_fitinc_pOn_' + str(pOn) + '.png' plt.savefig(figpath) plt.clf() print("See results in figures/prob_fitinc_pOn*.png")
def display_box_sel(name, sel): coords = cmd.get_coords(sel) max = np.max(coords, axis=0) min = np.min(coords, axis=0) display_box(name, max, min)
def main(filename, zmin=None, zmax=None): #Read the parameter file params = read_redm_param(filename) if zmin != None and zmax != None: params['zmin'] = zmin params['zmax'] = zmax print >> sys.stderr, "Parameter file read successful" #Read in the data print >> sys.stderr, "Reading clusters file..." if os.path.isfile(params['cluster_file']) == False: print >> sys.stderr, "ERROR: Cluster catalog file " + params[ 'cluster_file'] + " not found" sys.exit(1) cat = pyfits.open(params['cluster_file']) cat = cat[1].data print >> sys.stderr, len(cat), " clusters read in" print >> sys.stderr, np.max(cat['z']) print >> sys.stderr, "Reading members file..." if os.path.isfile(params['member_file']) == False: print >> sys.stderr, "ERROR: Member catalog file " + params[ 'member_file'] + " not found" sys.exit(1) mem = pyfits.open(params['member_file']) mem = mem[1].data print >> sys.stderr, len(mem), " galaxies read in" #If asked to use zred instead of z_lambda, set up for that if int(params['use_zred']) == 1: [cat, mem] = set_zred.set_zred(cat, mem) #If not asked to use lambda_errors in calculations, set them to zero if int(params['use_lambda_err']) == 0: cat['lambda_chisq_e'][:] = cat['lambda_chisq_e'][:] * 0. #Also read in data to abundance match to, if requested if int(params['ABM']) == 1: print >> sys.stderr, "Reading in clusters to use for abundance matching..." if os.path.isfile(params['abm_file']) == False: print >> sys.stderr, "ERROR: ABM catalog file " + params[ 'abm_file'] + " not found" sys.exit(1) abm = pyfits.open(params['abm_file']) abm = abm[1].data print >> sys.stderr, len(abm), " clusters for ABM" if params['stellarmass']: mag = mem['MSTAR_50'] else: #Read in amag data if it exists, otherwise run kcorrect if os.path.isfile(params['kcorr_file']) == False: #Running kcorrect -- note this requires running IDL #os.system('setenv CATFILE '+params['cluster_file']) os.environ['CATFILE'] = params['cluster_file'] os.environ['MEMFILE'] = params['member_file'] os.environ['KCORRFILE'] = params['kcorr_file'] os.environ['USE_DES'] = params['use_des'] os.environ['NO_UBAND'] = params['no_uband'] os.environ['LRG'] = params['LRG'] os.environ['BANDSHIFT'] = params['bandshift'] os.environ['USE_ZRED'] = params['use_zred'] os.system( '/afs/slac/g/ki/software/idl/idl70/bin/idl < /afs/slac.stanford.edu/u/ki/chto100/code/redmapper_clf/redmapper/rm_kcorr_wrapper.pro' ) if os.path.isfile(params['kcorr_file']) == False: print >> sys.stderr, "ERROR: Kcorrect failed, no file found" sys.exit(1) kcorr = pyfits.open(params['kcorr_file']) kcorr = kcorr[0].data print >> sys.stderr, len(kcorr), " galaxies kcorrected" if len(kcorr) != len(mem): print >> sys.stderr, "ERROR! Number of galaxies != number of kcorrect values" sys.exit(1) #Pick up the mags we want for all galaxies if (int(params['obs_clf']) == 1) & (int(params['use_lum']) == 0): mag = mem['imag'] else: if int(params['use_REFMAG']) == 1: if params['Analband'] == "r": print >> sys.stderr, "using r band" mag = mem['REFMAG'] + (kcorr[:, 1] - mem['model_mag'][:, 3]) elif params['Analband'] == "i": mag = mem['REFMAG'] + (kcorr[:, 2] - mem['model_mag'][:, 3]) else: print >> sys.stderr, "ERRPR: OOPS I don't know the band you specified (because I am too lazy), which is {0}".format( params['Analband']) else: print(params['use_des']) if len(mem['model_mag'][0]) == 4: mag = mem['imag'] + (kcorr[:, 2] - mem['model_mag'][:, 2]) else: mag = mem['imag'] + (kcorr[:, 2] - mem['model_mag'][:, 3]) #Pulling limits from the parameters structure lm_min = np.array(params['lm_min']) lm_max = np.array(params['lm_max']) zmin = np.array(params['zmin']) zmax = np.array(params['zmax']) lm_min = lm_min.astype(float) lm_max = lm_max.astype(float) zmin = zmin.astype(float) zmax = zmax.astype(float) #If requested, switch to ABM lambda limits if int(params['ABM']) == 1: print >> sys.stderr, "Running abundance matching..." lm_min = abm_limits(cat, abm, params['area'], params['abm_area'], zmin, zmax, lm_min) lm_max = abm_limits(cat, abm, params['area'], params['abm_area'], zmin, zmax, lm_max) print "abundance matching: lm_min", lm_min print "abundance matching: lm_max", lm_max else: my_nz = len(zmin) lm_min = np.repeat([lm_min], my_nz, axis=0) lm_max = np.repeat([lm_max], my_nz, axis=0) #Get central absolute mags #Will only recalcuate index if it doesn't exist; otherwise, read in the index #and get the magnitudes from the mag values already available #Updated version -- for redmapper v5.10 and greater -- uses indices supplied #in the catalog files for finding the necessary index c_names = cat.columns.names use_id_cent = False #Check to see if the central IDs are available for name in c_names: if name == 'ID_CENT': use_id_cent = True break ''' if use_id_cent: cengalindex = np.zeros_like(cat['id_cent']) cenmag = np.zeros_like(cengalindex).astype(float) #Hash table taking galaxy ID to index offset = np.min(mem['id']) g_index = np.zeros(np.max(mem['id'])-offset+1)-1 g_index[mem['id']-offset] = np.array(range(len(mem))) #modified by chto to handle that cat['id_cent']=0 for i in range(len(cengalindex[0])): index = np.where(cat['id_cent'][:,i]-offset>=0) cengalindex[index,i] = g_index[cat['id_cent'][index,i]-offset] cenmag[index,i] = mag[cengalindex[index,i]] if int(params['weight_cen'])==0: cenmag = cenmag[0] cengalindex = cengalindex[0] del g_index ''' ############# #Algorithm changed by Chto #The reason is that is np.zeros of a large number will cause mem error in Python ############# if use_id_cent: cengalindex = np.zeros_like(cat['id_cent']) cenmag = np.zeros_like(cengalindex).astype(float) #Hash table taking galaxy ID to index offset = np.min(mem['id']) g_index = dok_matrix((np.max(mem['id']) - offset + 1, 1), dtype=np.int) g_index[mem['id'] - offset] = np.array(range(len(mem)))[:, np.newaxis] if len(cenmag.shape) == 1: index = np.where(cat['id_cent'][:] - offset >= 0)[0] cengalindex[index] = g_index[cat['id_cent'][index] - offset].toarray().flatten() cenmag[index] = mag[cengalindex[index].astype(int)] cengalindex = cengalindex.astype(int) cengalindex = cengalindex.reshape(-1, 1) cenmag = cenmag.reshape(-1, 1) else: for i in range(len(cengalindex[0])): index = np.where(cat['id_cent'][:, i] - offset >= 0)[0] cengalindex[index, i] = g_index[cat['id_cent'][index, i] - offset].toarray().flatten() cenmag[index, i] = mag[cengalindex[index, i]] if int(params['weight_cen']) == 0: cenmag = cenmag[:, 0] cengalindex = cengalindex[:, 0] cengalindex = cengalindex.reshape(-1, 1) cenmag = cenmag.reshape(-1, 1) del g_index gc.collect() ######################### else: #No central IDs found -- doing it the hard way if os.path.isfile(params['cindex_file']) == False: print >> sys.stderr, "Getting central magnitudes..." [cenmag, cengalindex ] = get_central_mag(cat, mem, mag, weight_cen=int(params['weight_cen'])) cengalindex = cengalindex.astype(long) hdu = pyfits.PrimaryHDU(cengalindex) hdu.writeto(params['cindex_file']) else: print >> sys.stderr, "Reading in central magnitudes..." cengalindex = pyfits.open(params['cindex_file']) cengalindex = cengalindex[0].data cengalindex = cengalindex.astype(long) cenmag = np.zeros_like(cengalindex).astype(float) print len(cenmag) for i in range(len(cenmag)): cenmag[i] = mag[cengalindex[i]] #If available, get the limiting magnitude information #Currently set for the more generous cut use_limmag = False for name in c_names: if name == 'LIM_LIMMAG': use_limmag = True limmag = cat['LIM_LIMMAG'] break #Convert limiting magnitude to absolute if needed; based on central #galaxy's k-correction if use_limmag: if params['dohaloCat']: limmag = cat['LIM_LIMMAG_DERED'] else: if int(params['get_limmag_5sigma']) == 1: limmag = get_limmag_5sigma(limmag, cat['lim_exptime']) if int(params['use_des']) == 1: if params['Analband'] == "r": limmag = limmag - (cat['MODEL_MAG'][:, 1] - cenmag[:, 0]) elif params['Analband'] == "i": limmag = limmag - (cat['MODEL_MAG'][:, 2] - cenmag[:, 0]) else: print >> sys.stderr, "ERRPR: OOPS I don't know the band you specified (because I am too lazy), which is {0}".format( params['Analband']) else: limmag = limmag - (cat['imag'] - cenmag[:, 0]) else: limmag = [] if params['stellarmass']: limmag = [] #For the rest of the calculations, since these are predicated on the #given lambda/z thresholds, remove all the clusters we don't care about. #This should speed things up significantly/avoid memory issues. print >> sys.stderr, "Ultimate max z cut is at: ", float( params['zcut_max']) clist = np.where(cat['z_lambda'] < float(params['zcut_max']))[0] cmlist = np.where(mem['z'] < float(params['zcut_max']))[0] cat = cat[clist] cenmag = cenmag[clist] cengalindex = cengalindex[clist] if len(limmag) != 0: limmag = limmag[clist] #Note that we also must trim galaxies mem = mem[cmlist] if not params['stellarmass']: kcorr = kcorr[cmlist] idn_list = np.zeros(len(mag)) idn_list[cmlist] = np.array(range(len(cmlist))) mag = mag[cmlist] cengalindex = idn_list[cengalindex] cengalindex = cengalindex.astype(long) del cmlist, clist print "cmlist, clist: ", gc.collect() #### ##Try to know what is the magnitude. #WARNING: Cuts should be VERY generous, otherwise, may have issues with P(z) tails... #Make the main output directory os.system("mkdir -p " + params['outdir']) #Now that we have our magnitudes, add central corrections if necessary if int(params['use_dr8_cen_corr']) == 1: if int(params['weight_cen']) == 1: cenmag[:, 0] = cenmag[:, 0] + correct_dr8_cen([0.213, -0.08], cat['z_lambda']) cenmag[:, 1] = cenmag[:, 1] + correct_dr8_cen([0.104, -0.036], cat['z_lambda']) mag[cengalindex[:, 0]] = mag[cengalindex[:, 0]] + correct_dr8_cen( [0.213, -0.08], mem['z'][cengalindex[:, 0]]) corrlist = np.where(cat['ncent_good'] >= 2)[0] if len(corrlist) > 0: mag[cengalindex[ corrlist, 1]] = mag[cengalindex[corrlist, 1]] + correct_dr8_cen( [0.104, -0.036], mem['z'][cengalindex[corrlist, 1]]) else: cenmag = cenmag + correct_dr8_cen([0.213, -0.08], cat['z_lambda']) mag[cengalindex] = mag[cengalindex] + correct_dr8_cen( [0.213, -0.08], cat['z_lambda']) #Convert everything to log(L) if requested if int(params['use_lum']) == 1: #Value currently hard-coded to offset for z=0.3 bandshift in SDSS i-band abs_solar = float(params['abs_solar']) mag = np.log10( mag_to_Lsolar(mag, use_des=int(params['use_des']), abs_solar=abs_solar)) cenmag = np.log10( mag_to_Lsolar(cenmag, use_des=int(params['use_des']), abs_solar=abs_solar)) if use_limmag: limmag = np.log10( mag_to_Lsolar(limmag, use_des=int(params['use_des']), abs_solar=abs_solar)) np.save(params['outdir'] + "limmag.npy", limmag) np.save(params['outdir'] + "cengalindex.npy", cengalindex) print "I save the limiting magnitude.... HAAH" print >> sys.stderr, "Finished converting mags to log(Lsolar)" np.save(params['outdir'] + "cen_mag.npy", cenmag) # limmag=[] #so hacky :( this is not my style print "magnitude distribution: " print np.histogram(mag, bins=np.array(range(35)) * 0.08 + 9.3) np.save(params['outdir'] + "mag.npy", mag) #Fix the normaliztion of the p(z) so that triangular integration works okay in pz_utils ##Modified by chto@@ dz = cat['pzbins'][:, 1] - cat['pzbins'][:, 0] weight = np.sum(cat['pz'], axis=1) * dz cat['pz'] = cat['pz'] / weight[:, None] del dz, weight print "dz, weight: ", gc.collect() print >> sys.stderr, "Done renormalizing P(z)" print >> sys.stderr, "Max lambda: ", np.max(cat['lambda_chisq']) #Now produce the galaxy samples and a hash table that takes cluster ID to the first #of its listed galaxies #Create an array that gives p_cen for all member galaxies pcen_all = np.zeros(len(mem)) if int(params['weight_cen']) == 0: pcen_all[cengalindex] = 0 * cengalindex + 1. else: for i in range(len(cengalindex[0])): clist = np.where(cengalindex[:, i] != -1)[0] #print len(cengalindex),cengalindex[i][0],clist[0] pcen_all[cengalindex[ clist, i]] = pcen_all[cengalindex[clist, i]] + cat['p_cen'][clist, i] np.save(params['outdir'] + "pcen_all.py", pcen_all) #Reassign p with p_ext if requested if int(params['use_p_ext']) > 0: mem['p'][:] = pext_correct.pext_correct_full(cat, mem, pcen_all, int(params['use_p_ext']), int(params['ncolors'])) #Add a systematic probability offset if requested if float(params['p_offset']) != 0: mem['p'][:] = mem['p'][:] + float(params['p_offset']) #Fix any objects with p>1 or p<0 plist = np.where(mem['p'] > 1.)[0] if len(plist) > 0: mem['p'][plist] = 0. * plist + 1. plist = np.where(mem['p'] < 0.)[0] if len(plist) > 0: mem['p'][plist] = 0. * plist #Now make the satellite lists print >> sys.stderr, "PCEN: ", np.max(pcen_all), len(cat), len( np.where(pcen_all > 0)[0]), len( np.where(pcen_all > 0.9)[0]), np.sum(pcen_all) #Make necessary bootstrap samples #Includes redshifts taken from P(z) #Only want to do this once #Also allows covariance estimates between measuresments, which are #Not currently implemented #Number of bootstrap samples -- current hard-coded nboot = dodotune.njack bootlist = pz_utils.make_boot_samples_simple(nboot, cat) [match_index, gboot] = pz_utils.make_boot_samples_gal_full(bootlist, cat['mem_match_id'], mem['mem_match_id'], mem['p'] * (1 - pcen_all)) print >> sys.stderr, "Done setting up bootstrap samples" assert params['jackknife_error'] if float(params['jackknife_error']) != 0: print "doing jackknife CLF" njack = dodotune.njack jacklist = pz_utils.make_jack_samples_simple(njack, cat) match_index_jack = pz_utils.getjackgal(jacklist[0], cat['mem_match_id'], mem['mem_match_id']) assert (mem['mem_match_id'][match_index_jack[cat['mem_match_id']] [:, 0]] == cat['mem_match_id']).all() # [match_index_jack, gjack] = pz_utils.make_jack_samples_gal_full(jacklist,cat['mem_match_id'], # mem['mem_match_id'],mem['p']*(1-pcen_all)) print >> sys.stderr, "Done setting up jackknife samples" #If requested, calculate n(z) if int(params['do_nz']) == 1: print >> sys.stderr, "Calculating n(z)..." redm_nz_calc(cat, params['area'], params['outdir'], bootlist, descale=bool(int(params['nz_descale']))) print >> sys.stderr, "Done calculating n(z)" #If requested, calculate n(lambda) if int(params['do_nlambda']) == 1: print >> sys.stderr, "Calculating n(lambda)..." redm_nlambda_err(cat['lambda_chisq'], cat['lambda_chisq_e'], cat['z_lambda'], cat['pz'], cat['pzbins'], bootlist, params['outdir'], zmin, zmax, params['area']) print >> sys.stderr, "Done calculating n(lambda)" redm_bigcount(cat['lambda_chisq'], cat['z_lambda'], zmin, zmax, params['outdir']) print >> sys.stderr, "Done calculating bonus listing of massive clusters" print >> sys.stderr, "lm_min: ", lm_min, "lm_max: ", lm_max, "zmin: ", zmin, "zmax: ", zmax #Calculate the CLF if int(params['do_clf']) == 1: #np.save("mag.npy",mag) #np.save("cenmag.npy",cenmag) #np.save("cat.npy",cat) #np.save("mem.npy", mem) print >> sys.stderr, "Calculating CLF..." if params["troughNames"] is not None: trough_data = np.array([ hp.read_map(params['troughNames'].format(i)) for i in range(5) ]) nside = hp.npix2nside(trough_data.shape[1]) def getTroughProb(RaDec): ra, dec = RaDec return trough_data[:, DeclRaToIndex(ra, dec, nside)] positionArray = np.array([cat['RA'], cat['DEC']]).T positionArray_mem = np.array([mem['RA'], mem['DEC']]).T troughProb = np.array(map(getTroughProb, positionArray)) troughProb_mem = np.array(map(getTroughProb, positionArray_mem)) oldmemP = copy.deepcopy(mem['p']) oldcatpcen = copy.deepcopy(cat['p_cen']) oldcatpsat = copy.deepcopy(cat['p_sat']) for i in range(5): mem['p'] *= troughProb_mem[:, i] cat['p_cen'] *= troughProb[:, i].reshape(-1, 1) cat['p_sat'] *= troughProb[:, i].reshape(-1, 1) outdir = params['outdir'] + "trough_{0}/".format(i) os.system("mkdir -p " + outdir) redm_clf.redm_clf(cat, mem, mag, cenmag, cengalindex, lm_min, lm_max, zmin, zmax, pcen_all, bootlist, gboot, match_index, outdir, weight_cen=int(params['weight_cen']), obs_clf=int(params['obs_clf']), use_lum=int(params['use_lum']), limmag=limmag, trough=True, stellarMass=params['stellarmass']) mem['p'] = oldmemP cat['p_cen'] = oldcatpcen cat['p_sat'] = oldcatpsat else: #print limmag if float(params['jackknife_error']) != 0: print "doing jackknife CLF" redm_clf.redm_clf(cat, mem, mag, cenmag, cengalindex, lm_min, lm_max, zmin, zmax, pcen_all, jacklist, gboot=None, match_index=match_index_jack, outdir=params['outdir'], weight_cen=int(params['weight_cen']), obs_clf=int(params['obs_clf']), use_lum=int(params['use_lum']), limmag=limmag, stellarMass=params['stellarmass'], jackknifeerr=int(params['jackknife_error'])) else: print "doing bootstrap CLF" redm_clf.redm_clf(cat, mem, mag, cenmag, cengalindex, lm_min, lm_max, zmin, zmax, pcen_all, bootlist, gboot, match_index, params['outdir'], weight_cen=int(params['weight_cen']), obs_clf=int(params['obs_clf']), use_lum=int(params['use_lum']), limmag=limmag, stellarMass=params['stellarmass']) print >> sys.stderr, "Done calculating CLF" #PLot CLF: if int(params['plot_clf']) == 1: redm_clf.plot_clf(lm_min, lm_max, zmin, zmax, indir=params['outdir']) # assert False #Calculate the radial profiles if int(params['do_rpr']) == 1: print >> sys.stderr, "Calculating radial profiles..." #First, get necessary input parameter limits on magnitude rpr_minlum = np.array(params['rpr_minlum']) rpr_maxlum = np.array(params['rpr_maxlum']) rpr_minlum = rpr_minlum.astype(float) rpr_maxlum = rpr_maxlum.astype(float) #Basic weirdness checking for luminosity limits error_check = 0 elist = np.where(rpr_minlum > rpr_maxlum)[0] if len(elist) > 0: print >> sys.stderr, "ERROR: Require rpr_minlum < rpr_maxlum" error_check = 1 if int(params['obs_clf']) == 1 or int( params['use_lum']) == 1 and min(rpr_minlum) < 0: print >> sys.stderr, "ERROR: Radial profiles are using app mags or solar lum," print >> sys.stderr, " but rpr_minlum/rpr_maxlum < 0" error_check = 1 if int(params['obs_clf']) == 0 and int( params['use_lum']) == 0 and max(rpr_maxlum) > 0: print >> sys.stderr, "ERROR: Radial profiles are using abs mags," print >> sys.stderr, " but rpr_minlum/rpr_maxlum > 0" error_check = 1 if error_check == 0: redm_rpr.redm_rpr(cat, mem, mag, lm_min, lm_max, zmin, zmax, rpr_minlum, rpr_maxlum, bootlist, gboot, params['outdir']) else: print >> sys.stderr, "SKIPPING RADIAL PROFILES" #Calculate magnitude gaps if int(params['do_mgap']) == 1: print >> sys.stderr, "Calculating magnitude gaps..." redm_mgap.redm_mgap(cat, mem, cenmag, cengalindex, mag, zmin, zmax, lm_min, lm_max, bootlist, gboot, params['outdir'], use_lum=bool(int(params['use_lum'])), use_obs=bool(int(params['obs_clf'])), weight_cen=bool(int(params['weight_cen']))) #calculate the probability that the brightest galaxy is not the central galaxy if int(params['do_pbcg']) == 1: print >> sys.stderr, "Calculating P(BCG!=central)..." redm_pbcg.get_p_bcg_not_cen(cat, cenmag, cengalindex, cat['p_cen'], mem['mem_match_id'], mag, mem['p'], zmin, zmax, params['outdir'], use_lum=int(params['use_lum']), weight_cen=int(params['weight_cen'])) #Calculate the distribution of the brightest satellite galaxy if int(params['do_bsat']) == 1: print >> sys.stderr, "Calculating brightest satellite clf..." redm_bright_sat.get_brightest_satellite_all( cat, mem, mag, cengalindex, lm_min, lm_max, zmin, zmax, bootlist, gboot, match_index, params['outdir'], weight_cen=int(params['weight_cen']), use_lum=int(params['use_lum']), obs_clf=int(params['obs_clf'])) print >> sys.stderr, "Calculating joint brightest sat-central distribution..." count_arr = redm_bright_sat.get_bright_sat_cen_all( cat, mem, mag, cengalindex, cenmag, lm_min, lm_max, zmin, zmax, bootlist, gboot, match_index, params['outdir'], weight_cen=int(params['weight_cen']), use_lum=int(params['use_lum']), obs_clf=int(params['obs_clf'])) #output the params np.save(params['outdir'] + "params.npy", params)
def beta_cont_frac_gsl(a, b, x): # This computes B_x(a,b) using a continued fraction approximation. # We do require a>0, but b can be negative. # Having b<0 and x very near 1 can cause of precision (not enough iterations) # However, x near 1 only occurs in the far future for our cosmology application # Require 0<=x<1. # # This python subroutine is adapted from the # Gnu Science Library (GSL) specfunc/beta_inc.c code # by Daniel Eisenstein (July 2015). # Changes were generally to strip down to the case of interest, removing # the pre-factor from the complete beta function. Also vectorized. # # Original GSL header: # Copyright (C) 2007 Brian Gough # Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at # your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # Author: G. Jungman x = np.array(x, copy=False, ndmin=1) if (np.min(x) < 0 or np.max(x) >= 1): print("Illegal entry in beta_cont_frac_gsl()\n") import sys sys.exit() cutoff = 1e-30 # control the zero cutoff # standard initialization for continued fraction num_term = 1.0 den_term = 1.0 - (a + b) * x / (a + 1.0) den_term[np.where(np.abs(den_term) < cutoff)] = cutoff den_term = 1.0 / den_term cf = den_term for k in range(1, 200): # first step coeff = k * (b - k) * x / (((a - 1.0) + 2 * k) * (a + 2 * k)) den_term = 1.0 + coeff * den_term num_term = 1.0 + coeff / num_term den_term[np.where(np.abs(den_term) < cutoff)] = cutoff num_term[np.where(np.abs(num_term) < cutoff)] = cutoff den_term = 1.0 / den_term cf *= den_term * num_term # second step coeff = -(a + k) * (a + b + k) * x / ((a + 2 * k) * (a + 2 * k + 1.0)) den_term = 1.0 + coeff * den_term num_term = 1.0 + coeff / num_term den_term[np.where(np.abs(den_term) < cutoff)] = cutoff num_term[np.where(np.abs(num_term) < cutoff)] = cutoff den_term = 1.0 / den_term cf *= den_term * num_term # Are we done? if (np.max(np.abs(den_term * num_term - 1)) < 1e-12): break # End k loop # If this ends, we're just accepting the answer even if we haven't converged # Include the prefactor # We need a>0 so that x=0 doesn't crash. cf *= np.power(x, a) * np.power(1 - x, b) / a if (len(cf) == 1): return cf[0] # Get back to a scalar else: return cf
# -*- coding: utf-8 -*- import pandas as pd import numpy as np #%% import data data = pd.read_csv("data.csv") data.drop(["id", "Unnamed: 32"], axis=1, inplace=True) # %% data.diagnosis = [1 if each == "M" else 0 for each in data.diagnosis] y = data.diagnosis.values x_data = data.drop(["diagnosis"], axis=1) #%% normalization x = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data)) # %% train test split from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=42) #%% from sklearn.tree import DecisionTreeClassifier dt = DecisionTreeClassifier() dt.fit(x_train, y_train) print("score: ", dt.score(x_test, y_test))
def prox(self, x, alpha): return np.sign(x)*np.max([np.abs(x) - self.regcoef*alpha, np.zeros(x.size)], axis=0)