def compute_cvm(predictions, masses, n_neighbours=200, step=50): """ Computing Cramer-von Mises (cvm) metric on background events: take average of cvms calculated for each mass bin. In each mass bin global prediction's cdf is compared to prediction's cdf in mass bin. :param predictions: array-like, predictions :param masses: array-like, in case of Kaggle tau23mu this is reconstructed mass :param n_neighbours: count of neighbours for event to define mass bin :param step: step through sorted mass-array to define next center of bin :return: average cvm value """ predictions = numpy.array(predictions) masses = numpy.array(masses) assert len(predictions) == len(masses) # First, reorder by masses predictions = predictions[numpy.argsort(masses)] # Second, replace probabilities with order of probability among other events predictions = numpy.argsort(numpy.argsort(predictions)) # Now, each window forms a group, and we can compute contribution of each group to CvM cvms = [] for window in __rolling_window(predictions, window_size=n_neighbours)[::step]: cvms.append(__cvm(subindices=window, total_events=len(predictions))) return numpy.mean(cvms)
def test_template(): size = 100 # Float prefactors ensure that image range is between 0 and 1 image = np.full((400, 400), 0.5) target = 0.1 * (np.tri(size) + np.tri(size)[::-1]) target_positions = [(50, 50), (200, 200)] for x, y in target_positions: image[x:x + size, y:y + size] = target np.random.seed(1) image += 0.1 * np.random.uniform(size=(400, 400)) result = match_template(image, target) delta = 5 positions = peak_local_max(result, min_distance=delta) if len(positions) > 2: # Keep the two maximum peaks. intensities = result[tuple(positions.T)] i_maxsort = np.argsort(intensities)[::-1] positions = positions[i_maxsort][:2] # Sort so that order matches `target_positions`. positions = positions[np.argsort(positions[:, 0])] for xy_target, xy in zip(target_positions, positions): assert_almost_equal(xy, xy_target)
def fitting(d0, d1): idx_list=[] pos_list=[] for tp in['beta', 'sw']: e=0 for net, sl in zip(['Net_0', 'Net_1'], [slice(2,4), slice(0,4)]): z=d0[tp][net]['mean_rates'][:,sl] target=d1[tp][net]['mean_rates'][sl] target=numpy.array([target]*z.shape[0]) if e is 0: e=z-target else: e=numpy.concatenate((z-target, e),axis=1) e**=2 e=numpy.sqrt(numpy.mean(e, axis=1)) idx=numpy.argsort(e) # idx_list.append(idx) # l=[] # for i, _id in enumerate(idx_list[-2]): # j=list(idx_list[-1]).index(_id) # l.append([i,j]) # l=numpy.array(l) # pos_list.append(l) # e=numpy.mean(l,axis=1) idx=numpy.argsort(e) # pp(list(l[idx,:])) # print idx # print e[idx] print tp for _id in idx[:100]: print d0[tp]['Net_0']['ylabels'][_id], d1[tp]['Net_0']['mean_rates'][:], numpy.round(d0[tp]['Net_0']['mean_rates'][_id,:],1),e[_id] print d0[tp]['Net_1']['ylabels'][_id], d1[tp]['Net_1']['mean_rates'][:], numpy.round(d0[tp]['Net_1']['mean_rates'][_id,:],1)
def calcFreqs(X, timeStep, minFreq=0, maxFreq=np.inf): if (X.ndim > 1): freqs = scipy.fftpack.fftfreq(X.shape[1], timeStep) idx1 = np.argsort(freqs) freqs = freqs[idx1] idx2 = np.where((freqs >= minFreq) & (freqs <= maxFreq))[0] freqs = freqs[idx2] return freqs, idx1, idx2, 0 else: # sometimes no all the time steps are the same allFreqs, lengths = [], [] idx1s, idx2s = [], [] if (isinstance(timeStep, float)): timeStep = np.ones((len(X))) * timeStep for x, dt in zip(X, timeStep): freqs = scipy.fftpack.fftfreq(x.shape[0], dt) idx1 = np.argsort(freqs) freqs = freqs[idx1] idx2 = np.where((freqs > minFreq) & (freqs < maxFreq))[0] freqs = freqs[idx2] allFreqs.append(freqs) lengths.append(len(freqs)) idx1s.append(idx1) idx2s.append(idx2) maxLenInd = np.argmax(lengths) return allFreqs, idx1s, idx2s, maxLenInd
def _test_corr(old_func, new_func, sel_item): from nose.tools import assert_equal, assert_raises n_obs = 20 n_dims = 10 np.random.seed(0) y = np.random.rand(n_obs) * n_obs X = np.tile(y, [n_dims, 1]).T + np.random.randn(n_obs, n_dims) rho_fast = new_func(X, y) # test dimensionality assert_equal(rho_fast.ndim, 1) assert_equal(rho_fast.shape[0], n_dims) # test data rho_slow = np.ones(n_dims) for dim in range(n_dims): rho_slow[dim] = np.array(old_func(X[:, dim], y)).item(sel_item) np.testing.assert_array_equal(rho_fast.shape, rho_slow.shape) np.testing.assert_array_almost_equal(rho_fast, rho_slow) # test errors new_func(np.squeeze(X[:, 0]), y) assert_raises(ValueError, new_func, y, X) assert_raises(ValueError, new_func, X, y[1:]) # test dtype X = np.argsort(X, axis=0) * 2 # ensure no bug at normalization y = np.argsort(y, axis=0) * 2 rho_fast = new_func(X, y, dtype=int) rho_slow = np.ones(n_dims) for dim in range(n_dims): rho_slow[dim] = np.array(old_func(X[:, dim], y)).item(sel_item) np.testing.assert_array_almost_equal(rho_fast, rho_slow)
def _sort_neurons(sort, gids, network): max_nest_gid = network.nest_gid.max() + 1 sorting = np.zeros(max_nest_gid) if isinstance(sort, str): sorted_ids = None if "degree" in sort: deg_type = sort[:sort.find("-")] degrees = network.get_degrees(deg_type) sorted_ids = np.argsort(degrees) elif sort == "betweenness": betw = network.get_betweenness(btype="node") sorted_ids = np.argsort(betw) else: raise InvalidArgument( '''Unknown sorting parameter {}; choose among "in-degree", "out-degree", "total-degree" or "betweenness".'''.format(sort)) num_sorted = 1 _, sorted_groups = _sort_groups(network.population) for group in sorted_groups: gids = network.nest_gid[group.id_list] order = np.argsort(sorted_ids[group.id_list]) sorting[gids] = num_sorted + order num_sorted += len(group.id_list) else: sorting[network.nest_gid[sort]] = sort return sorting
def carbonylorcarboxyl(allligand,index,bond_dist): allligandcoods = allligand.positions ocoods = np.zeros((1,3), dtype = float) ocoods[0,:] = allligandcoods[index,:] ocoods = np.float32(ocoods) tempdist = MDAnalysis.lib.distances.distance_array(ocoods,allligandcoods) A = np.argsort(tempdist) temp = int(A[0,1]) Omatecood = np.zeros((1,3), dtype = float) Omatecood[0,:] = allligandcoods[temp,:] Omatecood = np.float32(Omatecood) tempdist2 = MDAnalysis.lib.distances.distance_array(Omatecood, allligandcoods) B = np.argsort(tempdist2) B = np.delete(B,0,axis = 1) for i in xrange(0,B.size): if B[0,i] == index: C = np.delete(B,i,axis = 1) break base1 = int(C[0,0]) base2 = int(C[0,1]) type1 = allligand[base1].type type2 = allligand[base2].type if type1 == 'O' or type2 == 'O': atype = 'carboxyl' else: atype = 'carbonyl' return atype
def downsize(self, coefs, cut=None, verbose=True): """ Given a set of coefs, sort the coefs and get rid of the bottom cut percent of variables with lowest cut coefs. Return the new coefs. """ downsized_coefs = np.squeeze(np.array(coefs)) if cut is None: cut = self.cut n_trash = int(floor(cut * self.n_features)) if verbose: print("Downsampling...") print("Current shape:", self.Xview.shape) print("Removing {} columns... ".format(n_trash)) self.tail_start -= n_trash if self.tail_start <= 0: raise ValueError("Trying to downsize more variables than present") # get sorted order of coefs csort = np.squeeze(np.argsort(np.argsort(np.absolute(coefs)))) keep_feature = np.squeeze(csort >= n_trash) tail_start = self.tail_start # columns in the tail we want to keep keep_idx = np.squeeze( np.where(keep_feature[tail_start:tail_start+n_trash])) keep_idx += tail_start # columns we want to move to the tail trash_idx = np.squeeze(np.where(keep_feature[0:tail_start] == False)) if len(trash_idx) != len(keep_idx): raise ValueError("trash_idx and keep_idx not the same length") # swap the columns for trash, keep in zip(trash_idx, keep_idx): #print(keep, trash) keep_col = self.X[:, keep].copy() self.X[:, keep] = self.X[:, trash] self.X[:, trash] = keep_col self.orig_feature_index[trash], self.orig_feature_index[keep] = self.orig_feature_index[keep], self.orig_feature_index[trash] downsized_coefs[trash], downsized_coefs[keep] = downsized_coefs[keep], downsized_coefs[trash] if self.test_subj is not None: self.X_test[:, (trash, keep)] = self.X_test[:, (keep, trash)] self.n_features -= n_trash self.Xview = self.X.view()[:, :self.n_features] if self.test_subj is not None: self.X_testview = self.X_test.view()[:, :self.n_features] print("New Xview shape:", self.Xview.shape) return downsized_coefs[:-n_trash]
def trustworthiness(X, X_embedded, n_neighbors=5, precomputed=False): """Expresses to what extent the local structure is retained. The trustworthiness is within [0, 1]. It is defined as .. math:: T(k) = 1 - \frac{2}{nk (2n - 3k - 1)} \sum^n_{i=1} \sum_{j \in U^{(k)}_i} (r(i, j) - k) where :math:`r(i, j)` is the rank of the embedded datapoint j according to the pairwise distances between the embedded datapoints, :math:`U^{(k)}_i` is the set of points that are in the k nearest neighbors in the embedded space but not in the original space. * "Neighborhood Preservation in Nonlinear Projection Methods: An Experimental Study" J. Venna, S. Kaski * "Learning a Parametric Embedding by Preserving Local Structure" L.J.P. van der Maaten Parameters ---------- X : array, shape (n_samples, n_features) or (n_samples, n_samples) If the metric is 'precomputed' X must be a square distance matrix. Otherwise it contains a sample per row. X_embedded : array, shape (n_samples, n_components) Embedding of the training data in low-dimensional space. n_neighbors : int, optional (default: 5) Number of neighbors k that will be considered. precomputed : bool, optional (default: False) Set this flag if X is a precomputed square distance matrix. Returns ------- trustworthiness : float Trustworthiness of the low-dimensional embedding. """ if precomputed: dist_X = X else: dist_X = pairwise_distances(X, squared=True) dist_X_embedded = pairwise_distances(X_embedded, squared=True) ind_X = np.argsort(dist_X, axis=1) ind_X_embedded = np.argsort(dist_X_embedded, axis=1)[:, 1:n_neighbors + 1] n_samples = X.shape[0] t = 0.0 ranks = np.zeros(n_neighbors) for i in range(n_samples): for j in range(n_neighbors): ranks[j] = np.where(ind_X[i] == ind_X_embedded[i, j])[0][0] ranks -= n_neighbors t += np.sum(ranks[ranks > 0]) t = 1.0 - t * (2.0 / (n_samples * n_neighbors * (2.0 * n_samples - 3.0 * n_neighbors - 1.0))) return t
def generate_misclassifications(top_words): log("Generating artificial misclassification rate ..") from numpy.random import normal w = len(top_words) mis = np.zeros((w, w)) for i in xrange(w): for j in xrange(i+1): distance = edit_distance(top_words[i], top_words[j]) mis[i][j] = max(0.0, normal(0.4 ** distance, 0.05)) mis[j][i] = max(0.0, normal(0.4 ** distance, 0.05)) normalize_matrix(mis) mostly_wrong = list(sorted([(mis[i][i], i) for i in xrange(w)])) log("Top 10 words likely to be wrong:") for prob, idx in mostly_wrong[:10]: log(" %s (%.3lf%%) => %s", top_words[idx], prob*100.0, " ".join(["%s (%.3lf%%)" % (top_words[cand], mis[idx][cand]*100.0) for cand in reversed(np.argsort(mis[idx])[-4:])])) log("Top 10 words likely to be right:") for prob, idx in mostly_wrong[-10:]: log(" %s (%.3lf%%) => %s", top_words[idx], prob*100.0, " ".join(["%s (%.3lf%%)" % (top_words[cand], mis[idx][cand]*100.0) for cand in reversed(np.argsort(mis[idx])[-4:])])) return mis
def rankImages( imdists, query_id, dist_type ): # PRE [DO NOT TOUCH] ranking = [] # WRITE YOUR CODE HERE related_img = [] related_img = imdists[query_id,:] # smaller, order asc if dist_type == 'euclidean': ranking = np.argsort(related_img) # larger, order desc elif dist_type == 'l2': ranking = np.argsort(-related_img) # larger, order desc elif dist_type == 'intersect' or dist_type == 'l1': ranking = np.argsort(-related_img) # smaller, order asc elif dist_type == 'chi2': ranking = np.argsort(related_img) # larger, order desc elif dist_type == 'hellinger': ranking = np.argsort(-related_img) # RETURN [DO NOT TOUCH] return ranking
def predict_scores(self, test_data, N): dinx = np.array(list(self.train_drugs)) DS = self.dsMat[:, dinx] # print DS drug-drug sim with 0 diagonal entries tinx = np.array(list(self.train_targets)) TS = self.tsMat[:, tinx] # print TS target-target sim with 0 diagonal entries scores = [] for d, t in test_data: if d in self.train_drugs: if t in self.train_targets: val = np.sum(self.U[d, :]*self.V[t, :]) else: jj = np.argsort(TS[t, :])[::-1][:N] val = np.sum(self.U[d, :]*np.dot(TS[t, jj], self.V[tinx[jj], :]))/np.sum(TS[t, jj]) else: if t in self.train_targets: ii = np.argsort(DS[d, :])[::-1][:N] val = np.sum(np.dot(DS[d, ii], self.U[dinx[ii], :])*self.V[t, :])/np.sum(DS[d, ii]) else: ii = np.argsort(DS[d, :])[::-1][:N] jj = np.argsort(TS[t, :])[::-1][:N] v1 = DS[d, ii].dot(self.U[dinx[ii], :])/np.sum(DS[d, ii]) v2 = TS[t, jj].dot(self.V[tinx[jj], :])/np.sum(TS[t, jj]) val = np.sum(v1*v2) if np.isnan(val): scores.append(0) else: scores.append(np.exp(val)/(1+np.exp(val))) # print smat #whole prediction matrix return np.array(scores) #from original code
def evaluation(self, test_data, test_label): dinx = np.array(list(self.train_drugs)) DS = self.dsMat[:, dinx] tinx = np.array(list(self.train_targets)) TS = self.tsMat[:, tinx] scores = [] if self.K2 > 0: for d, t in test_data: if d in self.train_drugs: if t in self.train_targets: val = np.sum(self.U[d, :]*self.V[t, :]) else: jj = np.argsort(TS[t, :])[::-1][:self.K2] val = np.sum(self.U[d, :]*np.dot(TS[t, jj], self.V[tinx[jj], :]))/np.sum(TS[t, jj]) else: if t in self.train_targets: ii = np.argsort(DS[d, :])[::-1][:self.K2] val = np.sum(np.dot(DS[d, ii], self.U[dinx[ii], :])*self.V[t, :])/np.sum(DS[d, ii]) else: ii = np.argsort(DS[d, :])[::-1][:self.K2] jj = np.argsort(TS[t, :])[::-1][:self.K2] v1 = DS[d, ii].dot(self.U[dinx[ii], :])/np.sum(DS[d, ii]) v2 = TS[t, jj].dot(self.V[tinx[jj], :])/np.sum(TS[t, jj]) val = np.sum(v1*v2) scores.append(np.exp(val)/(1+np.exp(val))) elif self.K2 == 0: for d, t in test_data: val = np.sum(self.U[d, :]*self.V[t, :]) scores.append(np.exp(val)/(1+np.exp(val))) prec, rec, thr = precision_recall_curve(test_label, np.array(scores)) aupr_val = auc(rec, prec) fpr, tpr, thr = roc_curve(test_label, np.array(scores)) auc_val = auc(fpr, tpr) return aupr_val, auc_val
def get_heatmap(data_mat, name_for_saving_files, pp,stimulus_on_time, stimulus_off_time,delta_ff, f0_start, f0_end): #Plot heatmap for validation A1 = np.reshape(data_mat, (np.size(data_mat,0)*np.size(data_mat,1), np.size(data_mat,2))) if delta_ff == 1: delta_ff_A1 = np.zeros(np.shape(A1)) for ii in xrange(0,np.size(A1,0)): delta_ff_A1[ii,:] = (A1[ii,:]-np.mean(A1[ii,f0_start:f0_end]))/(np.std(A1[ii,f0_start:f0_end])+0.1) B = np.argsort(np.mean(delta_ff_A1, axis=1)) print np.max(delta_ff_A1) else: B = np.argsort(np.mean(A1, axis=1)) print np.max(A1) with sns.axes_style("white"): C = A1[B,:][-2000:,:] fig2 = plt.imshow(C,aspect='auto', cmap='jet', vmin = np.min(C), vmax = np.max(C)) plot_vertical_lines_onset(stimulus_on_time) plot_vertical_lines_offset(stimulus_off_time) plt.title(name_for_saving_files) plt.colorbar() fig2 = plt.gcf() pp.savefig(fig2) plt.close()
def make_plot(self): #plot gets arguments dates, prices = self.cmod.arguments_plot(buyerField=self.argCH_plot()) print(dates) print(prices) #creating plot dates = np.array(dates)#converting list prices = np.array(prices)#converting list fig, self.plotTK = plt.subplots() s = np.argsort(dates)#hang price to date f = np.argsort(prices)#hang price to date self.plotTK.plot_date(dates[s], prices[f], 'bo-') self.plotTK.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d')) self.plotTK.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S') fig.autofmt_xdate() #merge plot and tkinter self.canvas = FigureCanvasTkAgg(fig, self.cview.frames[view.AboutPage].leftFrame) self.canvas.show() self.canvas.get_tk_widget().pack(side=tk.BOTTOM, fill=tk.BOTH, expand=True) #creating toolbar toolbar = NavigationToolbar2TkAgg(self.canvas, self.cview.frames[view.AboutPage].leftFrame) toolbar.update() #packing plot self.canvas._tkcanvas.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
def get_informative_features(vectorizers, clf, class_labels, N): """ Return text with features with the highest absolute coefficient values, per class. """ feature_names = [] for vec_name, vec in vectorizers: feature_names += ["%30s %s" % (vec_name, name) for name in vec.get_feature_names()] features_by_class = [] for i, class_label in enumerate(class_labels): topN = np.argsort(clf.coef_[i])[-N:] bottomN = np.argsort(clf.coef_[i])[:N] res = [] for j in reversed(topN): coef = clf.coef_[i][j] if coef > 0: res.append("+%0.4f: %s" % (coef, feature_names[j])) if (len(topN) >= N) or (len(bottomN) >= N): res.append(" ...") for j in reversed(bottomN): coef = clf.coef_[i][j] if coef < 0: res.append("%0.4f: %s" % (coef, feature_names[j])) features_by_class.append((class_label, "\n".join(res))) return features_by_class
def argsort(x, topn=None, reverse=False): """Get indices of the `topn` smallest elements in array `x`. Parameters ---------- x : array_like Array to sort. topn : int, optional Number of indices of the smallest(greatest) elements to be returned if given, otherwise - indices of all elements will be returned in ascending(descending) order. reverse : bool, optional If True - return the `topn` greatest elements, in descending order. Returns ------- numpy.ndarray Array of `topn` indices that.sort the array in the required order. """ x = np.asarray(x) # unify code path for when `x` is not a np array (list, tuple...) if topn is None: topn = x.size if topn <= 0: return [] if reverse: x = -x if topn >= x.size or not hasattr(np, 'argpartition'): return np.argsort(x)[:topn] # np >= 1.8 has a fast partial argsort, use that! most_extreme = np.argpartition(x, topn)[:topn] return most_extreme.take(np.argsort(x.take(most_extreme))) # resort topn into order
def get_filtered_intersections(self, sort_by, inters_size_bounds, inters_degree_bounds): """ Filter the intersection data according to the user's directives and return it. :param sort_by: 'degree'|'size'. Whether to sort intersections by degree or size. :param inters_size_bounds: tuple. Specifies the size interval of the intersections that will be plotted. :param inters_degree_bounds: tuple. Specifies the degree interval of the intersections that will be plotted. :return: Array of int (sizes), array of tuples (sets included in intersection), array of tuples (sets excluded from intersection), all filtered and sorted. """ inters_sizes = np.array([self.inters_df_dict[x].shape[0] for x in self.in_sets_list]) inters_degrees = np.array(self.inters_degrees) size_clip = (inters_sizes <= inters_size_bounds[1]) & (inters_sizes >= inters_size_bounds[0]) & ( inters_degrees >= inters_degree_bounds[0]) & (inters_degrees <= inters_degree_bounds[1]) in_sets_list = np.array(self.in_sets_list)[size_clip] out_sets_list = np.array(self.out_sets_list)[size_clip] inters_sizes = inters_sizes[size_clip] inters_degrees = inters_degrees[size_clip] # sort as requested if sort_by == 'size': order = np.argsort(inters_sizes)[::-1] elif sort_by == 'degree': order = np.argsort(inters_degrees) # store ordered data self.filtered_inters_sizes = inters_sizes[order] self.filtered_in_sets = in_sets_list[order] self.filtered_out_sets = out_sets_list[order] return self.filtered_inters_sizes, self.filtered_in_sets, self.filtered_out_sets
def sort_by_pause_length(file_name): pause_before, pause_after = [], [] conn = sqlite3.connect('./alignment_data/{}.db'.format(file_name)) cur = conn.cursor() cur.execute("SELECT word, word_index, count_before, count_after, avg_pause_before, avg_pause_after FROM words") vals = cur.fetchall() for val in vals: entry_before = (val[0], val[4], val[2]) entry_after = (val[0], val[5], val[3]) pause_before.append(entry_before) pause_after.append(entry_after) pause_before = np.asarray(pause_before) pause_after = np.asarray(pause_after) idx_before = np.argsort(pause_before[:, 1])[::-1] idx_after = np.argsort(pause_after[:, 1])[::-1] idx_count_before = np.argsort(pause_before[:, 2])[::-1] idx_count_after = np.argsort(pause_after[:, 2])[::-1] return pause_before[idx_before], pause_after[idx_after], pause_before[idx_count_before], pause_after[idx_count_after]
def plotres(psr,deleted=False,group=None,**kwargs): """Plot residuals, compute unweighted rms residual.""" res, t, errs = psr.residuals(), psr.toas(), psr.toaerrs if (not deleted) and N.any(psr.deleted != 0): res, t, errs = res[psr.deleted == 0], t[psr.deleted == 0], errs[psr.deleted == 0] print("Plotting {0}/{1} nondeleted points.".format(len(res),psr.nobs)) meanres = math.sqrt(N.mean(res**2)) / 1e-6 if group is None: i = N.argsort(t) P.errorbar(t[i],res[i]/1e-6,yerr=errs[i],fmt='x',**kwargs) else: if (not deleted) and N.any(psr.deleted): flagmask = psr.flagvals(group)[~psr.deleted] else: flagmask = psr.flagvals(group) unique = list(set(flagmask)) for flagval in unique: f = (flagmask == flagval) flagres, flagt, flagerrs = res[f], t[f], errs[f] i = N.argsort(flagt) P.errorbar(flagt[i],flagres[i]/1e-6,yerr=flagerrs[i],fmt='x',**kwargs) P.legend(unique,numpoints=1,bbox_to_anchor=(1.1,1.1)) P.xlabel('MJD'); P.ylabel('res [us]') P.title("{0} - rms res = {1:.2f} us".format(psr.name,meanres))
def spike_find(input_array, t, max_spike_width): """ Find the spikes in the input_array. Inputs: input_array : a numpy array (1-dimensional) holding floats. t : threshold for spike detection max_spike_width : crossings further apart than this will disqualify the spike Returns: spikes : a numpy array (1-dimensional) holding integers (spike index values) """ crossings = fast_thresh_detect(input_array, threshold=t) spikes = [] if len(crossings) > 1: if t > 0.0: # find first positive crossing then pair up crossings first_p = numpy.argwhere(input_array[crossings] < t)[0] for p, n in itertools.izip(crossings[first_p::2], crossings[first_p + 1 :: 2]): if abs(p - n) <= max_spike_width: peak_index = numpy.argsort(input_array[p : n + 1])[-1] + p spikes.append(peak_index) else: # find first negative crossing then pair up crossings first_n = numpy.argwhere(input_array[crossings] > t)[0] for n, p in itertools.izip(crossings[first_n::2], crossings[first_n + 1 :: 2]): if abs(p - n) <= max_spike_width: peak_index = numpy.argsort(input_array[n : p + 1])[0] + n spikes.append(peak_index) return numpy.array(spikes)
def SNfunc(self,data,sig,significancefloor=0.5): D=data.ravel() S=sig.ravel() args=numpy.argsort(-D/S) D=numpy.take(D,args) S=numpy.take(S,args) Dsum=numpy.cumsum(D) Ssum=numpy.cumsum(S**2)**0.5 SN=(Dsum/Ssum).max() #regional SN import scipy.ndimage as ndimage data[data/sig<significancefloor]=0 masks, multiplicity = ndimage.measurements.label(data) labels=numpy.arange(1, multiplicity+1) SNs=numpy.zeros(multiplicity+1) SNs[0]=SN for i in range(multiplicity): D=data[masks==i+1].ravel() S=sig[masks==i+1].ravel() args=numpy.argsort(-D/S) D=numpy.take(D,args) S=numpy.take(S,args) Dsum=numpy.cumsum(D) Ssum=numpy.cumsum(S**2)**0.5 SNi=(Dsum/Ssum).max() SNs[i+1]=SNi SNs=-numpy.sort(-SNs) return SNs
def scale_score(x, kind="quicksort", kind2="quicksort"): y = x.copy() order = np.argsort(x.flat, kind=kind) # Black magic ;-) Probably the smartest thing I came up with today. order_order = np.argsort(order, kind=kind2) y.flat[:] = order_order.astype(y.dtype) return y
def _get_sorted_theta(self): '''sorts the integral points by bond in descending order''' depsf_arr = np.array([]) V_f_arr = np.array([]) E_f_arr = np.array([]) xi_arr = np.array([]) stat_weights_arr = np.array([]) nu_r_arr = np.array([]) r_arr = np.array([]) for reinf in self.cont_reinf_lst: n_int = len(np.hstack((np.array([]), reinf.depsf_arr))) depsf_arr = np.hstack((depsf_arr, reinf.depsf_arr)) V_f_arr = np.hstack((V_f_arr, np.repeat(reinf.V_f, n_int))) E_f_arr = np.hstack((E_f_arr, np.repeat(reinf.E_f, n_int))) xi_arr = np.hstack((xi_arr, np.repeat(reinf.xi, n_int))) stat_weights_arr = np.hstack((stat_weights_arr, reinf.stat_weights)) nu_r_arr = np.hstack((nu_r_arr, reinf.nu_r)) r_arr = np.hstack((r_arr, reinf.r_arr)) argsort = np.argsort(depsf_arr)[::-1] # sorting the masks for the evaluation of F idxs = np.array([]) for i, reinf in enumerate(self.cont_reinf_lst): idxs = np.hstack((idxs, i * np.ones_like(reinf.depsf_arr))) masks = [] for i, reinf in enumerate(self.cont_reinf_lst): masks.append((idxs == i)[argsort]) max_depsf = [np.max(reinf.depsf_arr) for reinf in self.cont_reinf_lst] masks = [masks[i] for i in np.argsort(max_depsf)[::-1]] return depsf_arr[argsort], V_f_arr[argsort], E_f_arr[argsort], \ xi_arr[argsort], stat_weights_arr[argsort], \ nu_r_arr[argsort], masks, r_arr[argsort]
def rforests(trainx, trainy, test, n_estimators=100, k=5): trainy = np.ravel(trainy) forest = RandomForestClassifier(n_estimators) forest.fit(trainx, trainy) prob_train = forest.predict_proba(trainx) prob_test = forest.predict_proba(test) # Since the index is the number of the country that's been chosen # we can use these with argsort to get the maximum 5., we will have to do this # for the entire matrix though. sort_train = np.argsort(prob_train)[:,-k:] sort_test = np.argsort(prob_test)[:,-k:] # Now we need to transform these back to countries, but to map I need to # have a dataframe. col_names = [] for i in range(k): name = "country_destination_" + str(i+1) col_names.append(name) pred_train = pd.DataFrame(sort_train, columns=col_names) pred_test = pd.DataFrame(sort_test, columns=col_names) for name in col_names: pred_train[name] = pred_train[name].map(dicts.country) pred_test[name] = pred_test[name].map(dicts.country) pred_train = np.fliplr(pred_train) pred_test = np.fliplr(pred_test) return forest, pred_train, pred_test
def regenerate_dim(x): """ assume x in ns since epoch from the current time """ msg = None # msg allows us to see which shot/diag was at fault diffs = np.diff(x) # bincount needs a positive input and needs an array with N elts where N is the largest number input small = (diffs > 0) & (diffs < 1000000) sorted_diffs = np.sort(diffs[np.where(small)[0]]) counts = np.bincount(sorted_diffs) bigcounts, bigvals = myhist(diffs[np.where(~small)[0]]) if pyfusion.VERBOSE>0: print('[[diff, count],....]') print('small:', [[argc, counts[argc]] for argc in np.argsort(counts)[::-1][0:5]]) print('big or negative:', [[bigvals[argc], bigcounts[argc]] for argc in np.argsort(bigcounts)[::-1][0:10]]) dtns = 1 + np.argmax(counts[1:]) # skip the first position - it is 0 # wgt0 = np.where(sorted_diffs > 0)[0] # we are in ns, so no worry about rounding histo = plt.hist if pyfusion.DBG() > 1 else np.histogram cnts, vals = histo(x, bins=200)[0:2] # ignore the two end bins - hopefully there will be very few there wmin = np.where(cnts[1:-1] < np.max(cnts[1:-1]))[0] if len(wmin)>0: print('**********\n*********** Gap in data > {p:.2f}%'.format(p=100*len(wmin)/float(len(cnts)))) x01111 = np.ones(len(x)) # x01111 will be all 1s except for the first elt. x01111[0] = 0 errcnt = np.sum(bigcounts) + np.sum(np.sort(counts)[::-1][1:]) if errcnt>0 or (pyfusion.VERBOSE > 0): msg = str('** repaired length of {l:,}, dtns={dtns:,}, {e} erroneous utcs' .format(l=len(x01111), dtns=dtns, e=errcnt)) fixedx = np.cumsum(x01111)*dtns wbad = np.where((x - fixedx)>1e8)[0] fixedx[wbad] = np.nan debug_(pyfusion.DEBUG, 3, key="repair", msg="repair of W7-X scrambled Langmuir timebase") return(fixedx, msg)
def show_heatmap(self, order_by = None, order_by_row = None, order_by_col = None): if order_by: title = 'Network ordered by node covariate\n"%s"' % order_by o = np.argsort(self.node_covariates[order_by][:]) elif order_by_row: title = 'Network ordered by row covariate\n"%s"' % order_by_row o = np.argsort(self.row_covariates[order_by_row][:]) elif order_by_col: title = 'Network ordered by column covariate\n"%s"' % order_by_col o = np.argsort(self.col_covariates[order_by_col][:]) else: title, o = 'Unordered adjacency matrix', np.arange(self.N) f, (ax_im, ax_ord) = plt.subplots(2, sharex = True) f.set_figwidth(3) f.set_figheight(6) A = self.adjacency_matrix() ax_im.imshow(A[o][:,o]).set_cmap('binary') ax_im.set_ylim(0, self.N - 1) ax_im.set_xticks([]) ax_im.set_yticks([]) ax_im.set_title(title) #plt.setp([ax_im.get_xticklabels(), ax_im.get_yticklabels()], # visible = False) if order_by: ax_ord.scatter(np.arange(self.N), self.node_covariates[order_by][o]) ax_ord.set_xlim(0, self.N - 1) ax_ord.set_ylim(self.node_covariates[order_by][o[0]], self.node_covariates[order_by][o[-1]]) plt.show()
def rowwise_rank(array, mask=None): """ Take a 2D array and return the 0-indexed sorted position of each element in the array for each row. Example ------- In [5]: data Out[5]: array([[-0.141, -1.103, -1.0171, 0.7812, 0.07 ], [ 0.926, 0.235, -0.7698, 1.4552, 0.2061], [ 1.579, 0.929, -0.557 , 0.7896, -1.6279], [-1.362, -2.411, -1.4604, 1.4468, -0.1885], [ 1.272, 1.199, -3.2312, -0.5511, -1.9794]]) In [7]: argsort(argsort(data)) Out[7]: array([[2, 0, 1, 4, 3], [3, 2, 0, 4, 1], [4, 3, 1, 2, 0], [2, 0, 1, 4, 3], [4, 3, 0, 2, 1]]) """ # note that unlike scipy.stats.rankdata, the output here is 0-indexed, not # 1-indexed. return argsort(argsort(array))
def target_neurons(self,nConnectPerInput,network,strCorr,bAntiCorr=False): numInput = self.dicProperties["IODim"] numNodesReservoir = self.dicProperties["ReservoirDim"] matTargetNeurons = np.zeros((numInput,nConnectPerInput)) if strCorr == "Betweenness": self.lstBetweenness = betweenness_list(network)[0].a #get edge betweenness array lstSortedNodes = np.argsort(self.lstBetweenness) if not bAntiCorr: lstSortedNodes = lstSortedNodes[::-1] for i in range(numInput): lstRandIdx = rand_int_trunc_exp(0,numNodesReservoir,0.2,nConnectPerInput) # characteristic exponential decay is a fifth of the reservoir's size matTargetNeurons[i,:] = lstSortedNodes[lstRandIdx] elif "degree" in strCorr: # get the degree type idxDash = strCorr.find("-") strDegType = strCorr[:idxDash].lower() lstDegrees = degree_list(network,strDegType) # sort the nodes by their importance lstSortedNodes = np.argsort(lstDegrees) if not bAntiCorr: lstSortedNodes = lstSortedNodes[::-1] for i in range(numInput): lstRandIdx = rand_int_trunc_exp(0,numNodesReservoir,0.2,nConnectPerInput) # characteristic exponential decay is a fifth of the reservoir's size matTargetNeurons[i,:] = lstSortedNodes[lstRandIdx] else: matTargetNeurons = np.random.randint(0,numNodesReservoir,(numInput,nConnectPerInput)) return matTargetNeurons.astype(int)
def __call__(self, filt, mask=None): ''' Provide the iterator over the levels. ''' self._check_filter(filt, mask) # This cover method is only for one-dimensional filter functions. assert(self.dim==1) # The interval length measures indices, not filter values # in this case. self.interval_length = 1. / \ ( self.intervals[0] - (self.intervals[0]-1)*self.fract_overlap ) self.step_size = self.interval_length*(1-self.fract_overlap) if mask is None: self.n = len(self.filt) self.sortorder = np.argsort(np.ravel(self.filt)) else: idx = np.flatnonzero(mask) self.n = len(idx) sortorder = np.argsort(np.ravel(self.filt[mask])) self.sortorder = idx[sortorder] assert len(self.sortorder)==self.n self.iter = range(self.intervals[0]).__iter__() return self
def shi_malik(st_mat,eig_thresh=0.95,cut=0,group=None): """ Given a stochastic matrix describing the strength of the relationship between pairs of items, determines an aggregation of the items using the spectral approach of Shi and Malik. A column-stochastic matrix T will always have a leading eigenvalue of 1 and a leading uniform right-eigenvector, u=(1,...,1), which is a fixed point of the map: T u = u If T has no disconnected components then u is the unique fixed point (up to a constant scaling) and the sub-leading eigenvalue is strictly less than one; otherwise, the eigenvalue 1 is degenerate. In the first case, if the sub-leading eigenvalue is close to 1, then the sub-leading right-eigenvector y may be used to partition the indices into two slowly-decaying communities. The Shi-Malik algorithm is recursive, taking the sub-leading eigenvector of T (as long as the corresponding eigenvalue is above a threshold), using it to bipartition the indices, and then repeating these steps on the partitions with a reweighted matrix. This implementation cuts the vector y by value, by default into components y>0 and y<=0, because of the orthogonality relationship <y>_pi = y . pi = 0 which indicates that the mean value of y under the stationary distribution pi (left-eigenvector of T) must always be zero, making this a value of significance. The algorithm halts when no community has a sub-leading eigenvector above the threshold, and the final partitioning is returned as an Aggregation. Arguments --------- st_mat : A square stochastic matrix describing a Markov dynamic. Keyword Arguments ----------------- eig_thresh : The smallest value the subleading eigenvalue may have to continue the recursion. cut : The value used to "cut" the subleading eigenvector into two clusters. group : The group which labels the indices of st_mat, and which will be the item set of the returned Aggregation. Output ------ Aggregation of the indices of st_mat """ if group is None: group = _Group(_np.arange(st_mat.shape[0])) num_items = group.size clusts = _Aggregation( group, _Group(_np.array([0])), {0:_np.arange(len(group))} ) change = True while change: new_clusts = [] change = False for k,c in clusts: if len(c)>1: T = _utils.stoch(st_mat[ _np.ix_(c.in_superset,c.in_superset) ]) eigs,evecs = _la.eig(T) einds = _np.flip(_np.argsort(_np.abs(eigs))) if eigs[einds[1]]>eig_thresh: y = _np.real(evecs[:,einds[1]]) ind_agg = split_by_vals(y/_np.sum(y),group=c,cuts=cut) if ind_agg.clusters.size>1: new_clusts.append(c.in_superset[ind_agg[0].in_superset]) new_clusts.append(c.in_superset[ind_agg[1].in_superset]) else: ind_agg = split_by_gaps(y,group=c) new_clusts.append(c.in_superset[ind_agg[0].in_superset]) new_clusts.append(c.in_superset[ind_agg[1].in_superset]) change = True else: new_clusts.append(c.in_superset) else: new_clusts.append(c.in_superset) new_agg = {j:new_clusts[j] for j in range(len(new_clusts))} clusts = _Aggregation( group, _Group(_np.arange(len(new_clusts))), new_agg ) return clusts
def classify_boxes(self, images, image_names, detection_json, classification): classification_graph = self.models[classification] class_names = self.class_names[classification] # json_with_classes = self.add_classification_categories(detection_json, class_names) classification_predictions = {} with classification_graph.as_default(): with tf.Session(graph=classification_graph) as sess: # Get input and output tensors of classification model image_tensor = classification_graph.get_tensor_by_name( 'input:0') predictions_tensor = classification_graph.get_tensor_by_name( 'output:0') predictions_tensor = tf.squeeze(predictions_tensor, [0]) # For each image n_images = len(images) for i_image in tqdm.tqdm(list(range(0, n_images))): images = [np.asarray(image, np.uint8) for image in images] image_data = images[i_image] # Scale pixel values to [0,1] image_data = image_data / 255 image_height, image_width, _ = image_data.shape image_description = detection_json[image_names[i_image]] classification_predictions[image_names[i_image]] = list() # For each box n_detections = len(image_description) for i_box in range(n_detections): cur_detection = image_description[i_box] # Skip detections with low confidence if cur_detection[4] < self.default_confidence_threshold: continue # Skip if detection category is not in whitelist if not str(cur_detection[5] ) in self.detection_category_whitelist: continue # box ymin, xim, ymax, xmax x_min = cur_detection[1] y_min = cur_detection[0] width_of_box = cur_detection[1] + cur_detection[3] height_of_box = cur_detection[0] + cur_detection[2] # Get current box in relative coordinates and format [x_min, y_min, width_of_box, height_of_box] box_orig = [x_min, y_min, width_of_box, height_of_box] # Convert to [ymin, xmin, ymax, xmax] and # store it as 1x4 numpy array so we can re-use the generic multi-box padding code box_coords = np.array([[ box_orig[1], box_orig[0], box_orig[1] + box_orig[3], box_orig[0] + box_orig[2] ]]) # Convert normalized coordinates to pixel coordinates box_coords_abs = (box_coords * np.tile([image_height, image_width], (1, 2))) # Pad the detected animal to a square box and additionally by PADDING_FACTOR, the result will be in crop_boxes # However, we need to make sure that it box coordinates are still within the image bbox_sizes = np.vstack([ box_coords_abs[:, 2] - box_coords_abs[:, 0], box_coords_abs[:, 3] - box_coords_abs[:, 1] ]).T offsets = (self.padding_factor * np.max(bbox_sizes, axis=1, keepdims=True) - bbox_sizes) / 2 crop_boxes = box_coords_abs + np.hstack( [-offsets, offsets]) crop_boxes = np.maximum(0, crop_boxes).astype(int) # Get the first (and only) row as our bbox to classify crop_box = crop_boxes[0] # Get the image data for that box cropped_img = image_data[crop_box[0]:crop_box[2], crop_box[1]:crop_box[3]] # Run inference predictions = sess.run( predictions_tensor, feed_dict={image_tensor: cropped_img}) current_predicitions = [] # Add the *num_annotated_classes* top scoring classes for class_idx in np.argsort( -predictions)[:self.num_annotated_classes]: class_conf = self.truncate_float( predictions[class_idx].item()) for idx, name in enumerate(class_names): if class_idx == idx: current_predicitions.append( [f'{name}', class_conf]) classification_predictions[ image_names[i_image]].append(current_predicitions) return classification_predictions
def convert(self, mol, # type: ignore state_attributes: List = None, full_pair_matrix: bool = True) -> Dict: """ Compute the representation for a molecule Args: mol (pybel.Molecule): Molecule to generate features for state_attributes (list): State attributes. Uses average mass and number of bonds per atom as default full_pair_matrix (bool): Whether to generate info for all atom pairs, not just bonded ones Returns: (dict): Dictionary of features """ # Get the features features for all atoms and bonds atom_features = [] atom_pairs = [] for idx, atom in enumerate(mol.atoms): f = self.get_atom_feature(mol, atom) atom_features.append(f) atom_features = sorted(atom_features, key=lambda x: x["coordid"]) num_atoms = mol.OBMol.NumAtoms() for i, j in itertools.combinations(range(0, num_atoms), 2): bond_feature = self.get_pair_feature(mol, i, j, full_pair_matrix) if bond_feature: atom_pairs.append(bond_feature) else: continue # Compute the graph distance, if desired if 'graph_distance' in self.bond_features: graph_dist = self._dijkstra_distance(atom_pairs) for i in atom_pairs: i.update({'graph_distance': graph_dist[i['a_idx'], i['b_idx']]}) # Generate the state attributes (that describe the whole network) state_attributes = state_attributes or [ [mol.molwt / num_atoms, len([i for i in atom_pairs if i['bond_type'] > 0]) / num_atoms] ] # Get the atom features in the order they are requested by the user as a 2D array atoms = [] for atom in atom_features: atoms.append(self._create_atom_feature_vector(atom)) # Get the bond features in the order request by the user bonds = [] index1_temp = [] index2_temp = [] for bond in atom_pairs: # Store the index of each bond index1_temp.append(bond.pop('a_idx')) index2_temp.append(bond.pop('b_idx')) # Get the desired bond features bonds.append(self._create_pair_feature_vector(bond)) # Given the bonds (i,j), make it so (i,j) == (j, i) index1 = index1_temp + index2_temp index2 = index2_temp + index1_temp bonds = bonds + bonds # Sort the arrays by the beginning index sorted_arg = np.argsort(index1) index1 = np.array(index1)[sorted_arg].tolist() index2 = np.array(index2)[sorted_arg].tolist() bonds = np.array(bonds)[sorted_arg].tolist() return {'atom': atoms, 'bond': bonds, 'state': state_attributes, 'index1': index1, 'index2': index2}
def accumulate(self, p=None): ''' Accumulate per image evaluation results and store the result in self.eval :param p: input params for evaluation :return: None ''' print('Accumulating evaluation results...') tic = time.time() if not self.evalImgs: print('Please run evaluate() first') # allows input customized parameters if p is None: p = self.params p.catIds = p.catIds if p.useCats == 1 else [-1] T = len(p.iouThrs) R = len(p.recThrs) K = len(p.catIds) if p.useCats else 1 A = len(p.areaRng) M = len(p.maxDets) # -1 for the precision of absent categories precision = -np.ones((T, R, K, A, M)) recall = -np.ones((T, K, A, M)) scores = -np.ones((T, R, K, A, M)) # create dictionary for future indexing _pe = self._paramsEval catIds = _pe.catIds if _pe.useCats else [-1] setK = set(catIds) setA = set(map(tuple, _pe.areaRng)) setM = set(_pe.maxDets) setI = set(_pe.imgIds) # get inds to evaluate k_list = [n for n, k in enumerate(p.catIds) if k in setK] m_list = [m for n, m in enumerate(p.maxDets) if m in setM] a_list = [ n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA ] i_list = [n for n, i in enumerate(p.imgIds) if i in setI] I0 = len(_pe.imgIds) A0 = len(_pe.areaRng) # retrieve E at each category, area range, and max number of detections for k, k0 in enumerate(k_list): Nk = k0 * A0 * I0 for a, a0 in enumerate(a_list): Na = a0 * I0 for m, maxDet in enumerate(m_list): E = [self.evalImgs[Nk + Na + i] for i in i_list] E = [e for e in E if e is not None] if len(E) == 0: continue dtScores = np.concatenate( [e['dtScores'][0:maxDet] for e in E]) # different sorting method generates slightly different results. # mergesort is used to be consistent as Matlab # implementation. inds = np.argsort(-dtScores, kind='mergesort') dtScoresSorted = dtScores[inds] dtm = np.concatenate( [e['dtMatches'][:, 0:maxDet] for e in E], axis=1)[:, inds] dtIg = np.concatenate( [e['dtIgnore'][:, 0:maxDet] for e in E], axis=1)[:, inds] gtIg = np.concatenate([e['gtIgnore'] for e in E]) npig = np.count_nonzero(gtIg == 0) if npig == 0: continue tps = np.logical_and(dtm, np.logical_not(dtIg)) fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): tp = np.array(tp) fp = np.array(fp) nd = len(tp) rc = tp / npig pr = tp / (fp + tp + np.spacing(1)) q = np.zeros((R, )) ss = np.zeros((R, )) if nd: recall[t, k, a, m] = rc[-1] else: recall[t, k, a, m] = 0 # numpy is slow without cython optimization for accessing elements # use python array gets significant speed improvement pr = pr.tolist() q = q.tolist() for i in range(nd - 1, 0, -1): if pr[i] > pr[i - 1]: pr[i - 1] = pr[i] inds = np.searchsorted(rc, p.recThrs, side='left') try: for ri, pi in enumerate(inds): q[ri] = pr[pi] ss[ri] = dtScoresSorted[pi] except BaseException: pass precision[t, :, k, a, m] = np.array(q) scores[t, :, k, a, m] = np.array(ss) self.eval = { 'params': p, 'counts': [T, R, K, A, M], 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'precision': precision, 'recall': recall, 'scores': scores, } toc = time.time() print('DONE (t={:0.2f}s).'.format(toc - tic))
def evaluateImg(self, imgId, catId, aRng, maxDet): ''' perform evaluation for single category and image :return: dict (single image results) ''' p = self.params if p.useCats: gt = self._gts[imgId, catId] dt = self._dts[imgId, catId] else: gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] if len(gt) == 0 and len(dt) == 0: return None for g in gt: if g['ignore'] or (g['area'] < aRng[0] or g['area'] > aRng[1]): g['_ignore'] = 1 else: g['_ignore'] = 0 # sort dt highest score first, sort gt ignore last gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort') gt = [gt[i] for i in gtind] dtind = np.argsort([-d['score'] for d in dt], kind='mergesort') dt = [dt[i] for i in dtind[0:maxDet]] iscrowd = [int(o['iscrowd']) for o in gt] # load computed ious ious = self.ious[imgId, catId][:, gtind] if len( self.ious[imgId, catId]) > 0 else self.ious[imgId, catId] T = len(p.iouThrs) G = len(gt) D = len(dt) gtm = np.zeros((T, G)) dtm = np.zeros((T, D)) gtIg = np.array([g['_ignore'] for g in gt]) dtIg = np.zeros((T, D)) if not len(ious) == 0: for tind, t in enumerate(p.iouThrs): for dind, d in enumerate(dt): # information about best match so far (m=-1 -> unmatched) iou = min([t, 1 - 1e-10]) m = -1 for gind, g in enumerate(gt): # if this gt already matched, and not a crowd, continue if gtm[tind, gind] > 0 and not iscrowd[gind]: continue # if dt matched to reg gt, and on ignore gt, stop if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1: break # continue to next gt unless better match made if ious[dind, gind] < iou: continue # if match successful and best so far, store # appropriately iou = ious[dind, gind] m = gind # if match made store id of match for both dt and gt if m == -1: continue dtIg[tind, dind] = gtIg[m] dtm[tind, dind] = gt[m]['id'] gtm[tind, m] = d['id'] # set unmatched detections outside of area range to ignore a = np.array([d['area'] < aRng[0] or d['area'] > aRng[1] for d in dt]).reshape((1, len(dt))) dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0))) # store results for given image and category return { 'image_id': imgId, 'category_id': catId, 'aRng': aRng, 'maxDet': maxDet, 'dtIds': [d['id'] for d in dt], 'gtIds': [g['id'] for g in gt], 'dtMatches': dtm, 'gtMatches': gtm, 'dtScores': [d['score'] for d in dt], 'gtIgnore': gtIg, 'dtIgnore': dtIg, }
def show_dets_gt_boxes(im, dets, classes, gt_boxes, gt_classes, scale=1.0, FS=22, LW=3.5, save_file_path='temp_det_gt.png'): import matplotlib.pyplot as plt import numpy as np from random import random as rand from random import randint fig = plt.figure(1) fig.set_size_inches((2 * 8.5, 1 * 11), forward=False) plt.subplot(121) plt.cla() plt.axis("off") plt.imshow(im) for cls_dets, cls_name in zip(dets, classes): scores = [] if len(cls_dets) == 0: continue for det in cls_dets: scores += [det[-1]] ord = np.argsort(scores) cls_dets = cls_dets[ord] for det in cls_dets: bbox = det[:4] * scale color = (rand(), rand(), rand()) rect = plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor=color, linewidth=LW) plt.gca().add_patch(rect) score = det[-1] corner = randint(1, 2) if corner == 1: x0 = bbox[0] y0 = bbox[1] if corner == 2: x0 = bbox[0] y0 = bbox[3] # if corner == 3: # x0 = bbox[2]; y0 = bbox[1] # if corner == 4: # x0 = bbox[2]; y0 = bbox[3] plt.gca().text(x0, y0, '{:s} {:.3f}'.format(cls_name, score), bbox=dict(facecolor=color, alpha=0.6), fontsize=FS, color='white') plt.subplot(122) plt.cla() plt.axis("off") plt.imshow(im) for cls_idx, cls_name in enumerate(gt_classes): bbox = gt_boxes[cls_idx] color = (rand(), rand(), rand()) rect = plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor=color, linewidth=LW) plt.gca().add_patch(rect) plt.gca().text(bbox[0], bbox[1], '{:s}'.format(cls_name), bbox=dict(facecolor=color, alpha=0.6), fontsize=FS, color='white') plt.gca().set_axis_off() plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) fig.savefig(save_file_path, bbox_inches='tight', pad_inches=0) plt.close(fig)
def param_effects_fig(placeholder=True, supervised=True): sb.set_context('talk') # sb.set_context('poster') # sb.set_context('notebook') # fig, axes = plt.subplots(2, figsize=(6, 8)) # fig, axes = plt.subplots(1, 2, figsize=(10, 6)) if supervised: fig, axes = plt.subplots(1, 2, figsize=(10, 5.2)) else: fig, axes = plt.subplots(1, 2, figsize=(10, 5.72)) # 10% taller KEEP_HOW_MANY = 10 # plotting too many makes fig hideous if supervised: df_fc = pd.read_csv(NET_SIZE_PATH_SUPERVISED) df_pool = pd.read_csv(POOL_SIZE_PATH_SUPERVISED) else: df_fc = pd.read_csv(NET_SIZE_PATH_UNSUPERVISED) df_pool = pd.read_csv(POOL_SIZE_PATH_UNSUPERVISED) # print "df fc:" # print df_fc # print "df_pool" # print df_pool # return # make sure both use the same datasets, because otherwise the # legend will break / be wrong dsets_fc = sorted(df_fc[DATASET_COL].unique()) dsets_pool = sorted(df_pool[DATASET_COL].unique()) assert np.array_equal(dsets_fc, dsets_pool) dsets = dsets_fc dset_names_lens = np.array([len(name) for name in dsets]) sort_idxs = np.argsort(dset_names_lens) dsets = [dsets[i] for i in sort_idxs[:KEEP_HOW_MANY]] # print "param_effects_fig: using datasets: ", dsets # return # ------------------------ top plot: fc layer size fc_params = (df_fc, NET_SIZE_COL, axes[0]) pool_params = (df_pool, POOL_SIZE_COL, axes[1]) for (df, xcol, ax) in (fc_params, pool_params): for dset in dsets: sub_df = df[df[DATASET_COL] == dset] sub_df = sub_df.sort_values(xcol) xvals, yvals = sub_df[xcol], sub_df[ACC_COL] yvals /= yvals.max() # name = dset.replace('_', ' ').replace('-', ' ').capitalize() name = dset.replace('_', ' ').replace('-', ' ') ax.plot(xvals, yvals, label=name) leg_lines, leg_labels = ax.get_legend_handles_labels() plt.figlegend(leg_lines, leg_labels, loc='lower center', ncol=5, labelspacing=0) ax = axes[0] ax.set_title("Effect of Fully Connected Layer Size", y=1.03) if supervised: ax.set_xlabel("Neurons in Each Fully Connected Layer") else: ax.set_xlabel("Neurons in Each Fully Connected Layer\n" "(Fraction of # of classes)") ax.set_ylabel("Normalized Accuracy") ax = axes[1] ax.set_title("Effect of Max Pool Size", y=1.03) ax.set_xlabel("Fraction of Mean Time Series Length") # ax.set_xlabel("Max Pool Size\n(Fraction of Mean Time Series Length)") ax.set_ylabel("Normalized Accuracy") # plt.tight_layout(w_pad=.02) # plt.tight_layout(h_pad=2.0) plt.tight_layout(h_pad=1.8) # plt.tight_layout() # plt.subplots_adjust(bottom=.32) # this one with horz but 2 legend cols # plt.subplots_adjust(bottom=.23) # this one for vertical subplots if supervised: plt.subplots_adjust(bottom=.25) else: plt.subplots_adjust(bottom=.27) # plt.show() figname = 'param_effects' if not supervised: figname += '_unsupervised' save_fig_png(figname)
def __init__(self, track, params=visualizerParams()): self.track = track self.visualizer_params = params plot_subplots = params.plot_subplots parking_spot_width = params.parking_spot_width num_parking_spots = params.num_parking_spots self.fsm_state_ids = list(state_num_dict.values()) self.fsm_state_names = list(state_num_dict.keys()) sort_idxs = np.argsort(self.fsm_state_ids) self.fsm_state_ids = [self.fsm_state_ids[i] for i in sort_idxs] self.fsm_state_names = [self.fsm_state_names[i] for i in sort_idxs] # Initialize figure figsize = (14, 7) if plot_subplots else (7, 7) self.fig = plt.figure(figsize=figsize) self.fig.suptitle("BARC OBCA Plotter", fontsize=16) plt.ion() self.axs = dict() if params.trajectory_file is not None: trajectory_scaling = params.trajectory_scaling trajectory_init = params.trajectory_init trajectory = load_vehicle_trajectory(params.trajectory_file) trajectory -= np.array([trajectory[0, 0], trajectory[0, 1], 0, 0]) trajectory = np.multiply( trajectory, np.array([ trajectory_scaling['x'], trajectory_scaling['y'], 1, trajectory_scaling['v'] ])) trajectory += np.array( [trajectory_init['x'], trajectory_init['y'], 0, 0]) # waypoints, next_ref_start = get_trajectory_waypoints(trajectory, 20, 0.1) waypoints = np.array([]) ################ Trajectory Subplot ################ if plot_subplots: axtr = self.fig.add_subplot(3, 2, 1) axtr.set_title("Trajectories") axtr.set_xlabel("X") axtr.set_ylabel("Y") else: axtr = self.fig.add_subplot(3, 1, 1) if params.trajectory_file is not None: axtr.plot(trajectory[:, 0], trajectory[:, 1]) if waypoints.size > 0: axtr.plot(waypoints[:, 0], waypoints[:, 1], 'ro') axtr.plot(trajectory[next_ref_start, 0], trajectory[next_ref_start, 1], 'bx') # User Defined map plotting parking_spot_length = 0.6 track_length = self.track.track_length track_width = self.track.track_width # Plot lanes self.track.plot_map(axtr) # Plot parking spots axtr.plot([0, track_length], [ track_width / 2 + parking_spot_length, track_width / 2 + parking_spot_length ], color='#908E8E', linewidth=1.5) axtr.plot([0, track_length], [ -track_width / 2 - parking_spot_length, -track_width / 2 - parking_spot_length ], color='#908E8E', linewidth=1.5) axtr.plot([0, 0], [track_width / 2, track_width / 2 + parking_spot_length], color='#908E8E', linewidth=1.5) axtr.plot([0, 0], [-track_width / 2, -track_width / 2 - parking_spot_length], color='#908E8E', linewidth=1.5) for i in range(num_parking_spots): axtr.plot([(i + 1) * parking_spot_width, (i + 1) * parking_spot_width], [track_width / 2, track_width / 2 + parking_spot_length], color='#908E8E', linewidth=1.5) axtr.plot( [(i + 1) * parking_spot_width, (i + 1) * parking_spot_width], [-track_width / 2, -track_width / 2 - parking_spot_length], color='#908E8E', linewidth=1.5) axtr.set_aspect('equal') self.axs['track'] = axtr if plot_subplots: ################ Speed Subplot ################ axv = self.fig.add_subplot(4, 2, 2) axv.set_ylabel("vel") axv.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) self.axs['vel'] = axv ################ PsiDot Subplot ################ axpsiDot = self.fig.add_subplot(4, 2, 4) axpsiDot.set_ylabel("yaw rate") axpsiDot.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) self.axs['yaw_rate'] = axpsiDot ################ u_a Subplot ################ axua = self.fig.add_subplot(4, 2, 6) axua.set_ylabel("motor") axua.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) self.axs['throttle'] = axua ################ u_df Subplot ################ axudf = self.fig.add_subplot(4, 2, 8) axudf.set_ylabel("servo") axudf.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) self.axs['steering'] = axudf plt.tight_layout() plt.show() self.plotters = []
# Feature importances of tree based classifiers names_classifiers = [('RandomForest', best_rf_clf), ('AdaBoost', best_ada_clf), ('GradientBoosting', best_gb_clf), ('ExtraTrees', best_extra_clf), ('LogisticRegression', best_logreg)] fig, ax = plt.subplots(2, 2, figsize=(15, 15)) index = 0 for row in range(2): for col in range(2): name = names_classifiers[index][0] clf = names_classifiers[index][1] feature_importances = clf.feature_importances_ indices = np.argsort(feature_importances)[::-1] # Least to most important features graph = sns.barplot(y=X_train.columns[indices], x=feature_importances[indices], ax=ax[row][col]) graph.set_xlabel('Relative Importance') graph.set_ylabel('Features') graph.set_title(name + ' feature importances') index += 1 # Title, sex, age, fare, and family size are most important features # Might remove series of cabin features and create a 'HasCabin' feature # In[171]: test_survived_rf_clf = pd.Series(best_rf_clf.predict(test), name='Rf') test_survived_ada_clf = pd.Series(best_ada_clf.predict(test), name='Ada')
if detector not in trigfile: continue grp = trigfile[detector] # show gates if 'gates' in grp: for gate in grp['gates'][:]: plot_gate(ax[detector], gate) # show triggers if 'end_time' not in grp or len(grp['end_time']) == 0: continue ar_time.update(grp['end_time']) ar_dur.update(grp['template_duration']) sorter = np.argsort(grp['snr'][:]) sc = ax[detector].scatter(grp['end_time'][:][sorter], grp['template_duration'][:][sorter], c=grp['snr'][:][sorter], cmap='plasma_r', vmin=4.5, vmax=10) ax[detectors[-1]].set_xlabel('GPS time') for detector in detectors: ax[detector].set_xlim(ar_time.low, ar_time.high) ax[detector].set_ylim(ar_dur.low, ar_dur.high) ax[detector].set_yscale('log') for ht in (args.highlight_times or []): ax[detector].axvline(ht, ls='--', color='green') for g in (args.gates or []): gate = g.split(',') if gate[0] != detector: continue
cosmean, coserr = bootstrap(cosamp) sinmean, sinerr = bootstrap(sinamp) else: cosmean = np.mean(cosamp) sinmean = np.mean(sinamp) coserr = np.std(cosamp) / np.sqrt(ngood) sinerr = np.std(sinamp) / np.sqrt(ngood) cosamps[iline] = cosmean sinamps[iline] = sinmean coserrs[iline] = coserr sinerrs[iline] = sinerr ffac = 2 * np.pi * fline * t template += cosmean*np.cos(ffac) + sinmean*np.sin(ffac) iline += 1 ind = np.argsort(linefreq) if col == '': lw = 5 else: lw = 2 plt.subplot(3, 1, 1) plt.errorbar(linefreq[ind], cosamps[ind]*scale, coserrs[ind]*scale, fmt='-o', label=det+col, lw=lw) plt.subplot(3, 1, 2) plt.errorbar(linefreq[ind], sinamps[ind]*scale, sinerrs[ind]*scale, fmt='-o', label=det+col, lw=lw) plt.subplot(3, 1, 3) plt.plot(t, template*scale, label=det+col, lw=lw)
def update(self, experiment): result = DataReader.get_episodes_success_counts(experiment) self.result_permutation = np.argsort(result) result = np.sort(result) self.episode_grid_heat_map.color = result.reshape(11, 51) self.color_result = result
def diffArea(nest, outlier = 0, data = 0, kinds = 'all', axis = 'probability', ROI = 20 , mu = 0, sigma = 1, weight = False, interpolator = 'linear', distribuition = 'normal',seed = None, plot = True): """ Return an error area between a analitic function and a estimated discretization from a distribuition. Parameters ---------- nest: int The number of estimation points. outlier: int, optional Is the point of an outlier event, e.g outlier = 50 will put an event in -50 and +50 if mu = 0. Defaut is 0 data: int, optional If data > 0, a randon data will be inserted insted analitcs data. Defaut is 0. kinds: str or array, optional specifies the kind of distribuition to analize. ('Linspace', 'CDFm', 'PDFm', 'iPDF1', 'iPDF2', 'all'). Defaut is 'all'. axis: str, optional specifies the x axis to analize ('probability', 'derivative', '2nd_derivative', 'X'). Defaut is 'probability'. ROI: int, optional Specifies the number of regions of interest. Defaut is 20. mu: int, optional Specifies the mean of distribuition. Defaut is 0. sigma: int, optional Specifies the standard desviation of a distribuition. Defaut is 1. weight: bool, optional if True, each ROI will have a diferent weight to analyze. Defaut is False interpolator: str, optional Specifies the kind of interpolation as a string ('linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic' where 'zero', 'slinear', 'quadratic' and 'cubic' refer to a spline interpolation of zeroth, first, second or third order) or as an integer specifying the order of the spline interpolator to use. Default is 'linear'. distribuition: str, optional Select the distribuition to analyze. ('normal', 'lognormal') Defaut is 'normal' plot: bool, optional If True, a plot will be ploted with the analyzes Defaut is True Returns ------- a, [b,c]: float and float of ndarray. area,[probROIord,areaROIord] returns the sum of total error area and the 'x' and 'y' values. """ import numpy as np from scipy.stats import norm, lognorm from scipy.interpolate import interp1d from numpy import exp import matplotlib.pyplot as plt from statsmodels.distributions import ECDF from distAnalyze import pdf, dpdf, ddpdf, PDF, dPDF, ddPDF area = [] n = [] data = int(data) if distribuition == 'normal': outlier_inf = outlier_sup = outlier elif distribuition == 'lognormal': outlier_inf = 0 outlier_sup = outlier ngrid = int(1e6) truth = pdf if axis == 'probability': truth1 = pdf elif axis == 'derivative': truth1 = dpdf elif axis == '2nd_derivative': truth1 = ddpdf elif axis == 'X': truth1 = lambda x,mu,sigma,distribuition: x #else: return 'No valid axis' probROIord = {} areaROIord = {} div = {} if seed is not None: np.random.set_state(seed) if data: if distribuition == 'normal': d = np.random.normal(mu,sigma,data) elif distribuition == 'lognormal': d = np.random.lognormal(mu, sigma, data) if kinds == 'all': kinds = ['Linspace', 'CDFm', 'PDFm', 'iPDF1', 'iPDF2'] elif type(kinds) == str: kinds = [kinds] for kind in kinds: if distribuition == 'normal': inf, sup = norm.interval(0.9999, loc = mu, scale = sigma) elif distribuition == 'lognormal': inf, sup = lognorm.interval(0.9999, sigma, loc = 0, scale = exp(mu)) xgrid = np.linspace(inf,sup,ngrid) xgridROI = xgrid.reshape([ROI,ngrid//ROI]) dx = np.diff(xgrid)[0] if kind == 'Linspace': if not data: xest = np.linspace(inf-outlier_inf,sup+outlier_sup,nest) else: if distribuition == 'normal': #d = np.random.normal(loc = mu, scale = sigma, size = data) inf,sup = min(d),max(d) xest = np.linspace(inf-outlier_inf,sup+outlier_sup,nest) elif distribuition == 'lognormal': #d = np.random.lognormal(mean = mu, sigma = sigma, size = data) inf,sup = min(d),max(d) xest = np.linspace(inf-outlier_inf,sup+outlier_sup,nest) yest = pdf(xest,mu,sigma,distribuition) elif kind == 'CDFm': eps = 5e-5 yest = np.linspace(0+eps,1-eps,nest) if distribuition == 'normal': if not data: xest = norm.ppf(yest, loc = mu, scale = sigma) yest = pdf(xest,mu,sigma,distribuition) else: #d = np.random.normal(loc = mu, scale = sigma, size = data) ecdf = ECDF(d) inf,sup = min(d),max(d) xest = np.linspace(inf,sup,data) yest = ecdf(xest) interp = interp1d(yest,xest,fill_value = 'extrapolate', kind = 'nearest') yest = np.linspace(eps,1-eps,nest) xest = interp(yest) elif distribuition == 'lognormal': if not data: xest = lognorm.ppf(yest, sigma, loc = 0, scale = exp(mu)) yest = pdf(xest,mu,sigma,distribuition) else: #d = np.random.lognormal(mean = mu, sigma = sigma, size = data) ecdf = ECDF(d) inf,sup = min(d),max(d) xest = np.linspace(inf,sup,nest) yest = ecdf(xest) interp = interp1d(yest,xest,fill_value = 'extrapolate', kind = 'nearest') yest = np.linspace(eps,1-eps,nest) xest = interp(yest) elif kind == 'PDFm': xest, yest = PDF(nest,mu,sigma, distribuition, outlier, data, seed) elif kind == 'iPDF1': xest, yest = dPDF(nest,mu,sigma, distribuition, outlier, data, 10, seed) elif kind == 'iPDF2': xest, yest = ddPDF(nest,mu,sigma, distribuition, outlier, data, 10, seed) YY = pdf(xest,mu, sigma,distribuition) fest = interp1d(xest,YY,kind = interpolator, bounds_error = False, fill_value = (YY[0],YY[-1])) #fest = lambda x: np.concatenate([fest1(x)[fest1(x) != -1],np.ones(len(fest1(x)[fest1(x) == -1]))*fest1(x)[fest1(x) != -1][-1]]) yestGrid = [] ytruthGrid = [] ytruthGrid2 = [] divi = [] for i in range(ROI): yestGrid.append([fest(xgridROI[i])]) ytruthGrid.append([truth(xgridROI[i],mu,sigma,distribuition)]) ytruthGrid2.append([truth1(xgridROI[i],mu,sigma,distribuition)]) divi.append(len(np.intersect1d(np.where(xest >= min(xgridROI[i]))[0], np.where(xest < max(xgridROI[i]))[0]))) diff2 = np.concatenate(abs((np.array(yestGrid) - np.array(ytruthGrid))*dx)) #diff2[np.isnan(diff2)] = 0 areaROI = np.sum(diff2,1) divi = np.array(divi) divi[divi == 0] = 1 try: probROI = np.mean(np.sum(ytruthGrid2,1),1) except: probROI = np.mean(ytruthGrid2,1) probROIord[kind] = np.sort(probROI) index = np.argsort(probROI) areaROIord[kind] = areaROI[index] #deletes = ~np.isnan(areaROIord[kind]) #areaROIord[kind] = areaROIord[kind][deletes] #probROIord[kind] = probROIord[kind][deletes] area = np.append(area,np.sum(areaROIord[kind])) n = np.append(n,len(probROIord[kind])) div[kind] = divi[index] if plot: if weight: plt.logy(probROIord[kind],areaROIord[kind]*div[kind],'-o',label = kind, ms = 3) else: plt.plot(probROIord[kind],areaROIord[kind],'-o',label = kind, ms = 3) plt.yscale('log') plt.xlabel(axis) plt.ylabel('Error') plt.legend() #plt.title('%s - Pontos = %d, div = %s - %s' %(j,nest, divs,interpolator)) return area,[probROIord,areaROIord]
def slip_distribution_profile(ox, t_step, t_step_subseismic, t_step_seismic, slip_ref, depth=15e3): """ Helper routine to plot the snapshot data (slip contours) """ mask = np.isfinite(ox["x"]) x = ox["x"][mask].unique() x_order = np.argsort(x) t_vals = np.sort(ox["t"].unique()) z0 = 0 z_max = depth*1e-3 z = np.linspace(0, z_max, len(x)) + z0 Nx = len(x) Nt = len(t_vals) t_vals = t_vals[:-1] slip = ox["slip"][:Nx*(Nt-1)].values.reshape((Nt-1, Nx)) slip = slip.T[x_order].T v = ox["v"][:Nx*(Nt-1)].values.reshape((Nt-1, Nx)) v = v.T[x_order].T v_max = np.array([np.nanmax(v[i]) for i in range(Nt-1)]) v_subseismic = 1e-7 v_seismic = 1e-3 t_prev = 0 inds_seismic = (v_max >= v_seismic) inds_subseismic = (v_max >= v_subseismic) & (v_max < v_seismic) ref_ind = np.where(slip[:,0] > slip_ref)[0][0] slip_ref = slip[ref_ind,:] fig = plt.figure(figsize=(15,8), facecolor="white") colours = seaborn.color_palette("deep", 5) colours[0] = "b" colours[1] = "r" colours[2] = "b" ax = fig.add_subplot(111) for i in range(Nt-1): if inds_seismic[i]: if t_vals[i] > t_prev + t_step_seismic: plt.plot(slip[i]-slip_ref, z, ls="--", c=colours[1], lw=0.8) t_prev = t_vals[i] elif inds_subseismic[i]: if t_vals[i] > t_prev + t_step_subseismic: plt.plot(slip[i]-slip_ref, z, ls="-", c=colours[2], lw=1.0) t_prev = t_vals[i] else: if t_vals[i] > t_prev + t_step: plt.plot(slip[i]-slip_ref, z, ls="-", c=colours[0], lw=1.5) t_prev = t_vals[i] t_day = 24*3600.0 t_yr = 365*t_day plt.plot([np.nan]*2, [np.nan]*2, "-", c=colours[0], label="Interseismic (%.0f yr)" % (t_step/t_yr)) plt.plot([np.nan]*2, [np.nan]*2, "-", c=colours[2], label="Subseismic (%.1f day)" % (t_step_subseismic/t_day)) plt.plot([np.nan]*2, [np.nan]*2, "--", c=colours[1], label="Coseismic (%.1f sec)" % (t_step_seismic)) plt.legend(bbox_to_anchor=(0.0, 1.1, 1.0, .102), loc="center", ncol=3, borderaxespad=0.0) plt.ylim((np.min(z), np.max(z))) plt.xlim((0, np.max(slip)-np.max(slip_ref))) plt.ylabel("depth [km]") plt.xlabel("accumulated slip [m]") plt.gca().invert_yaxis() ax.xaxis.tick_top() ax.xaxis.set_label_position('top') plt.tight_layout() plt.subplots_adjust(top=0.85) plt.show()
def kllucb(self, anchors: list, init_stats: dict, epsilon: float, delta: float, batch_size: int, top_n: int, verbose: bool = False, verbose_every: int = 1) -> np.ndarray: """ Implements the KL-LUCB algorithm (Kaufmann and Kalyanakrishnan, 2013). Parameters ---------- anchors: A list of anchors from which two critical anchors are selected (see Kaufmann and Kalyanakrishnan, 2013). init_stats Dictionary with lists containing nb of samples used and where sample predictions equal the desired label. epsilon Precision bound tolerance for convergence. delta Used to compute beta. batch_size Number of samples. top_n Min of beam width size or number of candidate anchors. verbose Whether to print intermediate output. verbose_every Whether to print intermediate output every verbose_every steps. Returns ------- Indices of best result options. Number of indices equals min of beam width or nb of candidate anchors. """ # n_features equals to the nb of candidate anchors n_features = len(anchors) # arrays for total number of samples & positives (# samples where prediction equals desired label) n_samples, positives = init_stats['n_samples'], init_stats['positives'] anchors_to_sample, anchors_idx = [], [] for f in np.where(n_samples == 0)[0]: anchors_to_sample.append(anchors[f]) anchors_idx.append(f) if anchors_idx: pos, total = self.draw_samples(anchors_to_sample, 1) positives[anchors_idx] += pos n_samples[anchors_idx] += total if n_features == top_n: # return all options b/c of beam search width return np.arange(n_features) # update the upper and lower precision bounds until the difference between the best upper ... # ... precision bound of the low precision anchors and the worst lower precision bound of the high ... # ... precision anchors is smaller than eps means = positives / n_samples # fraction sample predictions equal to desired label ub, lb = np.zeros(n_samples.shape), np.zeros(n_samples.shape) t = 1 crit_a_idx = self.select_critical_arms(means, ub, lb, n_samples, delta, top_n, t) B = ub[crit_a_idx.ut] - lb[crit_a_idx.lt] verbose_count = 0 while B > epsilon: verbose_count += 1 if verbose and verbose_count % verbose_every == 0: ut, lt = crit_a_idx print('Best: %d (mean:%.10f, n: %d, lb:%.4f)' % (lt, means[lt], n_samples[lt], lb[lt]), end=' ') print('Worst: %d (mean:%.4f, n: %d, ub:%.4f)' % (ut, means[ut], n_samples[ut], ub[ut]), end=' ') print('B = %.2f' % B) # draw samples for each critical result, update anchors' mean, upper and lower # bound precision estimate selected_anchors = [anchors[idx] for idx in crit_a_idx] pos, total = self.draw_samples(selected_anchors, batch_size) idx = list(crit_a_idx) positives[idx] += pos n_samples[idx] += total means = positives / n_samples t += 1 crit_a_idx = self.select_critical_arms(means, ub, lb, n_samples, delta, top_n, t) B = ub[crit_a_idx.ut] - lb[crit_a_idx.lt] sorted_means = np.argsort(means) return sorted_means[-top_n:]
def fit(self, X, y, overwrite_X=False, overwrite_y=False, verbose=False): """ Fit an OASIS model. """ if not overwrite_X: X = X.copy() if not overwrite_y: y = y.copy() n_samples, n_features = X.shape self.init = np.random.RandomState(self.random_seed) # Parameter initialization self._weights = np.eye(n_features).flatten() # self._weights = np.random.randn(n_features,n_features).flatten() W = self._weights.view() W.shape = (n_features, n_features) ind = np.argsort(y) y = y[ind] X = X[ind, :] classes = np.unique(y) classes.sort() n_classes = len(classes) # Translate class labels to serial integers 0, 1, ... y_new = np.empty((n_samples, ), dtype='int') for ii in xrange(n_classes): y_new[y == classes[ii]] = ii y = y_new class_sizes = [None] * n_classes class_start = [None] * n_classes for ii in xrange(n_classes): class_sizes[ii] = np.sum(y == ii) # This finds the first occurrence of that class class_start[ii] = np.flatnonzero(y == ii)[0] loss_steps = np.empty((self.n_iter, ), dtype='bool') n_batches = int(np.ceil(self.n_iter / self.save_every)) steps_vec = np.ones((n_batches, ), dtype='int') * self.save_every steps_vec[-1] = self.n_iter - (n_batches - 1) * self.save_every if verbose: print 'n_batches = %d, total n_iter = %d' % (n_batches, self.n_iter) for bb in xrange(n_batches): if verbose: print 'run batch %d/%d, for %d steps ("." = 100 steps)\n' \ % (bb + 1, n_batches, self.save_every) W, loss_steps_batch = self._fit_batch(W, X, y, class_start, class_sizes, steps_vec[bb], verbose=verbose) # print "loss_steps_batch = %d" % sum(loss_steps_batch) loss_steps[bb * self.save_every:min( (bb + 1) * self.save_every, self.n_iter)] = loss_steps_batch if self.do_sym: if np.mod(bb + 1, self.sym_every) == 0 or bb == n_batches - 1: if verbose: print "Symmetrizing" symmetrize(W) if self.do_psd: if np.mod(bb + 1, self.psd_every) == 0 or bb == n_batches - 1: if verbose: print "PSD" make_psd(W) if self.save_path is not None: self._save(bb + 1) # back up model state return self
def _fit(self, x, y, sample_weight, check_input): time_init = time.perf_counter() if self.verbose: self._logger.info("Optimal binning started.") self._logger.info("Options: check parameters.") _check_parameters(**self.get_params()) # Pre-processing if self.verbose: self._logger.info("Pre-processing started.") self._n_samples = len(x) if self.verbose: self._logger.info("Pre-processing: number of samples: {}" .format(self._n_samples)) time_preprocessing = time.perf_counter() [x_clean, y_clean, x_missing, y_missing, x_special, y_special, y_others, categories, cat_others, sw_clean, sw_missing, sw_special, sw_others] = split_data( self.dtype, x, y, self.special_codes, self.cat_cutoff, self.user_splits, check_input, self.outlier_detector, self.outlier_params, None, None, self.class_weight, sample_weight) self._time_preprocessing = time.perf_counter() - time_preprocessing if self.verbose: n_clean = len(x_clean) n_missing = len(x_missing) n_special = len(x_special) self._logger.info("Pre-processing: number of clean samples: {}" .format(n_clean)) self._logger.info("Pre-processing: number of missing samples: {}" .format(n_missing)) self._logger.info("Pre-processing: number of special samples: {}" .format(n_special)) if self.outlier_detector is not None: n_outlier = self._n_samples-(n_clean + n_missing + n_special) self._logger.info("Pre-processing: number of outlier samples: " "{}".format(n_outlier)) if self.dtype == "categorical": n_categories = len(categories) n_categories_others = len(cat_others) n_others = len(y_others) self._logger.info("Pre-processing: number of others samples: " "{}".format(n_others)) self._logger.info("Pre-processing: number of categories: {}" .format(n_categories)) self._logger.info("Pre-processing: number of categories " "others: {}".format(n_categories_others)) self._logger.info("Pre-processing terminated. Time: {:.4f}s" .format(self._time_preprocessing)) # Pre-binning if self.verbose: self._logger.info("Pre-binning started.") time_prebinning = time.perf_counter() if self.user_splits is not None: n_splits = len(self.user_splits) if self.verbose: self._logger.info("Pre-binning: user splits supplied: {}" .format(n_splits)) if not n_splits: splits = self.user_splits n_nonevent = np.array([]) n_event = np.array([]) else: if self.dtype == "numerical": user_splits = check_array( self.user_splits, ensure_2d=False, dtype=None, force_all_finite=True) if len(set(user_splits)) != len(user_splits): raise ValueError("User splits are not unique.") sorted_idx = np.argsort(user_splits) user_splits = user_splits[sorted_idx] else: [categories, user_splits, x_clean, y_clean, y_others, cat_others, sw_clean, sw_others, sorted_idx, ] = preprocessing_user_splits_categorical( self.user_splits, x_clean, y_clean, sw_clean) if self.user_splits_fixed is not None: self.user_splits_fixed = np.asarray( self.user_splits_fixed)[sorted_idx] splits, n_nonevent, n_event = self._prebinning_refinement( user_splits, x_clean, y_clean, y_missing, y_special, y_others, sw_clean, sw_missing, sw_special, sw_others) else: splits, n_nonevent, n_event = self._fit_prebinning( x_clean, y_clean, y_missing, y_special, y_others, self.class_weight, sw_clean, sw_missing, sw_special, sw_others) self._n_prebins = len(n_nonevent) self._categories = categories self._cat_others = cat_others self._time_prebinning = time.perf_counter() - time_prebinning if self.verbose: self._logger.info("Pre-binning: number of prebins: {}" .format(self._n_prebins)) self._logger.info("Pre-binning: number of refinements: {}" .format(self._n_refinements)) self._logger.info("Pre-binning terminated. Time: {:.4f}s" .format(self._time_prebinning)) # Optimization self._fit_optimizer(splits, n_nonevent, n_event) # Post-processing if self.verbose: self._logger.info("Post-processing started.") self._logger.info("Post-processing: compute binning information.") time_postprocessing = time.perf_counter() if not len(splits): t_info = target_info_samples(y_clean, sw_clean) n_nonevent = np.array([t_info[0]]) n_event = np.array([t_info[1]]) self._n_nonevent, self._n_event = bin_info( self._solution, n_nonevent, n_event, self._n_nonevent_missing, self._n_event_missing, self._n_nonevent_special, self._n_event_special, self._n_nonevent_cat_others, self._n_event_cat_others, cat_others) if self.dtype == "numerical": min_x = x_clean.min() max_x = x_clean.max() else: min_x = None max_x = None self._binning_table = BinningTable( self.name, self.dtype, self._splits_optimal, self._n_nonevent, self._n_event, min_x, max_x, self._categories, self._cat_others, self.user_splits) self._time_postprocessing = time.perf_counter() - time_postprocessing if self.verbose: self._logger.info("Post-processing terminated. Time: {:.4f}s" .format(self._time_postprocessing)) self._time_total = time.perf_counter() - time_init if self.verbose: self._logger.info("Optimal binning terminated. Status: {}. " "Time: {:.4f}s" .format(self._status, self._time_total)) # Completed successfully self._class_logger.close() self._is_fitted = True return self
def greengrass_classification_sample_run(): client.publish(topic=PARAM_TOPIC_NAME, payload="OpenVINO: Initializing...") model_bin = os.path.splitext(PARAM_MODEL_XML)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified plugin = IEPlugin(device=PARAM_DEVICE, plugin_dirs="") if "CPU" in PARAM_DEVICE: plugin.add_cpu_extension(PARAM_CPU_EXTENSION_PATH) # Read IR net = IENetwork.from_ir(model=PARAM_MODEL_XML, weights=model_bin) assert len( net.inputs.keys()) == 1, "Sample supports only single input topologies" assert len( net.outputs) == 1, "Sample supports only single output topologies" input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) # Read and pre-process input image n, c, h, w = net.inputs[input_blob].shape cap = cv2.VideoCapture(PARAM_INPUT_SOURCE) exec_net = plugin.load(network=net) del net client.publish(topic=PARAM_TOPIC_NAME, payload="Starting inference on %s" % PARAM_INPUT_SOURCE) start_time = timeit.default_timer() inf_seconds = 0.0 frame_count = 0 res_json = [] labeldata = None if PARAM_LABELMAP_FILE is not None: with open(PARAM_LABELMAP_FILE) as labelmap_file: labeldata = json.load(labelmap_file) while (cap.isOpened()): ret, frame = cap.read() if not ret: break frameid = cap.get(cv2.CAP_PROP_POS_FRAMES) initial_w = cap.get(3) initial_h = cap.get(4) in_frame = cv2.resize(frame, (w, h)) in_frame = in_frame.transpose( (2, 0, 1)) # Change data layout from HWC to CHW in_frame = in_frame.reshape((n, c, h, w)) # Start synchronous inference inf_start_time = timeit.default_timer() res = exec_net.infer(inputs={input_blob: in_frame}) inf_seconds += timeit.default_timer() - inf_start_time top_ind = np.argsort(res[out_blob], axis=1)[0, -PARAM_NUM_TOP_RESULTS:][::-1] # Parse detection results of the current request res_json = OrderedDict() res_json["Candidates"] = OrderedDict() frame_timestamp = datetime.datetime.now() for i in top_ind: classlabel = labeldata[str(i)] if labeldata else str(i) res_json["Candidates"][classlabel] = round(res[out_blob][0, i], 2) frame_count += 1 # Measure elapsed seconds since the last report seconds_elapsed = timeit.default_timer() - start_time if seconds_elapsed >= reporting_interval: res_json["timestamp"] = frame_timestamp.isoformat() res_json["frame_id"] = int(frameid) res_json["inference_fps"] = frame_count / inf_seconds start_time = timeit.default_timer() report(res_json, frame) frame_count = 0 inf_seconds = 0.0 client.publish(topic=PARAM_TOPIC_NAME, payload="End of the input, exiting...") del exec_net del plugin
''' for Toeplitz Matrices ''' params = [0.01*np.random.randn(numInput+numHidden1),0.01*np.random.randn(numHidden1+numHidden2),0.01*np.random.randn(numHidden2+numOutput)] # Adding +1 is to counter the bias in the correponding layer #params = [np.zeros(numInput+numHidden1),np.zeros(numHidden1+numHidden2),np.zeros(numHidden2+numOutput)] # Adding +1 is to counter the bias in the correponding layer reward_episode=[] for episode in range (num_episodes): print('episode : ',episode) seeds = np.random.randint(10000,size=num_workers) reward_workers,epsilon_wi,epsilon_wh,epsilon_wo = [list(x) for x in zip(*main(seeds,params))] reward_episode.append([np.mean(reward_workers),np.median(reward_workers)]) index_sort = np.argsort(reward_workers) reward_workers = np.sort(reward_workers) fitness = fitness_shaping_paper(reward_workers) print("moy reward:") print(np.mean(reward_workers)) print("median reward:") print(np.median(reward_workers)) print("max reward:") print(np.max(reward_workers)) print("min reward:") print(np.min(reward_workers))
tick_label=order) # Get a 20CR data for the grid metadata ic=twcr.load('prate',datetime.datetime(1969,3,12,6), version='2c') ic=ic.extract(iris.Constraint(member=1)) # Get the autoencoder model_save_file=("%s/Machine-Learning-experiments/"+ "simple_autoencoder_variables/prate/"+ "saved_models/Epoch_%04d") % ( os.getenv('SCRATCH'),100) autoencoder=tf.keras.models.load_model(model_save_file) # Get the order of the hidden weights - most to least important order=numpy.argsort(numpy.abs(autoencoder.get_weights()[1]))[::-1] # Make a comparison plot - Input, hidden, and output weights fig=Figure(figsize=(10,12), # Width, Height (inches) dpi=100, facecolor=(0.88,0.88,0.88,1), edgecolor=None, linewidth=0.0, frameon=False, subplotpars=None, tight_layout=None) canvas=FigureCanvas(fig) # Hidden layer plot_hidden(autoencoder.get_weights()[1])
def power_diagram(face, uv, h=None, dh=None): if h is None: h = np.zeros((uv.shape[0], 1)) if dh is None: dh = h * 0 nf = face.shape[0] c = 1 while True: h = h - c * dh pl = np.concatenate((uv, np.reshape(np.square(norm(uv, axis=1)), (-1, 1)) - h), axis=1) hull = ConvexHull(pl, qhull_options='Qt') face = hull.simplices # fix ups for the convex hull, as the orientation may inverse fn_from_hull = hull.equations[:,2] fn = calculate_face_normal(face, pl) for i in range(face.shape[0]): if fn[i,2] * fn_from_hull[i] < 0 : # orientation difff face[i,:] = face[i,[0, 2, 1]] for i in range(face.shape[0]): mif = np.argmin(face[i,:]) face[i, :] = face[i, np.mod(np.arange(mif,mif+3),3)] face = face[np.argsort(face[:, 0] * np.max(face) + face[:, 1]), :] fn = calculate_face_normal(face, pl) ind = fn[:, 2] < 0 if np.sum(ind) < nf: h = h + c * dh c = c / 2 else: break if np.max(abs(dh)) == 0: break fn = calculate_face_normal(face, pl) ind = fn[:, 2] < 0 face = face[ind, :] pd = dict() pd['face'] = face vr = compute_vertex_ring(face, uv, ordered=True) pd['uv'] = uv pd['dp'] = np.zeros((face.shape[0], 2)) pd['cell'] = [[] for i in range(pl.shape[0])] for i in range(face.shape[0]): dp = face_dual_uv(pl[face[i,:],:]) pd['dp'][i,:] = dp K = ConvexHull(uv, qhull_options='Qt').vertices ks = np.argmin(K) K = np.concatenate((K[ks::], K[0:ks]), axis=0) K = np.append(K,K[0]) vb = np.zeros((K.shape[0] - 1, 2)) mindp = np.min(pd["dp"], axis=0) - 1 maxdp = np.max(pd["dp"], axis=0) + 1 minx = mindp[0] miny = mindp[1] maxx = maxdp[0] maxy = maxdp[1] box = np.array([minx, miny, maxx, miny, maxx, maxy, minx, maxy, minx, miny]).reshape((-1,2)) for i in range(K.shape[0]- 1): i1 = K[i] i2 = K[i + 1] vec = uv[i2,:] - uv[i1,:] vec = np.array([vec[1], -vec[0]]) mid = (uv[i2,:] + uv[i1,:]) / 2.0 intersect = intersectRayPolygon(mid, vec, box) vb[i,:] = intersect pd["dpe"] = np.concatenate((pd["dp"], vb), axis=0) vvif, _, _= compute_connectivity(face) for i in range(uv.shape[0]): vri = vr[i] pb = np.argwhere(K==i) if pb.size > 0 : pb = pb[0][0] fr = np.zeros((len(vri) + 1,)).astype(int) fr[-1] = face.shape[0] + pb if pb == 0: fr[0] = face.shape[0] + K.shape[0]-2 else: fr[0] = face.shape[0] + pb - 1 for j in range(len(vri) - 1): fr[j+1] = vvif[i, vri[j]] else: fr = np.zeros((len(vri),)).astype(int) for j in range(len(vri)): fr[j] = vvif[i, vri[j]] pd["cell"][i] = np.flip(fr) return pd, h
def plda( self, stat_server=None, output_file_name=None, whiten=False, w_stat_server=None, ): """Trains PLDA model with no within class covariance matrix but full residual covariance matrix. Arguments --------- stat_server : speechbrain.processing.PLDA_LDA.StatObject_SB Contains vectors and meta-information to perform PLDA rank_f : int Rank of the between-class covariance matrix. nb_iter : int Number of iterations to run. scaling_factor : float Scaling factor to downscale statistics (value between 0 and 1). output_file_name : str Name of the output file where to store PLDA model. """ # Dimension of the vector (x-vectors stored in stat1) vect_size = stat_server.stat1.shape[1] # noqa F841 # Whitening (Optional) if whiten is True: w_mean = w_stat_server.get_mean_stat1() w_Sigma = w_stat_server.get_total_covariance_stat1() stat_server.whiten_stat1(w_mean, w_Sigma) # Initialize mean and residual covariance from the training data self.mean = stat_server.get_mean_stat1() self.Sigma = stat_server.get_total_covariance_stat1() # Sum stat0 and stat1 for each speaker model model_shifted_stat, session_per_model = stat_server.sum_stat_per_model( ) # Number of speakers (classes) in training set class_nb = model_shifted_stat.modelset.shape[0] # Multiply statistics by scaling_factor model_shifted_stat.stat0 *= self.scaling_factor model_shifted_stat.stat1 *= self.scaling_factor session_per_model *= self.scaling_factor # Covariance for stat1 sigma_obs = stat_server.get_total_covariance_stat1() evals, evecs = linalg.eigh(sigma_obs) # Initial F (eigen voice matrix) from rank idx = numpy.argsort(evals)[::-1] evecs = evecs.real[:, idx[:self.rank_f]] self.F = evecs[:, :self.rank_f] # Estimate PLDA model by iterating the EM algorithm for it in range(self.nb_iter): # E-step # print( # f"E-step: Estimate between class covariance, it {it+1} / {nb_iter}" # ) # Copy stats as they will be whitened with a different Sigma for each iteration local_stat = copy.deepcopy(model_shifted_stat) # Whiten statistics (with the new mean and Sigma) local_stat.whiten_stat1(self.mean, self.Sigma) # Whiten the EigenVoice matrix eigen_values, eigen_vectors = linalg.eigh(self.Sigma) ind = eigen_values.real.argsort()[::-1] eigen_values = eigen_values.real[ind] eigen_vectors = eigen_vectors.real[:, ind] sqr_inv_eval_sigma = 1 / numpy.sqrt(eigen_values.real) sqr_inv_sigma = numpy.dot(eigen_vectors, numpy.diag(sqr_inv_eval_sigma)) self.F = sqr_inv_sigma.T.dot(self.F) # Replicate self.stat0 index_map = numpy.zeros(vect_size, dtype=int) _stat0 = local_stat.stat0[:, index_map] e_h = numpy.zeros((class_nb, self.rank_f)) e_hh = numpy.zeros((class_nb, self.rank_f, self.rank_f)) # loop on model id's fa_model_loop( batch_start=0, mini_batch_indices=numpy.arange(class_nb), factor_analyser=self, stat0=_stat0, stat1=local_stat.stat1, e_h=e_h, e_hh=e_hh, ) # Accumulate for minimum divergence step _R = numpy.sum(e_hh, axis=0) / session_per_model.shape[0] _C = e_h.T.dot(local_stat.stat1).dot(linalg.inv(sqr_inv_sigma)) _A = numpy.einsum("ijk,i->jk", e_hh, local_stat.stat0.squeeze()) # M-step # print("M-step") self.F = linalg.solve(_A, _C).T # Update the residual covariance self.Sigma = sigma_obs - self.F.dot(_C) / session_per_model.sum() # Minimum Divergence step self.F = self.F.dot(linalg.cholesky(_R))
def __initasteroid(self): """Initialize the asteroid properties """ # define the mass properties of the asteroid if self.name == 'castalia': self.M = 1.4091e12 self.sigma = 2.1 # g/cm^3 self.axes = np.array([1.6130, 0.9810, 0.8260]) / 2.0 self.omega = 2 * np.pi / 4.07 / 3600 # self.C20 = -7.275e-2 # self.C22 = 2.984e-2 elif self.name == 'itokawa': self.M = 3.51e10 self.sigma = 1.9 # g/cm^3 self.axes = np.array([535, 294, 209]) / 2 / 1.0e3 # size in meters self.omega = 2 * np.pi / 12.132 / 3600 elif self.name == 'eros': self.M = 4.463e-4 / self.G self.sigma = 2.67 # g/cm^3 self.axes = np.array([34.4, 11.7, 11.7]) # size in kilometers self.omega = 2 * np.pi / 5.27 / 3600 elif self.name == 'cube': self.M = 1 self.sigma = 1 self.axes=np.array([0.9, 1.0, 1.1]) self.omega = 1 elif self.name == 'tetrahedron': self.M = 1 self.sigma = 1 self.axes = np.array([0.9, 1.0, 1.1]) self.omega = 1 else: self.logger.error('Unknown asteroid name : {}'.format(self.name)) self.logger.error('Just assuming default values') self.M = 1 self.sigma = 1 self.axes = np.array([0.9, 1.0, 1.1]) self.omega = 1 self.mu = self.G * self.M self.sigma = self.sigma / 1000 * \ (100 / 1)**3 * (1000 / 1)**3 # kg/km^3 # Compute some inertia properties self.Ixx = self.M / 5 * (self.axes[1]**2 + self.axes[2]**2) self.Iyy = self.M / 5 * (self.axes[0]**2 + self.axes[2]**2) self.Izz = self.M / 5 * (self.axes[0]**2 + self.axes[1]**2) self.mass_param = (self.Iyy - self.Ixx) / (self.Izz - self.Ixx) self.res_radius = (self.mu / self.omega**2)**(1.0 / 3) self.dist_scale = self.res_radius self.time_scale = self.omega self.C20 = -1.0 / 2 * (self.Izz - self.Ixx) * \ (2 - self.mass_param) / self.dist_scale**2 / self.M self.C22 = 1.0 / 4 * (self.Izz - self.Ixx) * \ self.mass_param / self.dist_scale**2 / self.M # calculate the distance self.r = np.sqrt(self.V[:, 0]**2 + self.V[:, 1]**2 + self.V[:, 2]**2) self.long = np.arctan2(self.V[:, 1], self.V[:, 0]) * 180 / np.pi self.lat = np.arcsin(self.V[:, 2] / self.r) ** 180 / np.pi # sort in order of increasing radius index = np.argsort(self.r) self.r = self.r[index] self.long = self.long[index] self.lat = self.lat[index]
col].set_title("Predicted label :{}\nTrue label :{}".format( pred_errors[error], obs_errors[error])) n += 1 # Probabilities of the wrong predicted numbers Y_pred_errors_prob = np.max(Y_pred_errors, axis=1) # Predicted probabilities of the true values in the error set true_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1)) # Difference between the probability of the predicted label and the true label delta_pred_true_errors = Y_pred_errors_prob - true_prob_errors # Sorted list of the delta prob errors sorted_dela_errors = np.argsort(delta_pred_true_errors) # Top 6 errors most_important_errors = sorted_dela_errors[-6:] # Show the top 6 errors display_errors(most_important_errors, X_val_errors, Y_pred_classes_errors, Y_true_errors) plt.show() results = model.predict(x_test) # select the indix with the maximum probability results = np.argmax(results, axis=1)
####################################################### CREATE GIFS ######################################################## max_gif_steps = 200 delay = 20 for i, exp_folder in enumerate(onlyfolders): print('\nCreating gifs for experiment: ', exp_folder) result_paths = '' for subdir in [ 'dataset_plotter_data_only', 'dataset_plotter_data_real', 'plot2D_dist', 'plot2D_dist_b_labeled', 'barplot', 'Train/Fixed/0' ]: subdir_path = all_experiments_dir + exp_folder + '/Visualization/' + subdir + '/*.png' files = glob.glob(subdir_path) try: order = list( np.argsort( [int(filename.split('_')[-3]) for filename in files])) except: order = list( np.argsort([ int(filename.split('_')[-1][:-len('.png')]) for filename in files ])) ordered_files = [files[ind] for ind in order] ordered_files = ordered_files[:max_gif_steps] ordered_files_str = '' for f in ordered_files: ordered_files_str = ordered_files_str + ' ' + f print('Creating gif for', subdir, '(Number of images ==> ', len(ordered_files)) # os.system('convert -resize 800x800 -delay '+str(delay)+' -loop 0 '+ordered_files_str+' '+all_experiments_dir+exp_folder+'/Visualization/'+subdir+'.gif')
verbose=1) with open('predicts.pkl', 'wb') as f: pickle.dump(predicts, f) with open('predicts.pkl', 'rb') as f: predicts = pickle.load(f) print('predicts', predicts.shape) # Loop through all images for p in tqdm(predicts): # Predict Top N Image Classes # print('p', p) topn_preds = np.argsort(p)[::-1][0:topn] # print(topn_preds) p0.append(topn_preds[0]) p1.append(topn_preds[1]) p2.append(topn_preds[2]) # Create dataframe for later usage topn_df = pd.DataFrame() topn_df['filename'] = np.array(all_images) topn_df['p0'] = np.array(p0) topn_df['p1'] = np.array(p1) topn_df['p2'] = np.array(p2) topn_df.to_csv('topn_class_numbers.csv', index = False) # Summary
subcode = 'sub-01' for o in origdirs: for r in range(1, 3): rundata = [] rundir = os.path.join(o, 'run%d' % r) infiles = glob.glob(os.path.join(rundir, '*txt')) cond = [] for i in infiles: condition = os.path.basename(i).replace('.txt', '') for l in open(i).readlines(): l_s = [float(j) for j in l.strip().split('\t')] rundata.append(l_s[:2]) cond.append(condition) rundata_array = numpy.array(rundata) idx = numpy.argsort(rundata_array[:, 0]) sesscode = os.path.basename(o).replace('sess', 'ses-') odir = os.path.join(outdir, '%s/%s/%s/func' % (outdir, subcode, sesscode)) if not os.path.exists(odir): os.makedirs(odir) outfile = os.path.join( odir, '%s_%s_task-objects_run-%03d_events.tsv' % (subcode, sesscode, r)) f = open(outfile, 'w') f.write('onset\tduration\tcondition\n') for i in idx: f.write('%s\t%s\n' % ('\t'.join(['%f' % j for j in rundata[i]]), cond[i])) f.close()
def local_(self): bin_var = self.parameters.y[0] control_variable = self.parameters.x[0] outcome_pos = self.parameters.outcome_pos outcome_neg = self.parameters.outcome_neg total_duration = self.parameters.total_duration data = self.data.db.read_longitudinal_data_from_db(self._args) data.replace("", np.nan, inplace=True) data = data.dropna() data = data[(data[bin_var] == outcome_pos) | (data[bin_var] == outcome_neg)] if len(data) < PRIVACY_MAGIC_NUMBER: raise PrivacyError("Query results in illegal number of datapoints.") levels = list(set(data[control_variable])) data_dict = {level: data[data[control_variable] == level] for level in levels} timelines_dict = { k: build_timelines(d, time_axis="subjectvisitdate", var=bin_var) for k, d in data_dict.items() } # Remove patients who tested positive on first visit for key, timelines in timelines_dict.items(): timelines = [tl for tl in timelines if tl[1][0] != outcome_pos] timelines_dict[key] = timelines durations_dict = {} events_dict = {} for k, tl in timelines_dict.items(): durations_dict[k], events_dict[k] = convert_timelines_to_events( total_duration, outcome_pos, tl ) grouped_durations_observed_dict = {} grouped_durations_non_observed_dict = {} for key, events in events_dict.items(): durations = durations_dict[key] # Sort events by ascending duration idx = np.argsort(durations) events = events[idx] durations = durations[idx] # Split events into observed and non_observed groups durations_observed = np.array( [d for d, e in zip(durations, events) if e == 1] ) durations_non_observed = np.array( [total_duration for e in events if e == 0] ) # Remove some observations at random to allow grouping (see below) n_rem_o = len(durations_observed) % PRIVACY_MAGIC_NUMBER if n_rem_o: idx_rem = np.random.permutation(len(durations_observed))[:n_rem_o] durations_observed = np.delete(durations_observed, idx_rem) n_rem_n = len(durations_non_observed) % PRIVACY_MAGIC_NUMBER if n_rem_n: idx_rem = np.random.permutation(len(durations_non_observed))[:n_rem_n] durations_non_observed = np.delete(durations_non_observed, idx_rem) # Group observations by multiples of PRIVACY_MAGIC_NUMBER grouped_durations_observed_dict[key] = [] for group in durations_observed.reshape(-1, PRIVACY_MAGIC_NUMBER): grouped_durations_observed_dict[key] += [group[-1]] grouped_durations_non_observed_dict[key] = [] for group in durations_non_observed.reshape(-1, PRIVACY_MAGIC_NUMBER): grouped_durations_non_observed_dict[key] += [group[-1]] if all(not val for val in grouped_durations_observed_dict.values()): msg = ( "There are not enough transitions from {neg} to {pos} in the data. " "Please try with different values".format( pos=outcome_pos, neg=outcome_neg ) ) raise ExaremeError(msg) self.push_and_concat( grouped_durations_observed_dict=grouped_durations_observed_dict ) self.push_and_concat( grouped_durations_non_observed_dict=grouped_durations_non_observed_dict ) self.push_and_agree(control_variable=control_variable)
print('kb_rel_basis_pred:', kb_rel_basis_pred.shape) print('kb_rel_mask:', kb_rel_mask.shape) # kb_rel_mask = (torch.arange(kb_rel_basis_pred.shape[1]) < args.n_basis_kb)\ # .to(dtype=kb_rel_basis_pred.dtype, device=kb_rel_basis_pred.device).view(1, -1, 1).expand(kb_rel_basis_pred.shape) kb_rel_basis_pred = kb_rel_basis_pred * kb_rel_mask kb_rel_basis_pred_norm = kb_rel_basis_pred / ( 1e-12 + kb_rel_basis_pred.norm(dim=2, keepdim=True)) kb_rel_output_emb_norm = kb_rel_output_emb / ( 1e-12 + kb_rel_output_emb.norm(dim=2, keepdim=True)) coeff_sum = coeff_pred.cpu().detach().numpy() coeff_sum_diff = coeff_pred[:, :, 0] - coeff_pred[:, :, 1] coeff_sum_diff_pos = coeff_sum_diff.clamp(min=0) coeff_sum_diff_cpu = coeff_sum_diff.cpu().detach().numpy() coeff_order = np.argsort(coeff_sum_diff_cpu, axis=1) coeff_order = np.flip(coeff_order, axis=1) log.info("Basis preds and relation embs obtained.") # Convert basis pred and emb to numpy ndarrays kb_rel_basis_pred_np = kb_rel_basis_pred_norm.cpu().detach().numpy() kb_rel_output_emb_np = kb_rel_output_emb_norm.cpu().detach().numpy() # Find nearest entity pairs for each dimension basis_norm_pred = kb_rel_basis_pred_norm.permute(0, 2, 1) top_values = [] top_indices = [] for basis_norm_pred_batch in basis_norm_pred: sim_pairwise = torch.matmul(target_norm_emb, basis_norm_pred_batch).unsqueeze(0) top_value, top_index = torch.topk(sim_pairwise,
def prioritise(self): index = np.argsort(self.order) self.order = self.order[index] self.queue = self.queue[index] for i, Node in enumerate(self.queue): Node.q_pos = i - 1