def test_reductions_2D_int(): x = np.arange(1, 122).reshape((11, 11)).astype('i4') a = da.from_array(x, chunks=(4, 4)) reduction_2d_test(da.sum, a, np.sum, x) reduction_2d_test(da.prod, a, np.prod, x) reduction_2d_test(da.mean, a, np.mean, x) reduction_2d_test(da.var, a, np.var, x, False) # Difference in dtype algo reduction_2d_test(da.std, a, np.std, x, False) # Difference in dtype algo reduction_2d_test(da.min, a, np.min, x, False) reduction_2d_test(da.max, a, np.max, x, False) reduction_2d_test(da.any, a, np.any, x, False) reduction_2d_test(da.all, a, np.all, x, False) reduction_2d_test(da.nansum, a, np.nansum, x) with ignoring(AttributeError): reduction_2d_test(da.nanprod, a, np.nanprod, x) reduction_2d_test(da.nanmean, a, np.mean, x) reduction_2d_test(da.nanvar, a, np.nanvar, x, False) # Difference in dtype algo reduction_2d_test(da.nanstd, a, np.nanstd, x, False) # Difference in dtype algo reduction_2d_test(da.nanmin, a, np.nanmin, x, False) reduction_2d_test(da.nanmax, a, np.nanmax, x, False) assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0)) assert eq(da.argmax(a, axis=1), np.argmax(x, axis=1)) assert eq(da.argmin(a, axis=1), np.argmin(x, axis=1)) assert eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1)) assert eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
def dynamic(quality_matrix): size = quality_matrix.shape[0] optimal_score = np.empty(size) optimal_score.fill(-np.inf) optimal_score[0] = 0 previous_end = np.empty(size) previous_end.fill(-1) domain_defining = np.empty(size) np.set_printoptions(threshold=np.nan) for i in range(size): cand_nodomain = np.nanargmax(optimal_score) with_domain = optimal_score + quality_matrix[:, i] cand_domain = np.nanargmax(with_domain) if optimal_score[cand_nodomain] > with_domain[cand_domain]: domain_defining[i] = 0 previous_end[i] = cand_nodomain optimal_score[i] = optimal_score[cand_nodomain] else: domain_defining[i] = 1 previous_end[i] = cand_domain optimal_score[i] = with_domain[cand_domain] current_end = size - 2 result = [] while current_end > 0: if domain_defining[current_end] == 1: result.append(Domain(Bin(previous_end[current_end]), Bin(current_end), 0)) current_end = previous_end[current_end] return result[::-1]
def get_max_social_welfare(self, by_role=False): """Returns the maximum social welfare over the known profiles. If by_role is specified, then max social welfare applies to each role independently.""" if by_role: if self.num_profiles: welfares = self.role_reduce(self.profiles * self.payoffs) prof_inds = np.nanargmax(welfares, 0) return (welfares[prof_inds, np.arange(self.num_roles)], self.profiles[prof_inds]) else: welfares = np.empty(self.num_roles) welfares.fill(np.nan) profiles = np.empty(self.num_roles, dtype=object) profiles.fill(None) return welfares, profiles else: if self.num_profiles: welfares = np.sum(self.profiles * self.payoffs, 1) prof_ind = np.nanargmax(welfares) return welfares[prof_ind], self.profiles[prof_ind] else: return np.nan, None
def test_reductions_1D(dtype): x = np.arange(5).astype(dtype) a = da.from_array(x, chunks=(2,)) reduction_1d_test(da.sum, a, np.sum, x) reduction_1d_test(da.prod, a, np.prod, x) reduction_1d_test(da.mean, a, np.mean, x) reduction_1d_test(da.var, a, np.var, x) reduction_1d_test(da.std, a, np.std, x) reduction_1d_test(da.min, a, np.min, x, False) reduction_1d_test(da.max, a, np.max, x, False) reduction_1d_test(da.any, a, np.any, x, False) reduction_1d_test(da.all, a, np.all, x, False) reduction_1d_test(da.nansum, a, np.nansum, x) with ignoring(AttributeError): reduction_1d_test(da.nanprod, a, np.nanprod, x) reduction_1d_test(da.nanmean, a, np.mean, x) reduction_1d_test(da.nanvar, a, np.var, x) reduction_1d_test(da.nanstd, a, np.std, x) reduction_1d_test(da.nanmin, a, np.nanmin, x, False) reduction_1d_test(da.nanmax, a, np.nanmax, x, False) assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0)) assert eq(da.argmax(a, axis=0, split_every=2), np.argmax(x, axis=0)) assert eq(da.argmin(a, axis=0, split_every=2), np.argmin(x, axis=0)) assert eq(da.nanargmax(a, axis=0, split_every=2), np.nanargmax(x, axis=0)) assert eq(da.nanargmin(a, axis=0, split_every=2), np.nanargmin(x, axis=0))
def predict_ana( model, a, a2, b, realb2 ): questWordIndices = [ model.word2id[x] for x in (a,a2,b) ] # b2 is effectively iterating through the vocab. The row is all the cosine values b2a2 = model.sim_row(a2) b2a = model.sim_row(a) b2b = model.sim_row(b) addsims = b2a2 - b2a + b2b addsims[questWordIndices] = -10000 iadd = np.nanargmax(addsims) b2add = model.vocab[iadd] # For debugging purposes ia = model.word2id[a] ia2 = model.word2id[a2] ib = model.word2id[b] ib2 = model.word2id[realb2] realaddsim = addsims[ib2] mulsims = ( b2a2 + 1 ) * ( b2b + 1 ) / ( b2a + 1.001 ) mulsims[questWordIndices] = -10000 imul = np.nanargmax(mulsims) b2mul = model.vocab[imul] return b2add, b2mul
def extract_stamp(im, xy, box_size): """ Extracts stamp centered on star/spot in image based on initial guess Args: image - a slice of the original data cube xy - initial xy coordinate guess to center of spot box_size - size of stamp to be extracted (actually, size of radial mask, box is 4 pixels bigger) Return: output - box cutout of spot with optimized center """ box_size = float(box_size) xguess = float(xy[0]) yguess = float(xy[1]) #Exctracts a 10px stamp centered on the guess and refines based on maximum pixel location for i in range(0, 2): x,y = gen_xy(10.0) x += (xguess-10/2.) y += (yguess-10/2.) output = pixel_map(im,x,y) xguess = x[np.unravel_index(np.nanargmax(output), np.shape(output))] yguess = y[np.unravel_index(np.nanargmax(output), np.shape(output))] #Fits location of star/spot xc,yc = return_pos(output, (xguess,yguess), x,y) #Extracts a box_size + 4 width stamp centered on exact position x,y = gen_xy(box_size+4) x += (xc-np.round((box_size+4)/2.)) y += (yc-np.round((box_size+4)/2.)) output = pixel_map(im,x,y) return output
def max_pure_social_welfare(game, *, by_role=False): """Returns the maximum social welfare over the known profiles. If by_role is specified, then max social welfare applies to each role independently. If there are no profiles with full payoff data for a role, an arbitrary profile will be returned.""" if by_role: # pylint: disable=no-else-return if game.num_profiles: # pylint: disable=no-else-return welfares = np.add.reduceat( game.profiles() * game.payoffs(), game.role_starts, 1) prof_inds = np.nanargmax(welfares, 0) return (welfares[prof_inds, np.arange(game.num_roles)], game.profiles()[prof_inds]) else: welfares = np.full(game.num_roles, np.nan) profiles = np.full(game.num_roles, None) return welfares, profiles else: if game.num_complete_profiles: # pylint: disable=no-else-return welfares = np.einsum('ij,ij->i', game.profiles(), game.payoffs()) prof_ind = np.nanargmax(welfares) return welfares[prof_ind], game.profiles()[prof_ind] else: return np.nan, None
def _single_node_deletion(self, chroms, genes, samples): """ The single node deletion routine of the algorithm. Parameters ---------- chroms : ndarray Contains 1 for a chromosome pair that belongs to the tricluster currently examined, 0 otherwise. genes : ndarray Contains 1 for a gene that belongs to the tricluster currently examined, 0 otherwise. samples : ndarray Contains 1 for a sample that belongs to the tricluster currently examined, 0 otherwise. Returns ------- chroms : ndarray Contains 1 for a chromosome pair that belongs to the tricluster examined, 0 otherwise. genes : ndarray Contains 1 for a gene that belongs to the tricluster examined, 0 otherwise. samples : ndarray Contains 1 for a sample that belongs to the tricluster examined, 0 otherwise. """ self._compute_MSR(chroms, genes, samples) while (self.MSR > self.delta): chrom_idx = np.nanargmax(self.MSR_chrom) gene_idx = np.nanargmax(self.MSR_gene) sample_idx = np.nanargmax(self.MSR_sample) with warnings.catch_warnings(): # We expect mean of NaNs here warnings.simplefilter("ignore", category=RuntimeWarning) if (self.MSR_chrom[chrom_idx] > self.MSR_gene[gene_idx]): if (self.MSR_chrom[chrom_idx] > self.MSR_sample[sample_idx]): # Delete chrom nonz_idx = chroms.nonzero()[0] chroms.put(nonz_idx[chrom_idx], 0) else: # Delete sample nonz_idx = samples.nonzero()[0] samples.put(nonz_idx[sample_idx], 0) else: if (self.MSR_gene[gene_idx] > self.MSR_sample[sample_idx]): # Delete gene nonz_idx = genes.nonzero()[0] genes.put(nonz_idx[gene_idx], 0) else: # Delete sample nonz_idx = samples.nonzero()[0] samples.put(nonz_idx[sample_idx], 0) self._compute_MSR(chroms, genes, samples) return chroms, genes, samples
def get_best_threshold(y_ref, y_pred_score, plot=True): """ Get threshold on scores that maximizes f1 score. Parameters ---------- y_ref : array Reference labels (binary). y_pred_score : array Predicted scores. plot : bool If true, plot ROC curve Returns ------- best_threshold : float threshold on score that maximized f1 score max_fscore : float f1 score achieved at best_threshold """ pos_weight = 1.0 - float(len(y_ref[y_ref == 1]))/float(len(y_ref)) neg_weight = 1.0 - float(len(y_ref[y_ref == 0]))/float(len(y_ref)) sample_weight = np.zeros(y_ref.shape) sample_weight[y_ref == 1] = pos_weight sample_weight[y_ref == 0] = neg_weight print "max prediction value = %s" % np.max(y_pred_score) print "min prediction value = %s" % np.min(y_pred_score) precision, recall, thresholds = \ metrics.precision_recall_curve(y_ref, y_pred_score, pos_label=1, sample_weight=sample_weight) beta = 1.0 btasq = beta**2.0 fbeta_scores = (1.0 + btasq)*(precision*recall)/((btasq*precision)+recall) max_fscore = fbeta_scores[np.nanargmax(fbeta_scores)] best_threshold = thresholds[np.nanargmax(fbeta_scores)] if plot: plt.figure(1) plt.subplot(1, 2, 1) plt.plot(recall, precision, '.b', label='PR curve') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Precision-Recall Curve') plt.legend(loc="lower right", frameon=True) plt.subplot(1, 2, 2) plt.plot(thresholds, fbeta_scores[:-1], '.r', label='f1-score') plt.xlabel('Probability Threshold') plt.ylabel('F1 score') plt.show() plot_data = (recall, precision, thresholds, fbeta_scores[:-1]) return best_threshold, max_fscore, plot_data
def mapmean(tempDF, meta, name = '', option = 0): import cartopy.crs as ccrs from cartopy.io.img_tiles import MapQuestOSM from mpl_toolkits.axes_grid1 import make_axes_locatable #fig = plt.figure(figsize=(30, 30)) x = meta['location:Longitude'].values y = meta['location:Latitude'].values c = tempDF[meta.index].mean() marker_size = 350 imagery = MapQuestOSM() fig = plt.figure(figsize=[15,15]) ax = plt.axes(projection=imagery.crs) ax.set_extent(( meta['location:Longitude'].min()-.005, meta['location:Longitude'].max()+.005 , meta['location:Latitude'].min()-.005, meta['location:Latitude'].max()+.005)) ax.add_image(imagery, 14) cmap = matplotlib.cm.OrRd bounds = np.linspace(round((c.mean()-3)),round((c.mean()+3)),13) norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N) plotHandle = ax.scatter(x,y,c = c, s = marker_size, transform=ccrs.Geodetic(), cmap = cmap, norm = norm) if option ==0 : cbar1 = plt.colorbar(plotHandle, label = 'Temperature in $^\circ $C') else : cbar1 = plt.colorbar(plotHandle, label = option) lon = x[np.nanargmax(c)] lat = y[np.nanargmax(c)] at_x, at_y = ax.projection.transform_point(lon, lat, src_crs=ccrs.Geodetic()) plt.annotate( '%2.1f'%np.nanmax(c.values), xy=(at_x, at_y), #xytext=(30, 20), textcoords='offset points', color='black', backgroundcolor='none', size=22, ) lon = x[np.nanargmin(c)] lat = y[np.nanargmin(c)] at_x, at_y = ax.projection.transform_point(lon, lat, src_crs=ccrs.Geodetic()) plt.annotate( '%2.1f'%np.nanmin(c.values), xy=(at_x, at_y), #xytext=(30, 20), textcoords='offset points', color='black', size = 22, backgroundcolor='none') plt.annotate( '$\mu = $ %2.1f, $\sigma = $ %2.1f'%(np.nanmean(c.values), np.nanstd(c.values)), (0.01,0.01), xycoords ='axes fraction', #xytext=(30, 20), textcoords='offset points', color='black', size = 22, backgroundcolor='none') plt.title('Mean Temperature %s'%name) filename = './plots/meantempmap%s.eps'%name plt.savefig(filename, format = 'eps', dpi = 600)
def predict(self, X): """ Predict class """ n_frame = len(X) n_label = len(le.classes_) self.labels_predicted = np.empty(n_frame, dtype=int) #尤度保存用行列 matP = np.empty((n_frame, n_label)) #初期確率はクラス0が0.99, その他は当確率とする matP[0, 0] = 0.99 for i in xrange(1,n_label): matP[0, i] = (1 - 0.99) / (n_label - 1) #ラベル保存用行列 matL = np.empty((n_frame, n_label)) #ヴィタビ経路の計算 for j in xrange(1, n_frame): for yj in xrange(n_label): prob = np.empty(n_label) for yk in xrange(n_label): #出力確率または遷移確率が0の場合はNone if (self.emit_prob[X[j], yj] == 0.) or (self.trans_prob[yk, yj] == 0.): prob[yk] = None else: prob[yk] = self.emit_prob[X[j], yj] * self.trans_prob[yk, yj] * matP[j-1, yk] #logprobが全てnanの場合はnanを返す count = 0 for i in prob: if np.isnan(i) == True: count += 1 if count == len(prob): matP[j, yj] = None matL[j, yj] = None else: matP[j, yj] = np.nanmax(prob) matL[j, yj] = np.nanargmax(prob) #クラスごとの確率を足すと1になるように正規化 matP[j, :] = matP[j, :] / np.sum(matP[j, :]) self.likelihoods = matP #推定ラベル列の決定 self.labels_predicted[n_frame-1] = np.nanargmax(matP[n_frame-1, :]) for j in reversed(xrange(n_frame-1)): self.labels_predicted[j] = matL[j+1, self.labels_predicted[j+1]] return self.labels_predicted
def _ifws_peak_bins(self, ws): ''' Gives the bin indices of the first and last peaks (of spectra 0) in the IFWS @param ws :: input workspace return :: [xmin,xmax] ''' y = mtd[ws].readY(0) size = len(y) mid = int(size / 2) imin = np.nanargmax(y[0:mid]) imax = np.nanargmax(y[mid:size]) + mid return imin, imax
def _monitor_max_range(self, ws): """ Gives the bin indices of the first and last peaks in the monitor @param ws :: input workspace name return :: [xmin,xmax] """ y = mtd[ws].readY(0) size = len(y) mid = int(size / 2) imin = np.nanargmax(y[0:mid]) imax = np.nanargmax(y[mid:size]) + mid return imin, imax
def get3MaxDerivatives(eda,num_max=3): deriv, second_deriv = getDerivatives(eda) d = copy.deepcopy(deriv) d2 = copy.deepcopy(second_deriv) max_indices = [] for i in range(num_max): maxd_idx = np.nanargmax(abs(d)) max_indices.append(maxd_idx) d[maxd_idx] = 0 max2d_idx = np.nanargmax(abs(d2)) max_indices.append(max2d_idx) d2[max2d_idx] = 0 return max_indices, abs(deriv), abs(second_deriv)
def sim_print(self, input_word, corpus_word, sim_matrix, number=5): for input_sent, sim_vector in zip(input_word, sim_matrix): print("input=", input_sent) for count in range(0, number): # 上位n個を出す(n未満の配列には対応しないので注意) ans_sim = [np.nanmax(sim_vector), np.nanargmax(sim_vector)] print('配列番号:', np.nanargmax(sim_vector), 'No.', count, 'sim=', ans_sim[0]) print('output=', corpus_word[ans_sim[1]]) src_set = set(input_sent.split()) tag_set = set(corpus_word[ans_sim[1]].split()) print('共通部分', list(src_set & tag_set)) print() sim_vector[np.nanargmax(sim_vector)] = -1 print() return 0
def findpeaks(f, fft, f_cent, f_span, points, fig, ax, line1, line2): center = round(points/2.) region = round(points/8.) lc = center - region rc = center + region region1 = round(points/6.) l = lc - region1 r = rc + region1 mu1 = nanargmax(fft[l:lc]) + l mu2 = nanargmax(fft[lc:rc]) + lc mu3 = nanargmax(fft[rc:r]) + rc args = [mu1, mu2, mu3] line1[0].set_data(f[args], fft[args]) return args
def clustercoordsbymax1d(arr, pkind, critsepind):#results will be sorted. wherever there are peak indeces too close together. the peak index next to the peak index with highest arr value gets removed pkind.sort() indindslow=numpy.where((pkind[1:]-pkind[:-1])<critsepind)[0] indindshigh=indindslow+1 while indindslow.size>0: maxindindindlow=numpy.nanargmax(arr[pkind[(indindslow,)]]) maxindindindhigh=numpy.nanargmax(arr[pkind[(indindshigh,)]]) if arr[pkind[indindslow[maxindindindlow]]]>arr[pkind[indindshigh[maxindindindhigh]]]: pkind=numpy.delete(pkind, indindshigh[maxindindindlow]) else: pkind=numpy.delete(pkind, indindslow[maxindindindhigh]) indindslow=numpy.where((pkind[1:]-pkind[:-1])<critsepind)[0] indindshigh=indindslow+1 return pkind
def test_reductions_2D_nans(): # chunks are a mix of some/all/no NaNs x = np.full((4, 4), np.nan) x[:2, :2] = np.array([[1, 2], [3, 4]]) x[2, 2] = 5 x[3, 3] = 6 a = da.from_array(x, chunks=(2, 2)) reduction_2d_test(da.sum, a, np.sum, x, False, False) reduction_2d_test(da.prod, a, np.prod, x, False, False) reduction_2d_test(da.mean, a, np.mean, x, False, False) reduction_2d_test(da.var, a, np.var, x, False, False) reduction_2d_test(da.std, a, np.std, x, False, False) reduction_2d_test(da.min, a, np.min, x, False, False) reduction_2d_test(da.max, a, np.max, x, False, False) reduction_2d_test(da.any, a, np.any, x, False, False) reduction_2d_test(da.all, a, np.all, x, False, False) reduction_2d_test(da.nansum, a, np.nansum, x, False, False) reduction_2d_test(da.nanprod, a, nanprod, x, False, False) reduction_2d_test(da.nanmean, a, np.nanmean, x, False, False) with pytest.warns(None): # division by 0 warning reduction_2d_test(da.nanvar, a, np.nanvar, x, False, False) with pytest.warns(None): # division by 0 warning reduction_2d_test(da.nanstd, a, np.nanstd, x, False, False) with pytest.warns(None): # all NaN axis warning reduction_2d_test(da.nanmin, a, np.nanmin, x, False, False) with pytest.warns(None): # all NaN axis warning reduction_2d_test(da.nanmax, a, np.nanmax, x, False, False) assert_eq(da.argmax(a), np.argmax(x)) assert_eq(da.argmin(a), np.argmin(x)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmax(a), np.nanargmax(x)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmin(a), np.nanargmin(x)) assert_eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert_eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0)) assert_eq(da.argmax(a, axis=1), np.argmax(x, axis=1)) assert_eq(da.argmin(a, axis=1), np.argmin(x, axis=1)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
def quick_analyze(sp, freq_name_mapping, minvelo, maxvelo): """ get peak of spectrum, subtract continuum, etc. """ argmax = np.nanargmax(sp.data) # can have empty spectra passed to this, apparently if not np.isfinite(sp.data[argmax]): return (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, False, '', np.nan) cont = np.nanpercentile(sp.data, 20) shift = (minvelo+maxvelo)/2. / constants.c sp.data -= cont sp.xarr.convert_to_unit(u.GHz) peak = np.nanmax(sp.data) peakfreq = sp.xarr[argmax] assert sp.data[argmax] == peak peakfreq_shifted = peakfreq * (1+shift) freqlist = list(freq_name_mapping.keys()) #reverse_freq_name_mapping = {v:k for k,v in freq_name_mapping.items()} bestmatch = np.argmin(np.abs(peakfreq_shifted - u.Quantity(freqlist))) closest_freq = freqlist[bestmatch] peakvelo = ((closest_freq-peakfreq)/closest_freq * constants.c).to(u.km/u.s) velo_OK = (minvelo < peakvelo) and (peakvelo < maxvelo) peakspecies = (freq_name_mapping[closest_freq] if velo_OK else 'none') return (cont, peak, peakfreq, peakfreq_shifted, bestmatch, peakvelo, velo_OK, peakspecies, argmax)
def _create_new_neuron(self): ''' create new neuron if t mod \lambda = 0 and |K| < \theta a. find neuron q with the greatest counter: q := arg max_{n \in K} e_n b. find neighbor f of q with f := arg max_{n \in N_q} e_n c. initialize new neuron l K := K \cup l w_l := 1/2 * (w_q + w_f) c_l := 1/2 * (c_q + c_f) e_l := \delta * (e_f + e_q) d. adapt connections: E := (E \ {(q, f)}) \cup {(q, n), (n, f)} e. decrease counter of q and f by the factor \delta e_q := (1 - \deta) * e_q e_f := (1 - \deta) * e_f ''' q = np.nanargmax(self.errors) N_q = None if q: N_q = self.model.neighbors(q) if N_q: f = max(N_q, key=lambda n: self.errors[n]) l = self._add_node(e=self.delta*(self.errors[q] + self.errors[f]), w=(self.weights[q] + self.weights[f]) / 2, c=(self.contexts[q] + self.contexts[f]) / 2) self.model.remove_edge(q, f) self._add_edge(q, l) self._add_edge(f, l) self.errors[q] *= (1 - self.delta) self.errors[f] *= (1 - self.delta) return l
def rank_worms(complete_df, a_variable, a_time, return_all = False, egg_mode = True): ''' Rank worms according to their measured value of a_variable at a_time. ''' if a_time != None: my_data = complete_df.mloc(measures = [a_variable], times = [a_time])[:, 0, 0] my_index = list(complete_df.worms) for i in range(0, len(my_index)): a_worm = my_index[i] my_time = closest_real_time(complete_df, a_worm, a_time, egg_mode = egg_mode) my_index[i] += ' ' + my_time my_data = pd.Series(my_data, index = my_index).dropna() my_data.sort() else: my_data = complete_df.mloc(measures = [a_variable])[:, 0, :].copy() flat_data = np.ndarray.flatten(my_data) true_max = np.nanargmax(flat_data) sorted_arguments = np.argsort(flat_data) sorted_arguments = sorted_arguments[:np.where(sorted_arguments == true_max)[0] + 1] sorted_indices = np.array(np.unravel_index(sorted_arguments, my_data.shape)).transpose() the_lowest = [complete_df.worms[sorted_indices[i][0]] + ' ' + closest_real_time(complete_df, complete_df.worms[sorted_indices[i][0]], complete_df.times[sorted_indices[i][1]]) for i in range(0, 20)] the_highest = [complete_df.worms[sorted_indices[-i][0]] + ' ' + closest_real_time(complete_df, complete_df.worms[sorted_indices[-i][0]], complete_df.times[sorted_indices[-i][1]]) for i in range(20, 0, -1)] together_list = list(the_lowest) together_list.extend(the_highest) together_data = np.concatenate((flat_data[sorted_arguments[:20]], flat_data[sorted_arguments[-20:]])) my_data = pd.Series(together_data, index = together_list) if return_all: the_full = [complete_df.worms[sorted_indices[i][0]] + ' ' + closest_real_time(complete_df, complete_df.worms[sorted_indices[i][0]], complete_df.times[sorted_indices[i][1]]) for i in range(0, len(sorted_indices))] return (my_data, the_full) return my_data
def lookup_max(self, region=None): """ Find position of maximum in a image. Parameters ---------- region : `~regions.SkyRegion` (optional) Limit lookup of maximum to that given sky region. Returns ------- (position, value): `~astropy.coordinates.SkyCoord`, float Position and value of the maximum. """ if region: region_pix = region.to_pixel(self.wcs) coords_pix = self.coordinates_pix() mask = region_pix.contains(coords_pix) else: mask = np.ones_like(self.data) idx = np.nanargmax(self.data * mask) y, x = np.unravel_index(idx, self.data.shape) pos = self.wcs_pixel_to_skycoord(xp=x, yp=y) return pos, self.data[y, x]
def plot(self): if self.y is not None: pp.subplot(2, 1, 1) pp.plot(self.var_grid, self.y) if hasattr(self.optimizer.chooser, 'ei'): pp.subplot(2, 1, 1) func_m = self.optimizer.chooser.func_m[self.sort_idx] func_s = np.sqrt(self.optimizer.chooser.func_v[self.sort_idx]) pp.plot(self.var_grid, func_m) pp.plot(self.var_grid, func_m + func_s) pp.plot(self.var_grid, func_m - func_s) pp.subplot(2, 1, 2) ei = self.optimizer.chooser.ei[self.sort_idx] pp.plot(self.var_grid, ei) best_idx = np.nanargmax(ei) pp.plot(self.var_grid[best_idx], ei[best_idx], '.', markersize=10) pp.show()
def scan_callback(self, scan): # Calculate angles. angles = scan.angle_min + np.arange(scan.ranges.shape[0]) * scan.angle_increment # Blur ranges. blur_width_angle = np.deg2rad(30) blur_width = blur_width_angle / scan.angle_increment use_ranges = scipy.ndimage.filters.gaussian_filter(scan.ranges, blur_width) # Nan angles which are out of steering range. # Angles go from -2.something to +2.something. min_angle = np.deg2rad(-60) max_angle = np.deg2rad(60) use_ranges[np.where((angles < min_angle) | (angles > max_angle))] = np.nan index = np.nanargmax(use_ranges) dist = scan.ranges[index] too_close = dist < self.close_thresh theta = angles[index] view_dist = 1. self.pub_point.publish( PointStamped( scan.header, Point( np.cos(theta) * view_dist, np.sin(theta) * view_dist, 0 ) ) ) self.pub_angle.publish(Float32(theta)) self.pub_blur.publish(Int32(blur_width)) self.pub_too_close.publish(Bool(too_close))
def decide_migration_migrationlikelihood_woi(self): migrate_me_maybe = (self.window_overload_index > self.relocation_thresholds)[0] if np.sum(migrate_me_maybe) > 0: indexes = np.array(np.where(migrate_me_maybe)).tolist()[0] # potential migration sources set_of_vms = list() for i in indexes: partial = (self.location[:, i] == 1).transpose() newly_found = np.array(np.where(partial)).tolist() set_of_vms += newly_found[0] set_of_vms = sorted(set_of_vms) pms = [x.get_pm() for x in self.vms] pm_volumes = np.array([x.get_volume() for x in self.pms]) vm_volumes = np.array([x.get_volume_actual() for x in self.vms]) vm_migrations = np.array([x.get_migrations() for x in self.vms]) available_volume_per_pm = pm_volumes - self.physical_volume_vector available_capacity = [available_volume_per_pm[x.get_pm()] for x in self.vms] plan_coefficients = np.array([x.plan.get_coefficient() for x in self.vms]) minimize_me = -1.0/plan_coefficients * (vm_volumes + available_capacity) + plan_coefficients * vm_migrations vm_migrate = np.nanargmin(minimize_me) pm_source = self.vms[vm_migrate].get_pm() # avoiding to select the source machine as destination by using nan available_volume_per_pm[pm_source] = np.nan pm_destination = np.nanargmax(available_volume_per_pm) self.migrate(vm_migrate, pm_source, pm_destination) self.integrated_overload_index[0,pm_source] = 0
def decode_location(likelihood, pos_centers, time_centers): """Finds the decoded location based on the centers of the position bins. Parameters ---------- likelihood : np.array With shape(n_timebins, n_positionbins) pos_centers : np.array time_centers : np.array Returns ------- decoded : nept.Position Estimate of decoded position. """ keep_idx = np.sum(np.isnan(likelihood), axis=1) < likelihood.shape[1] likelihood = likelihood[keep_idx] max_decoded_idx = np.nanargmax(likelihood, axis=1) decoded_data = pos_centers[max_decoded_idx] decoded_time = time_centers[keep_idx] return nept.Position(decoded_data, decoded_time)
def rngWorker(inputEdges, queue): """ Work done by each processor :param inputEdges: set of edges (p, q) :param queue: shared Queue to place results """ edges = set() for p, q in inputEdges: relationPQ = _globalRelationalMatrix[p, q] row = _globalRelationalMatrix[p] # maxJRow = getBestScore(relationMatrix[q]) # non-numerical distances/similarities will not be counted as edges if np.isnan(relationPQ): isEdge = False # if there is a numeric value else: isEdge = True # assume edge until proven wrong # loop through all columns in the ith row # relationPR is weight of edge p,r ***************************************************** (N^3)/2 for r, relationPR in enumerate(row): # skip rows p and q and any points for which there is no distance value if p != r != q and (not np.isnan(relationPR)) and (not np.isnan(_globalRelationalMatrix[q, r])): # for triangle prq, if pq is the longest distance, then p and q are not neighbors lengths = [relationPR, _globalRelationalMatrix[q, r]] if lengths[np.nanargmax(lengths)] < relationPQ: isEdge = False # not an edge! break # break to next q # if p and q are neighbors if isEdge: edges.add(frozenset((p, q))) # add (p,q) tuple to edges set queue.put(edges)
def original_ensemble_selection(predictions, labels, ensemble_size, task_type, metric, do_pruning=False): """Rich Caruana's ensemble selection method.""" ensemble = [] trajectory = [] order = [] if do_pruning: n_best = 20 indices = pruning(predictions, labels, n_best, task_type, metric) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score( labels, ensemble_, task_type, metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) for j, pred in enumerate(predictions): ensemble.append(pred) ensemble_prediction = np.mean(np.array(ensemble), axis=0) scores[j] = calculate_score(labels, ensemble_prediction, task_type, metric, ensemble_prediction.shape[1]) ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) return np.array(order), np.array(trajectory)
def model_schreiben(datum,data_original,name): global fill_typ gemittelt,fill_typ=30,np.nan fill_zw=np.empty(data_original.shape[0]) fill_zw[:]=np.nan fill=mittelung(fill_zw[:],gemittelt) data=mittelung(datetime.datetime.strptime(datum,'%Y%m%d').timetuple().tm_yday+data_original[:,0]/(24*60*60),gemittelt) data=np.hstack((data,mittelung(data_original[:,0],gemittelt))) data=np.hstack((data,fill[:])) data=np.hstack((data,fill[:])) for i in range(len(data)): for j in range(2): a=data_original[i*gemittelt:(i+1)*gemittelt,45+j] if np.isnan(a[a.argsort()][0])==True: data[i,2+j]=fill_typ else: anzahl_non_nans= np.argmin(abs(np.nanargmax(a)-a.argsort()[:]))+1 pos_non_nans=a.argsort()[0:anzahl_non_nans] data[i,2+j]=a[pos_non_nans[np.argmin(abs(14-pos_non_nans[:]))]] index_list=[47,40,41,42,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,5,23,24,26,28,-99,-99,-99,-99] for i in range(len(index_list)): if index_list[i]==-99: data=np.hstack((data,fill[:])) else: data=np.hstack((data,mittelung( data_original[:,index_list[i]] ,gemittelt))) np.savetxt(name,data,fmt='%.5f',delimiter="\t") punkt=open(name,'r').read() komma=open(name,'w') komma.write(punkt.replace(".",",").replace("nan","-999,9")) komma.close()
def decide_migration_loadaware_woi(self): migrate_me_maybe = (self.window_overload_index > self.relocation_thresholds)[0] if np.sum(migrate_me_maybe) > 0: indexes = np.array(np.where(migrate_me_maybe)).tolist()[0] # potential migration sources pm_source = random.choice(indexes) set_of_vms = (self.location[:, pm_source] == 1).transpose() vm_set_migration = np.array(np.where(set_of_vms)).tolist()[0] volumes = np.array([x.get_volume() for x in self.pms]) available_volume_per_pm = volumes - self.physical_volume_vector aware_matrix = np.zeros((self.num_vms, self.num_pms)) for col in range(0,self.num_pms): aware_matrix[:, col] = available_volume_per_pm[col] for row in range(0,self.num_vms): if row in vm_set_migration: vol_to_remove = self.volumes[row] else: vol_to_remove = np.inf aware_matrix[row, :] = aware_matrix[row, :] - vol_to_remove aware_matrix[:, pm_source] = np.nan aware_matrix[aware_matrix<0] = np.nan if not np.isnan(aware_matrix).all(): argmaxidx = np.nanargmax(aware_matrix) coordinates = np.unravel_index(argmaxidx, (self.num_vms, self.num_pms)) vm_migrate = coordinates[0] pm_destination = coordinates[1] self.migrate(vm_migrate, pm_source, pm_destination) self.integrated_overload_index[0,pm_source] = 0
def compute_features(frameManager, featureExtractor, grasp_begin, grasp_end, pmax, max_matrix_1, max_matrix_5): # Values computed in "calibrate_impression_depth.py" max_val_matrix_1 = 3554.0 max_val_matrix_5 = 2493.0 impression_depth = 1.0 # Just an estimate of the maximal impression in [mm] impression_factor_1 = impression_depth / max_val_matrix_1 impression_factor_5 = impression_depth / max_val_matrix_5 # Determine more robust frames of interest (begin and end frame of the grasp) # by taking the objects diameter into account # head + tail <= thresh_sequence head_elem = 10 tail_elem = 10 miniballs = np.empty([grasp_end - grasp_begin + 1, 4]) miniballs.fill(None) #for i, frameID in enumerate(range(grasp_end-tail_elem+1, grasp_end+1)): for i, frameID in enumerate(range(grasp_begin, grasp_end + 1)): theta = frameManager.get_corresponding_jointangles(frameID) miniballs[ i] = featureExtractor.compute_minimal_bounding_sphere_centroid( frameID, theta) # Compensate for force dependent sensor matrix impression diameter = (2 * miniballs[:, 3] + max_matrix_1[grasp_begin:grasp_end + 1] * impression_factor_1 + max_matrix_5[grasp_begin:grasp_end + 1] * impression_factor_5) slice_tail = diameter[-tail_elem:] end_position = (grasp_end - tail_elem) + find_nearest_idx( slice_tail, np.median(slice_tail)) # Problem: # The object's initial size cannot be measured accurately enough if the grasp applies torque. # In that case, the contact surface between object and both sensor matrices is tilted leading to an # overestimation of the real diameter. This asymetry disappears when all forces reach an equilibrium state. # In order to get more robust object size features, the profile's centroids of the end position frame # is used to recalculate the diameter during each step of the grasp. centroid_matrix_1 = featureExtractor.compute_centroid(end_position, 1) centroid_matrix_5 = featureExtractor.compute_centroid(end_position, 5) points = np.array([[1.0, centroid_matrix_1[0], centroid_matrix_1[1]], [5.0, centroid_matrix_5[0], centroid_matrix_5[1]]], dtype=np.float64) miniballs_refined = np.empty([grasp_end - grasp_begin + 1, 4]) miniballs_refined.fill(None) for i, frameID in enumerate(range(grasp_begin, grasp_end + 1)): theta = frameManager.get_corresponding_jointangles(frameID) miniballs_refined[ i] = featureExtractor.compute_minimal_bounding_sphere_points( points, theta) # Compensate for force dependent sensor matrix impression diameter_refined = ( 2 * miniballs_refined[:, 3] + max_matrix_1[grasp_begin:grasp_end + 1] * impression_factor_1 + max_matrix_5[grasp_begin:grasp_end + 1] * impression_factor_5) # Initial position: max diameter of minimal bounding sphere slice_head = diameter_refined[0:head_elem] initial_position = grasp_begin + np.nanargmax(slice_head) # Local indices initial_position_grasp = initial_position - grasp_begin end_position_grasp = end_position - grasp_begin # Compute features #grasp_diameter = diameter_refined[initial_position] #grasp_diameter = np.median(diameter_refined) #grasp_diameter = stats.mode(diameter_refined)[0][0] grasp_diameter = stats.mode(diameter)[0][0] compressibility = diameter_refined[initial_position_grasp] - diameter_refined[ end_position_grasp] # Change of minimal bounding sphere's size during grasp std_dev_matrix_1 = featureExtractor.compute_standard_deviation( end_position, 1) # Standard deviation of intensity values (not 2D image moments) std_dev_matrix_5 = featureExtractor.compute_standard_deviation( end_position, 5) moments_matrix_1 = featureExtractor.compute_chebyshev_moments( end_position, 1, pmax).reshape(-1) # frameID, matrixID, pmax moments_matrix_5 = featureExtractor.compute_chebyshev_moments( end_position, 5, pmax).reshape(-1) return grasp_diameter, compressibility, std_dev_matrix_1, std_dev_matrix_5, moments_matrix_1, moments_matrix_5
def addImg(self, img, roi=None): ''' img - background, flat field, ste corrected image roi - [(x1,y1),...,(x4,y4)] - boundaries where points are ''' self.img = imread(img, 'gray') s0, s1 = self.img.shape if roi is None: roi = ((0, 0), (s0, 0), (s0, s1), (0, s1)) k = self.kernel_size hk = k // 2 # mask image img2 = self.img.copy() # .astype(int) mask = np.zeros(self.img.shape) cv2.fillConvexPoly(mask, np.asarray(roi, dtype=np.int32), color=1) mask = mask.astype(bool) im = img2[mask] bg = im.mean() # assume image average with in roi == background mask = ~mask img2[mask] = -1 # find points from local maxima: self.points = np.zeros(shape=(self.max_points, 2), dtype=int) thresh = 0.8 * bg + 0.2 * im.max() _findPoints(img2, thresh, self.min_dist, self.points) self.points = self.points[:np.argmin(self.points, axis=0)[0]] # correct point position, to that every point is over max value: for n, p in enumerate(self.points): sub = self.img[p[1] - hk:p[1] + hk + 1, p[0] - hk:p[0] + hk + 1] i, j = np.unravel_index(np.nanargmax(sub), sub.shape) self.points[n] += [j - hk, i - hk] # remove points that are too close to their neighbour or the border mask = maximum_filter(mask, hk) i = np.ones(self.points.shape[0], dtype=bool) for n, p in enumerate(self.points): if mask[p[1], p[0]]: # too close to border i[n] = False else: # too close to other points for pp in self.points[n + 1:]: if norm(p - pp) < hk + 1: i[n] = False isum = i.sum() ll = len(i) - isum print('found %s points' % isum) if ll: print('removed %s points (too close to border or other points)' % ll) self.points = self.points[i] # self.n_points += len(self.points) # for finding best peak position: # def fn(xy,cx,cy):#par # (x,y) = xy # return 1-(((x-cx)**2 + (y-cy)**2)*(1/8)).flatten() # x,y = np.mgrid[-2:3,-2:3] # x = x.flatten() # y = y.flatten() # for shifting peak: xx, yy = np.mgrid[0:k, 0:k] xx = xx.astype(float) yy = yy.astype(float) self.subs = [] # import pylab as plt # plt.figure(20) # img = self.drawPoints() # plt.imshow(img, interpolation='none') # # plt.figure(21) # # plt.imshow(sub2, interpolation='none') # plt.show() #thresh = 0.8*bg + 0.1*im.max() for i, p in enumerate(self.points): sub = self.img[p[1] - hk:p[1] + hk + 1, p[0] - hk:p[0] + hk + 1].astype(float) sub2 = sub.copy() mean = sub2.mean() mx = sub2.max() sub2[sub2 < 0.5 * (mean + mx)] = 0 # only select peak try: # SHIFT SUB ARRAY to align peak maximum exactly in middle: # only eval a 5x5 array in middle of sub: # peak = sub[hk-3:hk+4,hk-3:hk+4]#.copy() # peak -= peak.min() # peak/=peak.max() # peak = peak.flatten() # fit paraboloid to get shift in x,y: # p, _ = curve_fit(fn, (x,y), peak, (0,0)) c0, c1 = center_of_mass(sub2) # print (p,c0,c1,hk) #coords = np.array([xx+p[0],yy+p[1]]) coords = np.array([xx + (c0 - hk), yy + (c1 - hk)]) #print (c0,c1) #import pylab as plt #plt.imshow(sub2, interpolation='none') # shift array: sub = map_coordinates(sub, coords, mode='nearest').reshape(k, k) # plt.figure(2) #plt.imshow(sub, interpolation='none') # plt.show() #normalize: bg = 0.25 * (sub[0].mean() + sub[-1].mean() + sub[:, 0].mean() + sub[:, -1].mean()) sub -= bg sub /= sub.max() # import pylab as plt # plt.figure(20) # plt.imshow(sub, interpolation='none') # # plt.figure(21) # # plt.imshow(sub2, interpolation='none') # plt.show() self._psf += sub if self.calc_std: self.subs.append(sub) except ValueError: pass #sub.shape == (0,0)
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
def key_points(face, d_nose_x1=30, d_nose_x2=5, d_nose_y=5, d_lip_y1=25, d_lip_y2=70, d_lip_y3=4, d_lip_x1=50, d_chin_x=3, d_chin_y1=50, d_chin_y2=75, d_eye_x=2, d_eye_y=50): """ Rotate and zoom the face to create a full frame face. This is based on the fact that the nose is the highest point of the picture """ # We apply surfature to calculate the first and second derivates K, H, Pmax, Pmin = surfature(face) # Remove all key points face.key_points.clear() # # Nose # nose_x, nose_y = max_xy(face.Z) face.key_points["nose"] = (nose_x, nose_y) # # Nose left and right # nose_left = Pmin[(nose_y - d_nose_y):(nose_y + d_nose_y), (nose_x - d_nose_x1):(nose_x - d_nose_x2)] nose_right = Pmin[(nose_y - d_nose_y):(nose_y + d_nose_y), (nose_x + d_nose_x2):(nose_x + d_nose_x1)] nose_left_x, nose_left_y = min_xy(nose_left, offset_x=(nose_x - d_nose_x1), offset_y=(nose_y - d_nose_y)) nose_right_x, nose_right_y = min_xy(nose_right, offset_x=(nose_x + d_nose_x2), offset_y=(nose_y - d_nose_y)) face.key_points["nose_left"] = (nose_left_x, nose_left_y) face.key_points["nose_right"] = (nose_right_x, nose_right_y) # # Upper, lower, left right lip # lip_y = numpy.nanargmax(Pmax[(nose_y + d_lip_y1):(nose_y + d_lip_y2), nose_x]) + (nose_y + d_lip_y1) lip_left = Pmax[(lip_y - d_lip_y3):(lip_y + d_lip_y3), (nose_x - d_lip_x1):nose_x] lip_right = Pmax[(lip_y - d_lip_y3):(lip_y + d_lip_y3), nose_x:(nose_x + d_lip_x1)] lip_left_x = find_peak_start(numpy.sum(lip_left, axis=0)) + (nose_x - d_lip_x1) lip_left_y = numpy.nanargmax(Pmax[(lip_y - d_lip_y3):(lip_y + d_lip_y3), lip_left_x]) + (lip_y - d_lip_y3) lip_right_x = find_peak_stop(numpy.sum(lip_right, axis=0)) + nose_x lip_right_y = numpy.nanargmax(Pmax[(lip_y - d_lip_y3):(lip_y + d_lip_y3), lip_right_x]) + (lip_y - d_lip_y3) face.key_points['lip'] = (nose_x, lip_y) face.key_points['lip_left'] = (lip_left_x, lip_left_y) face.key_points['lip_right'] = (lip_right_x, lip_right_y) # # Chin # chin = numpy.gradient( signal.bspline(face.Z[(lip_y + d_chin_y1):, nose_x], 25)) chin_x, chin_y = nose_x, numpy.nanargmin(chin) + (lip_y + d_chin_y1) face.key_points["chin"] = (chin_x, chin_y) # # Eyes # eye_left = Pmax[d_eye_y:nose_left_y - d_eye_y, nose_left_x - d_eye_x:nose_left_x + d_eye_x] eye_right = Pmax[d_eye_y:nose_right_y - d_eye_y, nose_right_x - d_eye_x:nose_right_x + d_eye_x] eye_left_x, eye_left_y = max_xy(eye_left, nose_left_x - d_eye_x, d_eye_y) eye_right_x, eye_right_y = max_xy(eye_right, nose_right_x - d_eye_x, d_eye_y) face.key_points["eye_left"] = (eye_left_x, eye_left_y) face.key_points["eye_right"] = (eye_right_x, eye_right_y) # # Nose face border # nose_line = numpy.gradient(face.Z[nose_y, :]) border_nose_left_x, border_nose_left_y = numpy.nanargmax( nose_line[:lip_left_x - 10]), nose_y border_nose_right_x, border_nose_right_y = numpy.nanargmin( nose_line[lip_right_x + 10:]) + lip_right_x + 10, nose_y face.key_points["border_nose_left"] = (border_nose_left_x, border_nose_left_y) face.key_points["border_nose_right"] = (border_nose_right_x, border_nose_right_y) # # Lip face border # lip_line = numpy.gradient(face.Z[lip_y, :]) border_lip_left_x, border_lip_left_y = numpy.nanargmax( lip_line[:lip_left_x - 10]), lip_y border_lip_right_x, border_lip_right_y = numpy.nanargmin( lip_line[lip_right_x + 10:]) + lip_right_x + 10, lip_y face.key_points["border_lip_left"] = (border_lip_left_x, border_lip_left_y) face.key_points["border_lip_right"] = (border_lip_right_x, border_lip_right_y) # # Forehead border # forehead_line = numpy.gradient(face.Z[nose_y - (chin_y - nose_y), :]) border_forehead_left_x, border_forehead_left_y = numpy.nanargmax( forehead_line[:lip_left_x - 10]), nose_y - (chin_y - nose_y) border_forehead_right_x, border_forehead_right_y = numpy.nanargmin( forehead_line[lip_right_x + 10:]) + lip_right_x + 10, nose_y - (chin_y - nose_y) face.key_points["border_forehead_left"] = (border_forehead_left_x, border_forehead_left_y) face.key_points["border_forehead_right"] = (border_forehead_right_x, border_forehead_right_y)
def Strong_RRQR(A, k, f): # # Strong Rank Revealing QR with fixed rank 'k' # # # A(:, p) = Q * R = Q [R11, R12; ## 0, R22] ## where R11 and R12 satisfies that matrix (inv(R11) * R12) has entries ## bounded by a pre-specified constant which should be not less than 1. ## ## Input: ## A, matrix, target matrix that is appoximated. ## f, scalar, constant that bound the entries of calculated (inv(R11) * R12)# ## k, integer, dimension of R11. # # # Output: ## A(:, p) = [Q1, Q2] * [R11, R12; ## 0, R22] ## approx Q1 * [R11, R12]; ## Only truncated QR decomposition is returned as ## Q = Q1, ## R = [R11, R12]; ## where Q is a m * k matrix and R is a k * n matrix ## ## Reference: ## Gu, Ming, and Stanley C. Eisenstat. "Efficient algorithms for ## computing a strong rank-revealing QR factorization." SIAM Journal ## on Scientific Computing 17.4 (1996): 848-869. # # # Note: ## Algorithm 4 in the above ref. is implemented. # # dimension of the given matrix m, n = np.shape(A) Q, R, p = linalg.qr(A, mode="full", pivoting=True) print(p) #print(R[0,0]) print(p[0:10]) #print(np.shape(Q)) s_R = np.sign(np.diag(R)) #print(s_R[0]) for i in list(range(n)): R[i, i] = s_R[i] * R[i, i] Q[i, i] = s_R[i] * Q[i, i] # Initialization of A^{-1}B ( A refers to R11, B refers to R12) R11 = deepcopy(R[0:k, 0:k]) R12 = deepcopy(R[0:k, k:]) R22 = deepcopy(R[k:, k:]) AB = deepcopy(np.dot(np.linalg.inv(R11), R12)) #AB = solve_triangular(R11,R12) #print("AB") #print(np.amax(AB)) #print(np.shape(AB)) #print("ga11") #print(R[k-1,k-1]) # Initialization of gamma, i.e., norm of C's columns (C refers to R22) gamma = np.transpose(np.sqrt(np.diag(np.dot(np.transpose(R22), R22)))) #print("gamma") #print(np.shape(gamma)) # Initialization of omega, i.e., reciprocal of inv(A)'s row norm tmp = np.linalg.pinv(R11) omega = 1. / np.sqrt(np.diag(np.dot(tmp, np.transpose(tmp)))) #print("omega") #print(np.shape(omega)) #print(omega) ## "while" loop for interchanging the columns from first k columns and ## the remaining (n-k) columns. # counter = 0 while 1: tmp2 = np.power(np.outer(1. / omega, np.transpose(gamma)), 2) + np.power(AB, 2) #print("tmp2") #print(np.shape(tmp2)) #print(p[0:k]) i_, j_ = np.where(tmp2 > np.power(f, 2)) print("size") print(i_.size) ind = np.unravel_index(np.nanargmax(tmp2, axis=None), tmp2.shape) i = ind[0] j = ind[1] print("max tmp2") print(tmp2[i, j]) #if tmp2[i,j] <= np.power(f,2): # break #if i_.size>0 and j_.size>0: # print("yes") # i = i_[0] # j = j_[0] #else: # break counter = counter + 1 #print("counter") #print(counter) print("AB") print(np.amax(AB)) # Interchange the i th and (k+j) th column of target matrix A and # update QR decomposition (Q, R, p), AB, gamma, and omega. ## First step : interchanging the k+1 and k+j th columns if j > 0: #AB[:, [0, j]] = AB[:, [j, 0]] AB_d_0, _ = np.shape(AB) perm_AB_0 = get_transposition_list(AB_d_0, 0, j) AB = AB[perm_AB_0] #gamma[[0, j]] = gamma[[j, 0]] gamma_tmp = gamma[0] gamma[0] = gamma[j] gamma[j] = gamma_tmp _, R_d_2 = np.shape(R) perm_R_1 = get_transposition_list(R_d_2, k, k + j - 1) R = R[:, perm_R_1] #print("ga22") #print(R[k-1,k-1]) #R[:, [k, k+j-1]] =R[:, [k+j-1, k]] #p[[k, k+j-1]] = p[[k+j-1, k]] p_tmp = p[k + j - 1] p[k + j - 1] = p[k] p[k] = p_tmp ## Second step : interchanging the i and k th columns if i < k: _, R_d_2 = np.shape(R) perm_R_2 = get_cyclic_permutation_list(R_d_2, i, k - 1) p = p[perm_R_2] R = R[:, perm_R_2] #print("ga33") #print(R[k-1,k-1]) omega_d_1 = np.shape(omega)[0] perm_omega_3 = get_cyclic_permutation_list(omega_d_1, i, k - 1) omega = omega[perm_omega_3] AB_d_1, _ = np.shape(AB) perm_AB_3 = get_transposition_list(AB_d_1, i, k - 1) #print(omega) AB = AB[perm_AB_3, :] #print(AB) # % givens rotation for the triangulation of R(1:k, 1:k) for ii in list(range(i, k)): G = givens_rotation_matrix_2(R[ii, ii], R[ii + 1, ii]) if np.dot(G[0, :], R[ii:ii + 2, ii]) < 0: G = -G # guarantee R(ii,ii) > 0 print("ok") R[ii:ii + 2, :] = np.dot(G, R[ii:ii + 2, :]) Q[:, ii:ii + 2] = np.dot(Q[:, ii:ii + 2], np.transpose(G)) if R[k - 1, k - 1] < 0: #print("ok") R[k - 1, :] = -R[k - 1, :] Q[:, k - 1] = -Q[:, k - 1] ## Third step : zeroing out the below-diag of k+1 th columns R_m, R_n = np.shape(R) if k < R_m: for ii in list(range(k + 1, R_m)): G = givens_rotation_matrix_2(R[k, k], R[ii, k]) R_vstack = np.transpose(np.asarray([R[k, k], R[ii, k]])) if np.dot(G[0, :], R_vstack) < 0: G = -G #% guarantee R(k+1,k+1) > 0 _, R_d_4 = np.shape(R) #perm_R_4 = get_transposition_list(R_d_4,k,ii) R[[k, ii], :] = np.dot(G, R[[k, ii], :]) Q[:, [k, ii]] = np.dot(Q[:, [k, ii]], np.transpose(G)) ## Fourth step : interchaing the k and k+1 th columns #p[[k-1,k]] = p[[k, k-1]] p_tmp = p[k - 1] p[k - 1] = p[k] p[k] = p_tmp ga = deepcopy(R[k - 1, k - 1]) mu = deepcopy(R[k - 1, k]) / ga if k < R_m: nu = deepcopy(R[k, k]) / ga else: nu = 0 rho = np.sqrt(mu * mu + nu * nu) ga_bar = ga * rho b1 = R[0:k - 1, k - 1] b2 = R[0:k - 1, k] c1T = R[k - 1, k + 1:] c2T = R[k, k + 1:] #print(R[0,0]) c1T_bar = (mu * c1T + nu * c2T) / rho c2T_bar = (nu * c1T - mu * c2T) / rho R[0:k - 1, k - 1] = b2 R[0:k - 1, k] = b1 R[k - 1, k - 1] = ga_bar R[k - 1, k] = np.dot(ga, mu) / rho R[k, k] = np.dot(ga, nu) / rho R[k - 1, k + 1:] = c1T_bar R[k, k + 1:] = c2T_bar R_submatrix_tmp = deepcopy(R[0:k - 1, 0:k - 1]) u = np.dot(np.linalg.pinv(R_submatrix_tmp), b1) u1 = AB[0:k - 1, 0] AB[0:k - 1, 0] = ((mu * mu) * u - mu * u1) / (rho * rho) AB[k - 1, 0] = mu / (rho * rho) AB[k - 1, 1:] = c1T_bar / ga_bar AB[0:k - 1, 1:] = AB[0:k - 1, 1:] + (nu * np.outer(u, c2T_bar) - np.outer(u1, c1T_bar)) / ga_bar gamma[0] = ga * nu / rho gamma[1:] = np.power( (np.power(gamma[1:], 2) + np.power(np.transpose(c2T_bar), 2) - np.power(np.transpose(c2T), 2)), 1 / 2) u_bar = u1 + mu * u omega[k - 1] = ga_bar #print(np.power(omega[0:k-1],(-2))) #print("ga_bar") #print(ga_bar) #print(mu) #print("mu") #print(np.shape(omega)) if counter == 0: omega[0:k - 1] = np.power( np.abs((np.power(omega[0:k - 1], (-2)) + np.power(u_bar, 2) / (ga_bar * ga_bar) - np.power(u, 2) / (ga * ga))), (-1 / 2)) else: omega[0:k - 1] = np.power( (np.power(omega[0:k - 1], (-2)) + np.power(u_bar, 2) / (ga_bar * ga_bar) - np.power(u, 2) / (ga * ga)), (-1 / 2)) #print("counter") #print(counter) print(p[0:20]) #Eliminate new R(k+1, k) by orthgonal transformation Gk = np.asarray([[mu / rho, nu / rho], [nu / rho, -mu / rho]]) #print(np.dot(Gk,np.transpose(Gk))) R_d_final, _ = np.shape(R) if k < R_d_final: Q[:, [k, k + 1]] = np.dot(Q[:, [k, k + 1]], np.transpose(Gk)) # return p[0:k]
def file_loop(f): print('Doing file: ' + f) dic = xr.open_dataset(f) edate = pd.Timestamp(dic.time.values) out = dictionary() res = [] outt = dic['tc_lag0'].values outp = dic['p'].values out['lon'] = dic['lon'].values out['lat'] = dic['lat'].values out['hour'] = dic['time.hour'].item() out['month'] = dic['time.month'].item() out['year'] = dic['time.year'].item() out['date'] = dic['time'].values if np.nanmin(dic['tc_lag0'].values) > -53: return #ipdb.set_trace() out['clat'] = np.min(out['lat'])+((np.max(out['lat'])-np.min(out['lat']))*0.5) out['clon'] = np.min(out['lon']) + ((np.max(out['lon']) - np.min(out['lon'])) * 0.5) if (out['clat']<9) | (out['clon']<-15) | (out['clon']>15): print('MCS out of box') return # if edate.hour < 17: # return try: era_pl = xr.open_dataset(cnst.ERA5_HOURLY_PL+'ERA5_'+str(dic['time.year'].values)+'_'+str(dic['time.month'].values).zfill(2)+'_pl.nc') except: print('ERA5 missing') return try: era_srfc = xr.open_dataset(cnst.ERA5_HOURLY_SRFC+'ERA5_'+str(dic['time.year'].values)+'_'+str(dic['time.month'].values).zfill(2)+'_srfc.nc') except: print('ERA5 srfc missing') return era_pl = uda.flip_lat(era_pl) era_srfc = uda.flip_lat(era_srfc) edate = edate.replace(hour=12, minute=0) era_pl_day = era_pl.sel(time=edate, longitude=slice(-16,17), latitude=slice(4,26)) era_srfc_day = era_srfc.sel(time=edate, longitude=slice(-16,17), latitude=slice(4,26)) tminpos = np.where(dic['tc_lag0'].values == np.nanmin(dic['tc_lag0'].values)) # era position close to min temp if len(tminpos[0])>1: ptmax = np.nanmax((dic['p'].values)[tminpos]) if ptmax > 0: prpos = np.where((dic['p'].values)[tminpos] == ptmax) tminpos = ((tminpos[0])[prpos], (tminpos[1])[prpos] ) else: tminpos = ((tminpos[0])[0], (tminpos[1])[0]) elon = dic['lon'].values[tminpos] elat = dic['lat'].values[tminpos] era_day = era_pl_day.sel(latitude=elat, longitude=elon , method='nearest') # take point of minimum T era_day_srfc = era_srfc_day.sel(latitude=elat, longitude=elon , method='nearest') # take point of minimum T del era_srfc_day e925 = era_day.sel(level=925).mean() e850 = era_pl_day['t'].sel(level=850) elow = era_day.sel(level=slice(925,850)).mean('level').mean() e650 = era_day.sel(level=650).mean() emid = era_day.sel(level=slice(600,700)).mean('level').mean() srfc = era_day_srfc.mean() t_thresh = -50 # -40C ~ 167 W m-2 mask = np.isfinite(outp) & (outt<=t_thresh) & np.isfinite(outt) mask_area = (outt<=t_thresh) & np.isfinite(outt) mask70 = (outt<=-70) & np.isfinite(outt) if np.sum(mask) < 3: return print(np.nanmax(outt[mask])) # can be bigger than cutout threshold because of interpolation to 5km grid after cutout out['area'] = np.sum(mask_area) out['area70'] = np.sum(mask70) out['tmin'] = np.min(outt[mask]) out['tmean'] = np.mean(outt[mask]) maxpos = np.unravel_index(np.nanargmax(outp), outp.shape) out['pmax'] = np.nanmean(ua.cut_kernel(outp,maxpos[1], maxpos[0],1)) #np.max(outp[mask]) out['pmean'] = np.mean(outp[mask]) dbox = e850.copy(deep=True) minlon = era_pl_day.sel(latitude=8, longitude=np.min(out['lon']), method='nearest') maxlon = era_pl_day.sel(latitude=8, longitude=np.max(out['lon']), method='nearest') del era_pl_day tgrad = dbox.sel(longitude=slice(minlon.longitude.values, maxlon.longitude.values)).mean('longitude') tmin = np.nanargmin(tgrad.values) tmax = np.nanargmax(tgrad.values) tgrad = tgrad.isel(latitude=slice(tmin, tmax)) lingress = uda.linear_trend_lingress(tgrad) out['tgrad'] = lingress['slope'].values tgrad2 = dbox.sel(longitude=slice(np.min(out['lon']), np.max(out['lon'])), latitude=slice(10, 20)).mean( ['longitude', 'latitude']) - \ dbox.sel(longitude=slice(np.min(out['lon']), np.max(out['lon'])), latitude=slice(5, 7)).mean(['longitude', 'latitude']) out['tbox'] = tgrad2.values try: out['q925'] =float(e925['q']) except TypeError: return out['q650'] = float(e650['q']) out['v925'] = float(e925['v']) out['v650'] = float(e925['v']) out['u925'] = float(e925['u']) out['u650'] = float(e650['u']) out['w925'] = float(e925['w']) out['w650'] = float(e650['w']) out['rh925'] = float(e925['r']) out['rh650'] = float(e650['r']) out['t925'] = float(e925['t']) out['t650'] = float(e650['t']) out['pv925'] = float(e925['pv']) out['pv650'] = float(e650['pv']) out['div925'] = float(e925['d']) out['div650'] = float(e650['d']) out['q_low'] = float(elow['q']) out['q_mid'] = float(emid['q']) out['tcwv'] = float(srfc['tcwv']) out['shear'] = float(e650['u']-e925['u']) theta_down = u_met.theta_e(925,e925['t']-273.15, e925['q']) theta_up = u_met.theta_e(650,e650['t']-273.15, e650['q']) out['dtheta'] = (theta_down-theta_up).values out['thetaup'] = theta_up.values out['thetadown'] = theta_down.values out['pgt30'] = np.sum(outp[mask]>=30) out['isvalid'] = np.sum(mask) out['pgt01'] = np.sum(outp[mask]>=0.1) # out['p'] = outp[mask] out['t'] = outt[mask] #ipdb.set_trace() dic.close() return out
def x2dspec(x2dfile, traceloc='max', extrsize='stsci', bksize='stsci', bkoff='stsci', x1dfile=None, fitsout=None, overwrite=True, bkmask=0): """ Creates a spectrum from HST STIS (or maybe also COS?) data from HST using the x2d file provided by the default STScI pipeline. Parameters ---------- x2dfile : str Path of the x2d file. traceloc : {int|'max'|'lya'}, optional Location of the spectral trace. int : the midpoint pixel 'max' : use the mean y-location of the pixel with highest S/N extrsize, bksize, bkoff : {int|'stsci'}, optional The height of the signal extraction region, the height of the background extraction regions, and the offset above and below the spectral trace at which to center the background extraction regions. 'stsci' : use the value used by STScI in making the x1d (requires x1dfile) int : user specified value in pixels x1dfile : str, optional if 'stsci' is not specfied for any other keyword Path of the x1d file. fitsout : str, optional Path for saving a FITS file version of the spectrum. overwrite : {True|False}, optional Whether to overwrite the existing FITS file. bkmask : int, optional Data quality flags to mask the background. Background pixels that have at least one of these flags will be discarded. Returns ------- spectbl : astropy table The wavelength, flux, error, and data quality flag values of the extracted spectrum. Cautions -------- Using a non-stsci extraction size will cause a systematic error because a flux correction factor is applied that assumes the STScI extraction ribbon was used. This still isn't as good as an x1d, mainly because the wavelength dependency of the slit losses is not accounted for. """ x2d = _fits.open(x2dfile) # get the flux and error from the x2d f, e, q = x2d['sci'].data, x2d['err'].data, x2d['dq'].data inst = x2d[0].header['instrume'] if inst != 'STIS': raise NotImplementedError("This function cannot handle {} data at " "present.".format(inst)) # make sure x1d is available if 'stsci' is specified for anything if 'stsci' in [traceloc, extrsize, bksize, bkoff]: try: x1d = _fits.open(x1dfile) xd = x1d[1].data except: raise ValueError("An open x1d file is needed if 'stsci' is " "specified for any of the keywords.") # get the ribbon values if extrsize == 'stsci': extrsize = _np.mean(xd['extrsize']) if bksize == 'stsci': bksize = _np.mean([xd['bk1size'], xd['bk2size']]) if bkoff == 'stsci': bkoff = _np.mean(_np.abs([xd['bk1offst'], xd['bk2offst']])) # select the trace location if traceloc == 'max': sn = f / e sn[q > 0] = 0.0 sn[e <= 0.0] = 0.0 maxpixel = _np.nanargmax(sn) traceloc = _np.unravel_index(maxpixel, f.shape)[0] if traceloc == 'lya': xmx = _np.nanmedian(_np.argmax(f, 1)) redsum = _np.nansum(f[:, xmx+4:xmx+14], 1) smoothsum = data_structures._smooth_sum(redsum, extrsize) / float(extrsize) traceloc = _np.argmax(smoothsum) + extrsize/2 # convert everything to integers so we can make slices try: intrnd = lambda x: int(round(x)) traceloc, extrsize, bksize, bkoff = map(intrnd, [traceloc, extrsize, bksize, bkoff]) except ValueError: raise ValueError("Invalid input for either traceloc, extrsize, bksize, " "or bkoff. See docstring.") # convert intensity to flux fluxfac = x2d['sci'].header['diff2pt'] f, e = f * fluxfac, e * fluxfac # get slices for the ribbons sigslice = slice(traceloc - extrsize // 2, traceloc + extrsize // 2 + 1) bk0slice = slice(traceloc - bkoff - bksize // 2, traceloc - bkoff + bksize // 2 + 1) bk1slice = slice(traceloc + bkoff - bksize // 2, traceloc + bkoff + bksize // 2 + 1) slices = [sigslice, bk0slice, bk1slice] # mask bad values in background regions if bkmask: badpix = (q & bkmask) > 0 badpix[sigslice] = False # but don't modify the signal region f[badpix], e[badpix], q[badpix] = 0.0, 0.0, 0 # make a background area vector to account for masked pixels goodpix = ~badpix bkareas = [_np.sum(goodpix[slc, :], 0) for slc in slices[1:]] bkarea = sum(bkareas) else: bkarea = bksize * 2 # sum fluxes in each ribbon fsig, fbk0, fbk1 = [_np.sum(f[slc, :], 0) for slc in slices] # sum errors in each ribbon esig, ebk0, ebk1 = [_np.sqrt(_np.sum(e[slc, :]**2, 0)) for slc in slices] # condense dq flags in each ribbon bitor = lambda a: reduce(lambda x, y: x | y, a) qsig, qbk0, qbk1 = [bitor(q[slc, :]) for slc in slices] # subtract the background area_ratio = float(extrsize) / bkarea f1d = fsig - area_ratio * (fbk0 + fbk1) e1d = _np.sqrt(esig**2 + (area_ratio * ebk0)**2 + (area_ratio * ebk1)**2) # make sure no zero errors e1d[e1d == 0] = e1d.min() # propagate the data quality flags q1d = qsig | qbk0 | qbk1 # construct wavelength array wedges = _get_x2d_waveedges(x2d) w0, w1 = wedges[:-1], wedges[1:] # construct exposure time array expt = _np.ones(f.shape[0]) * x2d['sci'].header['exptime'] #region PUT INTO TABLE # make data columns colnames = ['w0', 'w1', 'w', 'flux', 'error', 'dq', 'exptime'] units = ['Angstrom'] * 3 + ['ergs/s/cm2/Angstrom'] * 2 + ['s'] descriptions = ['left (short,blue) edge of the wavelength bin', 'right (long,red) edge of the wavelength bin', 'midpoint of the wavelength bin', 'average flux over the bin', 'error on the flux', 'data quality flags', 'cumulative exposure time for the bin'] dataset = [w0, w1, (w0+w1)/2., f1d, e1d, q1d, expt] cols = [_tbl.Column(d, n, unit=u, description=dn) for d, n, u, dn in zip(dataset, colnames, units, descriptions)] # make metadata dictionary descriptions = {'rootname': 'STScI identifier for the dataset used to ' 'create this spectrum.'} meta = {'descriptions': descriptions, 'rootname': x2d[1].header['rootname'], 'traceloc': traceloc, 'extrsize': extrsize, 'bkoff': bkoff, 'bksize': bksize} # put into table tbl = _tbl.Table(cols, meta=meta) #endregion #region PUT INTO FITS if fitsout is not None: # spectrum hdu fmts = ['E'] * 5 + ['I', 'E'] cols = [_fits.Column(n, fm, u, array=d) for n, fm, u, d in zip(colnames, fmts, units, dataset)] del meta['descriptions'] spechdr = _fits.Header(meta.items()) spechdu = _fits.BinTableHDU.from_columns(cols, header=spechdr, name='spectrum') # make primary header prihdr = _fits.Header() prihdr['comment'] = ('Spectrum generated from an x2d file produced by ' 'STScI. The dataset is identified with the header ' 'keywrod rootname. All pixel locations refer to ' 'the x2d and are indexed from 0. ' 'Created with spectralPhoton software ' 'http://github.com/parkus/spectralPhoton') prihdr['date'] = _strftime('%c') prihdr['rootname'] = x2d[1].header['rootname'] prihdu = _fits.PrimaryHDU(header=prihdr) hdulist = _fits.HDUList([prihdu, spechdu]) hdulist.writeto(fitsout, clobber=overwrite) #endregion return tbl
def county_facility_x_correlation(facility, county, start_date, end_date, facility_name, county_pop): county_name = county.head(1)['county'].values[0] start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) facility['Date'] = pd.to_datetime(facility['Date']) facility_mask = (facility['Date'] > start_date) & (facility['Date'] <= end_date) facility = facility.loc[facility_mask] county['date'] = pd.to_datetime(county['date']) county_mask = (county['date'] > start_date) & (county['date'] <= end_date) county = county.loc[county_mask] plt.plot(facility['Residents.Confirmed']) plt.xlabel('Days') plt.ylabel('Cumulative case count') plt.show() plt.plot(county['cases']) plt.xlabel('Days') plt.ylabel('Cumulative case count') plt.show() # plt.figure('facility') # facility['Rolling_diff'] = moving_average(np.array(facility['Residents.Confirmed'].diff(1))[1:]) # # plt.plot(facility['Date'], facility['Residents.Confirmed'].diff(1), color='blue') # plt.plot(facility['Date'], facility['Rolling_diff'], color='blue') # plt.xticks(rotation=45) # plt.xlabel('Days') # plt.ylabel('Cumulative case count') # plt.title(f'7 Day Rolling Avg - {facility_name}') # plt.show() # # plt.figure('county') # county['Rolling_diff'] = moving_average(np.array(county['cases'].diff(1))[1:]) # # plt.plot(county['date'], county['cases'].diff(1), color='orange') # plt.plot(county['date'], county['Rolling_diff'], color='orange') # plt.xticks(rotation=45) # plt.xlabel('Days') # plt.ylabel('Cumulative case count') # plt.title(f'7 Day Rolling Avg - County {county_name}') # plt.show() joined_df = county.join(facility.set_index('Date'), on='date', how='left') ## TODO before doing the correlation, need to join on the date column to get the same date values for NYT and ICE data ## basically need to build up some more of my data tools first # Compute rolling window synchrony d1 = joined_df['Residents.Active'].fillna( method='ffill').dropna()[1:] / 338 * 10000 d2 = joined_df['cases'].fillna(method='ffill').dropna()[1:] / 30000 * 10000 rs = np.array([ crosscorr(d1, d2, lag) for lag in range(-min(len(joined_df), 21), min(len(joined_df), 21)) ]) #21 days rs_not_nan = rs[~np.isnan(rs)] offset = np.floor(len(rs) / 2) - np.nanargmax(rs) f, ax = plt.subplots(figsize=(14, 5)) ax.plot(rs) ax.axvline(np.ceil(len(rs) / 2), color='k', linestyle='--', label='Center') ax.axvline(np.nanargmax(rs), color='r', linestyle='--', label='Peak synchrony') ax.set( title= f'{facility_name} cross correlation with {county_name} county \n Date Offset = {offset} frames', xlabel='Offset', ylabel='Pearson r') # ax.set_xticks(np.arange(len(joined_df))) # ax.set_xticklabels(joined_df['Date']) plt.legend() plt.figure('compare case rates') avg_difference_in_rates = np.average( joined_df['Residents.Active'].fillna(method='ffill')[1:] / facility.head(1)['Population.Feb20'].values[0] * 10000) / np.average( joined_df['cases'].diff(10).fillna(method='bfill') / county_pop * 10000) plt.ylabel('Active case rate per 10,000 people') plt.title( f'Active case rates for {facility_name} and surrounding county\n' f'Avg rate of detention facility is {np.round(avg_difference_in_rates,1)}X higher than county rate' ) plt.plot(joined_df['date'], joined_df['Residents.Active'].fillna(method='ffill') / facility.head(1)['Population.Feb20'].values[0] * 10000, label=f'{facility_name} Detainee Rate') plt.plot(joined_df['date'], joined_df['cases'].diff(10).fillna(method='bfill') / county_pop * 10000, label='County rate') plt.xticks(rotation=45) plt.ylim(1, 100000) plt.semilogy() plt.yticks([10, 100, 1000, 10000], labels=['10', '100', '1000', '10000']) plt.legend(loc='upper left') plt.show()
plt.figure(figsize=(12, 12)) fig = sns.heatmap(sq_dists, cmap=plt.get_cmap('viridis'), square=True, mask=mask) figx = fig.get_figrue() figx.savefig('/home/u3749/result/matrix.png') # upper triangle of matrix set to np.nan sq_dists[np.triu_indices_from(mask)] = np.nan sq_dists[0, 0] = np.nan fig = plt.figure(figsize=(12, 8)) # maximally dissimilar image ax = fig.add_subplot(1, 3, 1) maximally_dissimilar_image_idx = np.nanargmax(np.nanmean(sq_dists, axis=1)) plt.imshow(all_images[maximally_dissimilar_image_idx]) plt.title('maximally dissimilar') # maximally similar image ax = fig.add_subplot(1, 3, 2) maximally_similar_image_idx = np.nanargmin(np.nanmean(sq_dists, axis=1)) plt.imshow(all_images[maximally_similar_image_idx]) plt.title('maximally similar') # now compute the mean image ax = fig.add_subplot(1, 3, 3) mean_img = gray_imgs_mat.mean(axis=0).reshape(rescaled_dim, rescaled_dim, 3) plt.imshow(cv2.normalize(mean_img, None, 0.0, 1.0, cv2.NORM_MINMAX)) plt.title('mean image')
def train_the_model(train_R_indices, train_R, cv_R_indices, cv_R, test_R_indices, test_R, BATCH_SIZE, NUM_EPOCHS, LAMBDA, lr, train_op, loss_op, y_pred_op, X, y, n_investors, n_input, threshold): start = time.time() n_batches = len(train_R) // BATCH_SIZE init = tf.global_variables_initializer() batch = Batch(train_R_indices, train_R, n_investors, n_input, BATCH_SIZE=BATCH_SIZE) epoch_loss_train, _reg = 0, 0 loss_cv_arr, loss_train_arr = [], [] best_save_score = -np.inf print('NUM_EPOCHS: {}\nLAMBDA: {}\nlr: {}\nn_batches: {}\nBATCH_SIZE: {}\nthreshold: {}'.format(\ NUM_EPOCHS, LAMBDA, lr, n_batches, BATCH_SIZE, threshold)) # Add ops to save and restore all the variables. saver = tf.train.Saver() print('start SGD iterations...', end='\r') with tf.Session() as sess: sess.run(init) epoch_end = time.time() while not (batch.epoch == NUM_EPOCHS and batch.last_batch == True): batch_X, batch_y = batch.next() if batch.i0 == 0: print('Epoch %d %s' % (batch.epoch, '_' * 62)) _, _batch_loss_train = sess.run([train_op, loss_op], feed_dict={ X: batch_X, y: batch_y }) epoch_loss_train += _batch_loss_train * (batch.i1 - batch.i0) print("batch_no:{}/{}, loss_train:{:6.4f}, t={:0.1f} sec".format( batch.batch_no, n_batches, epoch_loss_train / batch.i1, time.time() - epoch_end), end='\r') if batch.last_batch: # collect some statistics for printing the loss function etc # fetch the losses print('\nEvaluating loss_cv and preds_cv on cv set... ', end='\r') preds_cv, _loss_cv = evaluate_preds_and_loss( sess, cv_R, cv_R_indices, loss_op, y_pred_op, X, y, BATCH_SIZE, n_investors, n_input) # threshold ~ 0.7 <== an important parameter !!! print('Calculating ROC curve, threshold: {:3.1f} '. format(threshold), end='\r') #_, _, _precision_cv, _recall_cv, _f1_score_cv, _ = ROC_statistics(preds_cv, cv_R, threshold=threshold) _, _, _, _, f1_score_cv, _ = ROC_statistics(preds_cv, cv_R) # f1_score_cv is an array. idx = np.nanargmax(f1_score_cv) _f1_score_cv = f1_score_cv[idx] # retrieve the threshold value where f1_score_cv reaches a maximum. threshold = np.linspace(0, 1, len(f1_score_cv))[idx] loss_cv_arr.append(_loss_cv) loss_train_arr.append(epoch_loss_train / (batch.i1)) # resetting some iteration variables.... epoch_loss_train, _reg = 0, 0 epoch_end = time.time() # Save model if _f1_score_cv has reached a minimum. if (_f1_score_cv > best_save_score): best_save_score = _f1_score_cv save_path = saver.save( sess, 'saved_models/DL_models/best_model.ckpt') ckpt = ' !! CHECKPOINT!!' batch.ckpt_epoch = batch.epoch else: ckpt = '' # printing.... print( 'loss_train: {0:6.4f}, **loss_cv: {1:6.4f}**, f1_score_cv: {2:6.4f} @threshold:{3:1.2f} {4:3s}' .format(loss_train_arr[-1], loss_cv_arr[-1], _f1_score_cv, threshold, ckpt)) with open('saved_models/loss_train_and_cv.pkl', 'wb') as f: pickle.dump((loss_train_arr, loss_cv_arr, batch), f) print() return loss_cv_arr, loss_train_arr, batch
def compare(self, predicted, matchingFunc, output_fn, error_file=None, binary=False): ''' Compare gold against predicted using a specified matching function. Outputs PR curve to output_fn ''' y_true = [] y_scores = [] errors = [] correct = 0 incorrect = 0 correctTotal = 0 unmatchedCount = 0 predicted = Benchmark.normalizeDict(predicted) gold = Benchmark.normalizeDict(self.gold) if binary: predicted = Benchmark.binarize(predicted) gold = Benchmark.binarize(gold) #gold = self.gold # taking all distinct values of confidences as thresholds confidence_thresholds = set() for sent in predicted: for predicted_ex in predicted[sent]: confidence_thresholds.add(predicted_ex.confidence) confidence_thresholds = sorted(list(confidence_thresholds)) num_conf = len(confidence_thresholds) results = {} p = np.zeros(num_conf) pl = np.zeros(num_conf) r = np.zeros(num_conf) rl = np.zeros(num_conf) for sent, goldExtractions in gold.items(): if sent in predicted: predictedExtractions = predicted[sent] else: predictedExtractions = [] scores = [[None for _ in predictedExtractions] for __ in goldExtractions] # print("***Gold Extractions***") # print("\n".join([goldExtractions[i].pred + ' ' + " ".join(goldExtractions[i].args) for i in range(len(goldExtractions))])) # print("***Predicted Extractions***") # print("\n".join([predictedExtractions[i].pred+ " ".join(predictedExtractions[i].args) for i in range(len(predictedExtractions))])) for i, goldEx in enumerate(goldExtractions): for j, predictedEx in enumerate(predictedExtractions): score = matchingFunc(goldEx, predictedEx, ignoreStopwords=True, ignoreCase=True) scores[i][j] = score # OPTIMISED GLOBAL MATCH sent_confidences = [ extraction.confidence for extraction in predictedExtractions ] sent_confidences.sort() prev_c = 0 for conf in sent_confidences: c = confidence_thresholds.index(conf) ext_indices = [] for ext_indx, extraction in enumerate(predictedExtractions): if extraction.confidence >= conf: ext_indices.append(ext_indx) recall_numerator = 0 for i, row in enumerate(scores): max_recall_row = max( [row[ext_indx][1] for ext_indx in ext_indices], default=0) recall_numerator += max_recall_row precision_numerator = 0 selected_rows = [] selected_cols = [] num_precision_matches = min(len(scores), len(ext_indices)) for t in range(num_precision_matches): matched_row = -1 matched_col = -1 matched_precision = -1 # initialised to <0 so that it updates whenever precision is 0 as well for i in range(len(scores)): if i in selected_rows: continue for ext_indx in ext_indices: if ext_indx in selected_cols: continue if scores[i][ext_indx][0] > matched_precision: matched_precision = scores[i][ext_indx][0] matched_row = i matched_col = ext_indx selected_rows.append(matched_row) selected_cols.append(matched_col) precision_numerator += scores[matched_row][matched_col][0] p[prev_c:c + 1] += precision_numerator pl[prev_c:c + 1] += len(ext_indices) r[prev_c:c + 1] += recall_numerator rl[prev_c:c + 1] += len(scores) prev_c = c + 1 # for indices beyond the maximum sentence confidence, len(scores) has to be added to the denominator of recall rl[prev_c:] += len(scores) prec_scores = [a / b if b > 0 else 1 for a, b in zip(p, pl)] rec_scores = [a / b if b > 0 else 0 for a, b in zip(r, rl)] f1s = [Benchmark.f1(p, r) for p, r in zip(prec_scores, rec_scores)] try: optimal_idx = np.nanargmax(f1s) optimal = (prec_scores[optimal_idx], rec_scores[optimal_idx], f1s[optimal_idx]) except ValueError: # When there is no prediction optimal = (0, 0, 0) # In order to calculate auc, we need to add the point corresponding to precision=1 , recall=0 to the PR-curve temp_rec_scores = rec_scores.copy() temp_prec_scores = prec_scores.copy() temp_rec_scores.append(0) temp_prec_scores.append(1) # print("AUC: {}\t Optimal (precision, recall, F1): {}".format( np.round(auc(temp_rec_scores, temp_prec_scores),3), np.round(optimal,3) )) with open(output_fn, 'w') as fout: fout.write('{0}\t{1}\t{2}\n'.format("Precision", "Recall", "Confidence")) for cur_p, cur_r, cur_conf in sorted(zip(prec_scores, rec_scores, confidence_thresholds), key=lambda cur: cur[1]): fout.write('{0}\t{1}\t{2}\n'.format(cur_p, cur_r, cur_conf)) if len(f1s) > 0: return np.round(auc(temp_rec_scores, temp_prec_scores), 3), np.round(optimal, 3) else: # When there is no prediction return 0, (0, 0, 0)
def getErrors(true_behavior, logpos, grid): decoded_behavior = grid[np.nanargmax(logpos, axis=1)].flatten() assert (len(true_behavior) == len(decoded_behavior)) errors = np.array( [np.linalg.norm(pred_i - true_behav_i) \ for pred_i, true_behav_i in zip(decoded_behavior, true_behavior)]) return errors
def addEvidence(self, dataX, dataY): """ @summary: Add training data to learner @param dataX: X values of data to add @param dataY: the Y training values """ if dataX.shape[0] == 1: self.tree = np.array([[-1, dataY, -1, -1]], dtype=float) return self.tree if np.isclose(dataY, dataY[0]).all(): self.tree = np.array([[-1, dataY[0], -1, -1]], dtype=float) return self.tree if dataX.shape[0] <= self.leaf_size: self.tree = np.array([[-1, np.mean(dataY), -1, -1]]) return self.tree else: corr = [] for i in range(0, dataX.shape[1]): c = np.corrcoef(dataX[:, i], dataY) corr.append(c[0, 1]) corrArray = np.array(corr) max = np.nanmax(corrArray) index = np.nanargmax(corrArray) while True: SplitVal = np.median(dataX[:, index], axis=0) if np.isclose(dataX[:, index], dataX[0, index]).all(): corrArray = corrArray[corrArray < max] max = np.nanmax(corrArray) index = np.nanargmax(corrArray) continue elif SplitVal >= np.nanmax(dataX[:, index]): SplitVal = (dataX[:, index].max() + dataX[:, index].min()) / 2 break else: break lefttree = np.array( self.addEvidence(dataX[dataX[:, index] <= SplitVal], dataY[dataX[:, index] <= SplitVal])) righttree = np.array( self.addEvidence(dataX[dataX[:, index] > SplitVal], dataY[dataX[:, index] > SplitVal])) root = np.array([[index, SplitVal, 1, lefttree.shape[0] + 1]], dtype=float) self.tree = np.vstack((root, lefttree)) self.tree = np.vstack((self.tree, righttree)) return self.tree
N_star = ts.tabu_active(sMem, sMemVal, N, f_curr[0], sol_values) logger.info("length of sub-nbrhd: {} \n".format(len(N_star))) #Selecting a candidate #if all the solutions are tabu: if len(N_star) == 0: all_val = [cf.evaluate(s)[0] for s in N] s = ts.aspiration_criteria(neighborhood=N, values=all_val) f_s = cf.evaluate(s) else: #otherwise - #Pick the solution with the best value #from non-tabu members even if they are non-improving s_values = [cf.evaluate(s)[0] for s in N_star] s = N_star[np.nanargmax(s_values)] f_s = cf.evaluate(s) logger.info("candiddate solution: {} {} \n".format(f_s, s)) #Finding where the flip occurred tabu_ind = ts.tabu_criteria(s, x_curr) logger.info("tabooed element index: {} \n".format(tabu_ind)) #updating all variables sMem, sMemVal = ts.st_memory(update_ind=tabu_ind, mem=sMem, memValue=sMemVal, solution=s) logger.info("short term memory and value: {} {}".format(sMem, sMemVal))
if os.path.isfile(avepath): print('Averaged data found at: %s' % avepath) avedict = np.load( avepath, allow_pickle=True).item() # load here the above pickle else: sys.exit( 'ERROR: no averaged data found - run prep_plumes.py via submit_interp.sh first on Cedar!' ) #extract lcoations of max pm, w, temp ------------------------------ PMmax_profile = np.nanmax(avedict['pm25'], 1) #get max smoke profile top_threshold = max( PMmax_profile ) * 0.001 #variable threshold (based on near-surface high concentrations!!!!) PMmax_profile[PMmax_profile < top_threshold] = np.nan PMmax_idx = np.nanargmax(avedict['pm25'][np.isfinite(PMmax_profile)], 1) #get donwind location PMmax_meters = PMmax_idx * plume.dx wave_plume = avedict['w'].copy() wave_plume[avedict['pm25'] < top_threshold] = np.nan #mask where there is no plume wmax_profile = np.nanmax(wave_plume, 1) #get the profiles wmax_idx = np.nanargmax(wave_plume[np.isfinite(wmax_profile)], 1) #get downwind location (index) watPM_profile = np.array([ avedict['w'][ni, i] for ni, i in enumerate(PMmax_idx) ]) #get the profiles tmax_profile = np.nanmax(avedict['temp'], 1) tmax_profile[np.isnan(PMmax_profile)] = np.nan tmax_idx = np.nanargmax(avedict['temp'][np.isfinite(tmax_profile)], 1)
# Section 3.1: Difference, correlation and BIAS for scatter plot # Difference dif_pr = (prns - pr).round(2) # Pearson correlation coefficient pearson_pr = round(stats.pearsonr(prns, pr)[0], 2) # Bias = mean error bias_pr = round(dif_pr.mean(), 2) # Section 3.2: Maximum height storm top and max bin maxh = (round(np.nanmax(hst), 2)) maxbin = int(176 - (maxh / 125)) # Max Lon, Lat and beam lon_maxh = round(lon[np.nanargmax(hst)], 2) lat_maxh = round(lat[np.nanargmax(hst)], 2) beam_maxh = np.argmax(np.amax(ds_hst, axis=0)) # %% # Section 3.3: Prepare the reflectivity profile (cross track) # using “zFactorCorrected” variable. # through the maximun height: dbz_hmax = ds['NS']['SLV']['zFactorCorrected'][2555:2780, beam_maxh, :] dbz_hmax_c = np.where(dbz_hmax[:, :] <= -9999, np.nan, dbz_hmax[:, :]) lat_hmax = ds_lat[:, beam_maxh] lon_hmax = ds_lon[:, beam_maxh] # through the hurricane eye: dbz_eye = ds['NS']['SLV']['zFactorCorrected'][2555:2780, 48, :] dbz_eye_c = np.where(dbz_eye[:, :] <= -9999, np.nan, dbz_eye[:, :])
def resp_newton(self, response, responsef, iterations, ky, kx, use_sz): n_scale = response.shape[2] index_max_in_row = np.argmax(response, 0) max_resp_in_row = np.max(response, 0) index_max_in_col = np.argmax(max_resp_in_row, 0) init_max_response = np.max(max_resp_in_row, 0) col = index_max_in_col.flatten(order="F") max_row_perm = index_max_in_row row = max_row_perm[col, np.arange(n_scale)] trans_row = (row - 1 + np.floor((use_sz[1] - 1) / 2)) % use_sz[1] \ - np.floor((use_sz[1] - 1) / 2) + 1 trans_col = (col - 1 + np.floor((use_sz[0] - 1) / 2)) % use_sz[0] \ - np.floor((use_sz[0] - 1) / 2) + 1 init_pos_y = np.reshape(2 * np.pi * trans_row / use_sz[1], (1, 1, n_scale)) init_pos_x = np.reshape(2 * np.pi * trans_col / use_sz[0], (1, 1, n_scale)) max_pos_y = init_pos_y max_pos_x = init_pos_x # pre-compute complex exponential iky = 1j * ky exp_iky = np.tile(iky[np.newaxis, :, np.newaxis], (1, 1, n_scale)) * \ np.tile(max_pos_y, (1, ky.shape[0], 1)) exp_iky = np.exp(exp_iky) ikx = 1j * kx exp_ikx = np.tile(ikx[:, np.newaxis, np.newaxis], (1, 1, n_scale)) * \ np.tile(max_pos_x, (kx.shape[0], 1, 1)) exp_ikx = np.exp(exp_ikx) # gradient_step_size = gradient_step_size / prod(use_sz) ky2 = ky * ky kx2 = kx * kx iter = 1 while iter <= iterations: # Compute gradient ky_exp_ky = np.tile(ky[np.newaxis, :, np.newaxis], (1, 1, exp_iky.shape[2])) * exp_iky kx_exp_kx = np.tile(kx[:, np.newaxis, np.newaxis], (1, 1, exp_ikx.shape[2])) * exp_ikx y_resp = np.einsum('ilk,ljk->ijk', exp_iky, responsef) resp_x = np.einsum('ilk,ljk->ijk', responsef, exp_ikx) grad_y = -np.imag(np.einsum('ilk,ljk->ijk', ky_exp_ky, resp_x)) grad_x = -np.imag(np.einsum('ilk,ljk->ijk', y_resp, kx_exp_kx)) ival = 1j * np.einsum('ilk,ljk->ijk', exp_iky, resp_x) H_yy = np.tile(ky2[np.newaxis, :, np.newaxis], (1, 1, n_scale)) * exp_iky H_yy = np.real(-np.einsum('ilk,ljk->ijk', H_yy, resp_x) + ival) H_xx = np.tile(kx2[:, np.newaxis, np.newaxis], (1, 1, n_scale)) * exp_ikx H_xx = np.real(-np.einsum('ilk,ljk->ijk', y_resp, H_xx) + ival) H_xy = np.real( -np.einsum('ilk,ljk->ijk', ky_exp_ky, np.einsum('ilk,ljk->ijk', responsef, kx_exp_kx))) det_H = H_yy * H_xx - H_xy * H_xy # Compute new position using newtons method diff_y = (H_xx * grad_y - H_xy * grad_x) / det_H diff_x = (H_yy * grad_x - H_xy * grad_y) / det_H max_pos_y = max_pos_y - diff_y max_pos_x = max_pos_x - diff_x # Evaluate maximum exp_iky = np.tile(iky[np.newaxis, :, np.newaxis], (1, 1, n_scale)) * \ np.tile(max_pos_y, (1, ky.shape[0], 1)) exp_iky = np.exp(exp_iky) exp_ikx = np.tile(ikx[:, np.newaxis, np.newaxis], (1, 1, n_scale)) * \ np.tile(max_pos_x, (kx.shape[0], 1, 1)) exp_ikx = np.exp(exp_ikx) iter = iter + 1 max_response = 1 / np.prod(use_sz) * \ np.real(np.einsum('ilk,ljk->ijk', np.einsum('ilk,ljk->ijk', exp_iky, responsef), exp_ikx)) # check for scales that have not increased in score ind = max_response < init_max_response max_response[0, 0, ind.flatten()] = init_max_response[ind.flatten()] max_pos_y[0, 0, ind.flatten()] = init_pos_y[0, 0, ind.flatten()] max_pos_x[0, 0, ind.flatten()] = init_pos_x[0, 0, ind.flatten()] sind = int(np.nanargmax(max_response, 2)) disp_row = (np.mod(max_pos_y[0, 0, sind] + np.pi, 2 * np.pi) - np.pi) / (2 * np.pi) * use_sz[1] disp_col = (np.mod(max_pos_x[0, 0, sind] + np.pi, 2 * np.pi) - np.pi) / (2 * np.pi) * use_sz[0] return disp_row, disp_col, sind
def train_epochs(args, model, optimizer, params, dicts, struc_feats, struc_labels): """ Main loop. does train and test """ metrics_hist = defaultdict(lambda: []) metrics_hist_te = defaultdict(lambda: []) metrics_hist_tr = defaultdict(lambda: []) test_only = args.test_model is not None print("\n\ntest_only: " + str(test_only)) # Converting to csr sparse matrix form X = struc_feats.tocsr() print(X.shape[0]) # Splitting into train, val and test --> need idx values passed as args X_train = X[:args.len_train] y_train = struc_labels[:args.len_train] X_val = X[args.len_train:args.len_train + args.len_val] X_test = X[args.len_train + args.len_val:args.len_train + args.len_val + args.len_test] # Standardizing features scaler = MaxAbsScaler().fit(X_train) X_train_std = scaler.transform(X_train) X_val_std = scaler.transform(X_val) X_test_std = scaler.transform(X_test) ################################ opt_thresh = None # Placeholder, only needed when predicting on test set, updated below #train for n_epochs unless criterion metric does not improve for [patience] epochs for epoch in range(args.n_epochs): #only test on train/test set on very last epoch if epoch == 0 and not args.test_model: model_dir = os.path.join( MODEL_DIR, '_'.join([ args.model, args.desc, time.strftime('%b_%d_%H:%M', time.gmtime()) ])) os.mkdir(model_dir) elif args.test_model: model_dir = os.getcwd( ) #just save things to where this script was called start = time.time() metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs, args.batch_size, args.data_path, test_only, dicts, model_dir, args.gpu, args.quiet, X_train_std, X_val_std, X_test_std, y_train, args.train_frac, args.test_frac, opt_thresh, args.struc_aux_loss_wt, args.conv_aux_loss_wt) end = time.time() print("\nEpoch Duration: " + str(end - start)) # DISTRIBUTING results from metrics_all to respective dicts for name in metrics_all[0].keys(): metrics_hist[name].append(metrics_all[0][name]) for name in metrics_all[1].keys(): metrics_hist_te[name].append(metrics_all[1][name]) for name in metrics_all[2].keys(): metrics_hist_tr[name].append(metrics_all[2][name]) metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr) #save metrics, model, params persistence.save_everything(args, metrics_hist_all, model, model_dir, params, args.criterion) if test_only: break if (epoch == args.n_epochs - 2): opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax( metrics_hist[args.criterion])] print("Optimal f1 threshold: " + str(opt_thresh)) if args.criterion in metrics_hist.keys(): if (early_stop(metrics_hist, args.criterion, args.patience)): #stop training, do tests on test and train sets, and then stop the script print( "%s hasn't improved in %d epochs, early stopping or just completed last epoch" % (args.criterion, args.patience)) test_only = True opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax( metrics_hist[args.criterion])] print("Optimal f1 threshold: " + str(opt_thresh)) model = torch.load( '%s/model_best_%s.pth' % (model_dir, args.criterion)) # LOADING BEST MODEL FOR FINAL TEST return epoch + 1
def significant_figures(n, unc=None, max_sf=20, rtol=1e-20): """ Iterative method to determine the number of significant digits for a given float, optionally providing an uncertainty. Parameters ---------- n : :class:`float` Number from which to ascertain the significance level. unc : :class:`float`, :code:`None` Uncertainty, which if provided is used to derive the number of significant digits. max_sf : :class:`int` An upper limit to the number of significant digits suggested. rtol : :class:`float` Relative tolerance to determine similarity of numbers, used in calculations. Returns ------- :class:`int` Number of significant digits. """ if not hasattr(n, "__len__"): if np.isfinite(n): if unc is not None: mag_n = np.floor(np.log10(np.abs(n))) mag_u = np.floor(np.log10(unc)) if not np.isfinite(mag_u) or not np.isfinite(mag_n): return np.nan sf = int(max(0, int(1.0 + mag_n - mag_u))) else: sf = min( [ ix for ix in range(max_sf) if np.isclose(round_sig(n, ix), n, rtol=rtol) ] ) return sf else: return 0 else: # this isn't working n = np.array(n) _n = n.copy() mask = np.isclose(n, 0.0) # can't process zeros _n[mask] = np.nan if unc is not None: mag_n = np.floor(np.log10(np.abs(_n))) mag_u = np.floor(np.log10(unc)) sfs = np.nanmax( np.vstack( [np.zeros(mag_n.shape), (1.0 + mag_n - mag_u).astype(np.int)] ), axis=0, ).astype(np.int) else: rounded = np.vstack([_n] * max_sf).reshape(max_sf, *_n.shape) indx = np.indices(rounded.shape)[0] # get the row indexes for no. sig figs rounded = round_sig(rounded, indx) sfs = np.nanargmax(np.isclose(rounded, _n, rtol=rtol), axis=0) sfs[np.isnan(sfs)] = 0 return sfs
def test_hull_construction(): # test case 1 vals = np.array([[50, 60], [20, 40], [-74, 50], [-95, +10], [20, 60]]) bh = BoundingConvexHull(vals) mask = bh.mask assert mask.shape == (np.max(vals[:, 1]) - np.min(vals[:, 1]) + 1, np.max(vals[:, 0]) - np.min(vals[:, 0]) + 1) assert np.abs(mask.sum() - bh.area) / bh.area < 0.05 # integral mask area needs to be close to true area normalized_normals = bh.rnormals / np.linalg.norm(bh.rnormals, axis=1)[:, None] # test case 2 for e, n in zip(bh.edges, normalized_normals): edge_vec = e[1] - e[0] assert np.all(np.abs(np.dot(edge_vec, n)) < 1.0e-8) # test case 3 valsextract = np.array([[-10, 120], [90, 268], [293, 110],[40, -30]]) bh_extract = BoundingConvexHull(valsextract) sinc_npx = 255 sinc = np.sinc(np.linspace(-7, 7, sinc_npx)) sinc2d = np.outer(sinc, sinc).reshape((1, 1, sinc_npx, sinc_npx)) extracted_data, extracted_window_extents = BoundingConvexHull.regional_data(bh_extract, sinc2d, oob_value=np.nan) assert extracted_window_extents == [-10, 293, -30, 268] sparse_mask = np.array(bh_extract.sparse_mask) lines = np.hstack([bh_extract.corners, np.roll(bh_extract.corners, -1, axis=0)]) minx = np.min(lines[:, 0:4:2]); maxx = np.max(lines[:, 0:4:2]) miny = np.min(lines[:, 1:4:2]); maxy = np.max(lines[:, 1:4:2]) sel = np.logical_and(np.logical_and(sparse_mask[:, 1] >= 0, sparse_mask[:, 1] < 255), np.logical_and(sparse_mask[:, 0] >= 0, sparse_mask[:, 0] < 255)) flat_index = (sparse_mask[sel][:, 0])*sinc_npx + (sparse_mask[sel][:, 1]) sinc_integral = np.sum(sinc2d.ravel()[flat_index]) assert np.abs(sinc_integral - np.nansum(extracted_data.ravel())) < 1.0e-8 v = np.nanargmax(extracted_data) vx = v % extracted_data.shape[3]; vy = v // extracted_data.shape[3] cextracted = (extracted_window_extents[0] + vx, extracted_window_extents[2] + vy) v = np.nanargmax(sinc2d) sincvx = v % sinc_npx; sincvy = v // sinc_npx csinc = tuple([sincvx, sincvy]) assert csinc == cextracted # test case 4 vals2 = np.array([[-20, -120], [0, 60], [40, -60]]) vals3 = np.array([[-20, 58], [-40, 80], [20, 100]]) bh2 = BoundingConvexHull(vals2) bh3 = BoundingConvexHull(vals3) assert bh.overlaps_with(bh2) assert not bh.overlaps_with(bh3) assert not bh2.overlaps_with(bh3) # test case 5 assert (-1000, -1000) not in bh assert (30, 0) not in bh assert (0, 0) not in bh assert (-40, 30) in bh # test case 6 bb = BoundingBox(-14, 20, 30, 49) assert bb.centre == [3, 39] assert bb.box_npx == (35, 20) assert bb.mask.shape == bb.box_npx[::-1] assert bb.area == 35 * 20 assert np.sum(bb.mask) == bb.area assert (-15, 35) not in bb assert (0, 35) in bb bb2 = BoundingBoxFactory.AxisAlignedBoundingBox(bb) #enforce odd assert bb2.box_npx == (35, 21) assert bb2.area == 35 * 21 assert (bb.sparse_mask == bb2.sparse_mask).all() assert (-15, 35) not in bb2 assert (0, 35) in bb2 bb3 = BoundingBoxFactory.AxisAlignedBoundingBox(bb, square=True) #enforce odd assert bb3.box_npx[0] == bb3.box_npx[1] assert bb3.box_npx[0] % 2 == 1 #enforce odd assert bb3.area == bb3.box_npx[0]**2 assert (bb.sparse_mask == bb3.sparse_mask).all() assert (-15, 35) not in bb2 assert (0, 35) in bb2 # test case 7 bb4s = BoundingBoxFactory.SplitBox(bb, nsubboxes=3) assert len(bb4s) == 9 xlims = [(np.min(c.corners[:, 0]), np.max(c.corners[:, 0])) for c in bb4s][0:3] ylims = [(np.min(c.corners[:, 1]), np.max(c.corners[:, 1])) for c in bb4s][0::3] assert np.all(xlims == np.array([(-14, -3), (-2, 9), (10, 20)])) assert np.all(ylims == np.array([(30, 36), (37, 43), (44, 49)])) assert np.sum([b.area for b in bb4s]) == bb.area for bb4 in bb4s: assert bb4.area == np.sum(bb4.mask) # test case 8 bb5 = BoundingBox(-14, 20, 30, 50) assert bb5.box_npx == (35, 21) bb6 = BoundingBoxFactory.PadBox(bb5, 41, 27) assert bb6.box_npx == (41, 27) assert bb5.centre == bb6.centre assert np.sum(bb5.mask) == np.sum(bb6.mask) bb7s = list(map(lambda x: BoundingBoxFactory.PadBox(x, 17, 11), bb4s)) assert all([b.box_npx == (17, 11) for b in bb7s]) assert np.sum([np.sum(b.mask) for b in bb7s]) == np.sum([np.sum(b.mask) for b in bb4s]) # test case 9 facet_regions = list(map(lambda f: BoundingBoxFactory.PadBox(f, 63, 63), BoundingBoxFactory.SplitBox(BoundingBoxFactory.AxisAlignedBoundingBox(bh_extract), nsubboxes=5))) facets = list(map(lambda pf: BoundingConvexHull.regional_data(pf, sinc2d, oob_value=np.nan), facet_regions)) stitched_image, stitched_region = BoundingBox.project_regions([f[0] for f in facets], facet_regions) assert np.abs(sinc_integral - np.nansum([np.nansum(f[0]) for f in facets])) < 1.0e-8 assert np.abs(sinc_integral - np.sum(stitched_image)) < 1.0e-8 v = np.argmax(stitched_image) vx = v % stitched_image.shape[3]; vy = v // stitched_image.shape[3] cstitched = (np.min(stitched_region.corners[:, 0]) + vx, np.min(stitched_region.corners[:, 1]) + vy) assert cstitched == csinc # test case 10 olap_box1 = BoundingBox(110, 138, 110, 135) olap_box2 = BoundingBox(115, 150, 109, 150) olap_box3 = BoundingBox(125, 130, 125, 130) BoundingConvexHull.normalize_masks([olap_box1, olap_box2, olap_box3]) ext1 = BoundingConvexHull.regional_data(olap_box1, sinc2d)[0] ext2 = BoundingConvexHull.regional_data(olap_box2, sinc2d)[0] ext3 = BoundingConvexHull.regional_data(olap_box3, sinc2d)[0] olaps_stitched_image, olaps_stitched_region = BoundingBox.project_regions([ext1, ext2, ext3], [olap_box1, olap_box2, olap_box3]) v = np.nanargmax(olaps_stitched_image) vx = v % olaps_stitched_image.shape[3]; vy = v // olaps_stitched_image.shape[3] cstitched_olap = (np.min(olaps_stitched_region.corners[:, 0]) + vx, np.min(olaps_stitched_region.corners[:, 1]) + vy) assert cstitched_olap == csinc assert np.abs(1.0 - np.nanmax(olaps_stitched_image)) < 1.0e-8 # visual inspection if DEBUG: from matplotlib import pyplot as plt plt.figure(figsize=(7, 2.5)) plt.title("Winding, normals and masking check") for h in [bh, bh2, bh3]: for ei, e in enumerate(h.edges): plt.plot(e[:, 0], e[:, 1], "r--") plt.text(e[0, 0], e[0, 1], str(ei)) plt.plot(bh.edge_midpoints[:, 0], bh.edge_midpoints[:, 1], "ko") for e, n in zip(bh.edge_midpoints, normalized_normals): p0 = e p = e + n*6 plt.plot([p0[0], p[0]], [p0[1], p[1]], "b--", lw=2) plt.scatter(vals[:, 0], vals[:, 1]) plt.imshow(mask, extent=[np.min(vals[:, 0]), np.max(vals[:, 0]), np.max(vals[:, 1]), np.min(vals[:, 1])]) plt.grid(True) plt.figure(figsize=(7, 2.5)) plt.title("Data extraction check (global)") for h in [bh_extract]: for ei, e in enumerate(h.edges): plt.plot(e[:, 0], e[:, 1], "r--") plt.imshow(sinc2d[0, 0, :, :], extent=[0, sinc_npx, sinc_npx, 0]) plt.grid(True) plt.figure(figsize=(7, 2.5)) plt.title("Data extraction check (local)") for h in [bh_extract]: for ei, e in enumerate(h.edges): plt.plot(e[:, 0], e[:, 1], "r--") plt.imshow(extracted_data[0, 0, :, :], extent=[extracted_window_extents[0], extracted_window_extents[1], extracted_window_extents[3], extracted_window_extents[2]]) plt.figure(figsize=(7, 2.5)) plt.title("Faceting check") for h in [bh_extract]: for ei, e in enumerate(h.edges): plt.plot(e[:, 0], e[:, 1], "r--") for f in facet_regions: for ei, e in enumerate(f.edges): plt.plot(e[:, 0], e[:, 1], "co--") plt.imshow(stitched_image[0, 0, :, :], extent=[np.min(stitched_region.corners[:, 0]), np.max(stitched_region.corners[:, 0]), np.max(stitched_region.corners[:, 1]), np.min(stitched_region.corners[:, 1])]) plt.figure(figsize=(7, 2.5)) plt.title("Overlapping faceting check") for f in [olap_box1, olap_box2, olap_box3]: for ei, e in enumerate(f.edges): plt.plot(e[:, 0], e[:, 1], "co--") plt.imshow(olaps_stitched_image[0, 0, :, :], extent=[np.min(olaps_stitched_region.corners[:, 0]), np.max(olaps_stitched_region.corners[:, 0]), np.max(olaps_stitched_region.corners[:, 1]), np.min(olaps_stitched_region.corners[:, 1])]) plt.xlim((np.min(olaps_stitched_region.corners[:, 0]) - 15, np.max(olaps_stitched_region.corners[:, 0]) + 15)) plt.ylim((np.min(olaps_stitched_region.corners[:, 1]) - 15, np.max(olaps_stitched_region.corners[:, 1]) + 15)) plt.show(block=True)
def ref_impl(a): return np.nanargmax(a)
def argf(self, *args, **kwargs): return np.nanargmax(*args, **kwargs) class Extremum(Ch):
def plot_projection(self, data, ax=None, index=None, sample_cov=0, **kwargs): if ax is None: fig, ax = plt.subplots() else: fig = ax.figure obs_dispersion, obs_flux, obs_ivar = self._slice_spectrum(data) # Apply masks. mask = _generate_mask(obs_dispersion, self.metadata["mask"]) \ * np.isfinite(obs_flux * obs_ivar) if 0 in (obs_dispersion.size, mask.sum()): raise ValueError("no overlapping spectra with finite flux/ivar") #obs_dispersion = obs_dispersion[mask] #obs_flux, obs_ivar = obs_flux[mask], obs_ivar[mask] _ = np.where(mask)[0] si, ei = _[0], _[-1] # Show uncertainties. obs_sigma = np.sqrt(1.0 / obs_ivar) fill_between_steps(ax, obs_dispersion[si:ei], obs_flux[si:ei] - obs_sigma[si:ei], obs_flux[si:ei] + obs_sigma[si:ei], facecolor="#AAAAAA", edgecolor="none", alpha=1) # Limit to the edge of what is OK. ax.plot(obs_dispersion[si:ei], obs_flux[si:ei], c="#444444", drawstyle="steps-mid") obs_flux[~mask] = np.nan ax.plot(obs_dispersion, obs_flux, c='k', drawstyle="steps-mid") # Get the MAP value. if index is None: index = np.nanargmax(self._inference_result[1]) op_theta = self._inference_result[3][index] model_disp, model_flux = utils.parse_spectrum(self.paths[index]).T y = self(obs_dispersion, model_disp, model_flux, *op_theta) y[~mask] = np.nan c = kwargs.pop("c", "r") ax.plot(obs_dispersion, y, c=c, **kwargs) # Get the covariance matrix? if sample_cov > 0: cov = self._inference_result[4][index] print(np.sqrt(np.diag(cov))) # Sample values from the cov matrix and project them. draws = np.random.multivariate_normal( self._inference_result[3][index], self._inference_result[4][index], size=sample_cov) for draw in draws: y_draw = self(obs_dispersion, model_disp, model_flux, *draw) y_draw[~mask] = np.nan ax.plot(obs_dispersion, y_draw, c=c, alpha=10.0 / sample_cov) # Draw fill_between in y? ax.set_title("Index {}: {}".format(index, self.stellar_parameters[index])) return fig
X[nan_locations] = np.take(means, nan_locations[1]) # Normalize X X = preprocessing.scale(X) regressor = linear_model.Ridge() alpha_range = np.logspace(-5, 3, 20) train_scores, valid_scores = validation_curve(regressor, X, y, "alpha", alpha_range, scoring="neg_mean_squared_error", n_jobs=-1) train_scores = [np.mean(s) for s in train_scores] valid_scores = [np.mean(s) for s in valid_scores] plt.plot(alpha_range, train_scores) plt.plot(alpha_range, valid_scores) plt.xscale("log") plt.show() # Take the alpha giving the highest validation score, and test it on test set best_alpha = alpha_range[np.nanargmax(valid_scores)] X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8) regressor.set_params(alpha=best_alpha) regressor.fit(X_train, y_train) print("best alpha =", alpha_range[np.nanargmax(valid_scores)]) print("RMSLE =", root_mean_squared_log_error(y_test, regressor.predict(X_test)))
def train_epochs(args, model, optimizer, params, dicts): """ Main loop. does train and test """ metrics_hist = defaultdict(lambda: []) metrics_hist_te = defaultdict(lambda: []) metrics_hist_tr = defaultdict(lambda: []) test_only = args.test_model is not None print("\n\ntest_only: " + str(test_only)) opt_thresh = None # Placeholder, only needed when predicting on test set, updated below #train for n_epochs unless criterion metric does not improve for [patience] epochs for epoch in range(args.n_epochs): #only test on train/test set on very last epoch if epoch == 0 and not args.test_model: model_dir = os.path.join( MODEL_DIR, '_'.join([ args.model, args.desc, time.strftime('%b_%d_%H:%M', time.gmtime()) ])) os.mkdir(model_dir) elif args.test_model: model_dir = os.getcwd( ) #just save things to where this script was called start = time.time() metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs, args.batch_size, args.data_path, test_only, dicts, model_dir, args.gpu, args.quiet, opt_thresh, args.obs_limit) end = time.time() print("\nEpoch Duration: " + str(end - start)) # DISTRIBUTING results from metrics_all to respective dicts for name in metrics_all[0].keys(): metrics_hist[name].append(metrics_all[0][name]) for name in metrics_all[1].keys(): metrics_hist_te[name].append(metrics_all[1][name]) for name in metrics_all[2].keys(): metrics_hist_tr[name].append(metrics_all[2][name]) metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr) #save metrics, model, params persistence.save_everything( args, metrics_hist_all, model, model_dir, params, args.criterion ) # SHOULD SAVE MODEL PARAMS AT EACH EPOCH, BELIEVE IS HAPPENING if test_only: break if (epoch == args.n_epochs - 2): opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax( metrics_hist[args.criterion])] print("Optimal f1 threshold: " + str(opt_thresh)) if (args.criterion in metrics_hist.keys()): if (early_stop(metrics_hist, args.criterion, args.patience)): #stop training, do tests on test and train sets, and then stop the script print( "%s hasn't improved in %d epochs, early stopping or just completed last epoch" % (args.criterion, args.patience)) test_only = True opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax( metrics_hist[args.criterion])] print("Optimal f1 threshold: " + str(opt_thresh)) model = torch.load( '%s/model_best_%s.pth' % (model_dir, args.criterion)) # LOADING BEST MODEL FOR FINAL TEST return epoch + 1
w=weights_k_dist) corr_pear_r[d, ] = wpearsonr(target_k, curr_train_k[:, d], w=weights_k_rank) corr_pear_n[d, ] = wpearsonr(target_k, curr_train_k[:, d])[0] corr_list = [ corr_dist_d, corr_dist_r, corr_dist_n, corr_pear_d, corr_pear_r, corr_pear_n ] for j in range(len(m_list)): corr_k = corr_list[j] # pick the best one best_clf_ind = np.nanargmax(corr_k) pred_scores_best[i, j] = test_scores_norm[i, best_clf_ind] # pick the p dynamically threshold = corr_k.max() - corr_k.std() * alpha p = (corr_k >= threshold).sum() if p == 0: # in case extreme cases [nan and all -1's] p = 1 pred_scores_ens[i, j] = np.max( test_scores_norm[i, argmaxp(corr_k, p)]) for m in range(len(m_list)): test_target_list.extend( [pred_scores_best[:, m], pred_scores_ens[:, m]]) method_list.extend( ['DCSO_a_' + m_list[m], 'DCSO_moa_' + m_list[m]])
def _get_ellipsoid_parameters_basic(self): np.seterr(all="ignore") # check if there are 4 particles to form an ellipsoid # neglecting to check if the 4 particles in the same plane, # that is almost certainly never to occur, # will deal with it later if it ever comes up if np.size(self["particle_position_x"]) < 4: mylog.warning("Too few particles for ellipsoid parameters.") return (0, 0, 0, 0, 0, 0, 0) # Calculate the parameters that describe the ellipsoid of # the particles that constitute the halo. This function returns # all the parameters except for the center of mass. com = self.center_of_mass() position = [ self["particle_position_x"], self["particle_position_y"], self["particle_position_z"], ] # Locate the furthest particle from com, its vector length and index DW = np.array([self.gridsize[0], self.gridsize[1], self.gridsize[2]]) position = [position[0] - com[0], position[1] - com[1], position[2] - com[2]] # different cases of particles being on other side of boundary for axis in range(np.size(DW)): cases = np.array( [position[axis], position[axis] + DW[axis], position[axis] - DW[axis]] ) # pick out the smallest absolute distance from com position[axis] = np.choose(np.abs(cases).argmin(axis=0), cases) # find the furthest particle's index r = np.sqrt(position[0] ** 2 + position[1] ** 2 + position[2] ** 2) A_index = r.argmax() mag_A = r.max() # designate the A vector A_vector = (position[0][A_index], position[1][A_index], position[2][A_index]) # designate the e0 unit vector e0_vector = A_vector / mag_A # locate the tB particle position by finding the max B e0_vector_copy = np.empty((np.size(position[0]), 3), dtype="float64") for i in range(3): e0_vector_copy[:, i] = e0_vector[i] rr = np.array( [position[0], position[1], position[2]] ).T # Similar to tB_vector in old code. tC_vector = np.cross(e0_vector_copy, rr) te2 = tC_vector.copy() for dim in range(3): te2[:, dim] *= np.sum(tC_vector**2.0, axis=1) ** (-0.5) te1 = np.cross(te2, e0_vector_copy) length = np.abs( -np.sum(rr * te1, axis=1) * (1.0 - np.sum(rr * e0_vector_copy, axis=1) ** 2.0 * mag_A**-2.0) ** (-0.5) ) # This problem apparently happens sometimes, that the NaNs are turned # into infs, which messes up the nanargmax below. length[length == np.inf] = 0.0 tB_index = np.nanargmax(length) # ignores NaNs created above. mag_B = length[tB_index] e1_vector = te1[tB_index] e2_vector = te2[tB_index] temp_e0 = rr.copy() temp_e1 = rr.copy() temp_e2 = rr.copy() for dim in range(3): temp_e0[:, dim] = e0_vector[dim] temp_e1[:, dim] = e1_vector[dim] temp_e2[:, dim] = e2_vector[dim] length = np.abs( np.sum(rr * temp_e2, axis=1) * ( 1 - np.sum(rr * temp_e0, axis=1) ** 2.0 * mag_A**-2.0 - np.sum(rr * temp_e1, axis=1) ** 2.0 * mag_B**-2.0 ) ** (-0.5) ) length[length == np.inf] = 0.0 tC_index = np.nanargmax(length) mag_C = length[tC_index] # tilt is calculated from the rotation about x axis # needed to align e1 vector with the y axis # after e0 is aligned with x axis # find the t1 angle needed to rotate about z axis to align e0 onto x-z plane t1 = np.arctan(-e0_vector[1] / e0_vector[0]) RZ = get_rotation_matrix(t1, (0, 0, 1)) r1 = np.dot(RZ, e0_vector) # find the t2 angle needed to rotate about y axis to align e0 to x t2 = np.arctan(r1[2] / r1[0]) RY = get_rotation_matrix(t2, (0, 1, 0)) r2 = np.dot(RY, np.dot(RZ, e1_vector)) # find the tilt angle needed to rotate about x axis to align e1 to y and e2 to z tilt = np.arctan(-r2[2] / r2[1]) return (mag_A, mag_B, mag_C, e0_vector[0], e0_vector[1], e0_vector[2], tilt)
n = inputData['n'] R = inputData['R'] epsilon = inputData['epsilon'] x0 = inputData['x'] y0 = inputData['y'] phi = data['phi'] phiCount = phi.size Tphi = data['Tphi'] frequencies = data['frequencies'] countf = frequencies.size Rk = data['Rk'] S = 2 * pi / phiCount * Rk * np.sum(Tphi, axis=1) print(np.nanargmax(S)) # = 840 print(np.nanargmin(S[100:]) + 100) print(np.nanargmax(S[900:]) + 900) mpl.rcParams['mathtext.fontset'] = 'stix' mpl.rcParams['font.family'] = 'STIXGeneral' mpl.rcParams['legend.fontsize'] = 'medium' mpl.rcParams['axes.labelsize'] = 'large' plt.figure(figsize=(7, 3)) plt.plot(frequencies, S) plt.ylim(top=10, bottom=5e-4) plt.yscale("log") plt.xlabel(r"$f$") plt.ylabel(r"$S$") plt.title(r"1 hexagon, $R=0.45$, $\epsilon = (1.1 \pm 0.1\mathrm{i})^2$")
def add_stat_annotation(ax, plot='boxplot', data=None, x=None, y=None, hue=None, units=None, order=None, hue_order=None, box_pairs=None, width=0.8, perform_stat_test=True, pvalues=None, test_short_name=None, test=None, text_format='star', pvalue_format_string=DEFAULT, text_annot_custom=None, loc='inside', show_test_name=True, pvalue_thresholds=DEFAULT, stats_params=dict(), comparisons_correction='bonferroni', use_fixed_offset=False, line_offset_to_box=None, line_offset=None, line_height=0.02, text_offset=1, color='0.2', linewidth=1.5, fontsize='medium', verbose=1): """ Optionally computes statistical test between pairs of data series, and add statistical annotation on top of the boxes/bars. The same exact arguments `data`, `x`, `y`, `hue`, `order`, `width`, `hue_order` (and `units`) as in the seaborn boxplot/barplot function must be passed to this function. This function works in one of the two following modes: a) `perform_stat_test` is True: statistical test as given by argument `test` is performed. b) `perform_stat_test` is False: no statistical test is performed, list of custom p-values `pvalues` are used for each pair of boxes. The `test_short_name` argument is then used as the name of the custom statistical test. :param plot: type of the plot, one of 'boxplot' or 'barplot'. :param line_height: in axes fraction coordinates :param text_offset: in points :param box_pairs: can be of either form: For non-grouped boxplot: `[(cat1, cat2), (cat3, cat4)]`. For boxplot grouped by hue: `[((cat1, hue1), (cat2, hue2)), ((cat3, hue3), (cat4, hue4))]` :param pvalue_format_string: defaults to `"{.3e}"` :param pvalue_thresholds: list of lists, or tuples. Default is: For "star" text_format: `[[1e-4, "****"], [1e-3, "***"], [1e-2, "**"], [0.05, "*"], [1, "ns"]]`. For "simple" text_format : `[[1e-5, "1e-5"], [1e-4, "1e-4"], [1e-3, "0.001"], [1e-2, "0.01"]]` :param pvalues: list or array of p-values for each box pair comparison. :param comparisons_correction: Method for multiple comparisons correction. `bonferroni` or None. """ def find_x_position_box(box_plotter, boxName): """ boxName can be either a name "cat" or a tuple ("cat", "hue") """ if box_plotter.plot_hues is None: cat = boxName hue_offset = 0 else: cat = boxName[0] hue = boxName[1] hue_offset = box_plotter.hue_offsets[box_plotter.hue_names.index( hue)] group_pos = box_plotter.group_names.index(cat) box_pos = group_pos + hue_offset return box_pos def get_box_data(box_plotter, boxName): """ boxName can be either a name "cat" or a tuple ("cat", "hue") Here we really have to duplicate seaborn code, because there is not direct access to the box_data in the BoxPlotter class. """ cat = box_plotter.plot_hues is None and boxName or boxName[0] index = box_plotter.group_names.index(cat) group_data = box_plotter.plot_data[index] if box_plotter.plot_hues is None: # Draw a single box or a set of boxes # with a single level of grouping box_data = remove_na(group_data) else: hue_level = boxName[1] hue_mask = box_plotter.plot_hues[index] == hue_level box_data = remove_na(group_data[hue_mask]) return box_data # Set default values if necessary if pvalue_format_string is DEFAULT: pvalue_format_string = '{:.3e}' simple_format_string = '{:.2f}' else: simple_format_string = pvalue_format_string if pvalue_thresholds is DEFAULT: if text_format == "star": pvalue_thresholds = [[1e-4, "****"], [1e-3, "***"], [1e-2, "**"], [0.05, "*"], [1, "ns"]] else: pvalue_thresholds = [[1e-5, "1e-5"], [1e-4, "1e-4"], [1e-3, "0.001"], [1e-2, "0.01"]] fig = plt.gcf() # Validate arguments if perform_stat_test: if test is None: raise ValueError( "If `perform_stat_test` is True, `test` must be specified.") if pvalues is not None or test_short_name is not None: raise ValueError( "If `perform_stat_test` is True, custom `pvalues` " "or `test_short_name` must be `None`.") valid_list = [ 't-test_ind', 't-test_welch', 't-test_paired', 'Mann-Whitney', 'Mann-Whitney-gt', 'Mann-Whitney-ls', 'Levene', 'Wilcoxon', 'Kruskal' ] if test not in valid_list: raise ValueError( "test value should be one of the following: {}.".format( ', '.join(valid_list))) else: if pvalues is None: raise ValueError( "If `perform_stat_test` is False, custom `pvalues` must be specified." ) if test is not None: raise ValueError( "If `perform_stat_test` is False, `test` must be None.") if len(pvalues) != len(box_pairs): raise ValueError( "`pvalues` should be of the same length as `box_pairs`.") if text_annot_custom is not None and len(text_annot_custom) != len( box_pairs): raise ValueError( "`text_annot_custom` should be of same length as `box_pairs`.") assert_is_in(loc, ['inside', 'outside'], label='argument `loc`') assert_is_in(text_format, ['full', 'simple', 'star', 'custom'], label='argument `text_format`') assert_is_in(comparisons_correction, ['bonferroni', None], label='argument `comparisons_correction`') if verbose >= 1 and text_format == 'star': print("p-value annotation legend:") pvalue_thresholds = pd.DataFrame(pvalue_thresholds).sort_values( by=0, ascending=False).values for i in range(0, len(pvalue_thresholds)): if i < len(pvalue_thresholds) - 1: print('{}: {:.2e} < p <= {:.2e}'.format( pvalue_thresholds[i][1], pvalue_thresholds[i + 1][0], pvalue_thresholds[i][0])) else: print('{}: p <= {:.2e}'.format(pvalue_thresholds[i][1], pvalue_thresholds[i][0])) print() ylim = ax.get_ylim() yrange = ylim[1] - ylim[0] if line_offset is None: if loc == 'inside': line_offset = 0.05 if line_offset_to_box is None: line_offset_to_box = 0.06 # 'outside', see valid_list else: line_offset = 0.03 if line_offset_to_box is None: line_offset_to_box = line_offset else: if loc == 'inside': if line_offset_to_box is None: line_offset_to_box = 0.06 elif loc == 'outside': line_offset_to_box = line_offset y_offset = line_offset * yrange y_offset_to_box = line_offset_to_box * yrange if plot == 'boxplot': # Create the same plotter object as seaborn's boxplot box_plotter = sns.categorical._BoxPlotter(x, y, hue, data, order, hue_order, orient=None, width=width, color=None, palette=None, saturation=.75, dodge=True, fliersize=5, linewidth=None) elif plot == 'barplot': # Create the same plotter object as seaborn's barplot box_plotter = sns.categorical._BarPlotter(x, y, hue, data, order, hue_order, estimator=np.mean, ci=95, n_boot=1000, units=None, orient=None, color=None, palette=None, saturation=.75, errcolor=".26", errwidth=None, capsize=None, dodge=True) # Build the list of box data structures with the x and ymax positions group_names = box_plotter.group_names hue_names = box_plotter.hue_names if box_plotter.plot_hues is None: box_names = group_names labels = box_names else: box_names = [(group_name, hue_name) for group_name in group_names for hue_name in hue_names] labels = [ '{}_{}'.format(group_name, hue_name) for (group_name, hue_name) in box_names ] box_structs = [{ 'box': box_names[i], 'label': labels[i], 'x': find_x_position_box(box_plotter, box_names[i]), 'box_data': get_box_data(box_plotter, box_names[i]), 'ymax': np.amax(get_box_data(box_plotter, box_names[i])) if len(get_box_data(box_plotter, box_names[i])) > 0 else np.nan } for i in range(len(box_names))] # Sort the box data structures by position along the x axis box_structs = sorted(box_structs, key=lambda x: x['x']) # Add the index position in the list of boxes along the x axis box_structs = [ dict(box_struct, xi=i) for i, box_struct in enumerate(box_structs) ] # Same data structure list with access key by box name box_structs_dic = { box_struct['box']: box_struct for box_struct in box_structs } # Build the list of box data structure pairs box_struct_pairs = [] for i_box_pair, (box1, box2) in enumerate(box_pairs): valid = box1 in box_names and box2 in box_names if not valid: raise ValueError("box_pairs contains an invalid box pair.") pass # i_box_pair will keep track of the original order of the box pairs. box_struct1 = dict(box_structs_dic[box1], i_box_pair=i_box_pair) box_struct2 = dict(box_structs_dic[box2], i_box_pair=i_box_pair) if box_struct1['x'] <= box_struct2['x']: pair = (box_struct1, box_struct2) else: pair = (box_struct2, box_struct1) box_struct_pairs.append(pair) # Draw first the annotations with the shortest between-boxes distance, in order to reduce # overlapping between annotations. box_struct_pairs = sorted(box_struct_pairs, key=lambda x: abs(x[1]['x'] - x[0]['x'])) # Build array that contains the x and y_max position of the highest annotation or box data at # a given x position, and also keeps track of the number of stacked annotations. # This array will be updated when a new annotation is drawn. y_stack_arr = np.array([[box_struct['x'] for box_struct in box_structs], [box_struct['ymax'] for box_struct in box_structs], [0 for i in range(len(box_structs))]]) if loc == 'outside': y_stack_arr[1, :] = ylim[1] ann_list = [] test_result_list = [] ymaxs = [] y_stack = [] for box_struct1, box_struct2 in box_struct_pairs: box1 = box_struct1['box'] box2 = box_struct2['box'] label1 = box_struct1['label'] label2 = box_struct2['label'] box_data1 = box_struct1['box_data'] box_data2 = box_struct2['box_data'] x1 = box_struct1['x'] x2 = box_struct2['x'] xi1 = box_struct1['xi'] xi2 = box_struct2['xi'] ymax1 = box_struct1['ymax'] ymax2 = box_struct2['ymax'] i_box_pair = box_struct1['i_box_pair'] # Find y maximum for all the y_stacks *in between* the box1 and the box2 i_ymax_in_range_x1_x2 = xi1 + np.nanargmax( y_stack_arr[1, np.where((x1 <= y_stack_arr[0, :]) & (y_stack_arr[0, :] <= x2))]) ymax_in_range_x1_x2 = y_stack_arr[1, i_ymax_in_range_x1_x2] if perform_stat_test: result = stat_test(box_data1, box_data2, test, comparisons_correction, len(box_struct_pairs), **stats_params) else: test_short_name = test_short_name if test_short_name is not None else '' result = StatResult('Custom statistical test', test_short_name, None, None, pvalues[i_box_pair]) result.box1 = box1 result.box2 = box2 test_result_list.append(result) if verbose >= 1: print("{} v.s. {}: {}".format(label1, label2, result.formatted_output)) if text_annot_custom is not None: text = text_annot_custom[i_box_pair] else: if text_format == 'full': text = "{} p = {}".format('{}', pvalue_format_string).format( result.test_short_name, result.pval) elif text_format is None: text = None elif text_format is 'star': text = pval_annotation_text(result.pval, pvalue_thresholds) elif text_format is 'simple': test_short_name = show_test_name and test_short_name or "" text = simple_text(result.pval, simple_format_string, pvalue_thresholds, test_short_name) elif text_format is 'custom': text = "%0.2f" % result.pval yref = ymax_in_range_x1_x2 yref2 = yref # Choose the best offset depending on wether there is an annotation below # at the x position in the range [x1, x2] where the stack is the highest if y_stack_arr[2, i_ymax_in_range_x1_x2] == 0: # there is only a box below offset = y_offset_to_box else: # there is an annotation below offset = y_offset y = yref2 + offset h = line_height * yrange line_x, line_y = [x1, x1, x2, x2], [y, y + h, y + h, y] if loc == 'inside': ax.plot(line_x, line_y, lw=linewidth, c=color) elif loc == 'outside': line = lines.Line2D(line_x, line_y, lw=linewidth, c=color, transform=ax.transData) line.set_clip_on(False) ax.add_line(line) # why should we change here the ylim if at the very end we set it to the correct range???? # ax.set_ylim((ylim[0], 1.1*(y + h))) if text is not None: ann = ax.annotate(text, xy=(np.mean([x1, x2]), y + h), xytext=(0, text_offset), textcoords='offset points', xycoords='data', ha='center', va='bottom', fontsize=fontsize, clip_on=False, annotation_clip=False) ann_list.append(ann) plt.draw() y_top_annot = None got_mpl_error = False if not use_fixed_offset: try: bbox = ann.get_window_extent() bbox_data = bbox.transformed(ax.transData.inverted()) y_top_annot = bbox_data.ymax except RuntimeError: got_mpl_error = True if use_fixed_offset or got_mpl_error: if verbose >= 1: print( "Warning: cannot get the text bounding box. Falling back to a fixed" " y offset. Layout may be not optimal.") # We will apply a fixed offset in points, # based on the font size of the annotation. fontsize_points = FontProperties( size='medium').get_size_in_points() offset_trans = mtransforms.offset_copy( ax.transData, fig=fig, x=0, y=1.0 * fontsize_points + text_offset, units='points') y_top_display = offset_trans.transform((0, y + h)) y_top_annot = ax.transData.inverted().transform( y_top_display)[1] else: y_top_annot = y + h y_stack.append( y_top_annot ) # remark: y_stack is not really necessary if we have the stack_array ymaxs.append(max(y_stack)) # Fill the highest y position of the annotation into the y_stack array # for all positions in the range x1 to x2 y_stack_arr[1, (x1 <= y_stack_arr[0, :]) & (y_stack_arr[0, :] <= x2)] = y_top_annot # Increment the counter of annotations in the y_stack array y_stack_arr[2, xi1:xi2 + 1] = y_stack_arr[2, xi1:xi2 + 1] + 1 y_stack_max = max(ymaxs) if loc == 'inside': ax.set_ylim((ylim[0], max(1.03 * y_stack_max, ylim[1]))) elif loc == 'outside': ax.set_ylim((ylim[0], ylim[1])) return ax, test_result_list
def ndimage_alg(self, img, opts): """Island detection using scipy.ndimage Use scipy.ndimage.label to detect islands of emission in the image. Island is defined as group of tightly connected (8-connectivity for 2D images) pixels with emission. The following cuts are applied: - pixel is considered to have emission if it is 'thresh_isl' times higher than RMS. - Island should have at least 'minsize' active pixels - There should be at lease 1 pixel in the island which is 'thresh_pix' times higher than noise (peak clip). Parameters: image, mask: arrays with image data and mask mean, rms: arrays with mean & rms maps thresh_isl: threshold for 'active pixels' thresh_pix: threshold for peak minsize: minimal acceptable island size Function returns a list of Island objects. """ ### islands detection mylog = mylogger.logging.getLogger("PyBDSM."+img.log+"Islands") image = img.ch0_arr mask = img.mask_arr rms = img.rms_arr mean = img.mean_arr thresh_isl = opts.thresh_isl thresh_pix = img.thresh_pix clipped_mean = img.clipped_mean saverank = opts.savefits_rankim # act_pixels is true if significant emission if img.masked: act_pixels = ~(mask.copy()) act_pixels[~mask] = (image[~mask]-mean[~mask])/thresh_isl >= rms[~mask] else: act_pixels = (image-mean)/thresh_isl >= rms # dimension of image rank = len(image.shape) # generates matrix for connectivity, in this case, 8-conn connectivity = nd.generate_binary_structure(rank, rank) # labels = matrix with value = (initial) island number labels, count = nd.label(act_pixels, connectivity) # slices has limits of bounding box of each such island slices = nd.find_objects(labels) img.island_labels = labels ### apply cuts on island size and peak value pyrank = N.zeros(image.shape, dtype=N.int32) res = [] islid = 0 for idx, s in enumerate(slices): idx += 1 # nd.labels indices are counted from 1 # number of pixels inside bounding box which are in island isl_size = (labels[s] == idx).sum() isl_peak = nd.maximum(image[s], labels[s], idx) isl_maxposn = tuple(N.array(N.unravel_index(N.nanargmax(image[s]), image[s].shape))+\ N.array((s[0].start, s[1].start))) if (isl_size >= img.minpix_isl) and (isl_size <= img.maxpix_isl) and (isl_peak - mean[isl_maxposn])/thresh_pix > rms[isl_maxposn]: isl = Island(image, mask, mean, rms, labels, s, idx, img.pixel_beamarea()) res.append(isl) pyrank[tuple(isl.bbox)] += N.invert(isl.mask_active)*idx // idx return res