def _get_effect_interval(counts, effect, power_alpha=0.05, ci_alpha=None): """Calculate the confidence interal around the effect""" if isinstance(effect, (int, float)): l_bar = effect ci = np.nan elif ci_alpha is None: ci = np.nanstd(effect) l_bar = np.nanmean(effect) else: ci = confidence_bound(effect, alpha=ci_alpha) l_bar = np.nanmean(effect) l_low = l_bar - ci l_hi = l_bar + ci power_bar = z_power(counts, l_bar, power_alpha) if not np.isnan(ci): power_low = z_power(counts, l_low, power_alpha) power_hi = z_power(counts, l_hi, power_alpha) else: power_low = np.nan * power_bar power_hi = np.nan * power_bar return power_bar, power_low, power_hi
def summary(): # read sonde data for sites in [[0],[1],[2]]: slist,snames=read_diff_events(sites=sites) ecount = [len(s.einds) for s in slist] mintp = [np.nanmin(s.tp) for s in slist] meantp = [np.nanmean(s.tp) for s in slist] maxtp = [np.nanmax(s.tp) for s in slist] head="%9s"%slist[0].name ecount = "events " meantp = "mean tph " minmax = "tph bound" for sonde, sname in zip(slist,snames): head=head+'| %16s'%sname ecount=ecount+'| %16d'%len(sonde.einds) meantp=meantp+'| %16.2f'%np.nanmean(sonde.tp) minmax=minmax+'| %7.2f,%7.2f '%(np.nanmin(sonde.tp),np.nanmax(sonde.tp)) print("") print(head) print(ecount) print(meantp) print(minmax)
def filted_diff(curve, period, threshold): nframe=len(curve) ncycle=nframe/period diff1=np.ones(ncycle) diff1[:]=np.NAN diff2=np.ones(ncycle) diff2[:]=np.NAN #Von=np.ones(ncycle) #Von[:]=np.NAN #Voff=np.ones(ncycle) #Voff[:]=np.NAN k=0 l=0 F=[] for i in range(ncycle): if threshold <= min(curve[i*period : (i+1)*period]): diff1[k]=sum(curve[i*period : i*period+period/2])-sum(curve[i*period+period/2 : (i+1)*period]) F.append(curve[i*period:(i+1)*period]) #Von[k]=sum(curve[i*period : i*period+period/2]) #Voff[k]=sum(curve[i*period+period/2 : (i+1)*period]) k=k+1 for i in range(ncycle-1): if threshold <= min(curve[i*period+(period/2) : (i+1)*period+(period/2)]): diff2[l]=sum(curve[i*period+(period/2) : (i+1)*period])-sum(curve[(i+1)*period : (i+1)*period+(period/2)]) #Von[k]=sum(curve[i*period : i*period+period/2]) #Voff[k]=sum(curve[i*period+period/2 : (i+1)*period]) l=l+1 F=np.array(F) Favg=np.mean(F) dff1=diff1/(period/2)/Favg dff2=diff2/(period/2)/Favg dff_avg=(np.nanmean(dff1)-np.nanmean(dff2))/2 return diff1, diff2, dff_avg #Von/(period/2)/np.nanmax(curve), Voff/(period/2)/np.nanmax(curve)
def plot_hist(ind, binwidth=5, incl_daystr=True, ax=None, pos=(0.05, 0.7), kw={'alpha' : 0.3, 'color' : 'k'}): """Plot histogram of onset days. """ if ax is None: ax = plt.gca() def daystr(day): day = round(day) mm, dd = atm.jday_to_mmdd(day) mon = atm.month_str(mm) return '%.0f (%s-%.0f)' % (day, mon, dd) if isinstance(ind, pd.Series) or isinstance(ind, xray.DataArray): ind = ind.values b1 = np.floor(np.nanmin(ind) / binwidth) * binwidth b2 = np.ceil(np.nanmax(ind) / binwidth) * binwidth bin_edges = np.arange(b1, b2 + 1, binwidth) n, bins, _ = ax.hist(ind, bin_edges, **kw) ax.set_xlabel('Day of Year') ax.set_ylabel('Num of Occurrences') if incl_daystr: dmean = daystr(np.nanmean(ind)) dmin = daystr(np.nanmin(ind)) dmax = daystr(np.nanmax(ind)) else: dmean = '%.0f' % np.nanmean(ind) dmin = '%.0f' % np.nanmin(ind) dmax = '%.0f' % np.nanmax(ind) s = 'Mean %s\n' % dmean + 'Std %.0f\n' % np.nanstd(ind) s = s + 'Min %s\n' % dmin + 'Max %s' % dmax x0, y0 = pos atm.text(s, (x0, y0), ax=ax, horizontalalignment='left')
def calc_norm_summary_tables(accuracy_tbl, time_tbl): """ Calculate normalized performance/ranking summary, as numpy matrices as usual for convenience, and matrices of additional statistics (min, max, percentiles, etc.) Here normalized means relative to the best which gets a 1, all others get the ratio resulting from dividing by the performance of the best. """ # Min across all minimizers, i.e. for each fit problem what is the lowest chi-squared and the lowest time min_sum_err_sq = np.nanmin(accuracy_tbl, 1) min_runtime = np.nanmin(time_tbl, 1) # create normalised tables norm_acc_rankings = accuracy_tbl / min_sum_err_sq[:, None] norm_runtimes = time_tbl / min_runtime[:, None] summary_cells_acc = np.array([np.nanmin(norm_acc_rankings, 0), np.nanmax(norm_acc_rankings, 0), nanmean(norm_acc_rankings, 0), nanmedian(norm_acc_rankings, 0) ]) summary_cells_runtime = np.array([np.nanmin(norm_runtimes, 0), np.nanmax(norm_runtimes, 0), nanmean(norm_runtimes, 0), nanmedian(norm_runtimes, 0) ]) return norm_acc_rankings, norm_runtimes, summary_cells_acc, summary_cells_runtime
def error(self, nCells=15): ''' calculate the standard deviation of all fitted images, averaged to a grid ''' s0, s1 = self.fits[0].shape aR = s0 / s1 if aR > 1: ss0 = int(nCells) ss1 = int(ss0 / aR) else: ss1 = int(nCells) ss0 = int(ss1 * aR) L = len(self.fits) arr = np.array(self.fits) arr[np.array(self._fit_masks)] = np.nan avg = np.tile(np.nanmean(arr, axis=0), (L, 1, 1)) arr = (arr - avg) / avg out = np.empty(shape=(L, ss0, ss1)) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) for n, f in enumerate(arr): out[n] = subCell2DFnArray(f, np.nanmean, (ss0, ss1)) return np.nanmean(out**2)**0.5
def evaluate(self): """Compute evaluation result. Returns: A named tuple with the following fields - average_precision: float numpy array of average precision for each class. mean_ap: mean average precision of all classes, float scalar precisions: List of precisions, each precision is a float numpy array recalls: List of recalls, each recall is a float numpy array corloc: numpy float array mean_corloc: Mean CorLoc score for each class, float scalar """ if (self.num_gt_instances_per_class == 0).any(): logging.warn( 'The following classes have no ground truth examples: %s', np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset) if self.use_weighted_mean_ap: all_scores = np.array([], dtype=float) all_tp_fp_labels = np.array([], dtype=bool) for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue if not self.scores_per_class[class_index]: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=float) else: scores = np.concatenate(self.scores_per_class[class_index]) tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index]) if self.use_weighted_mean_ap: all_scores = np.append(all_scores, scores) all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) logging.info('Scores and tpfp per class label: %d', class_index) logging.info(tp_fp_labels) logging.info(scores) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class[class_index] = precision self.recalls_per_class[class_index] = recall average_precision = metrics.compute_average_precision(precision, recall) self.average_precision_per_class[class_index] = average_precision self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) if self.use_weighted_mean_ap: num_gt_instances = np.sum(self.num_gt_instances_per_class) precision, recall = metrics.compute_precision_recall( all_scores, all_tp_fp_labels, num_gt_instances) mean_ap = metrics.compute_average_precision(precision, recall) else: mean_ap = np.nanmean(self.average_precision_per_class) mean_corloc = np.nanmean(self.corloc_per_class) return ObjectDetectionEvalMetrics( self.average_precision_per_class, mean_ap, self.precisions_per_class, self.recalls_per_class, self.corloc_per_class, mean_corloc)
def _get_x_0_stats(self): x_diff = np.diff(self.x_arr_0, axis=1) mu_mm = np.nanmean(x_diff) std_mm = np.nanstd(x_diff) mu_px_mm = np.nanmean(x_diff / self.aramis_info.n_px_facet_step_x) std_px_mm = np.nanstd(x_diff / self.aramis_info.n_px_facet_step_x) return mu_mm, std_mm, mu_px_mm, std_px_mm
def go(x, y, x_denominators=1, y_denominators=1): # these next too lines are wrong, but they are bug-compatible with v0.6.13 ! x = x / np.nanmean(x_denominators) y = y / np.nanmean(y_denominators) return group_sequential(x, y, spending_function, estimated_sample_size, alpha, cap)
def _do_scale_fit(freqs, signal, model, w=None): """ Perform a round of fitting to deal with over or under-estimation. Scales curve on y-axis but preserves shape. Parameters ---------- freqs : array signal : array The signal that the model is being fit to model : array The model being scaled w : array weighting function Returns ------- scalefac : array of len(signal) the scaling factor for each transient scalemodel : array of model.shape the scaled model """ scalefac = np.empty(model.shape[0]) scalemodel = np.empty((model.shape[0], np.real(model).shape[1])) scalesignal = np.empty((signal.shape[0], np.real(signal).shape[1])) for ii, xx in enumerate(signal): # per transient scalesignal[ii] = np.real(xx) # ratio = np.empty(scalesignal[ii].shape[0]) # for ppm, trans in enumerate(scalesignal[ii]): # ratio[ppm] = trans/model[ii][ppm] # scalefac[ii] = np.mean(ratio,0) scalefac[ii] = np.nanmean(scalesignal[ii],0)/np.nanmean(model[ii],0) scalemodel[ii] = scalefac[ii] * model[ii] return scalefac, scalemodel
def __entrofy(X, k, w=None, q=None, pre_selects=None): '''See entrofy() for documentation''' n_participants, n_attributes = X.shape if w is None: w = np.ones(n_attributes) if q is None: q = 0.5 * np.ones(n_attributes) assert 0 < k <= n_participants assert not np.any(w < 0) assert np.all(q >= 0.0) and np.all(q <= 1.0) assert len(w) == n_attributes assert len(q) == n_attributes if k == n_participants: return np.arange(n_participants) # Initialization y = np.zeros(n_participants, dtype=bool) if pre_selects is None: # Select one at random pre_selects = np.random.choice(n_participants, size=1) y[pre_selects] = True # Where do we have missing data? Xn = np.isnan(X) while True: i = y.sum() if i >= k: break # Initialize the distribution vector p = np.nanmean(X[y], axis=0) p[np.isnan(p)] = 0.0 # Compute the candidate distributions p_new = (p * i + X) / (i + 1.0) # Wherever X is nan, propagate the old p since we have no new information p_new[Xn] = (Xn * p)[Xn] # Compute marginal gain for each candidate delta = obj(p_new, w, q) - obj(p, w, q) # Knock out the points we've already taken delta[y] = -np.inf # Select the top score. Break near-ties randomly. target_score = delta.max() target_score = target_score - 1e-3 * np.abs(target_score) new_idx = np.random.choice(np.flatnonzero(delta >= target_score)) y[new_idx] = True return obj(np.nanmean(X[y], axis=0), w, q), np.flatnonzero(y)
def get_loss_bb(gt,est): sf="/home/coskun/PycharmProjects/RNNPose21/daya/blanket.txt" batch_size=gt.shape[0] seq_length=gt.shape[1] loss=0 loss_list=[] seq_list=[] b_seq_list=[] with open(sf,"a") as f_handle: for b in range(batch_size): seq_los=[0]*seq_length for s in range(seq_length): diff_vec=np.abs(gt[b][s].reshape(14,3) - est[b][s].reshape(14,3))*2 #14,3 val=np.sqrt(np.sum(diff_vec**2,axis=1)) for i in range(14): f=val[i] f_handle.write("%f"%(f)) if(i<13): f_handle.write(";") f_handle.write('\n') b_l=np.nanmean(np.sqrt(np.sum(diff_vec**2,axis=1))) loss_list.append(b_l) seq_los[s]=b_l loss +=np.nanmean(np.sqrt(np.sum(diff_vec**2,axis=1))) b_seq_list.append(seq_los) seq_list=np.mean(b_seq_list,axis=0) loss/=(seq_length*batch_size) return (loss,loss_list,seq_list)
def run(self, x1, x2): if isinstance(x1, np.ndarray): x1 = np.nanmean(x1) if isinstance(x2, np.ndarray): x2 = np.nanmean(x2) return x1/(x1+x2)
def pair_angle_op(angles, nmask=None, m=4, globl=False, locl=False): """calculate the pair-angle (bond angle) order parameter the parameter for particle i is defined as: psi_m_i = < exp(i m theta_ij) > averaged over neighbors j of particle i the global parameter is the mean over all particles i: Psi_m = < psi_m_i > Parameters angles: angles between neighboring pairs (from pair_angles) nmask: neighbor mask if invalid angles are not np.nan (None) m: symmetryangles will be considered modulo tau/m Returns mag: the absolute value |psi| ang: the phase of psi mod tau/m psims: the local values of psi for each particle """ if not (globl or locl): globl = locl = True if nmask is not None: angles[nmask] = np.nan psims = np.nanmean(np.exp(m*angles*1j), 1) if not globl: return np.abs(psims) psim = np.nanmean(psims) mag = abs(psim) ang = phase(psim)/m if locl: return mag, ang, psims return mag, ang
def get_loss_pred(params,gt,est): fest="/home/coskun/PycharmProjects/RNNPoseV2/pred/3.6m/estimation.txt" fgt="/home/coskun/PycharmProjects/RNNPoseV2/pred/3.6m/ground_truth.txt" loss=0 loss_list=[] with open(fest,"a") as f_handle_est, open(fgt,"a") as f_handle_gt: for b in range(len(gt)): diff_vec=np.abs(gt[b].reshape(params['n_output']/3,3) - est[b].reshape(params['n_output']/3,3)) #14,3 for val in est[b]: f_handle_est.write("%f "%(val*1000)) for val in gt[b]: f_handle_gt.write("%f "%(val*1000)) # val=np.sqrt(np.sum(diff_vec**2,axis=1)) # # for i in range(14): # f=val[i] # f_handle.write("%f"%(f)) # if(i<13): # f_handle.write(";") f_handle_est.write('\n') f_handle_gt.write('\n') b_l=np.sqrt(np.sum(diff_vec**2,axis=1)) loss_list.append(b_l) loss +=np.nanmean(np.sqrt(np.sum(diff_vec**2,axis=1))) loss=np.nanmean(loss) return (loss,loss_list)
def sigmaclip(data, factor, replacement=None, median=False, maxiter = 100): std = np.std(data) iteration=0 if median: center = np.nanmedian(data) else: center = np.nanmean(data) if not replacement: replacement = np.nan elif replacement == 'mean': replacement = center indx = (data>(center+std*factor))+(data<(center-std*factor)) while np.sum(indx) > 0 and iteration < maxiter: #print indx, np.sum(indx) #pl.plot(data) #pl.plot([0,len(data)],[center+std*factor,center+std*factor]) #pl.plot([0,len(data)],[center-std*factor,center-std*factor]) data[indx] = replacement std = np.std(data) if median: center = np.nanmedian(data) else: center = np.nanmean(data) if not replacement: replacement = np.nan elif replacement == 'mean': replacement = center indx = (data>(center+std*factor))+(data<(center-std*factor)) #print indx, np.sum(indx) #pl.plot(data,'ko') #pl.show() iteration+=1 return data
def orient_op(orientations, m=4, positions=None, margin=0, ret_complex=True, do_err=False, globl=False, locl=False): """orient_op(orientations, m=4, positions=None, margin=0, ret_complex=True, do_err=False, globl=False, locl=False) calculate the global m-fold particle orientational order parameter 1 N i m theta Phi = --- SUM e j m N j=1 """ if not (globl or locl): globl = True locl = orientations.ndim == 2 np.mod(orientations, tau/m, orientations) if margin: if margin < ss: margin *= ss center = 0.5*(positions.max(0) + positions.min(0)) d = helpy.dist(positions, center) # distances to center orientations = orientations[d < d.max() - margin] phis = np.exp(m*orientations*1j) if locl: phis = np.nanmean(phis, 1) if do_err: err = np.nanstd(phis, ddof=1)/sqrt(np.count_nonzero(~np.isnan(phis))) if not globl: return (np.abs(phis), err) if do_err else np.abs(phis) phi = np.nanmean(phis) if ret_complex else np.abs(np.nanmean(phis)) if locl: return (np.abs(phis), phi, err) if do_err else (np.abs(phis), phi) return (phi, err) if do_err else phi
def Mplot(ax, x, ys, lss, xlabel, xunit, ylabel, yunit, labels, rescaleX=True, rescaleY=True): colors = getcolors(len(ys)) xprefix, mx = SId(nanmean(x)) yprefix, my = SId(nanmean(ys)) if rescaleX: if xunit != '': xunit = ' / ('+xprefix+xunit+')' elif xunit != '': xunit = ' / ('+xunit+')' if rescaleY: if yunit != '': yunit = ' / ('+yprefix+yunit+')' elif yunit != '': yunit = ' / ('+yunit+')' if labels == '': labels = ['' for i in range(len(ys))] if type(x) != type([]): xs = [x for i in range(len(ys))] else: xs = x for x, y, ls, lab, col in zip(xs, ys, lss, labels, colors): if rescaleY: y = y/my if rescaleX: x = x/mx # carefull! numpy.ndarrays are mutable!!! ax.plot(x, y, ls, color=col, label=lab) if xlabel != '': ax.set_xlabel(xlabel+xunit) if ylabel != '': ax.set_ylabel(ylabel+yunit) ax.set_xlim([min(x), max(x)]) return mx, my
def h__computeAvgAngles(self, x, y): """ Take average difference between successive x and y skeleton points, then compute the arc tangent from those averages. Parameters --------------------------------------- x : m x n float numpy array m is the number of skeleton points n is the number of frames y : m x n float numpy array (Same as x) Returns --------------------------------------- 1-d float numpy array of length n The angles Notes --------------------------------------- Simple helper for h__computeNoseBends """ # Suppress RuntimeWarning: Mean of empty slice with warnings.catch_warnings(): warnings.simplefilter('ignore', category=RuntimeWarning) avg_diff_x = np.nanmean(np.diff(x, n=1, axis=0), axis=0) avg_diff_y = np.nanmean(np.diff(y, n=1, axis=0), axis=0) angles = np.arctan2(avg_diff_y, avg_diff_x) return angles
def _msd_iter(pos, lagtimes): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) for lt in lagtimes: diff = pos[lt:] - pos[:-lt] yield np.concatenate((np.nanmean(diff, axis=0), np.nanmean(diff**2, axis=0)))
def plot(ax, x, y, ls, xlabel, xunit, ylabel, yunit, label, col=False, setlim=True, rescaleX=True, rescaleY=True, xprefix=None, mx=None, yprefix=None, my=None): if xprefix is None: xprefix, mx = SId(nanmean(abs(x))) if yprefix is None: yprefix, my = SId(nanmean(abs(y))) if rescaleX: x = x/mx # carefull! numpy.ndarrays are mutable!!! if xunit != '': xunit = ' / ('+xprefix+xunit+')' elif xunit != '': xunit = ' / ('+xunit+')' if rescaleY: y = y/my if yunit != '': yunit = ' / ('+yprefix+yunit+')' elif yunit != '': yunit = ' / ('+yunit+')' if col is False: ax.plot(x, y, ls, label=label) else: ax.plot(x, y, ls, color=col, label=label) if xlabel != '': ax.set_xlabel(xlabel+xunit) if ylabel != '': ax.set_ylabel(ylabel+yunit) if setlim: epsy = (max(y)-min(y))*0.15 ax.set_xlim([min(x), max(x)]) ax.set_ylim([min(y)-epsy, max(y)+epsy]) return x, y, yprefix, my
def Avg_WS_Picks(draft_db, plot=False): x=draft_db[['Pk','WS/48']] #Find the range of picks (#1-?) pick_list=sorted(set(x['Pk'].tolist())) #set function gets unique elements of picks pick_means=[] for pick in pick_list: x1=x['WS/48'][x['Pk']==pick].tolist() #Get win shares for each pick pick_mean=np.nanmean(x1) pick_means.append(pick_mean) #Graph if plot: plt.scatter(pick_list,pick_means) #Plot with average WS for an NBA Player avg_WS=np.nanmean(x['WS/48'].tolist()) plt.plot(pick_list,np.ones(len(pick_list))*avg_WS) plt.xlabel('Pick') plt.ylabel('Average WS/48') plt.ylim([-.5,.5]) plt.xlim([0,200]) plt.xticks([0,10,25,40,50,100]) plt.savefig('Avg_WS_vs_pick') plt.show() else: return pd.DataFrame({ 'Pk': pick_list, 'Avg_WS/48' : pick_means })
def compute(self, today, assets, out, close): # get returns dataset returns = ((close - np.roll(close, 1, axis=0)) / np.roll(close, 1, axis=0))[1:] # get index of benchmark benchmark_index = np.where((assets == 8554) == True)[0][0] # get returns of benchmark benchmark_returns = returns[:, benchmark_index] # prepare X matrix (x_is - x_bar) X = benchmark_returns X_bar = np.nanmean(X) X_vector = X - X_bar X_matrix = np.tile(X_vector, (len(returns.T), 1)).T # prepare Y matrix (y_is - y_bar) Y_bar = np.nanmean(close, axis=0) Y_bars = np.tile(Y_bar, (len(returns), 1)) Y_matrix = returns - Y_bars # prepare variance of X X_var = np.nanvar(X) # multiply X matrix an Y matrix and sum (dot product) # then divide by variance of X # this gives the MLE of Beta out[:] = (np.sum((X_matrix * Y_matrix), axis=0) / X_var) / (len(returns))
def autocorr(datain,endlag): ''' autocorr(datain,endlag) Input: datain[0:N] is a data time series of size N endlag is the number of time steps to find autocorrelation Output: aut[0:endlag] is the autocorrelation of datain from lag 0 to time step endlag Steven Cavallo University of Oklahoma July 2016 ''' N = np.size(datain) aut = [] for lag in range(0,endlag): data1 = datain[0:N-lag] data1m = data1 - np.nanmean(data1) data2 = datain[lag:] data2m = data2 - np.nanmean(data2) aut.append(np.sum(data1m*data2m)/np.sqrt(np.sum(data1m**2.0)*np.sum(data2m**2.0))) return aut
def process_chunk(self, data): moment_data = numpy.log(data) moments = numpy.zeros(self.mmax - self.mmin, dtype=numpy.float32) mean = numpy.nanmean(moment_data) moment_data = moment_data - mean if self.mmin == 1: temp = numpy.ones(len(moment_data), dtype=numpy.float32) elif self.mmin == 2: temp = moment_data else: temp = numpy.pow(moment_data, self.mmin-1) for i in range(0, self.mmax-self.mmin): temp = temp * moment_data moments[i] = numpy.nanmean(temp) if self.mmin == 1: moments[0] = mean return moments
def trim_bad_edges(self, r, window_width = 128, min_snr = 5.): """ Find edge regions that contain no information and trim them. Parameters ---------- r : `int` order index window_width : `int` number of pixels to average over for local SNR min_snr : `float` SNR threshold below which we discard the data """ for n in range(self.N): n_pix = len(self.xs[0][n]) for window_start in range(n_pix - window_width): mean_snr = np.sqrt(np.nanmean(self.ivars[r][n,window_start:window_start+window_width])) if mean_snr > min_snr: self.ivars[r][n,:window_start] = 0. # trim everything to left of window break for window_start in reversed(range(n_pix - window_width)): mean_snr = np.sqrt(np.nanmean(self.ivars[r][n,window_start:window_start+window_width])) if mean_snr > min_snr: self.ivars[r][n,window_start+window_width:] = 0. # trim everything to right of window break
def imputedata(data, strategy='mean', missing=False): ''' two impute strategys ''' with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) mean = np.nanmean(data, axis=0) sd = np.sqrt(np.nanmean((data - mean)**2, axis=0)) sign = np.sign(data - mean) is_out = is_outliers(data, m=2.5) data[is_out] = np.nan if strategy == '2sd': # impute as +-2sd m # reduce the change in distribution. for i in range(data.shape[1]): if missing: sign[np.isnan(sign)] = 0 #missing data will be imputed as mean ind_nan = np.where(np.isnan(data[:,i])) data[ind_nan,i] = mean[i] + (sd[i] * 2 * sign[ind_nan,i]) if strategy == 'mean': #impute as mean for i in range(data.shape[1]): ind_nan = np.where(np.isnan(data[:,i])) if missing: #missing data will be imputed as mean data[ind_nan,i] = mean[i] else: #missing data will be left as nan data[ind_nan,i] = mean[i] * abs(sign[ind_nan,i]) return data
def nanmean(array): """Return the mean of an array ignoring nans. Args: array: array of values Returns: result: np.nanmean(array) """ try: i = 0 unc = 0 if np.isnan(array.v).all() or len(array.v) == 0: return Measurement(np.nan, np.nan) val = np.nanmean(array.v) for u in np.nditer(array.u): if np.isfinite(u): unc += u ** 2 i += 1 return Measurement(val, np.sqrt(unc) / i) except AttributeError: if np.isnan(array).all() or len(array) == 0: return np.nan return np.nanmean(array)
def main(): os.system('modprobe w1-gpio') os.system('modprobe w1-therm') print len(sys.argv) if len(sys.argv) == 1: number_of_meas = 7 else: print sys.argv[1] number_of_meas = int(sys.argv[1]) print "number_of_measurements = " + str(number_of_meas) print "getting device files and serials..." THEDICT = _get_w1_tree_and_serials() print "reading sensors " + str(number_of_meas) + " times ..." for step in range(int(number_of_meas)): for sensor_id in THEDICT: if sensor_id[0:2] == '28' or sensor_id[0:2] == '10': temp = read_sensor_ds18b20(sensor_id,THEDICT[sensor_id]["path"]) volt = "n.a." THEDICT[sensor_id]["temp"].append(temp) THEDICT[sensor_id]["volt"].append(0.) if sensor_id[0:2] == '26': temp,volt = read_sensor_ds2438(sensor_id,THEDICT[sensor_id]["path"]) THEDICT[sensor_id]["temp"].append(temp) THEDICT[sensor_id]["volt"].append(volt) print "step " + str(step) + " " + sensor_id + " " + str(temp) + " " + str(volt) print "calculating individual and total means:" MEAN_IND = {} for sensor_id in THEDICT: MEAN_IND[sensor_id] = [ np.nanmean(np.array(THEDICT[sensor_id]["temp"])), np.nanmean(np.array(THEDICT[sensor_id]["volt"])) ] total_temp = [] total_volt = [] for sensor_id in MEAN_IND: if sensor_id[0:2] == '28' or sensor_id[0:2] == '10': total_temp.append(MEAN_IND[sensor_id][0]) if sensor_id[0:2] == '26': total_volt.append(MEAN_IND[sensor_id][1]) mean_temp = np.nanmean(np.array(total_temp)) mean_volt = np.nanmean(np.array(total_volt)) print "temp mean: " + str(mean_temp) + " +/- " + str(np.nanstd(np.array(total_temp))) print "volt mean: " + str(mean_volt) + " +/- " + str(np.nanstd(np.array(total_temp))) print "calculating offsets..." OFFSETS = {} for sensor_id in MEAN_IND: OFFSETS[sensor_id] = [ MEAN_IND[sensor_id][0] - mean_temp, MEAN_IND[sensor_id][1] - mean_volt ] print OFFSETS print "writing offsets..." write_offset(OFFSETS)
def average_values(result): """average the results of bootstrapping""" result_avg = result[0] if isinstance(result_avg,dict): for key in result_avg: val = result_avg[key] if isinstance(val,dict): result_avg[key] = average_values([r[key] for r in result]) elif isinstance(val, np.float64): result_avg[key] = np.nanmean([r[key] for r in result], axis=0) elif key == 'twoway': for (x,y), res in np.ndenumerate(val): result_avg[key][x,y] = average_values([r[key][x,y] for r in result]) elif isinstance(val, np.ndarray) and not val.dtype == np.object: result_avg[key] = np.nanmean([r[key] for r in result], axis=0) elif key != 'target': result_avg[key] = [r[key] for r in result] #make sure we get a cell-array back in MATLAB if isinstance(val, str): result_avg[key] = np.array(result_avg[key],dtype=np.object) return result_avg
def calculate_agg_middle_features(df, velocity_median, velocity_mean, velocity_05, velocity_10, velocity_15, velocity_25, velocity_75, velocity_85, velocity_90, velocity_95, velocity_99): if df.shape[0] > 0: # This method calculates the aggregated feature and # saves them in the original df as well as an metadata df. v_ave = np.nanmean(df['velocity'].values) v_min = np.nanmin(df['velocity'].values) v_max = np.nanmax(df['velocity'].values) a_ave = np.nanmean(df['acceleration'].values) a_min = np.nanmin(df['acceleration'].values) a_max = np.nanmax(df['acceleration'].values) d_ave = np.nanmean(df['distance'].values) d_min = np.nanmin(df['distance'].values) d_max = np.nanmax(df['distance'].values) e_ave = np.nanmean(df['elevation'].values) e_min = np.nanmin(df['elevation'].values) e_max = np.nanmax(df['elevation'].values) lon_ave = np.nanmean(df['longitude'].values) lon_min = np.nanmin(df['longitude'].values) lon_max = np.nanmax(df['longitude'].values) lat_ave = np.nanmean(df['latitude'].values) lat_min = np.nanmin(df['latitude'].values) lat_max = np.nanmax(df['latitude'].values) az_ave = np.nanmean(df['azimus'].values) az_min = np.nanmin(df['azimus'].values) az_max = np.nanmax(df['azimus'].values) long_delta_ave = np.nanmean(df['long_delta'].values) long_delta_min = np.nanmin(df['long_delta'].values) long_delta_max = np.nanmax(df['long_delta'].values) latitude_delta_ave = np.nanmean(df['latitude_delta'].values) latitude_delta_min = np.nanmin(df['latitude_delta'].values) latitude_delta_max = np.nanmax(df['latitude_delta'].values) velocity_delta_ave = np.nanmean(df['velocity'].values) velocity_delta_min = np.nanmin(df['velocity'].values) velocity_delta_max = np.nanmax(df['velocity'].values) azimus_delta_ave = np.nanmean(df['azimus'].values) azimus_delta_min = np.nanmin(df['azimus'].values) azimus_delta_max = np.nanmax(df['azimus'].values) elevation_delta_ave = np.nanmean(df['elevation'].values) elevation_delta_min = np.nanmin(df['elevation'].values) elevation_delta_max = np.nanmax(df['elevation'].values) velocity_median_count = np.sum(df['velocity'] > velocity_median) velocity_mean_count = np.sum(df['velocity'] > velocity_mean) velocity_05_count = np.sum(df['velocity'] > velocity_05) velocity_10_count = np.sum(df['velocity'] > velocity_10) velocity_15_count = np.sum(df['velocity'] > velocity_15) velocity_25_count = np.sum(df['velocity'] > velocity_25) velocity_75_count = np.sum(df['velocity'] > velocity_75) velocity_85_count = np.sum(df['velocity'] > velocity_85) velocity_90_count = np.sum(df['velocity'] > velocity_90) velocity_95_count = np.sum(df['velocity'] > velocity_95) velocity_99_count = np.sum(df['velocity'] > velocity_99) middle_list = list(df['distance'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['velocity'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['acceleration'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['elevation'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['longitude'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['latitude'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['azimus'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['long_delta'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['latitude_delta'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['velocity_delta'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['azimus_delta'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ list(df['elevation_delta'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \ [d_ave, d_min, d_max] + \ [v_ave, v_min, v_max] + \ [a_ave, a_min, a_max] + \ [e_ave, e_min, e_max] + \ [lon_ave, lon_min, lon_max] + \ [lat_ave, lat_min, lat_max] + \ [az_ave, az_min, az_max] + \ [long_delta_ave, long_delta_min, long_delta_max] + \ [latitude_delta_ave, latitude_delta_min, latitude_delta_max] + \ [velocity_delta_ave, velocity_delta_min, velocity_delta_max] + \ [azimus_delta_ave, azimus_delta_min, azimus_delta_max] + \ [elevation_delta_ave, elevation_delta_min, elevation_delta_max] + \ [velocity_median_count, velocity_mean_count, velocity_05_count, velocity_10_count, velocity_15_count, velocity_25_count, velocity_75_count, velocity_85_count, velocity_90_count, velocity_95_count, velocity_99_count] else: middle_list = [-1.0] * 227 return middle_list
def _fitgfunc_set(self, obj, g_func, p0=None,method_dqdt = None, method_fit = None, regression_function = None, uncertainty = True): if (method_fit == 'ols') or (method_fit == 'quantile'): if len(obj.dq_dt[method_dqdt])>0: x = np.log(np.array(obj.qh[method_dqdt])) y = np.log(obj.dq_dt[method_dqdt] * obj.corh[method_dqdt]) #Remove data when discharge <= 0 y = y[~np.isinf(x)] x = x[~np.isinf(x)] x = x[~np.isinf(y)] y = y[~np.isinf(y)] if len(x)>0: d = {'x': x, 'y': y} df = pd.DataFrame(data=d) if regression_function == 'ln(-dqdt) = a + b * ln(Q)': if method_fit == 'ols': res = smf.ols(formula='y ~ x', data=df).fit() elif method_fit == 'quantile': res = smf.quantreg('y ~ x', data=df).fit(q=.10) else: logger.error('Wrong fitting method') popt = [np.exp(res.params[0]), res.params[1] - 1] #modifying popt due to log and g(Q)*Q if uncertainty: ci = res.conf_int(alpha=0.05, cols=None) tmp = ufloat(np.log(popt[0]), ci[1][0] - np.log(popt[0])) tmp = uexp(tmp) std = tmp.std_dev popt_low = [popt[0] - std, ci[0][1] - 1] popt_high = [popt[0] + std,ci[1][1] - 1] else: popt_low, popt_high = None, None elif regression_function == 'ln(-dqdt) ~ a + b * ln(Q) + c * ln(Q)^2': if method_fit == 'ols': res = smf.ols(formula='y ~ x + np.power(x,2)', data=df).fit() elif method_fit == 'quantile': res = smf.quantreg('y ~ x + np.power(x,2)', data=df).fit(q=.10) else: logger.error('Wrong fitting method') logger.error(method_fit) popt = [res.params[0], res.params[1]-1 , res.params[2]] #modifying popt due to log and g(Q)*Q if uncertainty: ci = res.conf_int(alpha=0.05, cols=None) popt_low = [ci[0][0], ci[0][1] -1 , ci[0][2]] popt_high = [ci[1][0], ci[1][1] -1, ci[1][2]] else: popt_low, popt_high = None, None else: logger.error('Wrong regression function') else: if regression_function == 'ln(-dqdt) = a + b * ln(Q)': popt = [0.,0.0] popt_high = [0,0] popt_low = [0,0] elif regression_function == 'ln(-dqdt) ~ a + b * ln(Q) + c * ln(Q)^2': popt = [0.,0.0,0.] popt_high = [0,0,0.] popt_low = [0,0,0.] logger.debug('No data - fitgfunc') else: # if regression_function == 'ln(-dqdt) = a + b * ln(Q)': popt = [0.,0.0] popt_high = [0,0] popt_low = [0,0] elif regression_function == 'ln(-dqdt) ~ a + b * ln(Q) + c * ln(Q)^2': popt = [0.,0.0,0.] popt_high = [0,0,0.] popt_low = [0,0,0.] logger.debug('No data - fitgfunc') return lambda x: g_func(x, popt), popt, popt_low, popt_high, None, None, None,None, None elif method_fit == 'wls': x = np.array(obj.qh[method_dqdt]) y = obj.dq_dt[method_dqdt] * obj.corh[method_dqdt] y = y[x>0] #Remove data when discharge <=0 x = x[x>0] #sort y based on x and sort x as well. temp = x.argsort() temp = np.flipud(temp) y = y[temp] x = x[temp] xlog = np.log(x) binx = [] biny = [] binvar = [] binvarlog = [] bin_stderr_divQ = [] binnobs = [] bin_x_range = [np.nanmax(xlog)] xlog_min = np.nanmin(xlog) onepercent_range = (np.nanmax(xlog) - np.nanmin(xlog)) / 100. flag_cont = True idx_here = 0 while flag_cont: #Check if there is enough data std_err = 0.0 #First guess on the bin bin_upper = bin_x_range[idx_here] bin_lower = bin_x_range[idx_here] - onepercent_range if bin_lower > xlog_min: #adjust the range based on standard error flag_criteria = True bin_upper_idx = next(xx[0] for xx in enumerate(xlog) if xx[1] <= bin_upper) if idx_here>0: bin_upper_idx = bin_upper_idx + 1 bin_lower_idx = next(xx[0] for xx in enumerate(xlog) if xx[1] <= bin_lower) bin_lower = xlog[bin_lower_idx] while flag_criteria: if len(y[bin_upper_idx:bin_lower_idx]) > 1: std_err_y = sem(y[bin_upper_idx:bin_lower_idx]) half_mean = np.nanmean(y[bin_upper_idx:bin_lower_idx]) * 0.5 x_mean = np.nanmean(x[bin_upper_idx:bin_lower_idx]) else: std_err_y = np.inf half_mean = 0.0 if std_err_y <= half_mean: flag_criteria = False else: bin_lower_idx = bin_lower_idx + 1 if bin_lower_idx >= len(x): flag_criteria = False flag_cont = False x_mean = np.nan #add stats to the arrays if ~np.isnan([np.float64(x_mean), np.float64(half_mean * 2.0),np.power(np.float64(std_err_y),2.0)]).any(): #how is this possible? happen when bin_low_idx>=len(x) above? binx.append(np.float64(x_mean)) biny.append(np.float64(half_mean * 2.0)) binvar.append(np.power(np.float64(std_err_y),2.0)) binvarlog.append(np.power(np.float64(sem(np.log(y[bin_upper_idx:bin_lower_idx]))),2.0)) bin_stderr_divQ.append(np.float64(sem(np.array(y[bin_upper_idx:bin_lower_idx])/np.array(x[bin_upper_idx:bin_lower_idx])))) bin_x_range.append(bin_lower) binnobs.append(bin_lower_idx-bin_upper_idx) idx_here = idx_here + 1 else: # didnt include the last bin for now flag_cont = False if idx_here >= len(x): flag_cont = False d = {'x': np.log(binx), 'y': np.log(biny)} df = pd.DataFrame(data=d) if regression_function == 'ln(-dqdt) = a + b * ln(Q)': wls_res = smf.wls('y ~ x', data =df, weights = 1./np.array(binvarlog)).fit() #maybe I need the variance in the log space... popt = [np.exp(wls_res.params[0]), wls_res.params[1] - 1] #modifying popt due to log and g(Q)*Q ci = wls_res.conf_int(alpha=0.05, cols=None) tmp = ufloat(np.log(popt[0]), ci[1][0] - np.log(popt[0])) tmp = uexp(tmp) std = tmp.std_dev popt_low = [popt[0] - std, ci[0][1] - 1] popt_high = [popt[0] + std,ci[1][1] - 1] elif regression_function == 'ln(-dqdt) ~ a + b * ln(Q) + c * ln(Q)^2': wls_res = smf.wls('y ~ x + np.power(x,2)', data =df, weights = 1./np.array(binvarlog)).fit() #maybe I need the variance in the log space... popt = [wls_res.params[0], wls_res.params[1] - 1, wls_res.params[2]] #modifying popt due to log and g(Q)*Q ci = wls_res.conf_int(alpha=0.05, cols=None) popt_low = [ci[0][0], ci[0][1] - 1, ci[0][2]] popt_high = [ci[1][0], ci[1][1] - 1, ci[1][2]] else: logger.error('Wrong regression function') return lambda x: g_func(x, popt), popt, popt_low, popt_high, np.array(binx), np.array(biny) / np.array(binx), binnobs, np.sqrt(np.array(binvar)), bin_stderr_divQ else: logger.error('Wrong fittig method')
def MLD_temp_and_dens_criteria(dt,drho,time,depth,temp,salt,dens): MLD_temp_crit = np.empty(len(time)) MLD_temp_crit[:] = np.nan Tmean_temp_crit = np.empty(len(time)) Tmean_temp_crit[:] = np.nan Smean_temp_crit = np.empty(len(time)) Smean_temp_crit[:] = np.nan #Td_temp_crit = np.empty(len(time)) #Td_temp_crit[:] = np.nan MLD_dens_crit = np.empty(len(time)) MLD_dens_crit[:] = np.nan Tmean_dens_crit = np.empty(len(time)) Tmean_dens_crit[:] = np.nan Smean_dens_crit = np.empty(len(time)) Smean_dens_crit[:] = np.nan #Td_dens_crit = np.empty(len(time)) #Td_dens_crit[:] = np.nan for t,tt in enumerate(time): if depth.ndim == 1: d10 = np.where(depth >= 10)[0][0] if depth.ndim == 2: d10 = np.where(depth[:,t] >= -10)[0][-1] T10 = temp[d10,t] delta_T = T10 - temp[:,t] ok_mld_temp = np.where(delta_T <= dt)[0] rho10 = dens[d10,t] delta_rho = -(rho10 - dens[:,t]) ok_mld_rho = np.where(delta_rho <= drho)[0] if ok_mld_temp.size == 0: MLD_temp_crit[t] = np.nan #Td_temp_crit[t] = np.nan Tmean_temp_crit[t] = np.nan Smean_temp_crit[t] = np.nan else: if depth.ndim == 1: MLD_temp_crit[t] = depth[ok_mld_temp[-1]] #ok_mld_plus1m = np.where(depth >= depth[ok_mld_temp[-1]] + 1)[0][0] if depth.ndim == 2: MLD_temp_crit[t] = depth[ok_mld_temp[-1],t] #ok_mld_plus1m = np.where(depth >= depth[ok_mld_temp[-1],t] + 1)[0][0] #Td_temp_crit[t] = temp[ok_mld_plus1m,t] Tmean_temp_crit[t] = np.nanmean(temp[ok_mld_temp,t]) Smean_temp_crit[t] = np.nanmean(salt[ok_mld_temp,t]) if ok_mld_rho.size == 0: MLD_dens_crit[t] = np.nan #Td_dens_crit[t] = np.nan Tmean_dens_crit[t] = np.nan Smean_dens_crit[t] = np.nan else: if depth.ndim == 1: MLD_dens_crit[t] = depth[ok_mld_rho[-1]] #ok_mld_plus1m = np.where(depth >= depth[ok_mld_rho[-1]] + 1)[0][0] if depth.ndim == 2: MLD_dens_crit[t] = depth[ok_mld_rho[-1],t] #ok_mld_plus1m = np.where(depth >= depth[ok_mld_rho[-1],t] + 1)[0][0] #Td_dens_crit[t] = temp[ok_mld_plus1m,t] Tmean_dens_crit[t] = np.nanmean(temp[ok_mld_rho,t]) Smean_dens_crit[t] = np.nanmean(salt[ok_mld_rho,t]) return MLD_temp_crit,Tmean_temp_crit,Smean_temp_crit,\ MLD_dens_crit,Tmean_dens_crit,Smean_dens_crit
def get_offset(nBurst, Df_DC, coh_treshold=0.3): burst1 = 'burst_' + str(nBurst) + '/' burst2 = 'burst_' + str(nBurst + 1) + '/' # cpxint16 and cpxfloat32 dataFormat_s = 'complex64' line_start, line_length, first_pixel_this, first_pixel_next, pixel_length, this_nr_oflines, this_nr_ofpixels, next_nr_oflines, next_nr_ofpixels, PRF = get_coordinates( nBurst) ifgs_1 = freadbk(burst1 + 'cint.raw.old', line_start, first_pixel_this, line_length, pixel_length, dataFormat_s, this_nr_oflines, this_nr_ofpixels) ESD_coh_1 = freadbk(burst1 + 'coherence.raw', line_start, first_pixel_this, line_length, pixel_length, 'float32', this_nr_oflines, this_nr_ofpixels) ifgs_2 = freadbk(burst2 + 'cint.raw.old', 1, first_pixel_next, line_length, pixel_length, dataFormat_s, next_nr_oflines, next_nr_ofpixels) ESD_coh_2 = freadbk(burst2 + 'coherence.raw', 1, first_pixel_next, line_length, pixel_length, 'float32', next_nr_oflines, next_nr_ofpixels) ESD_coh = (ESD_coh_1 + ESD_coh_2) / 2 #ifgs_1_total = freadbk(burst1 + 'cint.raw.old', 1, 1, this_nr_oflines, this_nr_ofpixels, dataFormat_s, this_nr_oflines, this_nr_ofpixels) #ifgs_2_total = freadbk(burst2 + 'cint.raw.old', 1, 1, next_nr_oflines, next_nr_ofpixels, dataFormat_s, next_nr_oflines, next_nr_ofpixels) # Remove invalid data both in range and azimuth valid_range = [] valid_azimuth = [] for i in range(0, len(ifgs_1[0, :])): if np.nanmean(abs(ifgs_1[:, i])) != 0 and np.nanmean(abs( ifgs_2[:, i])) != 0: valid_range.append(i) for i in range(0, len(ifgs_1[:, 0])): if np.nanmean(abs(ifgs_1[i, :])) != 0 and np.nanmean(abs( ifgs_2[i, :])) != 0: valid_azimuth.append(i) if valid_range and valid_azimuth: ifgs_1 = ifgs_1[:, valid_range[:]] ifgs_2 = ifgs_2[:, valid_range[:]] ESD_coh = ESD_coh[:, valid_range[:]] ifgs_1 = ifgs_1[valid_azimuth[:], :] ifgs_2 = ifgs_2[valid_azimuth[:], :] ESD_coh = ESD_coh[valid_azimuth[:], :] Df_DC = Df_DC[:, valid_range[:]] Df_DC = Df_DC[valid_azimuth[:], :] # First downsample 2 * 10 Nra = 10 Naz = 2 new_ra = ESD_coh.shape[1] / 10 new_az = ESD_coh.shape[0] / 2 ESD_coh = ESD_coh[0:new_az * Naz - 1:Naz, 0:new_ra * Nra - 1:Nra] ifgs_1_multilook = ifgs_1[:new_az * 2, :new_ra * 10].reshape( [new_az, Naz, new_ra, Nra]).mean(3).mean(1) ifgs_2_multilook = ifgs_2[:new_az * 2, :new_ra * 10].reshape( [new_az, Naz, new_ra, Nra]).mean(3).mean(1) Df_DC_multilook = Df_DC[:new_az * 2, :new_ra * 10].reshape( [new_az, Naz, new_ra, Nra]).mean(3).mean(1) # Double difference and calculate weights according to Cramer Rao bound diffBursts = ifgs_1_multilook * ifgs_2_multilook.conj() weights = 2 * ESD_coh * ESD_coh / (1 - ESD_coh * ESD_coh) W = np.sum(weights[ESD_coh > coh_treshold]) angle = ( PRF / (2 * np.pi * np.nanmean(Df_DC_multilook[ESD_coh > coh_treshold] * weights[ESD_coh > coh_treshold] / np.mean(weights[ESD_coh > coh_treshold])))) offset = np.angle( np.sum(diffBursts[ESD_coh > coh_treshold] * weights[ESD_coh > coh_treshold]) / W) * angle angle_pixel = angle * (line_start - 1) return offset, W, angle_pixel
time_ax = da.DimArray(axes=[np.array(gmt.time)], dims=['time']) time_ax[:] = gmt.time # read HadCRUT4 dat = open('data/HadCRUT4_gmt.txt', 'r').read() had4 = [] year = [] for line in dat.split('\n')[::2]: year.append(line.split(' ')[1]) had4.append(float(line.split(' ')[-1])) # get HadCRUT4 for 1850-2016 had4_gmt_ = np.array(had4[:-1]) had4_gmt = da.DimArray(axes=[np.array(gmt.time)], dims=['time']) had4_gmt[1850:2016] = had4_gmt_ ref_ar5 = gmt.time[(gmt.time >= 1986) & (gmt.time < 2006)] had4_gmt[:] = had4_gmt[:] - np.nanmean(had4_gmt[ref_ar5]) + 0.61 #print np.nanmean(np.array(had4_gmt_-np.nanmean(had4_gmt_[0:240]))[136*12:145*12]) print 'hadcrut4', np.nanmean(had4_gmt[2010:2020]) print 'blend-mask', np.nanmean(gmt['rcp85', :, 'gmt_bm', 2010:2020]) - 0.93 print 'millar', np.nanmean(gmt['rcp85', :, 'gmt_millar', 2010:2020]) print 'blend-mask', np.nanmean(gmt['rcp85', :, 'gmt_bm', 2015:2016]) - 0.93 # FIG SI 1 plot_dict = { 'gmt_sat': { 'l_color': 'orange', 'color': 'darkorange', 'longname': '$\mathregular{GMT_{SAT}}$', 'pos': 0.65, 'lsty': '-'
gv = mask.get_data() == cl_values[c] # voxels showing the result # get voxels showing the result and also inside the network mask res_net = np.multiply(gv, nets.get_data()[:, :, :] > 0) nvoxels = np.sum(res_net) / float(np.sum(gv)) if not quiet: print 'Cluster %d overlap: %.2f' % (c, nvoxels) for sidx, s in enumerate(subjs): fname = '%s/dr_stage2_%s_Z.nii.gz' % (data_dir, s) img = nb.load(fname) subj_data = img.get_data()[gv, int(ic)] # # if we want to only plot results inside the network # subj_data = img.get_data()[res_net == 1, int(ic)] gidx = my_groups.index(groups[sidx]) cl_data[gidx].append(float(np.nanmean(subj_data))) sx[gidx].append(float(all_sx[sidx])) data.append(cl_data) nrows = nclusters ncols = 2 cnt = 1 fig = pl.figure(figsize=[10.25, nclusters * 5.9]) # for each cluster, make a scatterplot and a barplot for cl in range(nclusters): if res_fname.find('NV') > 0: x = [i for g in sx for i in g] y = [i for g in data[cl] for i in g] else: x = [
def make_gridded_dataset(data, res=0.25): """ Big ugly function to make a lat/lon gridded netcdf out L2 AMSR precip retrievals. In lieu of proper docstrings, because if you're reading this I forgot to polish this before sharing, I'll explain the gist of what's happening. Real simple, we take our data, smoosh it so that each obs falls at the nearest lat/lon point on our regular grid, group the data by which grid box it falls in, and calculate the relevant stats of the distribution of obs in each grid box. Stats are then returned as an xarray dataset. """ def round_nearest(arr, res): nans = np.isnan(arr) ret = (((arr+res/2)/res)//1)*res ret[nans] = np.nan return ret def reshape_incomplete_array(complete_idx, incomplete_idx, vals, shape): new_vals = np.full_like(complete_idx, fill_value=np.nan) for idx, val in zip(incomplete_idx, vals): new_vals[idx] = val return new_vals.reshape(shape) rain_stats_dict = {0: {'name': 'rain_prob', 'long_name': 'Probability of Rain', 'standard_name': 'rain_probability', 'units': '0-1'}, 1: {'name': 'rain_rate', 'long_name': 'Rain Rate', 'standard_name': 'rain_rate', 'units': 'mm hr^-1'}, 2: {'name': 'rain_rwr', 'long_name': 'Rain Rate While Raining', 'standard_name': 'conditional_rain_rate', 'units': 'mm hr^-1'}, 3: {'name': 'rain_max', 'long_name': 'Max Rain Rate', 'standard_name': 'max_rain_rate', 'units': 'mm hr^-1'}} func_dict = {'mean': np.nanmean, 'median': np.nanmedian, '25_pctile': lambda x: np.nanpercentile(x, 25), '75_pctile': lambda x: np.nanpercentile(x, 75), 'min': np.nanmin, 'max': np.nanmax} if not 1/res == int(1/res): raise ValueError("I haven't gone through to test whether this will work for any resolution that's not a unit fraction.") #setting up new grid and gridbox index grid_lats = np.arange(-90, 90, res) grid_lons = np.arange(0, 360, res) grid_coords = np.array(list(product(grid_lats, grid_lons))) full_grid_lats = grid_coords[:,0] full_grid_lons = grid_coords[:,1] grid_coords_lats_idx = (full_grid_lats+90)/res grid_coords_lons_idx = full_grid_lons/res grid_combined_idx = (360/res)*grid_coords_lats_idx + grid_coords_lons_idx assert(len(np.unique(grid_combined_idx)) == len(grid_combined_idx)) #setting up old data unique index old_lats = data.latitude.values.flatten() old_lons = data.longitude.values.flatten() good_filt = np.logical_and(~np.isnan(old_lats), ~np.isnan(old_lons)) old_lats, old_lons = old_lats[good_filt], old_lons[good_filt] lats_regrid = round_nearest(old_lats, res) lons_regrid = round_nearest(old_lons, res)%360 lats_regrid_idx = (lats_regrid+90)/res lons_regrid_idx = lons_regrid/res unique_combined_idx = (360/res)*lats_regrid_idx + lons_regrid_idx assert(set(unique_combined_idx).issubset(grid_combined_idx)) #grouping old data by box grouped = Groupby(unique_combined_idx.astype(int)) def new_reshape(vals): """Reshapes value from groupby operation to an unfilled lat/lon grid""" return reshape_incomplete_array(grid_combined_idx, grouped.keys, vals, shape=(len(grid_lats), len(grid_lons))) ds = xr.Dataset() ds['latitude'] = grid_lats ds['longitude'] = grid_lons ds.attrs['comments'] = "gridded netcdf created by [email protected], adapted from R Eastman AMSR 89 GHz retrievals. " +\ "https://doi.org/10.1175/JTECH-D-18-0185.1" ds.attrs['creation date'] = str(dt.datetime.utcnow()) ds.attrs['resolution'] = f'{str(res)} deg' ds['obs_count'] = (('latitude', 'longitude'), new_reshape(grouped.apply(len, np.empty_like(unique_combined_idx)))) ds['not_nan_count'] = (('latitude', 'longitude'), new_reshape(grouped.apply( lambda x: sum(~np.isnan(x)), np.empty_like(unique_combined_idx)))) ds['time'] = (('latitude', 'longitude'), new_reshape(grouped.apply( lambda x: np.nanmean(x.astype('int64')).astype('datetime64[ns]'), data['datetime'].values.flatten()[good_filt]))) for k, v in rain_stats_dict.items(): print('working on '+v['name']) sys.stdout.flush() old_data = data.rain_stats.isel(prob_rate_rwr_max=k).values.flatten()[good_filt] for func_name, func in func_dict.items(): new_vals = new_reshape(grouped.apply(func, old_data)) new_dict = {'long_name': f"{v['long_name']}_{func_name}", 'standard_name': f"{v['standard_name']}_{func_name}", 'units': v['units']} ds[f"{v['name']}_{func_name}"] = (('latitude', 'longitude'), new_vals, new_dict) # print(f"{v['name']}_{func_name}") sys.stdout.flush() print('finishing one') sys.stdout.flush() return ds
def train_nn(model, name, optimizer, scheduler, train_generator, test_generator, classification=False, n_epochs=10, outputs=[], use_wandb=False, plot_gradients=False, seed=0): np.random.seed(seed) torch.manual_seed(seed) if use_wandb: import wandb device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) print(classification) model = model.to(device) # for p in model.parameters(): # param_norm = p.grad.data.norm(2) # print(param_norm.item()) # print(p.grad.data) # total_norm += param_norm.item() ** 2 # total_norm = total_norm ** (1. / 2) # print('norm', total_norm) if use_wandb and plot_gradients: wandb.watch(model, log='all') # by default, reduction = mean when multiple outputs #criterion = nn.MSELoss() if classification: criterion = nn.BCELoss() else: criterion = nn.MSELoss(reduction="none") step = 0 best_loss_ts = None best_loss_tr = None losses_tr = [] losses_ts = [] dtype = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor for epoch in range(n_epochs): print(epoch) train_aux = [] for x, y, lengths in train_generator: # print(x.shape) x, y, lengths = x.type(dtype).to(device), y.type(dtype).to( device), lengths.to(device) preds = model(x, lengths) preds = preds.reshape(x.shape[0], len(outputs)) assert preds.shape == y.shape, "{} {}".format(preds.shape, y.shape) loss_elements = criterion(preds, y) loss = loss_elements.mean() if np.isnan(loss.detach().cpu().numpy()): raise ValueError("Train loss is nan: ", loss) train_aux.append(loss.detach().cpu().numpy()) # TODO: maybe we don't want to log at every step if use_wandb: wandb.log({f"{name} train loss per step": loss}, step=step) if len(outputs) > 1: outputs_loss = loss_elements.mean(dim=0) # print(outputs) # print(outputs_loss) assert len(outputs) == len(outputs_loss) per_output_loss = outputs_loss if use_wandb: for i in range(len(outputs)): wandb.log({outputs[i]: per_output_loss[i]}, step=step) optimizer.zero_grad() loss.backward() optimizer.step() preds = model(x, lengths) step += 1 if step % 20 == 0: # losses_tr.append(per_output_loss.detach().cpu().numpy()) aux = [] accuracy = [] for x, y, lengths in test_generator: x, y, lengths = x.type(dtype).to(device), y.type(dtype).to( device), lengths.to(device) loss_elements = criterion(model(x, lengths), y) loss = loss_elements.mean() if np.isnan(loss.detach().cpu().numpy()): raise ValueError("Test loss is nan: ", loss) if classification: accuracy.append( accuracy_score( model(x, lengths).detach().cpu().numpy(), y.detach().cpu().numpy().astype(np.int8))) aux.append(loss.detach().cpu().numpy()) test_loss = np.nanmean(aux) if use_wandb: wandb.log({f"{name} test loss per step": test_loss}, step=step) if len(outputs) > 1: outputs_loss = loss_elements.mean(dim=0) assert len(outputs) == len(outputs_loss) per_output_loss = outputs_loss if use_wandb: for i in range(len(outputs)): wandb.log({outputs[i]: per_output_loss[i]}, step=step) train_loss = train_aux[-1] # train_loss = np.nanmean(train_aux) # print(train_aux) train_aux = [] losses_tr.append(train_loss) # print(train_loss) if not np.isnan(train_loss) and not best_loss_tr or ( train_loss < best_loss_tr): if use_wandb: wandb.run.summary["best_tr_loss"] = train_loss best_loss_tr = train_loss scheduler.step() if classification: print('Train loss: ' + str(train_loss) + ", test loss: " + str(test_loss) + 'test accuracy: ' + np.nanmean(accuracy)) else: print('Train loss: ' + str(train_loss) + ", test loss: " + str(test_loss)) # losses_ts.append(per_output_loss.detach().cpu().numpy()) losses_ts.append(test_loss) if not np.isnan(train_loss) and not best_loss_ts or ( test_loss < best_loss_ts): if use_wandb: wandb.run.summary["best_loss"] = test_loss best_loss_ts = test_loss #print(list(model.parameters())[4]) return model, best_loss_tr, best_loss_ts, losses_tr, losses_ts
def parent_function(): # TODO: set patients patients = ["TS057"] # set which patients you want to test # show_fig = 0 # if 1, figures show; if 0, figures save to current working directory # create paths to the data folder to_data = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))) data_path = os.path.join(to_data, 'data') save_path = os.path.join(to_data, 'DCEpy', 'Features', 'BurnsStudy') features_path = os.path.join(save_path, 'StoredFeatures', 'features') # setting model parameters # TODO: some patients also have 5 min windowing available. If you want to play with it, chase chunk_len to 300 and chunk_overlap to 270 chunk_len = 180 chunk_overlap = 150 # MI parameters mi_win_len = 0.25 # seconds mi_win_overlap = 0 # seconds f_s = float(1e3) # Hz # TODO: set frequency bands here. Mapping see function get_freq_bands(). Delta band is not available now! freq_bands = ["theta", "beta", "gamma"] persistence_time = 300/(chunk_len - chunk_overlap) + 1 # persistence_time = 3.33 * 60 # minutes times seconds, the amount of time after a seizure prediction for which no alarm is raised preictal_time = 5 * 60 # minutes times seconds, the amount of time prior to seizure onset defined as preictal postictal_time = 5 * 60 # minutes times seconds, the amount of time after seizure end defined as postictal include_awake = True include_asleep = True # TODO: set rbf kernel here. svm_kernel = 'linear' # evaluate each patient for patient_index, patient_id in enumerate(patients): print "\n---------------------------Analyzing patient ", patient_id, "----------------------------\n" # update paths to be specific to each patient # my_data_path = "/Volumes/Brain_cleaner/Seizure Data/data" my_data_path = "/Users/TianyiZhang/Desktop/PatientData" p_data_path = os.path.join(my_data_path, patient_id) print 'Retreiving stored raw data' all_files, data_filenames, file_type, seizure_times = analyze_patient_raw(p_data_path, f_s, include_awake, include_asleep, patient_id, chunk_len, chunk_overlap, calc_train_local=True) number_files = len(all_files) # intializing performance stats prediction_sensitivity = np.zeros(len(all_files)) detection_sensitivity = np.zeros(len(all_files)) latency = np.zeros(len(all_files)) fp = np.zeros(len(all_files)) times = np.zeros(len(all_files)) # beginning leave one out cross-validation for i in xrange(number_files): print '\nCross validations, k-fold %d of %d ...' % (i + 1, number_files) # defining which files are training files vs testing files for this fold testing_file = all_files[i] testing_file_idx = i cv_file_names = data_filenames[:i] + data_filenames[i + 1:] cv_file_idxs = range(i) + range(i+1,number_files) cv_train_files = all_files[:i] + all_files[i + 1:] cv_file_type = file_type[:i] + file_type[i + 1:] cv_seizure_times = seizure_times[:i] + seizure_times[i + 1:] print '\tEntering offline training' my_svm, mean_MI_matrix, sd_MI_matrix = offline_training(cv_file_type, cv_file_names, cv_file_idxs, cv_seizure_times, cv_train_files, chunk_len, chunk_overlap, mi_win_len, mi_win_overlap, f_s, i, patient_id, persistence_time, preictal_time, postictal_time, freq_bands, svm_kernel) print'\tEntering online testing' # computing prediction on testing file for this fold print '\tCalculating testing features locally' # determine how many samples, windows, and channels we have in this test file total_test_samples = testing_file.shape[0] chunk_len_samples = chunk_len * f_s chunk_ovlap_samples = chunk_overlap * f_s num_chunks = int(math.floor(float(total_test_samples) / float(chunk_len_samples - chunk_ovlap_samples))) num_channels = testing_file.shape[1] # load test file test_key = str(testing_file_idx) + "_" + data_filenames[testing_file_idx].split("/")[-1] test_MI = get_MI_features(patient_id, test_key, freq_bands = freq_bands, chunk_len = chunk_len) # transform (normalize) MI matrix transformed_MI_test = transform_coherency([test_MI], mean_MI_matrix, sd_MI_matrix) test_features = find_centrality_multibands(transformed_MI_test)[0] # should be list of (n_samples, 2, 6, 6) # for loop to process each window in the test file # initializing for computing performance metrics # full_file_decision = np.zeros(num_chunks) t_samples = test_features.shape[0] full_file_decision = np.zeros(t_samples) alarm_timer = 0 for index in np.arange(t_samples): # getting the single window of data for this iteration of the for loop feature = test_features[index].reshape(1, -1) decision, label_index, alarm_timer = online_testing(feature, f_s, testing_file_idx, persistence_time, index, alarm_timer, my_svm) # storing the outlier fraction and decision for calculating performance metrics and visualization full_file_decision[index] = decision # using outputs from test file to compute performance metrics print'\tCalculating performance stats' print "\tFile Type: ", file_type[i] print "\t Full File Decision: ", full_file_decision # convert from units of windows to units of samples test_decision_sample = window_to_samples(full_file_decision, chunk_len, chunk_overlap, f_s) # find performance metrics for this fold of cross validation prediction_sensitivity[i], detection_sensitivity[i], latency[i], fp[i], times[i] = performance_stats( test_decision_sample, seizure_times[i], f_s, preictal_time, chunk_len, chunk_overlap) # print the performance metrics and visualize the algorithm output on a graph print '\tPrediction sensitivity = ', prediction_sensitivity[i], 'Detection sensitivity = ', \ detection_sensitivity[i], 'Latency = ', latency[i], 'FP = ', fp[i], 'Time = ', times[i] # viz_single_outcome(test_decision_sample, test_outlier_frac_sample, testing_file[:,0], seizure_times[i], threshold, i, patient_id, f_s) # compute false positive rate fpr = float(np.nansum(fp)) / float(np.nansum(times)) # print mean and median performance metrics print '\nMean prediction sensitivity = ', np.nanmean( prediction_sensitivity), 'Mean detection sensitivity = ', np.nanmean( detection_sensitivity), 'Mean latency = ', np.nanmean(latency), 'Mean FPR = ', fpr print 'Median prediction sensitivity = ', np.nanmedian( prediction_sensitivity), 'Median detection sensitivity = ', np.nanmedian( detection_sensitivity), 'Median latency = ', np.nanmedian(latency)
def plot_conditions(epochs, conditions=OrderedDict(), ci=97.5, n_boot=1000, title='', palette=None, ylim=(-6, 6), diff_waveform=(1, 2)): """Plot ERP conditions. Args: epochs (mne.epochs): EEG epochs Keyword Args: conditions (OrderedDict): dictionary that contains the names of the conditions to plot as keys, and the list of corresponding marker numbers as value. E.g., conditions = {'Non-target': [0, 1], 'Target': [2, 3, 4]} ci (float): confidence interval in range [0, 100] n_boot (int): number of bootstrap samples title (str): title of the figure palette (list): color palette to use for conditions ylim (tuple): (ymin, ymax) diff_waveform (tuple or None): tuple of ints indicating which conditions to subtract for producing the difference waveform. If None, do not plot a difference waveform Returns: (matplotlib.figure.Figure): figure object (list of matplotlib.axes._subplots.AxesSubplot): list of axes """ if isinstance(conditions, dict): conditions = OrderedDict(conditions) if palette is None: palette = sns.color_palette("hls", len(conditions) + 1) X = epochs.get_data() * 1e6 times = epochs.times y = pd.Series(epochs.events[:, -1]) fig, axes = plt.subplots(2, 2, figsize=[12, 6], sharex=True, sharey=True) axes = [axes[1, 0], axes[0, 0], axes[0, 1], axes[1, 1]] for ch in range(4): for cond, color in zip(conditions.values(), palette): sns.tsplot(X[y.isin(cond), ch], time=times, color=color, n_boot=n_boot, ci=ci, ax=axes[ch]) if diff_waveform: diff = (np.nanmean(X[y == diff_waveform[1], ch], axis=0) - np.nanmean(X[y == diff_waveform[0], ch], axis=0)) axes[ch].plot(times, diff, color='k', lw=1) axes[ch].set_title(epochs.ch_names[ch]) axes[ch].set_ylim(ylim) axes[ch].axvline(x=0, ymin=ylim[0], ymax=ylim[1], color='k', lw=1, label='_nolegend_') axes[0].set_xlabel('Time (s)') axes[0].set_ylabel('Amplitude (uV)') axes[-1].set_xlabel('Time (s)') axes[1].set_ylabel('Amplitude (uV)') if diff_waveform: legend = (['{} - {}'.format(diff_waveform[1], diff_waveform[0])] + list(conditions.keys())) else: legend = conditions.keys() axes[-1].legend(legend) sns.despine() plt.tight_layout() if title: fig.suptitle(title, fontsize=20) return fig, axes
au['Classification'].str.replace( ' ', '').values + au['Subclassification'].str.replace(' ', '').values qu['fname'] = pd.Series(np.arange(qu.shape[0])).astype(str).values + '-' + qu['Member'].values + '-Q-' + \ qu['Classification'].str.replace( ' ', '').values + qu['Subclassification'].str.replace(' ', '').values mu['fname'] = pd.Series(np.arange(mu.shape[0])).astype(str).values + '-' + mu['Member'].values + '-M-' + \ mu['Classification'].str.replace( ' ', '').values + mu['Subclassification'].str.replace(' ', '').values # Normalize feature values and we store the mean and standard deviation qu_stat = {} for fname in qu['fname'].values: if 'RealGDPGrowth' not in fname: data = qu.loc[(qu['fname'] == fname), '2000-Q1':'2020-Q4'].values mean, std = np.nanmean(data), np.nanstd(data) qu.loc[(qu['fname'] == fname), '2000-Q1':'2020-Q4'] = (qu.loc[(qu['fname'] == fname), '2000-Q1':'2020-Q4'] - mean) / std qu_stat[fname] = mean, std def deseasonalize(qu=None): """ Deseasonalize quarterly data. We will decompose it first and get trend, seasonality, and random error. We then remove seasonality and save it to a dictionary to be used later Args: qu - quarterly data return: deseasonalized qu
for i in range(numtrans): x = model.forward_model(x, p, dt) # Integramos la simulacion verdad # El resultado es almacenado en un array de numpy "state" con dimension (numstep,3) yo = forward_operator(x) + np.random.multivariate_normal(np.array([0]), R) #Inicializamos el ciclo desde la media "climatologica" del sistema. Es decir no tenemos informacion precisa #de donde esta el sistema al tiempo inicial. statefens = np.zeros((nvars, EnsSize)) for iens in range(EnsSize): statefens[:, iens] = np.nanmean(x, 0) + dx0 + np.random.multivariate_normal( np.zeros(nvars), P0) #Calculamos la matriz de transporte opitmo. #from emd import emd from scipy.spatial.distance import cdist import ot #Calculo la inversa de la matriz de covarianza Rinv = np.linalg.inv(R) #Calculamos los pesos en base al likelihood de las observaciones #dada cada una de las particulas. w = np.zeros(EnsSize) for iens in range(EnsSize): yf = forward_operator(statefens[:, iens])
rhs = [] ssts = [] binned = [[] for i in range(4)] values = [] regions = [] for region in sorted(world_dict.keys()): xai = world_dict[region]['feature_imp'].tolist() #xai.append(np.nanmean(world_dict[region]['values'])) X.append(xai) omegas.append(world_dict[region]['feature_imp'][0]) eiss.append(world_dict[region]['feature_imp'][1]) rhs.append(world_dict[region]['feature_imp'][2]) ssts.append(world_dict[region]['feature_imp'][3]) values.append(np.nanmean(world_dict[region]['values'])) regions.append(region) ssts = np.array(ssts) omegas = np.array(omegas) eiss = np.array(eiss) rhs = np.array(rhs) values = np.array(values) X = np.array(X) kmeans = KMeans(n_clusters=n_bins, n_init=5, algorithm='full', tol=.000001, max_iter=60,
index_y1f = np.min(np.where(time>simu.y1f)) ''' array_to_plotH = np.nanmean(moc[:,:,0,index_y1h:index_y1h+10],axis=2) array_to_plotH[array_to_plotH==0] = np.nan array_to_plotH = np.ma.masked_invalid(array_to_plotH) array_to_plotF = np.nanmean(moc[:,:,0,index_y1f:index_y1f+10],axis=2) array_to_plotF[array_to_plotF==0] = np.nan array_to_plotF = np.ma.masked_invalid(array_to_plotF) print(np.nanmin(array_to_plotH),np.nanmax(array_to_plotF)) print(array_to_plotH) make_plot.plot_map_s(xr,yr,array_to_plotH,var,simu.y1h,'',simu.output_fileH) make_plot.plot_map_s(xr,yr,array_to_plotF,var,simu.y1f,'',simu.output_fileF) ''' colorbar='undefined' array_to_plotH = np.nanmean(moc[0,:,:,index_y1h:index_y1h+10],axis=2) array_to_plotF = np.nanmean(moc[0,:,:,index_y1f:index_y1f+10],axis=2) X = yr[0,:] minline=-0.1 maxline=0.1 array_to_plotF = np.ma.masked_invalid(array_to_plotF) array_to_plotH = np.ma.masked_invalid(array_to_plotH) #make_plot.one_sec(array_to_plotH.transpose(),var,X,-depth,simu.max_depth,simu.y1h,simu.output_fileH,minline,maxline,str(simu.y1h)+'-'+str(simu.y2h),colorbar) #make_plot.one_sec(array_to_plotF.transpose(),var,X,-depth,simu.max_depth,simu.y1f,simu.output_fileF,minline,maxline,str(simu.y1f)+'-'+str(simu.y2f),colorbar) make_plot.one_sec(array_to_plotH.transpose(),array_to_plotF.transpose(),var,X,-depth,simu.max_depth,simu.y1h,simu.output_fileC,minline,maxline,'current and future',colorbar)
gt_coords, lm_cnt=valid_data.lm_cnt, pck_threshold=params['pck_threshold'], scale=1) # Write the validation result to csv write_pred_dataframe(valid_data, pred_coords, folder=params['valid_result_dir'] + "grid_temp/", file_name=str(date.today()) + col_name, patches_coord=None, write_index=False) result_dict = params result_dict = build_result_dict(result_dict=params, pck=np.round(pck, 4), mean_pck=round(np.nanmean(pck), 4), pck_threshold=params['pck_threshold'], diff_per_pt=np.round(diff_per_pt, 4), mean_diff_per_pt=round( np.nanmean(diff_per_pt), 4)) final_grid_df = final_grid_df.append( pd.DataFrame(result_dict, index=[id_grid])) final_grid_df.to_csv(params['valid_result_dir'] + "{}grid_search.csv".format(str(date.today())), index=False) # lr_list = np.round(lr_list,4) # N=5 # print(lr_list)
def classification_algorithm(file, out_directory, diagnostic=False, xr_data=False): Path(out_directory).mkdir(parents=True, exist_ok=True) df = xr.open_dataset(file) df = df.where(df.range > 90, drop=True) df = preprocess.bleed_through(df) df['beta_raw'] = df['beta_raw'].where( df['co_signal'] > (1 + df.attrs['background_snr_sd'])) classifier = np.zeros(df['beta_raw'].shape, dtype=int) log_beta = np.log10(df['beta_raw']) if xr_data is True: with open('ref_XR2.npy', 'rb') as f: ref_XR = np.load(f) log_beta[:, :50] = log_beta[:, :50] - ref_XR # Aerosol aerosol = log_beta < -5.5 # Small size median filter to remove noise aerosol_smoothed = median_filter(aerosol, size=11) # Remove thin bridges, better for the clustering aerosol_smoothed = median_filter(aerosol_smoothed, size=(15, 1)) classifier[aerosol_smoothed] = 10 for var in ['beta_raw', 'v_raw', 'depo_bleed']: df[var] = df[var].where( df['co_signal'] > (1 + 3 * df.attrs['background_snr_sd'])) log_beta = np.log10(df['beta_raw']) if xr_data is True: log_beta[:, :50] = log_beta[:, :50] - ref_XR range_flat = np.tile(df['range'], df['beta_raw'].shape[0]) # Liquid liquid = log_beta > -5.5 # maximum filter to increase the size of liquid region liquid_max = maximum_filter(liquid, size=5) # Median filter to remove background noise liquid_smoothed = median_filter(liquid_max, size=13) classifier[liquid_smoothed] = 30 # updraft - indication of aerosol zone updraft = df['v_raw'] > 1 updraft_smooth = median_filter(updraft, size=3) updraft_max = maximum_filter(updraft_smooth, size=91) # Fill the gap in aerosol zone updraft_median = median_filter(updraft_max, size=31) # precipitation < -1 (center of precipitation) precipitation_1 = (log_beta > -7) & (df['v_raw'] < -1) precipitation_1_median = median_filter(precipitation_1, size=9) # Only select precipitation outside of aerosol zone precipitation_1_ne = precipitation_1_median * ~updraft_median precipitation_1_median_smooth = median_filter(precipitation_1_ne, size=3) precipitation = precipitation_1_median_smooth # precipitation < -0.5 (include all precipitation) precipitation_1_low = (log_beta > -7) & (df['v_raw'] < -0.5) # Avoid ebola infection surrounding updraft # Useful to contain error during ebola precipitation updraft_ebola = df['v_raw'] > 0.2 updraft_ebola_max = maximum_filter(updraft_ebola, size=3) # Ebola precipitation for _ in range(1500): prep_1_max = maximum_filter(precipitation, size=3) prep_1_max *= ~updraft_ebola_max # Avoid updraft area precipitation_ = precipitation_1_low * prep_1_max if np.sum(precipitation) == np.sum(precipitation_): break precipitation = precipitation_ classifier[precipitation] = 20 # Remove all aerosol above cloud or precipitation mask_aerosol0 = classifier == 10 for i in np.array([20, 30]): if i == 20: mask = classifier == i else: mask = log_beta > -5 mask = maximum_filter(mask, size=5) mask = median_filter(mask, size=13) mask_row = np.argwhere(mask.any(axis=1)).reshape(-1) mask_col = np.nanargmax(mask[mask_row, :], axis=1) for row, col in zip(mask_row, mask_col): mask[row, col:] = True mask_undefined = mask * mask_aerosol0 classifier[mask_undefined] = i if (classifier == 10).any(): classifier_ = classifier.ravel() time_dbscan = np.repeat(np.arange(df['time'].size), df['beta_raw'].shape[1]) height_dbscan = np.tile(np.arange(df['range'].size), df['beta_raw'].shape[0]) time_dbscan = time_dbscan[classifier_ == 10].reshape(-1, 1) height_dbscan = height_dbscan[classifier_ == 10].reshape(-1, 1) X = np.hstack([time_dbscan, height_dbscan]) db = DBSCAN(eps=3, min_samples=25, n_jobs=-1).fit(X) v_dbscan = df['v_raw'].values.ravel()[classifier_ == 10] range_dbscan = range_flat[classifier_ == 10] v_dict = {} r_dict = {} for i in np.unique(db.labels_): v_dict[i] = np.nanmean(v_dbscan[db.labels_ == i]) r_dict[i] = np.nanmin(range_dbscan[db.labels_ == i]) lab = db.labels_.copy() for key, val in v_dict.items(): if key == -1: lab[db.labels_ == key] = 40 elif (val < -0.5): lab[db.labels_ == key] = 20 elif r_dict[key] == min(df['range']): lab[db.labels_ == key] = 10 elif (val > -0.2): lab[db.labels_ == key] = 10 else: lab[db.labels_ == key] = 40 classifier[classifier == 10] = lab # Separate ground rain if (classifier == 20).any(): classifier_ = classifier.ravel() time_dbscan = np.repeat(np.arange(df['time'].size), df['beta_raw'].shape[1]) height_dbscan = np.tile(np.arange(df['range'].size), df['beta_raw'].shape[0]) time_dbscan = time_dbscan[classifier_ == 20].reshape(-1, 1) height_dbscan = height_dbscan[classifier_ == 20].reshape(-1, 1) X = np.hstack([time_dbscan, height_dbscan]) db = DBSCAN(eps=3, min_samples=1, n_jobs=-1).fit(X) range_dbscan = range_flat[classifier_ == 20] r_dict = {} for i in np.unique(db.labels_): r_dict[i] = np.nanmin(range_dbscan[db.labels_ == i]) lab = db.labels_.copy() for key, val in r_dict.items(): if r_dict[key] == min(df['range']): lab[db.labels_ == key] = 20 else: lab[db.labels_ == key] = 30 classifier[classifier == 20] = lab cmap = mpl.colors.ListedColormap( ['white', '#2ca02c', 'blue', 'red', 'gray']) boundaries = [0, 10, 20, 30, 40, 50] norm = mpl.colors.BoundaryNorm(boundaries, cmap.N, clip=True) # decimal_time = df['time'].dt.hour + \ # df['time'].dt.minute / 60 + df['time'].dt.second/3600 if diagnostic is True: fig, axes = plt.subplots(6, 2, sharex=True, sharey=True, figsize=(16, 9)) for val, ax, cmap_ in zip( [ aerosol, aerosol_smoothed, liquid_smoothed, precipitation_1_median, updraft_median, precipitation_1_median_smooth, precipitation_1_low, updraft_ebola_max, precipitation ], axes.flatten()[2:-1], [['white', '#2ca02c'], ['white', '#2ca02c'], ['white', 'red'], ['white', 'blue'], ['white', '#D2691E'], ['white', 'blue'], ['white', 'blue'], ['white', '#D2691E'], ['white', 'blue']]): ax.pcolormesh(df['time'], df['range'], val.T, cmap=mpl.colors.ListedColormap(cmap_)) axes.flatten()[-1].pcolormesh(df['time'], df['range'], classifier.T, cmap=cmap, norm=norm) axes[0, 0].pcolormesh(df['time'], df['range'], np.log10(df['beta_raw']).T, cmap='jet', vmin=-8, vmax=-4) axes[0, 1].pcolormesh(df['time'], df['range'], df['v_raw'].T, cmap='jet', vmin=-2, vmax=2) fig.tight_layout() fig.savefig(out_directory + '/' + df.attrs['file_name'] + '_diagnostic_plot.png', dpi=150, bbox_inches='tight') fig, ax = plt.subplots(4, 1, figsize=(6, 8), sharex=True) ax1, ax3, ax5, ax7 = ax.ravel() p1 = ax1.pcolormesh(df['time'], df['range'], np.log10(df['beta_raw']).T, cmap='jet', vmin=-8, vmax=-4) p2 = ax3.pcolormesh(df['time'], df['range'], df['v_raw'].T, cmap='jet', vmin=-2, vmax=2) p3 = ax5.pcolormesh(df['time'], df['range'], df['depo_bleed'].T, cmap='jet', vmin=0, vmax=0.5) p4 = ax7.pcolormesh(df['time'], df['range'], classifier.T, cmap=cmap, norm=norm) myFmt = DateFormatter("%H") for ax in [ax1, ax3, ax5, ax7]: ax.yaxis.set_major_formatter(preprocess.m_km_ticks()) ax.set_ylabel('Height [km, a.g.l]') cbar = fig.colorbar(p1, ax=ax1) cbar.ax.set_ylabel(r'$\beta\quad[Mm^{-1}]$', rotation=90) # cbar.ax.yaxis.set_label_position('left') cbar = fig.colorbar(p2, ax=ax3) cbar.ax.set_ylabel('w [' + units.get('v_raw', None) + ']', rotation=90) # cbar.ax.yaxis.set_label_position('left') cbar = fig.colorbar(p3, ax=ax5) cbar.ax.set_ylabel(r'$\delta$') # cbar.ax.yaxis.set_label_position('left') cbar = fig.colorbar(p4, ax=ax7, ticks=[5, 15, 25, 35, 45]) cbar.ax.set_yticklabels( ['Background', 'Aerosol', 'Precipitation', 'Clouds', 'Undefined']) ax7.set_xlim(left=pd.to_datetime(df.time[0].values).floor('D')) myFmt = DateFormatter("%H") ax7.xaxis.set_major_locator(mdates.HourLocator(byhour=[0, 6, 12, 18])) ax7.xaxis.set_major_formatter(myFmt) ax7.set_xlim(left=pd.to_datetime(df.time[0].values).floor('D')) ax7.set_xlabel('Time UTC [hour]') fig.tight_layout() fig.savefig(out_directory + '/' + df.attrs['file_name'] + '_classified.png', dpi=150, bbox_inches='tight') plt.close('all') df['classified'] = (['time', 'range'], classifier) df.attrs['classified'] = 'Clasification algorithm by Vietle \ at github.com/vietle94/halo-lidar' df.attrs['bleed_corrected'] = 'Bleed through corrected for \ depolarization ratio, see Vietle thesis' df['depo_bleed'].attrs = { 'units': ' ', 'long_name': 'Depolarization ratio \ (bleed through corrected)', 'comments': 'Bleed through corrected' } df['depo_bleed_sd'].attrs = { 'units': ' ', 'long_name': 'Standard deviation of depolarization \ ratio (bleed through corrected)', 'comments': 'Bleed through corrected' } df['classified'].attrs = { 'units': ' ', 'long_name': 'Classified mask', 'comments': '0: Background, 10: Aerosol, \ 20: Precipitation, 30: Clouds, 40: Undefined' } df.to_netcdf(out_directory + '/' + df.attrs['file_name'] + '_classified.nc', format='NETCDF3_CLASSIC')
def msob_fp_array_to_results(title: str, arrival_enum: ArrivalEnum, perform_param: PerformParameter, opt_method: OptMethod, mc_dist: MonteCarloDist, param_array: np.array, res_array: np.array, number_flows: int, number_servers: int, compare_metric: ChangeEnum) -> dict: """Writes the array values into a dictionary""" if res_array.shape[1] != 3: raise IllegalArgumentError(f"Array must have 3 columns," f"not {res_array.shape[1]}") np.seterr(all='warn') res_array_no_full_nan = remove_full_nan_rows(full_array=res_array) valid_iterations = res_array_no_full_nan.shape[0] if compare_metric == ChangeEnum.RATIO_REF_NEW: change_vec_server_bound = np.divide(res_array[:, 0], res_array[:, 1]) change_vec_pmoo_fp = np.divide(res_array[:, 0], res_array[:, 2]) elif compare_metric == ChangeEnum.RATIO_NEW_REF: change_vec_server_bound = np.divide(res_array[:, 1], res_array[:, 0]) change_vec_pmoo_fp = np.divide(res_array[:, 2], res_array[:, 0]) elif compare_metric == ChangeEnum.RELATIVE_CHANGE: abs_vec_server_bound = np.subtract(res_array[:, 0], res_array[:, 1]) change_vec_server_bound = np.divide(abs_vec_server_bound, res_array[:, 0]) abs_vec_pmoo_fp = np.subtract(res_array[:, 0], res_array[:, 2]) change_vec_pmoo_fp = np.divide(abs_vec_pmoo_fp, res_array[:, 0]) else: raise NotImplementedError( f"Metric={compare_metric.name} is not implemented") only_improved_server_bound = change_vec_server_bound[ res_array[:, 0] > res_array[:, 1]] only_improved_pmoo_fp = change_vec_pmoo_fp[res_array[:, 0] > res_array[:, 2]] row_max_msob = np.nanargmax(change_vec_server_bound) opt_msob = change_vec_server_bound[row_max_msob] mean_msob = np.nanmean(change_vec_server_bound) median_improved_server_bound = np.nanmedian(only_improved_server_bound) row_max_pmoo_fp = np.nanargmax(change_vec_pmoo_fp) opt_pmoo_fp = change_vec_pmoo_fp[row_max_pmoo_fp] mean_pmoo_fp = np.nanmean(change_vec_pmoo_fp) median_improved_pmoo_fp = np.nanmedian(only_improved_pmoo_fp) if (perform_param.perform_metric == PerformEnum.DELAY_PROB or perform_param.perform_metric == PerformEnum.BACKLOG_PROB): number_standard_bound_valid = np.nansum( res_array_no_full_nan[:, 0] < 1) number_server_bound_valid = np.nansum(res_array_no_full_nan[:, 1] < 1) number_pmoo_fp_valid = np.nansum(res_array_no_full_nan[:, 2] < 1) else: number_standard_bound_valid = np.nansum( res_array_no_full_nan[:, 0] < inf) number_server_bound_valid = np.nansum( res_array_no_full_nan[:, 1] < inf) number_pmoo_fp_valid = np.nansum(res_array_no_full_nan[:, 2] < inf) number_improved_server_bound = np.sum( res_array_no_full_nan[:, 0] > res_array_no_full_nan[:, 1]) number_improved_pmoo_fp = np.sum( res_array_no_full_nan[:, 0] > res_array_no_full_nan[:, 2]) best_approach = np.nanargmin(res_array_no_full_nan, axis=1) standard_best = np.count_nonzero(best_approach == 0) msob_best = np.count_nonzero(best_approach == 1) fp_best = np.count_nonzero(best_approach == 2) res_dict = { "Name": "Value", "topology": title, "arrival_distribution": arrival_enum.name } opt_dict = { "Name": "Value", "topology": title, "arrival_distribution": arrival_enum.name } for j in range(number_flows): if arrival_enum == ArrivalEnum.DM1: opt_dict[f"pmoo_fp_lamb{j + 1}"] = format( param_array[row_max_pmoo_fp, j], '.3f') opt_dict[f"server_bound_lamb{j + 1}"] = format( param_array[row_max_msob, j], '.3f') elif arrival_enum == ArrivalEnum.MD1: opt_dict[f"pmoo_fp_lamb{j + 1}"] = format( param_array[row_max_pmoo_fp, j], '.3f') opt_dict[f"ser_bound_lamb{j + 1}"] = format( param_array[row_max_msob, j], '.3f') elif arrival_enum == ArrivalEnum.MMOODisc: opt_dict[f"pmoo_fp_stay_on{j + 1}"] = format( param_array[row_max_pmoo_fp, j], '.3f') opt_dict[f"pmoo_fp_stay_off{j + 1}"] = format( param_array[row_max_pmoo_fp, number_flows + j], '.3f') opt_dict[f"pmoo_fp_burst{j + 1}"] = format( param_array[row_max_pmoo_fp, 2 * number_flows + j], '.3f') opt_dict[f"ser_bound_stay_on{j + 1}"] = format( param_array[row_max_msob, j], '.3f') opt_dict[f"ser_bound_stay_off{j + 1}"] = format( param_array[row_max_msob, number_flows + j], '.3f') opt_dict[f"ser_bound_burst{j + 1}"] = format( param_array[row_max_msob, 2 * number_flows + j], '.3f') elif arrival_enum == ArrivalEnum.MMOOFluid: opt_dict[f"pmoo_fp_mu{j + 1}"] = format( param_array[row_max_pmoo_fp, j], '.3f') opt_dict[f"pmoo_fp_lamb{j + 1}"] = format( param_array[row_max_pmoo_fp, number_flows + j], '.3f') opt_dict[f"pmoo_fp_burst{j + 1}"] = format( param_array[row_max_pmoo_fp, 2 * number_flows + j], '.3f') opt_dict[f"ser_bound_mu{j + 1}"] = format( param_array[row_max_msob, j], '.3f') opt_dict[f"ser_bound_lamb{j + 1}"] = format( param_array[row_max_msob, number_flows + j], '.3f') opt_dict[f"ser_bound_burst{j + 1}"] = format( param_array[row_max_msob, 2 * number_flows + j], '.3f') else: raise NotImplementedError( f"Arrival parameter={arrival_enum.name} is not implemented") for j in range(number_servers): opt_dict[f"pmoo_fp_rate{j + 1}"] = format( param_array[row_max_pmoo_fp, arrival_enum.number_parameters() * number_flows + j], '.3f') opt_dict[f"server_bound_rate{j + 1}"] = format( param_array[row_max_msob, arrival_enum.number_parameters() * number_flows + j], '.3f') opt_dict.update({ "opt_pmoo_fp": format(opt_pmoo_fp, '.3f'), "opt_msob": format(opt_msob, '.3f'), "valid iterations": res_array.shape[0], "PerformParamValue": perform_param.value, "optimization": opt_method.name, "compare_metric": compare_metric.name, "MCDistribution": mc_dist.to_name(), "MCParam": mc_dist.param_to_string() }) res_dict.update({ "mean_pmoo_fp": mean_pmoo_fp, "mean_msob": mean_msob, "median_improved_pmoo_fp": median_improved_pmoo_fp, "median_improved_server_bound": median_improved_server_bound, "number standard bound is valid": number_standard_bound_valid, "number server bound is valid": number_server_bound_valid, "number PMOO_FP bound is valid": number_pmoo_fp_valid, "number server bound is improvement": number_improved_server_bound, "number PMOO_FP is improvement": number_improved_pmoo_fp, "valid iterations": valid_iterations, "number standard bound is best": standard_best, "number server bound is best": msob_best, "number PMOO_FP bound is best": fp_best, }) filename = title filename += f"_optimal_{perform_param.to_name()}_{arrival_enum.name}_" \ f"MC{mc_dist.to_name()}_{opt_method.name}_" \ f"{compare_metric.name}" with open(filename + ".csv", 'w') as csv_file: writer = csv.writer(csv_file) for key, value in opt_dict.items(): writer.writerow([key, value]) return res_dict
def generateAccCurveVote(functions): for time in times: # Do a run to test the accuracies when 'voters' vote # A vote will simply just be the average MAE over the number of voters for voters in range(1, NUM_VOTERS + 1): # A list to hold the details for the accuracy curve # This will be saved and printed out later. accCurveState = [] # How many data points are used for classification? # start at 5, count by 5s to 121 (121 is overkill) for GROUP_SIZE in range(5, MAX_GROUP_SIZE + 1, 5): # This is where we will store all the data to be output accMat = [] for task in tasks: for subject in subjects: for take in takes: # reporting print time, voters, GROUP_SIZE, ':\t', task, subject, take # Load the data # We're always applying it to the full data (all --- '') fileName = getFileName(task, subject, take, '') data = loadData(fileName) # Store a row (corresponding to a set of data) of accuracies # This will be added to the accMat accRow = np.zeros(NUM_FUNCTIONS) # We'll repeat the test 100 times (ti give us a percent) # Basically, apply some subset of data (GROUP_SIZE) to models 100 times. # This is for the stats. This makes sense... right? for i in range(100): # This list will store lists of errors # the length of this list depends on GROUP_SIZE # The lenfth of the lists inside this list will be equal to NUM_FUNCTIONS # Eventually we will get the column mean, to know how each model did. absErr_forAllData = [] # For each data point in a randomly selected set of data of size GROUP_SIZE # Apply this data point to each model, and keep track of the errors in a list # After applying this data point to every function, but that list inside ansErr_forAllData for d in data[np.random.permutation(len(data))[:GROUP_SIZE]]: absErr_forDatum = [] # For each model... # I really really wish I wrote this in a similar way to script 3 # had to rename variables here to not mess with outer loops for tsk in tasks: for sub in subjects: for tke in takes: # NEVER APPLY THE DATA TO THE MODEL IT WAS FIT TOOO!!! # If we have data from the model we're looking at, skip it (add a list of max value because that means it will never be the 'BEST') if task == tsk and subject == sub and tke == take: absErr_forDatum.append(sys.maxint) # Otherwise, just apply the data to the model and record the error value else: # Give each voter a shot voterScores = [] # This is the only line different from the TOP script # We just randomly pick some number of models for j in np.random.permutation(len(functions[time][tsk][sub][tke]))[:voters]: try: # Get the error by finding the difference between what we expect (l[-1] --- the last element in the row) # and what we got (applying all other data points to the model). err = d[-1] - functions[time][tsk][sub][tke][j](*d) except (ValueError, OverflowError, ZeroDivisionError): print 'Busted' # What's better... nan or maxint? # nan might be cheating because: np.nanmean([nan, nan, nan, 1]) = 1 # max int might be unfair as it could make things slightly off seem way worse? # I'll go with nan as maxvalue might throw off votes err = float('nan') #err = sys.maxint # Keep track of each voter's error voterScores.append(abs(err)) # The voters will just average out their error on the data point # Add the average error to the list keeping track of the data points error on all models absErr_forDatum.append(np.nanmean(voterScores)) # After applying the single row of data to all models # Add the error for each model to the list of errors # After doing this for all data points (GROUP_SIZE), we will get the column mean. absErr_forAllData.append(absErr_forDatum) # This line should not be necessary.... #if np.argmin(np.mean(abEs,axis=0)) > 0.000: # Find the index of the model with the smallest error # This will be the *winner* model # And mark it as the winner in the row's accuracy # Note that this is not an accuracy really, but just a record that it was selected # Accuracy is determined if the min model belonged to the same subject/task combo (take doesn't matter) accRow[np.argmin(np.mean(absErr_forAllData,axis=0))] += (1.) # Divide by 100 so we get a percent # Add the row's (data set's) values to the matrix # Remember, accuracy is really only measures after it's verified # Really, the matrix is just seeing what % of the time what model was the best. accRow = accRow/float(100) accMat.append(accRow) accMat = np.array(accMat) # Save the accuracy matrix here np.savetxt('./accuracyMatrices/2-AccMat-RAND_' + str(voters) + '_' + str(GROUP_SIZE) + '_' + time + '.csv', accMat, delimiter=',') # This part will now count the actual accuracies # Where we will store the compressed version of the mat # This will smush it over the takes accMatSmall = [] # For each row, we'll sum up the accuracies over all takes of the same subject/task # Will result in 150 rows, 30 cols. # Note, each row will add up to 1.00 (100%) # Or at least it should... it's possible two models tie, but this would be very very unusual considering floats for i in range(0, accMat.shape[0], 1): accMatRow = [] # For each group of 5 (take) # Add up the accuracies over the 5 for j in range(0, accMat.shape[0], 5): accMatRow.append(np.sum(accMat[i, j:j+5])) accMatSmall.append(accMatRow) accMatSmall = np.array(accMatSmall) # Save the small accuracy matrix here np.savetxt('./accuracyMatrices/2-AccMat-Small-RAND_' + str(voters) + '_' + str(GROUP_SIZE) + '_' + time + '.csv', accMat, delimiter=',') # Go through the diagonal (kinda, it's not really a diag, more like a staircase) # And record how often it was right diagValues = [] # This is ugly, but works... # 150 rows, 30 cols remember # So we need to look at 5 rows for each col. for i in range(30): for j in range(5): diagValues.append(accMatSmall[i*5+j,i]) # Store the mean, standard deviation, median, min, max, and number of functions (for CI calculations) accCurveState.append([np.mean(diagValues), np.std(diagValues), np.median(diagValues), np.min(diagValues), np.max(diagValues), NUM_FUNCTIONS]) # Save the output for each time np.savetxt('2-accCurveNoSameTake-RAND-' + str(voters) + '_' + time + '.csv', accCurveState, delimiter=',')
def plot_diagnostics(self, variable=0, lags=10, fig=None, figsize=None): """Plot an ARIMA's diagnostics. Diagnostic plots for standardized residuals of one endogenous variable Parameters ---------- variable : integer, optional Index of the endogenous variable for which the diagnostic plots should be created. Default is 0. lags : integer, optional Number of lags to include in the correlogram. Default is 10. fig : Matplotlib Figure instance, optional If given, subplots are created in this figure instead of in a new figure. Note that the 2x2 grid will be created in the provided figure using `fig.add_subplot()`. figsize : tuple, optional If a figure is created, this argument allows specifying a size. The tuple is (width, height). Notes ----- Produces a 2x2 plot grid with the following plots (ordered clockwise from top left): 1. Standardized residuals over time 2. Histogram plus estimated density of standardized residulas, along with a Normal(0,1) density plotted for reference. 3. Normal Q-Q plot, with Normal reference line. 4. Correlogram See Also -------- statsmodels.graphics.gofplots.qqplot pmdarima.utils.visualization.plot_acf References ---------- .. [1] https://www.statsmodels.org/dev/_modules/statsmodels/tsa/statespace/mlemodel.html#MLEResults.plot_diagnostics # noqa: E501 """ # implicitly checks whether installed, and does our backend magic: _get_plt() # We originally delegated down to SARIMAX model wrapper, but # statsmodels makes it difficult to trust their API, so we just re- # implemented a common method for all results wrappers. from statsmodels.graphics import utils as sm_graphics fig = sm_graphics.create_mpl_fig(fig, figsize) res_wpr = self.arima_res_ data = res_wpr.data # Eliminate residuals associated with burned or diffuse likelihoods. # The statsmodels code for the Kalman Filter takes the loglik_burn # as a parameter: # loglikelihood_burn : int, optional # The number of initial periods during which the loglikelihood is # not recorded. Default is 0. # If the class has it, it's a SARIMAX and we'll use it. Otherwise we # will just access the residuals as we normally would... if hasattr(res_wpr, 'loglikelihood_burn'): # This is introduced in the bleeding edge version, but is not # backwards compatible with 0.9.0 and less: d = res_wpr.loglikelihood_burn if hasattr(res_wpr, 'nobs_diffuse'): d = np.maximum(d, res_wpr.nobs_diffuse) resid = res_wpr.filter_results\ .standardized_forecasts_error[variable, d:] else: # This gets the residuals, but they need to be standardized d = 0 r = res_wpr.resid resid = (r - np.nanmean(r)) / np.nanstd(r) # Top-left: residuals vs time ax = fig.add_subplot(221) if hasattr(data, 'dates') and data.dates is not None: x = data.dates[d:]._mpl_repr() else: x = np.arange(len(resid)) ax.plot(x, resid) ax.hlines(0, x[0], x[-1], alpha=0.5) ax.set_xlim(x[0], x[-1]) ax.set_title('Standardized residual') # Top-right: histogram, Gaussian kernel density, Normal density # Can only do histogram and Gaussian kernel density on the non-null # elements resid_nonmissing = resid[~(np.isnan(resid))] ax = fig.add_subplot(222) # temporarily disable Deprecation warning, normed -> density # hist needs to use `density` in future when minimum matplotlib has it # 'normed' argument is no longer supported in matplotlib since # version 3.2.0. New function added for backwards compatibility with warnings.catch_warnings(record=True): ax.hist(resid_nonmissing, label='Hist', **mpl_compat.mpl_hist_arg()) kde = gaussian_kde(resid_nonmissing) xlim = (-1.96 * 2, 1.96 * 2) x = np.linspace(xlim[0], xlim[1]) ax.plot(x, kde(x), label='KDE') ax.plot(x, norm.pdf(x), label='N(0,1)') ax.set_xlim(xlim) ax.legend() ax.set_title('Histogram plus estimated density') # Bottom-left: QQ plot ax = fig.add_subplot(223) from statsmodels.graphics import gofplots gofplots.qqplot(resid_nonmissing, line='s', ax=ax) ax.set_title('Normal Q-Q') # Bottom-right: Correlogram ax = fig.add_subplot(224) from statsmodels.graphics import tsaplots tsaplots.plot_acf(resid, ax=ax, lags=lags) ax.set_title('Correlogram') ax.set_ylim(-1, 1) return fig
def calc_mse(y, y_hat): return np.nanmean(((y - y_hat) ** 2))
def estimate_phase_elevation_ratio(dem, ts_data, inps): """Estimate phase/elevation ratio for each acquisition of timeseries Parameters: dem : 2D array in size of ( length, width) ts_data : 3D array in size of (num_date, length, width) inps : Namespace Returns: X : 2D array in size of (poly_num+1, num_date) """ num_date = ts_data.shape[0] # prepare phase and elevation data print('reading mask from file: '+inps.mask_file) mask = readfile.read(inps.mask_file, datasetName='mask')[0] dem = mask_matrix(np.array(dem), mask) ts_data = mask_matrix(np.array(ts_data), mask) # display # 1. effect of multilooking --> narrow phase range --> better ratio estimation debug_mode = False if debug_mode: import matplotlib.pyplot as plt #d_index = np.argmax(topo_trop_corr) d_index = 47 data = ts_data[d_index, :, :] title = inps.date_list[d_index] fig = plt.figure() plt.plot(dem[~np.isnan(dem)], data[~np.isnan(dem)], '.', label='Number of Looks = 1') mli_dem = multilook_data(dem, 8, 8) mli_data = multilook_data(data, 8, 8) plt.plot(mli_dem[~np.isnan(mli_dem)], mli_data[~np.isnan(mli_dem)], '.', label='Number of Looks = 8') plt.legend() plt.xlabel('Elevation (m)') plt.ylabel('Range Change (m)') plt.title(title) out_file = 'phase_elevation_ratio_{}.png'.format(title) plt.savefig(out_file, bbox_inches='tight', transparent=True, dpi=300) print('save to {}'.format(out_file)) #plt.show() print('----------------------------------------------------------') print('Empirical tropospheric delay correction based on phase/elevation ratio (Doin et al., 2009)') print('polynomial order: {}'.format(inps.poly_order)) if inps.num_multilook > 1: print('number of multilook: {} (multilook data for estimation only)'.format(inps.num_multilook)) mask = multilook_data(mask, inps.num_multilook, inps.num_multilook) dem = multilook_data(dem, inps.num_multilook, inps.num_multilook) ts_data = multilook_data(ts_data, inps.num_multilook, inps.num_multilook) mask_nan = ~np.isnan(dem) dem = dem[mask_nan] ts_data = ts_data[:, mask_nan] # calculate correlation coefficient print('----------------------------------------------------------') print('calculate correlation of DEM with each acquisition') topo_trop_corr = np.zeros(num_date, np.float32) for i in range(num_date): phase = ts_data[i, :] cc = 0. if np.count_nonzero(phase) > 0: comp_data = np.vstack((dem, phase)) cc = np.corrcoef(comp_data)[0, 1] topo_trop_corr[i] = cc print('{}: {:>5.2f}'.format(inps.date_list[i], cc)) topo_trop_corr = np.abs(topo_trop_corr) print('average correlation magnitude: {:>5.2f}'.format(np.nanmean(topo_trop_corr))) # estimate ratio parameter print('----------------------------------------------------------') print('estimate phase/elevation ratio') A = design_matrix(dem=dem, poly_order=inps.poly_order) X = np.dot(np.linalg.pinv(A), ts_data.T) X = np.array(X, dtype=np.float32) X[:, topo_trop_corr < inps.threshold] = 0. return X
elif subjectNum > 100: style = 1 plt.plot(dmn_connectivity_change[s],madrs_change,marker='.',ms=20,color=colors[style],alpha=0.5) plt.xlabel('DMN Connectivity Change 3 - 1') plt.ylabel('MADRS Change 3 - 1') plt.show() scipy.stats. fig = plt.figure(figsize=(10,7)) # plot for each subject for s in np.arange(nSub): if subjects[s] < 100: style = 0 plt.plot(np.arange(nVisits),average_within_mat[row,col,s,:],marker='.', ms=20,color=colors[style],alpha=0.5) else: style = 1 plt.plot(np.arange(nVisits),average_within_mat[row,col,s,:], marker='.',ms=20,color=colors[style],alpha=0.5) plt.errorbar(np.arange(nVisits),np.nanmean(average_within_mat[row,col,HC_ind,:],axis=0),lw = 5,color=colors[0],yerr=scipy.stats.sem(average_within_mat[row,col,HC_ind,:],axis=0,nan_policy='omit'), label='HC') plt.errorbar(np.arange(nVisits),np.nanmean(average_within_mat[row,col,MDD_ind,:],axis=0),lw = 5,color=colors[1],yerr=scipy.stats.sem(average_within_mat[row,col,MDD_ind,:],axis=0,nan_policy='omit'), label='MDD') plt.xticks(np.arange(nVisits),('Pre NF', 'Post NF')) plt.xlabel('Visit') plt.title('Row %i Col %i' % (row,col)) plt.title('%s Within-Network Connectivity'% systems_to_keep_abbrv[system]) plt.legend() plt.show() # now test significance print('FIRST DAY') print(scipy.stats.ttest_ind(average_within_mat[row,col,HC_ind,0],average_within_mat[row,col,MDD_ind,0])) print('LAST DAY') print(scipy.stats.ttest_ind(average_within_mat[row,col,HC_ind,1],average_within_mat[row,col,MDD_ind,1]))
arr_dem = rb_dem.ReadAsArray(difx_dem, dify_dem, cols_all, rows_all).astype(float) arr_thp = rb_thp.ReadAsArray(difx_thp, dify_thp, cols_all, rows_all).astype(float) ## check which are uncommen (or NA) values np.unique(arr_sl) np.unique(arr_dem) np.unique(arr_thp) # replace na values with 0 arr_sl[arr_sl < int(0)] = np.nan # here negeative values as nas arr_dem[arr_dem == int(65536)] = np.nan # here 65535 as nas arr_thp[arr_thp == int(65535)] = np.nan print("mean slope", round(np.nanmean(arr_sl), 2)) print("max slope", round(np.nanmax(arr_sl), 2)) print("min slope", round(np.nanmin(arr_sl), 2)) print("mean dem", round(np.nanmean(arr_dem), 2)) print("max dem", round(np.nanmax(arr_dem), 2)) print("min dem", round(np.nanmin(arr_dem), 2)) # task 2 # binary raster where elevatoin < 1000m and slope < 30deg # slope < 30 deg arr_sl_bin = np.nan_to_num(arr_sl) arr_sl_bin[arr_sl_bin < float(30.00)] = 1 arr_sl_bin[arr_sl_bin >= float(30.00)] = 0 np.unique(arr_sl_bin)
def main(args=None): start = time.time() a = time.asctime() b = a.replace(':', '') start_at = b.replace(' ', '_') mode = "train" loop_num_ = None test_batch_num = None max_to_keep = 2 TEST_THRESHHOLD = 0.75 SAVE_THRESHHOLD = 0 dropout_1 = 1.00 dropout_2 = 0.80 dropout_3 = 0.50 queue_len = 5000 #max_train=20000 if args != None: mode = args.mode loop_num_ = args.loop_number test_batch_num = args.test_batch_number max_to_keep = args.max_to_keep input_dir = args.in_directory model_name = args.model pretrained_dir = args.ckpt_file output_dir = args.out_directory else: try: options, args = getopt.getopt(sys.argv[1:], 'm:i:n:b:o:c:p:', [ 'mode=', 'in_dir=', 'loop_num=', 'test_batch_num=', 'out_dir=', 'network_constructor=', 'pretrained_model=' ]) except getopt.GetoptError as err: print(str(err)) sys.exit(2) if len(options) < 3: print('too few argument') sys.exit(0) for opt, arg in options: if opt in ('-m', '--mode'): mode = arg elif opt in ('-i', '--in_dir'): input_dir = arg elif opt in ('-n', '--loop_num'): loop_num_ = int(arg) elif opt in ('-b', '--test_batch_num'): test_batch_num = int(arg) elif opt in ('-o', '--out_dir'): output_dir = arg elif opt in ('-c', '--network_constructor'): model_name = arg elif opt in ('-p', '--pretrained_model'): pretrained_dir = arg if input_dir.endswith("/"): input_dir = str(input_dir) + "*.npz" elif input_dir.endswith("*") or input_dir.endswith(".npz"): pass else: input_dir = str(input_dir) + "/*.npz" f = glob.glob(input_dir) if len(f) == 0: print("can't open input files, no such a directory") sys.exit(0) f_srt = natsorted(f) if loop_num_ == None: loop_num_ = len(f_srt) - 5 if test_batch_num == None: test_batch_num = loop_num_ + 1 with np.load(str(f_srt[0])) as f: labels = f['labels'] _data = f['data_array'] batch_size, label_dim = labels.shape _, data_length, _2 = _data.shape print(batch_size, label_dim) config = tf.ConfigProto(device_count={'GPU': 2}) config.gpu_options.allow_growth = True #config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) x_image = tf.placeholder(tf.float32, shape=[None, data_length, 4, 1]) y_ = tf.placeholder(tf.float32, shape=[None, label_dim]) phase = tf.placeholder(tf.bool) keep_prob = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) keep_prob3 = tf.placeholder(tf.float32) nc = il.import_module("deepgmap.network_constructors." + str(model_name)) print("running " + str(model_name)) model = nc.Model(image=x_image, label=y_, output_dir=output_dir, phase=phase, start_at=start_at, keep_prob=keep_prob, keep_prob2=keep_prob2, keep_prob3=keep_prob3, data_length=data_length, max_to_keep=max_to_keep) sess.run(tf.global_variables_initializer()) saver = model.saver if mode == 'retrain': saver.restore(sess, pretrained_dir) train_accuracy_record = [] loss_val_record = [] total_learing = [] loop_num = div_roundup(queue_len, len(f_srt)) BREAK = False prev_ac = None test_step = [] CHECK_TEST_FR = False for i in range(loop_num): if BREAK: print("breaking the train loop") break input_files = f_srt[i * queue_len:(i + 1) * queue_len] image_list, label_list = batch_queuing(input_files, batch_size, data_length) for k in range(len(image_list)): start_tmp = time.time() a = np.shape(image_list[k]) #print a if len(a) == 4: train_accuracy_, loss_val = sess.run( [model.error, model.cost], feed_dict={ x_image: image_list[k], y_: label_list[k], keep_prob: 1.0, keep_prob2: 1.0, keep_prob3: 1.0, phase: False }) else: batch = image_list[k][0], label_list[k][0], image_list[k][ 1], label_list[k][1] #print(len(batch)) #batch = next_batch(i,input_files, batch_size, data_length) train_accuracy_, loss_val = sess.run( [model.error, model.cost], feed_dict={ x_image: np.concatenate((batch[2], batch[0])), y_: np.concatenate((batch[3], batch[1])), keep_prob: 1.0, keep_prob2: 1.0, keep_prob3: 1.0, phase: False }) """train_accuracy_,loss_val= sess.run([model.error, model.cost], feed_dict={x_image:batch[2], y_: batch[3], keep_prob: 1.0, keep_prob2: 1.0, keep_prob3: 1.0, phase: False})""" FPR_list, TPR_list, PPV_list = train_accuracy_ #print np.nansum(PPV_list) curr_accu = float( np.round( np.nanmean( 2 * np.array(TPR_list) * np.array(PPV_list) / (0.0000001 + np.array(PPV_list) + np.array(TPR_list))), 4)) sys.stdout.write("\r" + "step " + str(i * queue_len + k) + ", cost: " + str(loss_val) + ", train_accuracy: " + str(list([curr_accu])) + ", " + str(time.time() - start_tmp)) sys.stdout.flush() #train_accuracy_record.append(TPR_list[0]-FPR_list[0]) train_accuracy_record.append(curr_accu) loss_val_record.append(loss_val) total_learing.append((i * queue_len + k) * batch_size / 1000.0) if i * queue_len + k >= 2: #temporal_accuracy=train_accuracy_record[i*queue_len+k]+train_accuracy_record[i*queue_len+k-1]+train_accuracy_record[i*queue_len+k-2] temporal_accuracy = np.round( (train_accuracy_record[i * queue_len + k] + train_accuracy_record[i * queue_len + k - 1] + train_accuracy_record[i * queue_len + k - 2]) / 3.0, 4) if len(test_step) > 1: CHECK_TEST_FR = ((i * queue_len + k - test_step[-1]) > 1000) CHECK_ACCU = (temporal_accuracy >= TEST_THRESHHOLD) if CHECK_ACCU or CHECK_TEST_FR: test_step.append(i * queue_len + k) if len(test_step) > 10: e, f = test_step[-1], test_step[-10] if e - f <= 40: TEST_THRESHHOLD += 0.10 print("\n" + str(TEST_THRESHHOLD)) if TEST_THRESHHOLD > 0.9800: TEST_THRESHHOLD = 0.9800 if CHECK_TEST_FR: TEST_THRESHHOLD -= 0.02 #TEST_THRESHHOLD=temporal_accuracy-0.005 t_batch = test_batch(input_dir, output_dir, test_batch_num, batch_size, data_length) f1_list = [] for o in range(3): ta = sess.run(model.error, feed_dict={ x_image: t_batch[o * 2], y_: t_batch[o * 2 + 1], keep_prob: 1.0, keep_prob2: 1.0, keep_prob3: 1.0, phase: False }) FPR_list, TPR_list, PPV_list = ta f1 = float( np.round( np.nanmean(2 * np.array(TPR_list) * np.array(PPV_list) / (0.0000001 + np.array(PPV_list) + np.array(TPR_list))), 4)) f1_list.append(f1) mean_ac = np.round(np.nanmean(f1_list), 4) to_print = ( "\nThis is tests for the model at the train step: " + str(i * queue_len + k) + "\n" + "mean accuracy : " + str(mean_ac) + "\n Total time " + str(time.time() - start)) print(to_print) if (prev_ac == None and mean_ac >= SAVE_THRESHHOLD) or ( prev_ac != None and mean_ac >= prev_ac): flog = open( str(output_dir) + str(start_at) + '.log', 'a') flog.write( "This is tests for the model at the train step: " + str(i * queue_len + k) + "\nThe average of TPR+PPV: " + str(mean_ac) + '\n') flog.close() saver.save(sess, str(output_dir) + str(model_name) + "_" + str(start_at) + '_step' + str(i * queue_len + k) + '.ckpt', global_step=i * queue_len + k) prev_ac = mean_ac if mean_ac >= 0.999: BREAK = True break #sess.run(model.optimize, feed_dict={x_image: np.concatenate((batch[2],batch[0])),y_: np.concatenate((batch[3],batch[1])), keep_prob: dropout_1, keep_prob2: dropout_2, keep_prob3: dropout_3,phase:True}) if len(a) == 4: sess.run(model.optimize, feed_dict={ x_image: image_list[k], y_: label_list[k], keep_prob: dropout_1, keep_prob2: dropout_2, keep_prob3: dropout_3, phase: True }) else: sess.run(model.optimize, feed_dict={ x_image: batch[2], y_: batch[3], keep_prob: dropout_1, keep_prob2: dropout_2, keep_prob3: dropout_3, phase: True }) sess.run(model.optimize, feed_dict={ x_image: batch[0], y_: batch[1], keep_prob: dropout_1, keep_prob2: dropout_2, keep_prob3: dropout_3, phase: True }) sess.run(model.optimize, feed_dict={ x_image: batch[2], y_: batch[3], keep_prob: dropout_1, keep_prob2: dropout_2, keep_prob3: dropout_3, phase: True }) sess.run(model.optimize, feed_dict={ x_image: batch[0], y_: batch[1], keep_prob: dropout_1, keep_prob2: dropout_2, keep_prob3: dropout_3, phase: True }) if (i * queue_len + k) == loop_num_: # or (i*queue_len+k) >= max_train: BREAK = True break saver.save(sess, str(output_dir) + str(model_name) + "_" + str(start_at) + ".ckpt", global_step=i * queue_len + k) t_batch = test_batch(input_dir, output_dir, test_batch_num, batch_size, data_length) f1_list = [] for o in range(3): ta = sess.run(model.error, feed_dict={ x_image: t_batch[o * 2], y_: t_batch[o * 2 + 1], keep_prob: 1.0, keep_prob2: 1.0, keep_prob3: 1.0, phase: False }) FPR_list, TPR_list, PPV_list = ta f1 = float( np.round( np.nanmean( 2 * np.array(TPR_list) * np.array(PPV_list) / (0.0000001 + np.array(PPV_list) + np.array(TPR_list))), 4)) print(f1) f1_list.append(f1) current_variable = {} all_tv = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) for v in all_tv: value = sess.run(v) scope = v.name current_variable[scope] = value all_lv = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES) local_variable = {} for v in all_lv: value = sess.run(v) scope = v.name print(scope) local_variable[scope] = value all_ = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) np.savez( str(output_dir) + str(model_name) + '_trained_variables_' + str(start_at) + '.npz', **current_variable) np.savez( str(output_dir) + str(model_name) + '_local_variables_' + str(start_at) + '.npz', **local_variable) mean_ac = np.round(np.nanmean(f1_list), 4) running_time = time.time() - start import datetime if args is not None: _args = args else: _args = sys.argv to_print = ("dropout parameters: " + str(dropout_1) + ", " + str(dropout_2) + ", " + str(dropout_3) + "\n" + "input directory: " + str(input_dir) + "\n" + "The average of TPR+PPV: " + str(np.round(mean_ac, 2)) + "\nTotal time " + str(datetime.timedelta(seconds=running_time)) + "\nThe model is " + str(model_name) + "\nArguments are " + str(sys.argv[1:]) + "\nGlobal variables: " + str(all_)) sess.close() print(to_print) flog = open(str(output_dir) + str(start_at) + '.log', 'a') flog.write(to_print + '\n') flog.close() fit = np.polyfit(total_learing, train_accuracy_record, 1) fit_fn = np.poly1d(fit) plt.figure(1) ax1 = plt.subplot(211) plt.title('Train accuracy') plt.plot(total_learing, train_accuracy_record, 'c.', total_learing, fit_fn(total_learing), 'm-') ax1.grid(True) x1, x2, y1, y2 = plt.axis() plt.axis((x1, x2, y1, 1.0)) plt.figure(1) plt.subplot(212) plt.title('Cost') plt.plot(total_learing, loss_val_record, '-') x1, x2, y1, y2 = plt.axis() plt.axis((x1, x2, 0, 1.0)) plt.savefig(str(output_dir) + 'plot_' + str(start_at) + '.pdf', format='pdf') np.savez_compressed(str(output_dir) + str(model_name) + "_" + str(start_at) + '_train_rec', total_learing=total_learing, train_accuracy_record=train_accuracy_record, loss_val_record=loss_val_record) plt.show()
def main(): """Create the network and start the training.""" model_urls = {'CoarseSN': 'models/DR_CoarseSN/CoarseSN.pth', 'MaskCN': 'models/MaskCN/MaskCN.pth'} writer = SummaryWriter('models/' + NAME) cudnn.enabled = True ############# Create mask-guided classification network. MaskCN = Xception_dilation(num_classes=NUM_CLASSES_CLS, input_channel=INPUT_CHANNEL) MaskCN.cuda() if FP16 is True: MaskCN = amp.initialize(MaskCN, opt_level="O1") ############# Load pretrained weights pretrained_dict = torch.load(model_urls['MaskCN']) MaskCN.load_state_dict(pretrained_dict) MaskCN.eval() ############# Create enhanced segmentation network. EnhanceSN = deeplabv3plus_en(num_classes=NUM_CLASSES_SEG) optimizer = torch.optim.Adam(EnhanceSN.parameters(), lr=LEARNING_RATE) EnhanceSN.cuda() if FP16 is True: EnhanceSN, optimizer = amp.initialize(EnhanceSN, optimizer, opt_level="O1") EnhanceSN = torch.nn.DataParallel(EnhanceSN) ############# Load pretrained weights pretrained_dict = torch.load(model_urls['CoarseSN']) net_dict = EnhanceSN.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if (k in net_dict) and (v.shape == net_dict[k].shape)} net_dict.update(pretrained_dict) EnhanceSN.load_state_dict(net_dict) EnhanceSN.train() EnhanceSN.float() print(len(net_dict)) print(len(pretrained_dict)) DR_loss = loss.Fusin_Dice_rank() cudnn.benchmark = True ############# Load training and validation data data_train_root = 'dataset/seg_data/Training_resize_seg/' data_train_root_mask = 'Coarse_masks/Training_EnhancedSN/' data_train_list = 'dataset/ISIC/Training_seg.txt' trainloader = data.DataLoader(MyDataSet_seg(data_train_root, data_train_list, root_path_coarsemask=data_train_root_mask, crop_size=(w, h)), batch_size=BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=True) data_val_root = 'dataset/seg_data/ISIC-2017_Validation_Data/' data_val_root_mask = 'Coarse_masks/Validation_EnhancedSN/' data_val_list = 'dataset/ISIC/Validation_seg.txt' valloader = data.DataLoader(MyValDataSet_seg(data_val_root, data_val_list, root_path_coarsemask=data_val_root_mask), batch_size=1, shuffle=False, num_workers=8, pin_memory=True) ############# Generate CAM for validation data val_cams = val_mode_cam(valloader, MaskCN) path = 'models/' + NAME if not os.path.isdir(path): os.mkdir(path) f_path = path + 'outputxx.txt' val_jac = [] ############# Start the training for epoch in range(EPOCH): train_loss_D = [] train_loss_R = [] train_loss_total = [] train_jac = [] for i_iter, batch in tqdm(enumerate(trainloader)): # if i_iter > 50: # continue step = (TRAIN_NUM / BATCH_SIZE) * epoch + i_iter images, coarsemask, labels, name = batch images = images.cuda() coarsemask = coarsemask.unsqueeze(1).cuda() labels = labels.cuda().squeeze(1) with torch.no_grad(): input_cla = torch.cat((images, coarsemask), dim=1) cla_cam = cam(MaskCN, input_cla) cla_cam = torch.from_numpy(np.stack(cla_cam)).unsqueeze(1).cuda() optimizer.zero_grad() lr = adjust_learning_rate(optimizer, step) EnhanceSN.train() preds = EnhanceSN(images, cla_cam) loss_D, loss_R = DR_loss(preds, labels) term = loss_D + 0.05 * loss_R if FP16 is True: with amp.scale_loss(term, optimizer) as scaled_loss: scaled_loss.backward() else: term.backward() optimizer.step() writer.add_scalar('learning_rate', lr, step) writer.add_scalar('loss', term.cpu().data.numpy(), step) train_loss_D.append(loss_D.cpu().data.numpy()) train_loss_R.append(loss_R.cpu().data.numpy()) train_loss_total.append(term.cpu().data.numpy()) train_jac.append(Jaccard(preds, labels)) print("train_epoch%d: lossTotal=%f, lossDice=%f, lossRank=%f, Jaccard=%f \n" % ( epoch, np.nanmean(train_loss_total), np.nanmean(train_loss_D), np.nanmean(train_loss_R), np.nanmean(train_jac))) ############# Start the validation [vacc, vdice, vsen, vspe, vjac_score] = val_mode_seg(valloader, val_cams, EnhanceSN, path, epoch) line_val = "val%d: vacc=%f, vdice=%f, vsensitivity=%f, vspecifity=%f, vjac=%f \n" % \ (epoch, np.nanmean(vacc), np.nanmean(vdice), np.nanmean(vsen), np.nanmean(vspe), np.nanmean(vjac_score)) print(line_val) f = open(f_path, "a") f.write(line_val) val_jac.append(np.nanmean(vjac_score)) ############# Plot val curve plt.figure() plt.plot(val_jac, label='val jaccard', color='blue', linestyle='--') plt.legend(loc='best') plt.savefig(os.path.join(path, 'jaccard.png')) plt.clf() plt.close() plt.show() plt.close('all') writer.add_scalar('val_Jaccard', np.nanmean(vjac_score), epoch) ############# Save network torch.save(EnhanceSN.state_dict(), path + 'CoarseSN_e' + str(epoch) + '.pth')
import numpy as np counter = 0 arraynonnan = np.array([[np.nan, 4, np.nan, 6], [4, 5, 6, 7], [1, 2, 3, 4]]) nrows = len(arraynonnan) ncols = len(arraynonnan[0]) i = 0 while i <= nrows*ncols: for r in range(nrows): for c in range(ncols): if np.isnan(arraynonnan[r][c]): counter += 1 i += 1 if counter != nrows*ncols: nrows = len(arraynonnan) ncols = len(arraynonnan[0]) for r in range(nrows): for c in range(ncols): if np.isnan(arraynonnan[r][c]): arraynonnan[r][c] = np.nanmean(arraynonnan) else: nrows = len(arraynonnan) ncols = len(arraynonnan[0]) for r in range(nrows): for c in range(ncols): if np.isnan(arraynonnan[r][c]): arraynonnan[r][c] = 0
def ppg_simulate(duration=120, sampling_rate=1000, heart_rate=70, frequency_modulation=0.3, ibi_randomness=0.1, drift=0, motion_amplitude=0.1, powerline_amplitude=0.01, burst_number=0, burst_amplitude=1, random_state=None, show=False): """Simulate a photoplethysmogram (PPG) signal. Phenomenological approximation of PPG. The PPG wave is described with four landmarks: wave onset, location of the systolic peak, location of the dicrotic notch and location of the diastolic peaks. These landmarks are defined as x and y coordinates (in a time series). These coordinates are then interpolated at the desired sampling rate to obtain the PPG signal. Parameters ---------- duration : int Desired recording length in seconds. The default is 120. sampling_rate : int The desired sampling rate (in Hz, i.e., samples/second). The default is 1000. heart_rate : int Desired simulated heart rate (in beats per minute). The default is 70. frequency_modulation : float Float between 0 and 1. Determines how pronounced respiratory sinus arrythmia (RSA) is (0 corresponds to absence of RSA). The default is 0.3. ibi_randomness : float Float between 0 and 1. Determines how much random noise there is in the duration of each PPG wave (0 corresponds to absence of variation). The default is 0.1. drift : float Float between 0 and 1. Determines how pronounced the baseline drift (.05 Hz) is (0 corresponds to absence of baseline drift). The default is 1. motion_amplitude : float Float between 0 and 1. Determines how pronounced the motion artifact (0.5 Hz) is (0 corresponds to absence of motion artifact). The default is 0.1. powerline_amplitude : float Float between 0 and 1. Determines how pronounced the powerline artifact (50 Hz) is (0 corresponds to absence of powerline artifact). Note that powerline_amplitude > 0 is only possible if 'sampling_rate' is >= 500. The default is 0.1. burst_amplitude : float Float between 0 and 1. Determines how pronounced high frequency burst artifacts are (0 corresponds to absence of bursts). The default is 1. burst_number : int Determines how many high frequency burst artifacts occur. The default is 0. show : bool If true, returns a plot of the landmarks and interpolated PPG. Useful for debugging. random_state : int Seed for the random number generator. Keep it fixed for reproducible results. Returns ------- ppg : array A vector containing the PPG. Examples -------- >>> import neurokit2 as nk >>> >>> ppg = ppg = nk.ppg_simulate(duration=40, sampling_rate=500, >>> heart_rate=75, random_state=42, show=True) See Also -------- ecg_simulate, rsp_simulate, eda_simulate, emg_simulate """ # At the requested sampling rate, how long is a period at the requested # heart-rate and how often does that period fit into the requested # duration? period = 60 / heart_rate # in seconds n_period = int(np.floor(duration / period)) periods = np.ones(n_period) * period # Seconds at which waves begin. x_onset = np.cumsum(periods) # Add respiratory sinus arrythmia (frequency modulation). periods, x_onset = _frequency_modulation( x_onset, periods, modulation_frequency=.05, modulation_strength=frequency_modulation) # Randomly modulate duration of waves by subracting a random value between # 0 and 20% of the wave duration (see function definition). x_onset = _random_x_offset(x_onset, np.diff(x_onset), ibi_randomness) # Corresponding signal amplitudes. y_onset = np.random.normal(0, .1, n_period) # Seconds at which the systolic peaks occur within the waves. x_sys = x_onset + 0.175 * periods # Corresponding signal amplitudes. y_sys = y_onset + np.random.normal(1.5, .25, n_period) # Seconds at which the dicrotic notches occur within the waves. x_notch = x_onset + 0.4 * periods # Corresponding signal amplitudes (percentage of systolic peak height). y_notch = y_sys * np.random.normal(.49, .01, n_period) # Seconds at which the diatolic peaks occur within the waves. x_dia = x_onset + 0.45 * periods # Corresponding signal amplitudes (percentage of systolic peak height). y_dia = y_sys * np.random.normal(.51, .01, n_period) x_all = np.concatenate((x_onset, x_sys, x_notch, x_dia)) x_all.sort(kind="mergesort") x_all = np.rint(x_all * sampling_rate).astype( int) # convert seconds to samples y_all = np.zeros(n_period * 4) y_all[0::4] = y_onset y_all[1::4] = y_sys y_all[2::4] = y_notch y_all[3::4] = y_dia if show: fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1, sharex=True) ax0.scatter(x_all, y_all, c="r") # Interpolate a continuous signal between the landmarks (i.e., Cartesian # coordinates). f = Akima1DInterpolator(x_all, y_all) samples = np.arange(0, int(np.ceil(duration * sampling_rate))) ppg = f(samples) # Remove NAN (values outside interpolation range, i.e., after last sample). ppg[np.isnan(ppg)] = np.nanmean(ppg) if show: ax0.plot(ppg) # Add baseline drift. if drift > 0: drift_freq = .05 if drift_freq < (1 / duration) * 2: drift_freq = (1 / duration) * 2 ppg = signal_distort(ppg, sampling_rate=sampling_rate, noise_amplitude=drift, noise_frequency=drift_freq, random_state=random_state, silent=True) # Add motion artifacts. if motion_amplitude > 0: motion_freq = .5 ppg = signal_distort(ppg, sampling_rate=sampling_rate, noise_amplitude=motion_amplitude, noise_frequency=motion_freq, random_state=random_state, silent=True) # Add high frequency bursts. if burst_amplitude > 0: ppg = signal_distort(ppg, sampling_rate=sampling_rate, artifacts_amplitude=burst_amplitude, artifacts_frequency=100, n_artifacts=burst_number, random_state=random_state, silent=True) # Add powerline noise. if powerline_amplitude > 0: ppg = signal_distort(ppg, sampling_rate=sampling_rate, powerline_amplitude=powerline_amplitude, powerline_frequency=50, random_state=random_state, silent=True) if show: ax1.plot(ppg) return ppg
def calc_event_data(etdata, evt, w = {255:1, 0: 1, 1: 50, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 'vel': 18, 'etdq': 200}, ): """Calculates event parameters. Parameters: etdata -- an instance of ETData evt -- compact event vector w -- dictionary of context to take into account for each event type; in ms Returns: posx_s -- onset position, horizontal posx_e -- offset position, horizontal posy_s -- onset position, vertical posy_e -- offset position, vertical posx_mean -- mean postion, horizontal posy_mean -- mean postion, vertical posx_med -- median postion, horizontal posy_med -- median postion, vertical pv -- peak velocity pv_index -- index for peak velocity rms -- precision, 2D rms std -- precision, 2D std """ #init params data = etdata.data fs = etdata.fs e = {k:v for k, v in zip(['s', 'e', 'evt'], evt)} ws = w[e['evt']] ws = 1 if not(ws > 1) else round_up_to_odd(ws/1000.0*fs, min_val=3) ws_vel = round_up_to_odd(w['vel']/1000.0*fs, min_val=3) w_etdq = int(w['etdq']/1000.*fs) #calculate velocity using Savitzky-Golay filter vel = np.hypot(sg.savgol_filter(data['x'], ws_vel, 2, 1), sg.savgol_filter(data['y'], ws_vel, 2, 1))*fs ind_s = e['s']+ws ind_s = ind_s if ind_s < e['e'] else e['e'] ind_e = e['e']-ws ind_e = ind_e if ind_e > e['s'] else e['s'] posx_s = np.nanmean(data[e['s']:ind_s]['x']) posy_s = np.nanmean(data[e['s']:ind_s]['y']) posx_e = np.nanmean(data[ind_e:e['e']]['x']) posy_e = np.nanmean(data[ind_e:e['e']]['y']) posx_mean = np.nanmean(data[e['s']:e['e']]['x']) posy_mean = np.nanmean(data[e['s']:e['e']]['y']) posx_med = np.nanmedian(data[e['s']:e['e']]['x']) posy_med = np.nanmedian(data[e['s']:e['e']]['y']) pv = np.max(vel[e['s']:e['e']]) pv_index = e['s']+ np.argmax(vel[e['s']:e['e']]) if e['e']-e['s']>w_etdq: x_ = rolling_window(data[e['s']:e['e']]['x'], w_etdq) y_ = rolling_window(data[e['s']:e['e']]['y'], w_etdq) std = np.median(np.hypot(np.std(x_, axis=1), np.std(y_, axis=1))) rms = np.median(np.hypot(np.sqrt(np.mean(np.diff(x_)**2, axis=1)), np.sqrt(np.mean(np.diff(y_)**2, axis=1)))) else: std = 0 rms = 0 return posx_s, posx_e, posy_s, posy_e, posx_mean, posy_mean, posx_med, posy_med, pv, pv_index, rms, std
#save data in case program crashes -- remove this if its causing any hold ups if nBlock in [1,2,6]: trial_type = 1 elif nBlock in [2,4,5,7]: trial_type = 2 #ocasionally key is if not key_response.rt: key_response.rt = float('nan') #add data to file data_out.loc[len(data_out)+1]=[nBlock, key_response.corr, key_response.rt, trial_type] #'data/%s_%s_%s' %(expInfo['participant'], expName, expInfo['date']) #build adaptive rt design. n_corr = np.sum(acc_last_block) acc_last_block = n_corr/len(acc_last_block) mean_rt = np.nanmean(block_rts) std_rt = np.nanstd(block_rts) adapt_rt = mean_rt+std_rt if (adapt_rt <.200 or acc_last_block < 0.75) or (nBlock == 6 or nBlock == 7) : max_rt = 1.0 else: max_rt = adapt_rt # completed 1 repeats of 'trials' #feedback text component after block completion. text_4 = visual.TextStim(win=win, ori=0, name='text_2', text='End of Block. You got %i trials correct out of 256. Your mean response time was : %.2f. \n Press any key to continue' %(n_corr,mean_rt), font=u'Arial', pos=[0, 0], height=0.1, wrapWidth=None, color=u'white', colorSpace='rgb', opacity=1,
def evaluate(args, model, criterions, dataloader): model.eval() epoch_loss = 0 n_class = 12 example_images = [] with torch.no_grad(): hist = np.zeros((n_class, n_class)) miou_images = [] for images, masks, _ in dataloader: images = torch.stack(images) # (batch, channel, height, width) masks = torch.stack( masks).long() # (batch, channel, height, width) images, masks = images.to(args.device), masks.to(args.device) outputs = model(images) flag = criterions[0] if flag == "+": loss = criterions[1](outputs, masks) + criterions[2](outputs, masks) elif flag == "-": loss = criterions[1](outputs, masks) - criterions[2](outputs, masks) else: loss = criterions[1](outputs, masks) epoch_loss += loss inputs_np = torch.clone(images).detach().cpu().permute(0, 2, 3, 1).numpy() inputs_np = denormalize_image(inputs_np, mean=(0.4611, 0.4403, 0.4193), std=(0.2107, 0.2074, 0.2157)) example_images.append( wb_mask( inputs_np[0], pred_mask=outputs.argmax(1)[0].detach().cpu().numpy(), true_mask=masks[0].detach().cpu().numpy(), )) outputs = torch.argmax(outputs.squeeze(), dim=1).detach().cpu().numpy() hist = add_hist(hist, masks.detach().cpu().numpy(), outputs, n_class=n_class) # 이미지별 miou 저장 miou_list = get_miou(masks.detach().cpu().numpy(), outputs, n_class=n_class) miou_images.extend(miou_list) # metrics acc, acc_cls, miou, fwavacc = label_accuracy_score(hist) # 리더보드 miou lb_miou = np.nanmean(miou_images) print(f"acc:{acc:.4f}, acc_cls:{acc_cls:.4f}, fwavacc:{fwavacc:.4f}") # hist wandb에 저장 summa = hist.sum(1).reshape(-1, 1) percent = hist / summa plt.figure(figsize=(10, 10)) sns.heatmap(percent, annot=True, fmt=".2%", annot_kws={"size": 8}) wandb.log({"percent_hist": wandb.Image(plt)}, commit=False) return (epoch_loss / len(dataloader)), lb_miou, miou, example_images