def pca_projection(self,batch_data,n_prof_x=20): pod_coeff_dfs = [] for profile in self.y_profiles: pod_coefficients = self.y_pca[profile].transform(self.get_profile_from_df(batch_data['profile_df'],profile,n_prof_x=n_prof_x).T) pod_coeff_dfs.append(pandas.DataFrame(pod_coefficients,columns=gd.profile_column_names(profile,self.y_prof_n_keep_dict[profile]))) for profile in self.X_profiles: pod_coefficients = self.X_pca[profile].transform( self.get_profile_from_df(batch_data['profile_df'], profile, n_prof_x=n_prof_x).T) pod_coeff_dfs.append( pandas.DataFrame(pod_coefficients, columns=gd.profile_column_names(profile, self.X_prof_n_keep_dict[profile]))) pod_coeff_df = pandas.concat(pod_coeff_dfs,axis=1) return pod_coeff_df
def batch_train_pca(self,batch_data,partial_fit=True): for prof in self.y_profiles: data = batch_data['profile_df'][gd.profile_column_names(prof,20)] if partial_fit: self.y_pca[prof] = self.y_pca[prof].partial_fit(data) else: self.y_pca[prof] = self.y_pca[prof].fit(data) for prof in self.X_profiles: data = batch_data['profile_df'][gd.profile_column_names(prof,20)] if partial_fit: self.X_pca[prof] = self.X_pca[prof].partial_fit(data) else: self.X_pca[prof] = self.X_pca[prof].fit(data)
def y_to_data(self, y, n_prof_x=20): df = pandas.DataFrame(y, columns=self.y_variables_wo_shot) #todo: automatically handle n_prof_x prof_dfs = [] for name in self.y_profiles: recon = self.y_pca[name].inverse_transform(df[gd.profile_column_names(name,self.y_prof_n_keep_dict[name])]) df2add = pandas.DataFrame(recon) df2add.columns = gd.profile_column_names(name, n_prof_x) prof_dfs.append(df2add) output_df = pandas.concat(prof_dfs, axis=1) output_scalars = [y for y in self.y_scalars if y not in ['Shot', 'ID']] scaler_df = df[output_scalars] output_df = pandas.concat([scaler_df, output_df], axis=1) return output_df
def update_main(shot_run, tag, dimension, t0, r0, batches, model, figure): """ Plot new slice data when inputs change. """ logging.debug("Updating the slice graph...") if shot_run is None or tag is None: return go.Figure() data_frames = load_data(batches, model) if data_frames is None: return go.Figure() shot, run = int(shot_run) // 0x1000, int(shot_run) % 0x1000 logging.debug("User wants {} for shot {} run {}".format(tag, shot, run)) data = { key: data_frames[key][(data_frames['exp']['Shot'] == shot) & (data_frames['exp']['ID'] == run)] for key in data_frames } # strip away the nonrelevant rows if tag[-1] == 'S': # if it's a scalar variable, tag = tag[:-1] return go.Figure( data=[ go.Scatter(x=data['exp']['Time'], y=data[key][tag], name=name, showlegend=showlegend, fill=fill, line=line) for key, name, showlegend, fill, line in PLOT_SERIES if key in data ], layout=go.Layout( title=u"{} vs. Time for Shot #{}, Run {}".format( tag.capitalize(), *format_run(shot, run).split()), xaxis=go.XAxis(title="Time [s]"), yaxis=go.YAxis(title="{}".format(tag.capitalize())), ), ) else: # if it's a profile tag = tag[:-1] times = data['exp']['Time'] radii = np.linspace(0, 1, 20) # check the other inputs for key in data: data[key] = data[key].loc[:, get_data.profile_column_names( tag, get_data.NUM_GRID_POINTS )] #strip away the nonrelevant columns if dimension == 'time': # constant time, plot against radius idx = (times - t0).abs().idxmin() return go.Figure( data=[ go.Scatter(x=radii, y=data[key].loc[idx, :], name=name, showlegend=showlegend, fill=fill, line=line) for key, name, showlegend, fill, line in PLOT_SERIES if key in data ], layout=go.Layout( title=u"{0} vs. Radius for Shot #{2}, Run {3}, t={1:.2f}s". format(tag.capitalize(), t0, *format_run(shot, run).split()), xaxis=go.XAxis(title="Normalized flux surface"), yaxis=go.YAxis(title="{}".format(tag.capitalize())), ), ) else: # constant radius, plot against time tag = tag + '{:02.0f}'.format(r0 * 19 + 1) return go.Figure( data=[ go.Scatter(x=times, y=data[key].loc[:, tag], name=name, showlegend=showlegend, fill=fill, line=line) for key, name, showlegend, fill, line in PLOT_SERIES if key in data ], layout=go.Layout( title=u"{0} vs. Time for Shot #{2}, Run {3}, ρ={1}".format( tag.capitalize(), r0, *format_run(shot, run).split()), xaxis=go.XAxis(title="Time [s]"), yaxis=go.YAxis(title="{}".format(tag.capitalize())), ), )
def get_profile_from_df(df, profile, n_prof_x=20): return df[gd.profile_column_names(profile, n_prof_x)].as_matrix().T
def get_profile_from_df(self, df, profile, n_prof_x=20): return df[gd.profile_column_names(profile, n_prof_x)].values.T
def __init__(self, dataset, X_scalars=['Shot','ID','ZEFFC','DN0OUT','R0','ELONG','PCURC','AMIN','BZXR','TRIANGU','TRIANGL']+['pinj0' + str(i) for i in np.arange(6)+1], X_profiles=['TE','NE','Q','DIFB'], X_n_keep = [4,4,4,4], pinj_taus = [0.02,0.05,0.1], y_scalars=['Shot','ID','NEUTT','BPSHI','BPLIM','BPCXX','BPCX0'], y_profiles=['PFI','CURB','PBE','PBI','TQBE','TQBI','CURBS','BDENS','ETA_SNC'], y_n_keep = [3,4,4,4,10,10,10,3,3], n_nn = 10, ensemble_exclude_fraction=0.1, hidden_layer_sizes=(10,), alpha=0.000001, learning_rate='adaptive', learning_rate_init=0.001, early_stopping=True): self.dataset = dataset self.X_prof_n_keep_dict = {key: value for key, value in zip(X_profiles, X_n_keep)} self.y_prof_n_keep_dict = {key: value for key, value in zip(y_profiles, y_n_keep)} self.X_scalars = X_scalars self.y_scalars = y_scalars self.X_profiles = X_profiles self.y_profiles = y_profiles self.X_n_keep = X_n_keep self.y_n_keep = y_n_keep self.pinj_taus = pinj_taus self.X_variables = X_scalars self.y_pca = {prof:value for (prof,value) in [(prof, IncrementalPCA(n_components=self.y_prof_n_keep_dict[prof])) for prof in self.y_profiles]} self.X_pca = {prof: value for (prof, value) in [(prof, IncrementalPCA(n_components=self.X_prof_n_keep_dict[prof])) for prof in self.X_profiles]} self.X_normalization = StandardScaler() self.y_normalization = StandardScaler() self.n_nn = n_nn self.learning_rate = learning_rate self.learning_rate_init = learning_rate_init self.hidden_layer_sizes = hidden_layer_sizes self.alpha = alpha self.early_stopping=early_stopping self.solver = 'sgd' if early_stopping: self.solver = 'adam' self.regressors = {key:MLPRegressor(hidden_layer_sizes=self.hidden_layer_sizes, alpha=self.alpha, solver=self.solver, early_stopping=self.early_stopping,warm_start=True,learning_rate=learning_rate,learning_rate_init=learning_rate_init) for key in np.arange(n_nn) } for regressor in self.regressors.values(): regressor.best_loss_ = 1.0e3 self.excluded_shots = {key:[] for key in np.arange(n_nn)} self.scores = {key:[] for key in np.arange(n_nn)} self.training_shots = [] self.ensemble_exclude_fraction = ensemble_exclude_fraction # add names for low pass filtered power values for x in [x for x in X_scalars if x.startswith('pinj')]: for i in np.arange(len(pinj_taus)): self.X_variables = self.X_variables + [x+'_lpf_'+str(i+1)] for prof in X_profiles: self.X_variables = self.X_variables + gd.profile_column_names(prof, self.X_prof_n_keep_dict[prof]) self.y_variables = y_scalars for prof in y_profiles: self.y_variables = self.y_variables + gd.profile_column_names(prof, self.y_prof_n_keep_dict[prof]) self.y_variables_wo_shot = [variable for variable in self.y_variables if variable not in ['Shot', 'ID']] self.X_variables_wo_shot = [variable for variable in self.X_variables if variable not in ['Shot', 'ID']]