def plot_graphs(classifier_name): fit_result = None checkpoint_dir = os.path.join('.', 'checkpoints') checkpoint_file = os.path.join(checkpoint_dir, classifier_name) if os.path.isfile( checkpoint_file ): # Loading the checkpoints if the models already trained with the same hyperparameters fit_result = torch.load(checkpoint_file, map_location=torch.device('cpu')) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if device.type == 'cpu': plot_fit(fit_result, 'RNN classifier graph', legend='total')
def test_gp(plot=False, method='full'): """ Compares model prediction with an exact GP (without optimisation) """ # note that this test fails without latent noise in the case of full Gaussian np.random.seed(111) num_input_samples = 10 num_samples = 10000 gaussian_sigma = .2 X, Y, kernel = DataSource.normal_generate_samples(num_input_samples, gaussian_sigma, 1) kernel = [GPy.kern.RBF(1, variance=1., lengthscale=np.array((1.,)))] if method == 'full': m = SAVIGP_SingleComponent(X, Y, num_input_samples, UnivariateGaussian(np.array(gaussian_sigma)), kernel, num_samples, None, 0.001, True, True) if method == 'diag': m = SAVIGP_Diag(X, Y, num_input_samples, 1, UnivariateGaussian(np.array(gaussian_sigma)), kernel, num_samples, None, 0.001, True, True) # update model using optimal parameters # gp = SAVIGP_Test.gpy_prediction(X, Y, gaussian_sigma, kernel[0]) # gp_mean, gp_var = gp.predict(X, full_cov=True) # m.MoG.m[0,0] = gp_mean[:,0] # m.MoG.update_covariance(0, gp_var - gaussian_sigma * np.eye(10)) try: folder_name = 'test' + '_' + ModelLearn.get_ID() logger = ModelLearn.get_logger(folder_name, logging.DEBUG) Optimizer.optimize_model(m, 10000, logger, ['mog']) except KeyboardInterrupt: pass sa_mean, sa_var = m.predict(X) gp = SAVIGP_Test.gpy_prediction(X, Y, gaussian_sigma, deepcopy(kernel[0])) gp_mean, gp_var = gp.predict(X) mean_error = (np.abs(sa_mean - gp_mean)).sum() / sa_mean.shape[0] var_error = (np.abs(sa_var - gp_var)).sum() / gp_var.T.shape[0] if mean_error < 0.1: print bcolors.OKBLUE, "passed: mean gp prediction ", mean_error else: print bcolors.WARNING, "failed: mean gp prediction ", mean_error print bcolors.ENDC if var_error < 0.1: print bcolors.OKBLUE, "passed: var gp prediction ", var_error else: print bcolors.WARNING, "failed: var gp prediction ", var_error print bcolors.ENDC if plot: plot_fit(m) gp.plot() show(block=True)
def print_with_plot(fit_result: FitResult): print(fit_result) plot_fit(fit_result)
# bgdsignal=ydata[bgd]/np.array([reference[bgd]]*len(ydata.T)).T # print bgdsignal bgdsignal = np.average(ydata[bgd] / np.array([reference[bgd]] * len(ydata.T)).T / 53 * 1000, axis=0) # plt.plot(xdata,bgdsignal,'-') # plt.plot(xdata,data.T[1],'-') # print len(ydata.T) # plt.show() # plt.close() # print bgdsignal # signal=np.arange(9,44) signal = ds.signalstack() sig = ydata[signal] / np.array([reference[signal]] * len(ydata.T)).T / 53 * 1000 # sig_bgd_pair.shape(2,24) # print sig_bgd_pair binsize = xdata[3] - xdata[2] print "#Num kobs(1/ms) height_err m1 c1 popt.a popt.b popt.c perr.a perr.b perr.c residual" for i in range(0, len(sig)): # Sig=(data.T[sig_bgd_pair[i][0]+1]/reference[sig_bgd_pair[i][0]]-data.T[sig_bgd_pair[i][1]+1]/reference[sig_bgd_pair[i][1]])/53*1000 # Time=data.T[0] fit_data = np.vstack((xdata, sig[i] - bgdsignal)) raw_data = np.copy(fit_data) # print fit_data param_opt = fitting(fit_data.T, binsize) kobs = param_opt.popt[1] height_err = param_opt.popt[0] + param_opt.popt[2] - param_opt.trigger_h print (signal[i]), kobs, height_err, param_opt.m1, param_opt.c1, " ".join(map(str, param_opt.popt)), " ".join( map(str, param_opt.perr) ), param_opt.residual plot_fit(raw_data.T, param_opt, str(signal[i]) + "-fig", binsize)
def print_fit_res(model_name: str): checkpoint_filename = f'{model_name}.pt' saved_state = torch.load(checkpoint_filename, 'cpu') fit_res = saved_state['fit_result'] plot_fit(fit_res)
def main(): '''Produces results for report.''' # simulations flag simulations = 0 # if analysing simulations if simulations == 1: detector = 'BGO' mus, mu_errs, Rs, R_errs, e_fs, e_f_errs, e_ps, e_p_errs, Gs, m, dEs, d_errs, sigmas, s_errs = sims( ) else: # name detector detector = 'CdTe' # empty arrays for plotting Rs = [] R_errs = [] ang_Rs = [] ang_R_errs = [] e_fs = [] e_f_errs = [] ang_e_fs = [] ang_e_f_errs = [] e_ps = [] e_p_errs = [] ang_e_ps = [] ang_e_p_errs = [] angles = [] Gs = [] ang_Gs = [] mu_errs = [] s_errs = [] dEs = [] d_errs = [] sigmas = [] Cs = [] C_errs = [] C_thetas = [] C_theta_errs = [] # get clock drift drift = get_detector_drift(detector) # list of sample files if detector == 'NaI': # NaI has different file format samples = ['241Am.txt', '133Ba.txt', '137Cs.txt', '60Co.txt'] m, c, mus, ns = find_calibration(detector, samples) elif detector == 'CdTe': # different file format, calibrate CdTe with 241Am samples = ['241Am.mca', '133Ba.mca'] m, c, mus, ns = find_calibration(detector, ['241Am.mca']) else: # HPGe, BGO samples = ['241Am.spe', '133Ba.spe', '137Cs.spe', '60Co.spe'] m, c, mus, ns = find_calibration(detector, samples) # for each sample listed for sample in samples: source = str(sample.split('.', 1)[0]) # find sample name # get list of angles at which off-axis measurements have been made angles = get_angles(detector, source) # get on-axis angle on_axis = int(get_on_axis_angle(detector, source)) # for each angle for angle in angles: # create filename filename = angle + '_' + sample # get live time from file t_l = get_live_time(filename) # for each combination of source and detector # get number of peaks of interest in sample spectrum no_peaks = get_no_peaks(detector, source) # calculate time since initial activity measurement lifetime = get_lifetime(detector, filename, drift) # calculate current source activity activity, a_err = calculate_activity(source, lifetime) # for each peak for no_peak in no_peaks: # fit it params, popt, perrs = fit_peak(detector, source, filename, no_peak, angle) # get gaussian parameters mu, sigma, amp = popt[0], popt[1], popt[2] # get actual energy n and decay fraction f n, n_err, f = get_decay_fraction( source, no_peak) # gets actual energy # calculate resolution for peak centered on given energy n R, R_err, dE, d_error = calculate_resolution( sigma, perrs[1], n, n_err, m) channel_min, channel_max = get_roi( detector, source, no_peak) # get region of interest # get greatest amplitude of gaussian curve, i.e. remove background amplitude = remove_background(channel_min, channel_max, mu, sigma, amp) amp_err = perrs[2] / popt[2] * amplitude # calculate FEP efficiency e_f, e_f_err, C, C_err = calculate_e_f( sigma * m, perrs[1] * m, amplitude, amp_err, activity, a_err, t_l, f) # calculate intrinsic peak efficiency e_p, e_p_err, G = calculate_e_p_G(e_f, e_f_err, detector, angle) # add to on-axis arrays if angle is on-axis if angle == str(on_axis): Rs.append(R * 100) R_errs.append(R_err * 100) e_fs.append(e_f * 100) e_f_errs.append(e_f_err * 100) e_ps.append(e_p * 100) e_p_errs.append(e_p_err * 100) Gs.append(G) mu_errs.append(perrs[0]) sigmas.append(popt[1]) s_errs.append(perrs[1]) dEs.append(dE) d_errs.append(d_error) # for calculating ratio C_on_axis = C # plot fit on-axis plot_fit(channel_min, channel_max, detector, source, no_peak, mu, sigma, amp, m, c) if len(angles) > 6: # if off-axis # if value not already in array, add it and its errors if float(e_f) not in ang_e_fs: ang_e_fs.append(e_f * 100) ang_e_f_errs.append(e_f_err * 100) if float(R) not in ang_Rs: ang_Rs.append(R * 100) ang_R_errs.append(R_err * 100) if e_p not in ang_e_ps: ang_e_ps.append(e_p * 100) ang_e_p_errs.append(e_p_err * 100) ang_Gs.append(G) if C not in Cs: Cs.append(C) C_errs.append(C_err) C_thetas.append(C / C_on_axis) C_theta_errs.append(C_err / C_on_axis) # convert list of angles to ints int_angles = list(map(int, angles)) # for off-axis case, create plots vs angle if len(angles) > 6: write_count_errors(detector, source, int_angles, Cs, C_errs, C_thetas, C_theta_errs) # for more than one peak, i.e. 2 if len(int_angles) != len(ang_Rs): # separate values for two peaks for plotting # create resolution vs angle plot plot_angle_vs_resolution(detector, source, int_angles, ang_Rs[::2], 0) plot_angle_vs_resolution(detector, source, int_angles, ang_Rs[1::2], 1) # create FEP efficiency vs angle plot plot_angle_vs_fep(detector, source, int_angles, ang_e_fs[::2], 0) plot_angle_vs_fep(detector, source, int_angles, ang_e_fs[1::2], 1) # create geometry vs angle plot plot_angle_vs_geometry(detector, source, int_angles, ang_Gs[::2], 0) plot_angle_vs_geometry(detector, source, int_angles, ang_Gs[1::2], 1) # create intrinsic peak efficiency vs angle plot plot_angle_vs_ipe(detector, source, int_angles, ang_e_ps[::2], 0) plot_angle_vs_ipe(detector, source, int_angles, ang_e_ps[1::2], 1) else: # create resolution vs angle plot plot_angle_vs_resolution(detector, source, int_angles, ang_Rs, 0) # create FEP efficiency vs angle plot plot_angle_vs_fep(detector, source, int_angles, ang_e_fs, 0) # create geometry vs angle plot plot_angle_vs_geometry(detector, source, int_angles, ang_Gs, 0) # create intrinsic peak efficiency vs angle plot plot_angle_vs_ipe(detector, source, int_angles, ang_e_ps, 0) # clear array for use with next source ang_Rs.clear() ang_e_fs.clear() ang_Gs.clear() ang_e_ps.clear() # for simulated detector if simulations == 1: detector = 'simulated ' + detector # plot resolution curve for detector plot_resolution_curve(detector, mus, Rs) # plot FEP effieciency curve for detector plot_fp_efficiency_curve(detector, mus, e_fs) # plot geometry curve for detector plot_geometry_curve(detector, mus, Gs) # plot intrinsic effieciency curve for detector plot_ip_efficiency_curve(detector, mus, e_ps) # for writing to file, uncalibrate to get raw data for mu in mus: mu = mu / m for mu_err in mu_errs: mu_err = mu_err / m # write to file for tables write_resolution_errors(detector, mus, mu_errs, dEs, d_errs, sigmas, s_errs, Rs, R_errs) write_efficiency_errors(detector, mus, mu_errs, e_fs, e_f_errs, e_ps, e_p_errs)
def main(): # downloading the data print("Downloading data...\n") download.download_data(out_path, TCELL_CSV_FILENAME, TCELL_DOWNLOAD_URL) downloaded_filename = TCELL_CSV_FILENAME # parsing the downloaded data print("Organizing data, checking for duplicates (it might take a while...)\n") parser.make_samples(out_path, downloaded_filename, PARSED_SAMPLES_FOLDER_NAME, BATCH_FILE_SIZE, BATCH_REQUEST_SIZE) print("Finished parsing data\n") # pre-proecessing the data print("Clustering data for train-test independence\n") parsed_samples_paths = get_parsed_samples_paths( out_path, PARSED_SAMPLES_FOLDER_NAME) run_processing.main(['-i', *parsed_samples_paths]) print("Done clustering\n") parser.Clean_id_lines_from_samples( out_path, processed_folder_name, CLEAN_PROCESSED_SAMPLES) print("Loading the data to memory and partitioning to train and test groups\n") # Create dataset of sequences char_to_idx, idx_to_char = lstm_model.char_maps() vocab_len = len(char_to_idx) train_samples, train_labels, test_samples, test_labels = make_labelled_samples( out_path, CLEAN_PROCESSED_SAMPLES, char_to_idx, idx_to_char, config["train_test_ratio"]) # ====================== MODEL AND TRAINING ====================== # Create DataLoader returning batches of samples. dl_train, dl_test, _, ds_test = get_dataloaders( train_samples, train_labels, test_samples, test_labels) # get random subset text from test dataset subset_text = get_subset_text(ds_test, idx_to_char) # initialize a model and try to train it in_local_maximum = True while in_local_maximum: try: in_local_maximum = False model_text = '' model = None print( "\nInitializing a random model with a random enough capitalization before training\n") while not is_random(model_text): # init model model = lstm_model.LSTMTagger(hidden_dim=config["hidden_dim"], input_dim=vocab_len, tagset_size=TAGSET_SIZE, n_layers=config["n_layers"], bidirectional=(config["bidirectional"] == 1), drop_prob=config["dropout"], device=device) model.to(device) model_text = get_capitalized_model_text( model, subset_text.lower(), (char_to_idx, idx_to_char)) # see how model works before training at all print("Model capitalization before training:\n") print(model_text) # train the model fit_res = train_model(model, subset_text, (char_to_idx, idx_to_char), dl_train, dl_test) print("\nFinished training\n") except LocalMaximumError: print("Stuck in local maximum of all non-epitopes! Retrying...") checkpoint_file = config['checkpoint_file'] checkpoint_filename = f'{checkpoint_file}.pt' if os.path.isfile(checkpoint_filename): pathlib.Path(checkpoint_filename).unlink() in_local_maximum = True # plot the training results training_plot_name = config['training_plot_name'] print(f"Saving training plot to: {training_plot_name}\n") fig, _ = plot_fit(fit_res) fig.savefig(training_plot_name)