Esempio n. 1
0
def plot_graphs(classifier_name):
    fit_result = None
    checkpoint_dir = os.path.join('.', 'checkpoints')
    checkpoint_file = os.path.join(checkpoint_dir, classifier_name)
    if os.path.isfile(
            checkpoint_file
    ):  # Loading the checkpoints if the models already trained with the same hyperparameters
        fit_result = torch.load(checkpoint_file,
                                map_location=torch.device('cpu'))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    if device.type == 'cpu':
        plot_fit(fit_result, 'RNN classifier graph', legend='total')
Esempio n. 2
0
    def test_gp(plot=False, method='full'):
        """
        Compares model prediction with an exact GP (without optimisation)
        """
        # note that this test fails without latent noise in the case of full Gaussian
        np.random.seed(111)
        num_input_samples = 10
        num_samples = 10000
        gaussian_sigma = .2
        X, Y, kernel = DataSource.normal_generate_samples(num_input_samples, gaussian_sigma, 1)
        kernel = [GPy.kern.RBF(1, variance=1., lengthscale=np.array((1.,)))]

        if method == 'full':
            m = SAVIGP_SingleComponent(X, Y, num_input_samples, UnivariateGaussian(np.array(gaussian_sigma)),
                                          kernel, num_samples, None, 0.001, True, True)

        if method == 'diag':
            m = SAVIGP_Diag(X, Y, num_input_samples, 1, UnivariateGaussian(np.array(gaussian_sigma)),
                                          kernel, num_samples, None, 0.001, True, True)

        # update model using optimal parameters
        # gp = SAVIGP_Test.gpy_prediction(X, Y, gaussian_sigma, kernel[0])
        # gp_mean, gp_var = gp.predict(X, full_cov=True)
        # m.MoG.m[0,0] = gp_mean[:,0]
        # m.MoG.update_covariance(0, gp_var - gaussian_sigma * np.eye(10))

        try:
            folder_name = 'test' + '_' + ModelLearn.get_ID()
            logger = ModelLearn.get_logger(folder_name, logging.DEBUG)

            Optimizer.optimize_model(m, 10000, logger, ['mog'])
        except KeyboardInterrupt:
            pass
        sa_mean, sa_var = m.predict(X)
        gp = SAVIGP_Test.gpy_prediction(X, Y, gaussian_sigma, deepcopy(kernel[0]))
        gp_mean, gp_var = gp.predict(X)
        mean_error = (np.abs(sa_mean - gp_mean)).sum() / sa_mean.shape[0]
        var_error = (np.abs(sa_var - gp_var)).sum() / gp_var.T.shape[0]
        if mean_error < 0.1:
            print bcolors.OKBLUE, "passed: mean gp prediction ", mean_error
        else:
            print bcolors.WARNING, "failed: mean gp prediction ", mean_error
        print bcolors.ENDC
        if var_error < 0.1:
            print bcolors.OKBLUE, "passed: var gp prediction ", var_error
        else:
            print bcolors.WARNING, "failed: var gp prediction ", var_error
        print bcolors.ENDC
        if plot:
            plot_fit(m)
            gp.plot()
            show(block=True)
Esempio n. 3
0
def print_with_plot(fit_result: FitResult):
    print(fit_result)
    plot_fit(fit_result)
# bgdsignal=ydata[bgd]/np.array([reference[bgd]]*len(ydata.T)).T
# print bgdsignal
bgdsignal = np.average(ydata[bgd] / np.array([reference[bgd]] * len(ydata.T)).T / 53 * 1000, axis=0)
# plt.plot(xdata,bgdsignal,'-')
# plt.plot(xdata,data.T[1],'-')
# print len(ydata.T)
# plt.show()
# plt.close()
# print bgdsignal
# signal=np.arange(9,44)
signal = ds.signalstack()
sig = ydata[signal] / np.array([reference[signal]] * len(ydata.T)).T / 53 * 1000
# sig_bgd_pair.shape(2,24)
# print sig_bgd_pair
binsize = xdata[3] - xdata[2]

print "#Num kobs(1/ms) height_err m1 c1 popt.a popt.b popt.c perr.a perr.b perr.c residual"
for i in range(0, len(sig)):
    # Sig=(data.T[sig_bgd_pair[i][0]+1]/reference[sig_bgd_pair[i][0]]-data.T[sig_bgd_pair[i][1]+1]/reference[sig_bgd_pair[i][1]])/53*1000
    # Time=data.T[0]
    fit_data = np.vstack((xdata, sig[i] - bgdsignal))
    raw_data = np.copy(fit_data)
    # print fit_data
    param_opt = fitting(fit_data.T, binsize)
    kobs = param_opt.popt[1]
    height_err = param_opt.popt[0] + param_opt.popt[2] - param_opt.trigger_h
    print (signal[i]), kobs, height_err, param_opt.m1, param_opt.c1, " ".join(map(str, param_opt.popt)), " ".join(
        map(str, param_opt.perr)
    ), param_opt.residual
    plot_fit(raw_data.T, param_opt, str(signal[i]) + "-fig", binsize)
Esempio n. 5
0
def print_fit_res(model_name: str):
    checkpoint_filename = f'{model_name}.pt'
    saved_state = torch.load(checkpoint_filename, 'cpu')
    fit_res = saved_state['fit_result']
    plot_fit(fit_res)
Esempio n. 6
0
def main():
    '''Produces results for report.'''

    # simulations flag
    simulations = 0

    # if analysing simulations
    if simulations == 1:
        detector = 'BGO'
        mus, mu_errs, Rs, R_errs, e_fs, e_f_errs, e_ps, e_p_errs, Gs, m, dEs, d_errs, sigmas, s_errs = sims(
        )
    else:

        # name detector
        detector = 'CdTe'

        # empty arrays for plotting
        Rs = []
        R_errs = []
        ang_Rs = []
        ang_R_errs = []
        e_fs = []
        e_f_errs = []
        ang_e_fs = []
        ang_e_f_errs = []
        e_ps = []
        e_p_errs = []
        ang_e_ps = []
        ang_e_p_errs = []
        angles = []
        Gs = []
        ang_Gs = []
        mu_errs = []
        s_errs = []
        dEs = []
        d_errs = []
        sigmas = []
        Cs = []
        C_errs = []
        C_thetas = []
        C_theta_errs = []

        # get clock drift
        drift = get_detector_drift(detector)

        # list of sample files
        if detector == 'NaI':  # NaI has different file format
            samples = ['241Am.txt', '133Ba.txt', '137Cs.txt', '60Co.txt']
            m, c, mus, ns = find_calibration(detector, samples)
        elif detector == 'CdTe':  # different file format, calibrate CdTe with 241Am
            samples = ['241Am.mca', '133Ba.mca']
            m, c, mus, ns = find_calibration(detector, ['241Am.mca'])
        else:  # HPGe, BGO
            samples = ['241Am.spe', '133Ba.spe', '137Cs.spe', '60Co.spe']
            m, c, mus, ns = find_calibration(detector, samples)

        # for each sample listed
        for sample in samples:
            source = str(sample.split('.', 1)[0])  # find sample name

            # get list of angles at which off-axis measurements have been made
            angles = get_angles(detector, source)

            # get on-axis angle
            on_axis = int(get_on_axis_angle(detector, source))

            # for each angle
            for angle in angles:
                # create filename
                filename = angle + '_' + sample

                # get live time from file
                t_l = get_live_time(filename)

                # for each combination of source and detector
                # get number of peaks of interest in sample spectrum
                no_peaks = get_no_peaks(detector, source)

                # calculate time since initial activity measurement
                lifetime = get_lifetime(detector, filename, drift)
                # calculate current source activity
                activity, a_err = calculate_activity(source, lifetime)

                # for each peak
                for no_peak in no_peaks:
                    # fit it
                    params, popt, perrs = fit_peak(detector, source, filename,
                                                   no_peak, angle)
                    # get gaussian parameters
                    mu, sigma, amp = popt[0], popt[1], popt[2]
                    # get actual energy n and decay fraction f
                    n, n_err, f = get_decay_fraction(
                        source, no_peak)  # gets actual energy

                    # calculate resolution for peak centered on given energy n
                    R, R_err, dE, d_error = calculate_resolution(
                        sigma, perrs[1], n, n_err, m)

                    channel_min, channel_max = get_roi(
                        detector, source, no_peak)  # get region of interest
                    # get greatest amplitude of gaussian curve, i.e. remove background
                    amplitude = remove_background(channel_min, channel_max, mu,
                                                  sigma, amp)
                    amp_err = perrs[2] / popt[2] * amplitude

                    # calculate FEP efficiency
                    e_f, e_f_err, C, C_err = calculate_e_f(
                        sigma * m, perrs[1] * m, amplitude, amp_err, activity,
                        a_err, t_l, f)

                    # calculate intrinsic peak efficiency
                    e_p, e_p_err, G = calculate_e_p_G(e_f, e_f_err, detector,
                                                      angle)

                    # add to on-axis arrays if angle is on-axis
                    if angle == str(on_axis):

                        Rs.append(R * 100)
                        R_errs.append(R_err * 100)

                        e_fs.append(e_f * 100)
                        e_f_errs.append(e_f_err * 100)
                        e_ps.append(e_p * 100)
                        e_p_errs.append(e_p_err * 100)

                        Gs.append(G)

                        mu_errs.append(perrs[0])
                        sigmas.append(popt[1])
                        s_errs.append(perrs[1])

                        dEs.append(dE)
                        d_errs.append(d_error)

                        # for calculating ratio
                        C_on_axis = C

                        # plot fit on-axis
                        plot_fit(channel_min, channel_max, detector, source,
                                 no_peak, mu, sigma, amp, m, c)

                    if len(angles) > 6:  # if off-axis
                        # if value not already in array, add it and its errors
                        if float(e_f) not in ang_e_fs:
                            ang_e_fs.append(e_f * 100)
                            ang_e_f_errs.append(e_f_err * 100)

                        if float(R) not in ang_Rs:
                            ang_Rs.append(R * 100)
                            ang_R_errs.append(R_err * 100)

                        if e_p not in ang_e_ps:
                            ang_e_ps.append(e_p * 100)
                            ang_e_p_errs.append(e_p_err * 100)
                            ang_Gs.append(G)

                        if C not in Cs:
                            Cs.append(C)
                            C_errs.append(C_err)
                            C_thetas.append(C / C_on_axis)
                            C_theta_errs.append(C_err / C_on_axis)

            # convert list of angles to ints
            int_angles = list(map(int, angles))

            # for off-axis case, create plots vs angle
            if len(angles) > 6:

                write_count_errors(detector, source, int_angles, Cs, C_errs,
                                   C_thetas, C_theta_errs)

                # for more than one peak, i.e. 2
                if len(int_angles) != len(ang_Rs):

                    # separate values for two peaks for plotting
                    # create resolution vs angle plot
                    plot_angle_vs_resolution(detector, source, int_angles,
                                             ang_Rs[::2], 0)
                    plot_angle_vs_resolution(detector, source, int_angles,
                                             ang_Rs[1::2], 1)

                    # create FEP efficiency vs angle plot
                    plot_angle_vs_fep(detector, source, int_angles,
                                      ang_e_fs[::2], 0)
                    plot_angle_vs_fep(detector, source, int_angles,
                                      ang_e_fs[1::2], 1)

                    # create geometry vs angle plot
                    plot_angle_vs_geometry(detector, source, int_angles,
                                           ang_Gs[::2], 0)
                    plot_angle_vs_geometry(detector, source, int_angles,
                                           ang_Gs[1::2], 1)

                    # create intrinsic peak efficiency vs angle plot
                    plot_angle_vs_ipe(detector, source, int_angles,
                                      ang_e_ps[::2], 0)
                    plot_angle_vs_ipe(detector, source, int_angles,
                                      ang_e_ps[1::2], 1)

                else:
                    # create resolution vs angle plot
                    plot_angle_vs_resolution(detector, source, int_angles,
                                             ang_Rs, 0)

                    # create FEP efficiency vs angle plot
                    plot_angle_vs_fep(detector, source, int_angles, ang_e_fs,
                                      0)

                    # create geometry vs angle plot
                    plot_angle_vs_geometry(detector, source, int_angles,
                                           ang_Gs, 0)

                    # create intrinsic peak efficiency vs angle plot
                    plot_angle_vs_ipe(detector, source, int_angles, ang_e_ps,
                                      0)

                # clear array for use with next source
                ang_Rs.clear()
                ang_e_fs.clear()
                ang_Gs.clear()
                ang_e_ps.clear()

    # for simulated detector
    if simulations == 1:
        detector = 'simulated ' + detector

    # plot resolution curve for detector
    plot_resolution_curve(detector, mus, Rs)

    # plot FEP effieciency curve for detector
    plot_fp_efficiency_curve(detector, mus, e_fs)

    # plot geometry curve for detector
    plot_geometry_curve(detector, mus, Gs)

    # plot intrinsic effieciency curve for detector
    plot_ip_efficiency_curve(detector, mus, e_ps)

    # for writing to file, uncalibrate to get raw data
    for mu in mus:
        mu = mu / m

    for mu_err in mu_errs:
        mu_err = mu_err / m

    # write to file for tables
    write_resolution_errors(detector, mus, mu_errs, dEs, d_errs, sigmas,
                            s_errs, Rs, R_errs)
    write_efficiency_errors(detector, mus, mu_errs, e_fs, e_f_errs, e_ps,
                            e_p_errs)
def main():
    # downloading the data
    print("Downloading data...\n")
    download.download_data(out_path, TCELL_CSV_FILENAME, TCELL_DOWNLOAD_URL)
    downloaded_filename = TCELL_CSV_FILENAME

    # parsing the downloaded data
    print("Organizing data, checking for duplicates (it might take a while...)\n")
    parser.make_samples(out_path, downloaded_filename,
                        PARSED_SAMPLES_FOLDER_NAME, BATCH_FILE_SIZE, BATCH_REQUEST_SIZE)
    print("Finished parsing data\n")
    # pre-proecessing the data
    print("Clustering data for train-test independence\n")
    parsed_samples_paths = get_parsed_samples_paths(
        out_path, PARSED_SAMPLES_FOLDER_NAME)
    run_processing.main(['-i', *parsed_samples_paths])
    print("Done clustering\n")
    parser.Clean_id_lines_from_samples(
        out_path, processed_folder_name, CLEAN_PROCESSED_SAMPLES)
    print("Loading the data to memory and partitioning to train and test groups\n")

    # Create dataset of sequences
    char_to_idx, idx_to_char = lstm_model.char_maps()
    vocab_len = len(char_to_idx)

    train_samples, train_labels, test_samples, test_labels = make_labelled_samples(
        out_path, CLEAN_PROCESSED_SAMPLES, char_to_idx, idx_to_char, config["train_test_ratio"])

    # ====================== MODEL AND TRAINING ======================

    # Create DataLoader returning batches of samples.
    dl_train, dl_test, _, ds_test = get_dataloaders(
        train_samples, train_labels, test_samples, test_labels)
    # get random subset text from test dataset
    subset_text = get_subset_text(ds_test, idx_to_char)
    # initialize a model and try to train it
    in_local_maximum = True
    while in_local_maximum:
        try:
            in_local_maximum = False
            model_text = ''
            model = None
            print(
                "\nInitializing a random model with a random enough capitalization before training\n")
            while not is_random(model_text):
                # init model
                model = lstm_model.LSTMTagger(hidden_dim=config["hidden_dim"], input_dim=vocab_len, tagset_size=TAGSET_SIZE,
                                              n_layers=config["n_layers"], bidirectional=(config["bidirectional"] == 1), drop_prob=config["dropout"], device=device)
                model.to(device)
                model_text = get_capitalized_model_text(
                    model, subset_text.lower(), (char_to_idx, idx_to_char))

            # see how model works before training at all
            print("Model capitalization before training:\n")
            print(model_text)
            # train the model
            fit_res = train_model(model, subset_text, (char_to_idx,
                                                       idx_to_char), dl_train, dl_test)
            print("\nFinished training\n")
        except LocalMaximumError:
            print("Stuck in local maximum of all non-epitopes! Retrying...")
            checkpoint_file = config['checkpoint_file']
            checkpoint_filename = f'{checkpoint_file}.pt'
            if os.path.isfile(checkpoint_filename):
                pathlib.Path(checkpoint_filename).unlink()
            in_local_maximum = True
    # plot the training results
    training_plot_name = config['training_plot_name']
    print(f"Saving training plot to: {training_plot_name}\n")
    fig, _ = plot_fit(fit_res)
    fig.savefig(training_plot_name)