def set_train_dists(self, train_fnames):

        # initialize empty histograms
        # since histogram is accumulated as below, it needs to be initialized
        # at every training
        for c in preferences.CLASSES:
            self.train_histograms[c] = {}
            self.train_pdfs[c] = {}
            for o in preferences.OBSERVABLES:
                self.train_histograms[c][
                    o] = generic_tools.initialize_histogram(o)

        # compute histograms for each class (using training set)
        for c in preferences.CLASSES:
            for train_fname in train_fnames[c]:

                data = np.load(train_fname)
                data_A, data_B = generic_tools.extract_individual_data(data)
                obs_data = generic_tools.compute_observables(data_A, data_B)

                for o in preferences.OBSERVABLES:
                    self.train_histograms[c][
                        o] += generic_tools.compute_histogram_1D(
                            o, obs_data[o])

        for c in preferences.CLASSES:
            for o in preferences.OBSERVABLES:
                self.train_pdfs[c][o] = generic_tools.compute_pdf(
                    o, self.train_histograms[c][o])
Example #2
0
    def train(self, train_fnames):

        train_histograms1D = {}
        # initialize empty histograms
        for o in preferences.OBSERVABLES:
            train_histograms1D[o], self.train_pdfs1D[o] = {}, {}
            for c in preferences.CLASSES:
                train_histograms1D[o][c] = generic_tools.initialize_histogram(
                    o)

        # compute histograms for each class
        for c in preferences.CLASSES:
            for file_path in train_fnames[c]:
                data = np.load(file_path)
                data_A, data_B = generic_tools.extract_individual_data(data)
                obs_data = generic_tools.compute_observables(data_A, data_B)
                for o in preferences.OBSERVABLES:
                    train_histograms1D[o][
                        c] += generic_tools.compute_histogram_1D(
                            o, obs_data[o])

        for o in preferences.OBSERVABLES:
            for c in preferences.CLASSES:
                self.train_pdfs1D[o][c] = generic_tools.compute_pdf(
                    o, train_histograms1D[o][c])
    def set_test_dists(self, test_fnames):

        # initialize empty histograms
        # since one histogram/pdf  is computed for each element of test set
        # as below, it needs to be initialized at every testing
        for c in preferences.CLASSES:
            self.test_histograms[c], self.test_pdfs[c] = {}, {}
            for test_fname in test_fnames[c]:
                self.test_histograms[c][test_fname], self.test_pdfs[c][
                    test_fname] = {}, {}
                for o in preferences.OBSERVABLES:
                    self.test_histograms[c][test_fname][
                        o] = generic_tools.initialize_histogram(o)
                    self.test_pdfs[c][test_fname][o] = []

        # compute histograms for each class (using test set)
        for c in preferences.CLASSES:
            for test_fname in test_fnames[c]:

                data = np.load(test_fname)
                data_A, data_B = generic_tools.extract_individual_data(data)
                obs_data = generic_tools.compute_observables(data_A, data_B)

                for o in preferences.OBSERVABLES:
                    self.test_histograms[c][test_fname][
                        o] = generic_tools.compute_histogram_1D(
                            o, obs_data[o])

        for c in preferences.CLASSES:
            for test_fname in test_fnames[c]:
                for o in preferences.OBSERVABLES:
                    self.test_pdfs[c][test_fname][
                        o] = generic_tools.compute_pdf(
                            o, self.test_histograms[c][test_fname][o])
Example #4
0
    data_fnames = generic_tools.get_data_fnames('data/classes/')

    for c in preferences.CLASSES: 
        
        for file_path in data_fnames[c]:
            
            data = np.load(file_path)
            data_A, data_B = generic_tools.extract_individual_data(data)
            obs_data = generic_tools.compute_observables(data_A, data_B)
            
            for o in preferences.OBSERVABLES:
                
                edges = get_edges(o)
                
                temp_hist = generic_tools.compute_histogram_1D(o, obs_data[o])
                temp_pdf = generic_tools.compute_pdf(o, temp_hist)
                
                histograms1D[o][c].append(temp_hist )
                pdf1D[o][c].append( temp_pdf )
                
                mean_pdfs[o][c].append( np.average(edges, weights=temp_pdf) )
            
    print('Obs\tF_d\tv1\tv2')
    print('-------------------------------')
    for o in preferences.OBSERVABLES:
        F_d, v1, v2 = get_fval(mean_pdfs[o])
        print('{}\t{:2.3f}\t{}\t{}'.format(o, F_d, v1, v2))
        
    print('\n**Check this page for computing p-value from f-statistics**')
    print('http:stattrek.com/online-calculator/f-distribution.aspx ')
    elapsed_time = time.time() - start_time