def set_test_dists(self, test_fnames):

        # initialize empty histograms
        # since one histogram/pdf  is computed for each element of test set
        # as below, it needs to be initialized at every testing
        for c in preferences.CLASSES:
            self.test_histograms[c], self.test_pdfs[c] = {}, {}
            for test_fname in test_fnames[c]:
                self.test_histograms[c][test_fname], self.test_pdfs[c][
                    test_fname] = {}, {}
                for o in preferences.OBSERVABLES:
                    self.test_histograms[c][test_fname][
                        o] = generic_tools.initialize_histogram(o)
                    self.test_pdfs[c][test_fname][o] = []

        # compute histograms for each class (using test set)
        for c in preferences.CLASSES:
            for test_fname in test_fnames[c]:

                data = np.load(test_fname)
                data_A, data_B = generic_tools.extract_individual_data(data)
                obs_data = generic_tools.compute_observables(data_A, data_B)

                for o in preferences.OBSERVABLES:
                    self.test_histograms[c][test_fname][
                        o] = generic_tools.compute_histogram_1D(
                            o, obs_data[o])

        for c in preferences.CLASSES:
            for test_fname in test_fnames[c]:
                for o in preferences.OBSERVABLES:
                    self.test_pdfs[c][test_fname][
                        o] = generic_tools.compute_pdf(
                            o, self.test_histograms[c][test_fname][o])
    def set_train_dists(self, train_fnames):

        # initialize empty histograms
        # since histogram is accumulated as below, it needs to be initialized
        # at every training
        for c in preferences.CLASSES:
            self.train_histograms[c] = {}
            self.train_pdfs[c] = {}
            for o in preferences.OBSERVABLES:
                self.train_histograms[c][
                    o] = generic_tools.initialize_histogram(o)

        # compute histograms for each class (using training set)
        for c in preferences.CLASSES:
            for train_fname in train_fnames[c]:

                data = np.load(train_fname)
                data_A, data_B = generic_tools.extract_individual_data(data)
                obs_data = generic_tools.compute_observables(data_A, data_B)

                for o in preferences.OBSERVABLES:
                    self.train_histograms[c][
                        o] += generic_tools.compute_histogram_1D(
                            o, obs_data[o])

        for c in preferences.CLASSES:
            for o in preferences.OBSERVABLES:
                self.train_pdfs[c][o] = generic_tools.compute_pdf(
                    o, self.train_histograms[c][o])
Beispiel #3
0
    def train(self, train_fnames):

        train_histograms1D = {}
        # initialize empty histograms
        for o in preferences.OBSERVABLES:
            train_histograms1D[o], self.train_pdfs1D[o] = {}, {}
            for c in preferences.CLASSES:
                train_histograms1D[o][c] = generic_tools.initialize_histogram(
                    o)

        # compute histograms for each class
        for c in preferences.CLASSES:
            for file_path in train_fnames[c]:
                data = np.load(file_path)
                data_A, data_B = generic_tools.extract_individual_data(data)
                obs_data = generic_tools.compute_observables(data_A, data_B)
                for o in preferences.OBSERVABLES:
                    train_histograms1D[o][
                        c] += generic_tools.compute_histogram_1D(
                            o, obs_data[o])

        for o in preferences.OBSERVABLES:
            for c in preferences.CLASSES:
                self.train_pdfs1D[o][c] = generic_tools.compute_pdf(
                    o, train_histograms1D[o][c])
    def trainKDE(self, train_fnames):
        
        year, month, day, hour, minute = time.strftime("%Y,%m,%d,%H,%M").split(',')
        out_fname_bw_ests = 'results/bw_est_stability/'+ year +'_'+ month +'_'+ day +'_'+ hour +'_'+ \
        minute + '_'+ 'KDE_BW_estimation_stability.txt'
                         
        for c in preferences.CLASSES: 
            self.kernels[c] = []
            values = []
            
            for file_path in train_fnames[c]:
                data = np.load(file_path)
                data_A, data_B = generic_tools.extract_individual_data(data)
                obs_data = generic_tools.compute_observables(data_A, data_B)
                
                # len(data_A) and len(data_B) are the same
                for j in range(0, len(data_A)): 
                    # prepare data point
                    data_pt = []
                    for o in preferences.OBSERVABLES:
                        data_pt.append(obs_data[o][j])
                        
                    values.append(data_pt)
                

            # optimizing kernel bandwidth with sklearn grid search
            params = {'bandwidth': np.linspace(preferences.BW0, preferences.BWF, preferences.NBINS_BW)}
            grid = GridSearchCV(KernelDensity(kernel='gaussian'), params, cv=preferences.NCV_BW)
            
            # I recently upgraded scikit-learn version to 0.21.dev0.
            # The following line gives deprecation warning:
            # DeprecationWarning: The default of the `iid` parameter will change 
            # from True to False in version 0.22 and will be removed in 0.24. 
            # This will change numeric results when test-set sizes are unequal.
            # DeprecationWarning)
            grid.fit(np.array(values))
          
            bw = grid.best_estimator_.bandwidth
            
            with open(out_fname_bw_ests, "a") as myfile:
                myfile.write(('{}\t{}\n'.format(c, bw)))
                
            

            self.kernels[c] = KernelDensity(bandwidth = bw, \
                        kernel='gaussian', algorithm='ball_tree')
            self.kernels[c].fit(np.array(values))
 def train(self, train_fnames):
                    
     train_histograms_ND = {}
     for c in preferences.CLASSES:
             train_histograms_ND[c] = generic_tools.initialize_histogram_ND()
             
     # compute histograms for each class
     for c in preferences.CLASSES:   
         for file_path in train_fnames[c]:
             data = np.load(file_path)
             data_A, data_B = generic_tools.extract_individual_data(data)
             obs_data = generic_tools.compute_observables(data_A, data_B)
             
             temp, edges = generic_tools.compute_histogram_ND( obs_data )
             
             train_histograms_ND[c] += temp
                 
         self.train_pdfs_ND[c] = generic_tools.compute_pdf_ND(train_histograms_ND[c])
 def trainMA(self, train_fnames, sizeMA):
     """
     Apply a moving average filter over the pdfs
     """
                    
     train_histograms_ND = {}
     for c in preferences.CLASSES:
             train_histograms_ND[c] = generic_tools.initialize_histogram_ND()
             
     # compute histograms for each class
     for c in preferences.CLASSES:   
         for file_path in train_fnames[c]:
             data = np.load(file_path)
             data_A, data_B = generic_tools.extract_individual_data(data)
             obs_data = generic_tools.compute_observables(data_A, data_B)
             
             temp, edges = generic_tools.compute_histogram_ND( obs_data )
             
             train_histograms_ND[c] += temp
                 
         temp = generic_tools.compute_pdf_ND(train_histograms_ND[c])
         self.train_pdfs_ND[c] = ndimage.uniform_filter(temp, size=preferences.SIZE_MA)
Beispiel #7
0
    def estimate(self, alpha, test_fnames):
        """
        
        Performance is evaluated in various ways.
        
        -----------------------------------------------------------------------
            
        event-based: treats each point on trajectory as an event. For each event,
        we make an instantaneous decision (koibito, yujin, etc). For instance, we
        have post probabilities as follows:
            
            time = t [K    D    Y    Kz]
            time = 0 [0.45 0.20 0.10 0.25]
            time = 1 [0.20 0.10 0.45 0.25]
            time = 2 [0.45 0.20 0.10 0.25]
            time = 3 [0.20 0.45 0.10 0.25]
            time = 4 [0.45 0.20 0.10 0.25]
            time = 5 [0.25 0.20 0.10 0.45]
            time = 6 [0.45 0.20 0.10 0.25]
            time = 7 [0.20 0.45 0.10 0.25]
           
        Each vector involves (post) probabilities for koibito (K), doryo (D), yujin (Y), 
        kazoku (Kz), respectively.
           
        Then the instantaneous decisions will be:
                [K Y K D K Kz K D]
        -----------------------------------------------------------------------
        event-based + voting: picks the class with highest number of votes among
        all events. So eventually the dyad has a single label (discrete output)
        For the above case, the votes are as follows:
            K = 4
            D = 2
            Y = 1
            Kz= 1
            
        So the output will be K. If this decision is correct it will give a
        1, otherwise a 0. Actually in the confusion matrix, I also store the 
        exact mistakes (off-diagonal).
            
        -----------------------------------------------------------------------
    
        event-based + empirical probability: the instantaneous  decisions are 
        expressed as empirical pribabilities. 
        
        For instance, for the above example, the empirical probabilities are:
            [4/8 2/8 1/8 1/8]
        for koibito (K), doryo (D), yujin (Y), kazoku (Kz), respectively.
           
        I use a confusion matrix to see the off-diagonal.        
        -----------------------------------------------------------------------
        trajectory-based: treats a trajectory as a single entity. 
        
        See below for details of :
            trajectory-based + prob 
            trajectory-based + confidence 
        
        -----------------------------------------------------------------------
        trajectory-based + prob: returns the probabilies of each possible 
        outcome as an average of probabilies at each time instant. 
        
        For the above case, we compute cumulative probabilities as an average of 
        probabilies at each time instant as follows:
            
            K = mean([.45, .20, .45, .20, .45, .25, .45, .20]) = 0.33125
            D = mean([.20, .10, .20, .45, .20, .20, .20, .45]) = 0.25
            Y = mean([.10, .45, .10, .10, .10, .10, .10, .10]) = 0.14375
            Kz= mean([.25, .25, .25, .25, .25, .45, .25, .25]) = 0.275
            
        -----------------------------------------------------------------------
        trajectory-based + binary: returns the class with highest probability as
        the output (decision) class.
        
        For the above case, the decision will be K.
            
            K = 0.33125
            D = 0.25
            Y = 0.14375
            Kz= 0.275
            
            argmax([K, D, Y, Kz]) = K
            
        -----------------------------------------------------------------------
        trajectory-based + confidence: returns a confidence metric which is defined 
        as below:
            conf = 100 - abs(p_max - p_gt)
        
        Here p_max is the highest probability (among the probabiities associated 
        with each possible outcome (ie class)). On the other hand, p_gt is the 
        probability that is associated with the gt class.
        
        For the above case, asuming thatthe gt class is D, conf will be:
            conf = 100 - abs(33.125 - 25)
                   = 91.875
            
        This value is 100 when the highest probability is associated with the gt class.
        When another class other than the gt class has a higher probability, it 
        gives the extent of the difference. 
        
        Values close to 100 indicate that there is a mistake but not that big.
        
        -----------------------------------------------------------------------
        collective: treats all observations from each gt class equally. Namely, 
        it boils down to four long trajectories for koibito, doryo, yujin and 
        kazoku.
        
        See below for details of :
            collective + confidence
            collective + binary

        -----------------------------------------------------------------------
        collective + confidence: I compute confidence at each single observation 
        point (ie trajectory point) 
        I do not store all these values. Instead, I store only the variables to 
        compute statistics. Namely:
            the number of observations
            the sum confidence values
            the sum of squares of confidence values
            
        -----------------------------------------------------------------------
        collective + binary: At each observation point, I make a binary decision
        and store the number of success and fails.
        
        The keys in the dictionary are:
            n_suc
            n_fail

        """

        for class_gt in preferences.CLASSES:

            for test_fname in test_fnames[class_gt]:

                data = np.load(test_fname)
                data_A, data_B = generic_tools.extract_individual_data(data)
                N_observations = len(data_A)  # len(data_B) is the same
                obs_data = generic_tools.compute_observables(data_A, data_B)

                bins = {}
                for o in preferences.OBSERVABLES:
                    bins[o] = generic_tools.find_bins(o, obs_data[o])
                p_posts = self.compute_probabilities(bins, alpha)

                ###############################################################
                #
                # event based
                #

                n_votes = {}
                for class_temp in preferences.CLASSES:
                    n_votes[class_temp] = 0

                for i in range(0, N_observations):
                    # get all instantaneous probabilities
                    p_inst = {}  #instantaneous probabilities
                    for class_temp in preferences.CLASSES:
                        p_inst[class_temp] = p_posts[class_temp][i]

                    # the votes goes to the class with highest prob
                    # clas_est is the estimated class
                    class_est = max(p_inst.items(),
                                    key=operator.itemgetter(1))[0]
                    n_votes[class_est] += 1

                class_est_voting_winner = max(n_votes.items(),
                                              key=operator.itemgetter(1))[0]
                self.conf_mat['event_based']['voting'][class_gt][
                    class_est_voting_winner] += 1

                # scale the votes to 1, such that they represent probabilities
                factor = 1.0 / sum(n_votes.values())
                class_est_emp_probs = {
                    k: v * factor
                    for k, v in n_votes.items()
                }

                for class_est in preferences.CLASSES:
                    # class_est is not really the 'output decision'
                    # here I only keep the probability associated with every
                    # possible outcome
                    self.conf_mat['event_based']['emp_probs'][class_gt][class_est] += \
                    class_est_emp_probs[class_est]

                ###############################################################
                #
                # trajectory-based
                #
                p_mean = {}
                for class_est in preferences.CLASSES:
                    # class_est is not really the 'output decision'
                    self.conf_mat['trajectory_based']['prob'][class_gt][class_est].append(\
                                 np.mean(p_posts[class_est]))

                    p_mean[class_est] = np.mean(p_posts[class_est])

                p_max = max(p_mean.items(), key=operator.itemgetter(1))[1]
                c_out = max(p_mean.items(), key=operator.itemgetter(1))[0]
                self.conf_mat['trajectory_based']['binary'][class_gt][
                    c_out] += 1

                p_gt = p_mean[class_gt]
                confidence = 1 - (p_max - p_gt)
                self.conf_mat['trajectory_based']['confidence'][
                    class_gt].append(confidence)

                ###############################################################
                #
                # collectively, ie dumping all observations from each class in
                # one set, as if it is one long trajectory
                #
                temp_suc = n_votes[class_gt]
                temp_fail = 0
                for class_est in preferences.CLASSES:
                    if class_est is not class_gt:
                        temp_fail += n_votes[class_est]

                self.conf_mat['collective']['binary'][class_gt][
                    'n_suc'] += temp_suc
                self.conf_mat['collective']['binary'][class_gt][
                    'n_fail'] += temp_fail

                ###############################################################
                #
                # collective + confidence
                # There is lots of overlap between event-based
                #
                temp_cum_n_observations = N_observations
                temp_cum_confidence = 0
                temp_cum_confidence_sq = 0

                for i in range(0, N_observations):
                    # get all instantaneous probabilities
                    p_inst = {}  #instantaneous probabilities
                    for class_temp in preferences.CLASSES:
                        p_inst[class_temp] = p_posts[class_temp][i]

                    # clas_est is the the one with highest prob
                    class_est = max(p_inst.items(),
                                    key=operator.itemgetter(1))[0]

                    # p_est is the highest probability (ie the probability of
                    # class_est). So I use p_est to compute confidence at this
                    # instant
                    p_est = max(p_inst.items(), key=operator.itemgetter(1))[1]
                    temp = 1 - (p_est - p_inst[class_gt])
                    temp_cum_confidence += temp
                    temp_cum_confidence_sq += (temp * temp)

                self.conf_mat['collective']['confidence'][class_gt][
                    'cum_n_observations'] += temp_cum_n_observations
                self.conf_mat['collective']['confidence'][class_gt][
                    'cum_confidence'] += temp_cum_confidence
                self.conf_mat['collective']['confidence'][class_gt][
                    'cum_confidence_sq'] += temp_cum_confidence_sq
    def estimate(self, alpha, filtering, test_fnames):
    
        for class_gt in preferences.CLASSES:
                
            for t, test_fname in enumerate(test_fnames[class_gt]):
                 
                data = np.load(test_fname)
                data_A, data_B = generic_tools.extract_individual_data(data)
                N_observations = len(data_A) # len(data_B) is the same
                obs_data = generic_tools.compute_observables(data_A, data_B)
                
                
                if filtering is 'none':   
                    bins = generic_tools.find_bins_ND(obs_data)
                    p_posts =  self.compute_probabilities_ND_woKDE(bins, alpha) 
                    
                elif filtering is 'KDE':
                    p_posts = self.compute_probabilities_ND_wKDE(N_observations, obs_data, alpha)
                    
                elif filtering is 'MA':
                    bins = generic_tools.find_bins_ND(obs_data)
                    p_posts =  self.compute_probabilities_ND_woKDE(bins, alpha)
                    
                else:
                    print('bayesian_model_dep Line 293: preferences.FILTERING status is undefined')
                
                ###############################################################
                #
                # event based
                #

                n_votes = {}
                for class_temp in preferences.CLASSES:
                    n_votes[class_temp] = 0
                
                for i in range(0, N_observations):
                    # get all instantaneous probabilities
                    p_inst = {} #instantaneous probabilities
                    for class_temp in preferences.CLASSES:
                        p_inst[class_temp] = p_posts[class_temp][i]
                    
                    # the votes goes to the class with highest prob
                    class_est = max(p_inst.items(), key=operator.itemgetter(1))[0]
                    n_votes[class_est] += 1
                  
                class_est_voting_winner = max(n_votes.items(), key=operator.itemgetter(1))[0]
                self.conf_mat['event_based']['voting'][class_gt][class_est_voting_winner] += 1
                
                # scale the votes to 1, such that they represent probabilities
                factor = 1.0/sum(n_votes.values())
                class_est_emp_probs = {k: v*factor for k, v in n_votes.items() }

                
                for class_est in preferences.CLASSES:
                    # class_est is not really the 'output decision'
                    # here I only keep the probability associated with every 
                    # possible outcome
                    self.conf_mat['event_based']['emp_probs'][class_gt][class_est] += \
                    class_est_emp_probs[class_est]
                    
                ###############################################################
                #
                # trajectory-based
                #
                p_mean = {}
                for class_est in preferences.CLASSES:
                    # class_est is not really the 'output decision'
                    self.conf_mat['trajectory_based']['prob'][class_gt][class_est].append(\
                                 np.mean(p_posts[class_est]))
                    
                    p_mean[class_est] = np.mean(p_posts[class_est])
                    
                
                p_max = max(p_mean.items(), key=operator.itemgetter(1))[1] 
                c_out = max(p_mean.items(), key=operator.itemgetter(1))[0] 
                self.conf_mat['trajectory_based']['binary'][class_gt][c_out] += 1

                p_gt = p_mean[class_gt]
                
                confidence = 1 - (p_max - p_gt)
                self.conf_mat['trajectory_based']['confidence'][class_gt].append(confidence)
                
                ###############################################################
                #
                # collectively, ie dumping all observations from each class in 
                # one set, as if it is one long trajectory 
                #
                temp_suc = n_votes[class_gt]
                temp_fail = 0
                for class_est in preferences.CLASSES:
                    if class_est is not class_gt:
                        temp_fail += n_votes[class_est]
                        
                self.conf_mat['collective']['binary'][class_gt]['n_suc'] += temp_suc
                self.conf_mat['collective']['binary'][class_gt]['n_fail'] += temp_fail
                
                ###############################################################
                #
                # collective + confidence
                # There is lots of overlap between event-based 
                #
                temp_cum_n_observations = N_observations
                temp_cum_confidence = 0
                temp_cum_confidence_sq = 0

                for i in range(0, N_observations):
                    # get all instantaneous probabilities
                    p_inst = {} #instantaneous probabilities
                    for class_temp in preferences.CLASSES:
                        p_inst[class_temp] = p_posts[class_temp][i]
                    
                    # clas_est is the the one with highest prob
                    class_est = max(p_inst.items(), key=operator.itemgetter(1))[0]
                    
                    # p_est is the highest probability (ie the probability of 
                    # class_est). So I use p_est to compute confidence at this 
                    # instant
                    p_est = max(p_inst.items(), key=operator.itemgetter(1))[1]
                    temp = 1 - (p_est - p_inst[class_gt])
                    temp_cum_confidence += temp
                    temp_cum_confidence_sq += (temp*temp)
                    
                self.conf_mat['collective']['confidence'][class_gt]['cum_n_observations'] += temp_cum_n_observations
                self.conf_mat['collective']['confidence'][class_gt]['cum_confidence'] += temp_cum_confidence
                self.conf_mat['collective']['confidence'][class_gt]['cum_confidence_sq'] += temp_cum_confidence_sq
Beispiel #9
0
    for o in preferences.OBSERVABLES:
        histograms1D[o], pdf1D[o], mean_pdfs[o] = {}, {}, {}
        for c in preferences.CLASSES:
            histograms1D[o][c] = []
            pdf1D[o][c] = []
            mean_pdfs[o][c] = []
                    
                   
    data_fnames = generic_tools.get_data_fnames('data/classes/')

    for c in preferences.CLASSES: 
        
        for file_path in data_fnames[c]:
            
            data = np.load(file_path)
            data_A, data_B = generic_tools.extract_individual_data(data)
            obs_data = generic_tools.compute_observables(data_A, data_B)
            
            for o in preferences.OBSERVABLES:
                
                edges = get_edges(o)
                
                temp_hist = generic_tools.compute_histogram_1D(o, obs_data[o])
                temp_pdf = generic_tools.compute_pdf(o, temp_hist)
                
                histograms1D[o][c].append(temp_hist )
                pdf1D[o][c].append( temp_pdf )
                
                mean_pdfs[o][c].append( np.average(edges, weights=temp_pdf) )
            
    print('Obs\tF_d\tv1\tv2')