예제 #1
0
def load_mt(in_dir, data_name):
    infile = in_dir + data_name + '.npz'
    #out_file = '../module/' + data_name + '.npz'
    reload_dict = np.load(infile)
    reload_mix_clt = MIXTURE_CLT()
    reload_mix_clt.mixture_weight = reload_dict['weights']
    reload_mix_clt.n_components = reload_mix_clt.mixture_weight.shape[0]

    reload_clt_component = reload_dict['clt_component']

    #print (reload_clt_component)
    for i in xrange(reload_mix_clt.n_components):
        clt_c = CLT()
        #str_id = str(i)
        curr_component = reload_clt_component[i]
        clt_c.xyprob = curr_component['xyprob']
        clt_c.xprob = curr_component['xprob']
        clt_c.topo_order = curr_component['topo_order']
        clt_c.parents = curr_component['parents']
        clt_c.log_cond_cpt = curr_component['log_cond_cpt']
        clt_c.cond_cpt = np.exp(clt_c.log_cond_cpt)  #deep

        reload_mix_clt.clt_list.append(clt_c)

    return reload_mix_clt
예제 #2
0
    def learn_structure_weight(self, dataset, weights, ids, smooth):
        curr_depth = self.nvariables - dataset.shape[1]

        if dataset.shape[0] < self.min_rec or dataset.shape[
                1] < self.min_var or curr_depth >= self.depth:
            clt = CLT()
            clt.learnStructure(dataset)
            clt.xyprob = np.zeros((1, 1, 2, 2))
            clt.xprob = np.zeros((1, 2))
            return clt

        self.xycounts = Util.compute_weighted_xycounts(dataset,
                                                       weights) + smooth
        self.xcounts = Util.compute_weighted_xcounts(dataset,
                                                     weights) + 2.0 * smooth
        edgemat = Util.compute_edge_weights(self.xycounts, self.xcounts)

        #edgemat[edgemat == 0.0] = 1e-20
        np.fill_diagonal(edgemat, 0)

        scores = np.sum(edgemat, axis=0)
        #print (scores)
        variable = np.argmax(scores)

        #print ("variable: ", ids[variable])

        index1 = np.where(dataset[:, variable] == 1)[0]
        index0 = np.where(dataset[:, variable] == 0)[0]
        #index0 = np.setdiff1d(np.arange(dataset.shape[0]), index1)

        new_dataset = np.delete(dataset, variable, axis=1)

        new_dataset1 = new_dataset[index1]
        new_weights1 = weights[index1]
        p1 = np.sum(new_weights1) + smooth

        #print ("new_ids: ", new_ids)
        new_dataset0 = new_dataset[index0]
        new_weights0 = weights[index0]
        p0 = np.sum(new_weights0) + smooth

        # Normalize
        p0 = p0 / (p0 + p1)
        p1 = 1.0 - p0

        #print p0, p1

        new_ids = np.delete(ids, variable, 0)

        #print ("p0, p1:", float(p0)/(p0+p1), float(p1)/(p0+p1))
        return [
            variable, ids[variable], p0, p1,
            self.learn_structure_weight(new_dataset0, new_weights0, new_ids,
                                        smooth),
            self.learn_structure_weight(new_dataset1, new_weights1, new_ids,
                                        smooth)
        ]
예제 #3
0
    def learnStructureHelper(self,tum, dataset, ids, lamda,  beta_function, evid_list, data_ind, next_id = -1, next_weights = np.zeros(2)):
        
        curr_depth=self.nvariables - ids.shape[0]
        
        if len(evid_list) == 0:    # the first run
            #alpha = 1.0 * lamda
            sub_dataset = dataset
        else:

            if data_ind.shape[0] == 0:
                sub_dataset = np.array([])
                #alpha = 0.0
            else:
                sub_dataset = dataset[data_ind,:][:, ids]
                #print (sub_dataset.shape)            
        alpha = utilM.updata_coef(sub_dataset.shape[0], dataset.shape[0], lamda, beta_function)
        #if True: 
        if next_id == -1:
            # tum part
            p_xy, p_x = tum.inference_jt(evid_list,ids)
            
            if alpha > 0:
                # dataset part
                xycounts = Util.compute_xycounts(sub_dataset) + 1  # laplace correction
                xcounts = Util.compute_xcounts(sub_dataset) + 2  # laplace correction
                p_xy_d = Util.normalize2d(xycounts)
                #print p_xy
                p_x_d = Util.normalize1d(xcounts)
                #print (p_xy)
                # leaf node
                
                p_xy = alpha * p_xy_d + (1-alpha) * p_xy
                p_x = alpha * p_x_d + (1-alpha) * p_x
            
            
            # compute mutual information score for all pairs of variables
            # weights are multiplied by -1.0 because we compute the minimum spanning tree
            edgemat = Util.compute_MI_prob(p_xy, p_x)
            
            # reset self mutual information to be 0
            np.fill_diagonal(edgemat, 0)
            #for i in xrange(self.nvariables):
                #print (edgemat[i,i])
            
            #print ("edgemat: ", edgemat)
            scores = np.sum(edgemat, axis=0)
            #print (scores)
            variable = np.argmax(scores)   
            
            #variable = 7 ####test
            variable_id = ids[variable] # the index in the original file
            
            p1 =p_x[variable,1]
            p0 =p_x[variable,0]
            
            evid_list.append(np.array([variable_id, -1]))   # -1 means not determined yet
        
            if curr_depth >= self.depth:
                clt_leaf=CLT()
                clt_leaf.learnStructure_MI(edgemat)
                #edgemat = None # Save memory
                clt_leaf.xyprob = p_xy
                clt_leaf.xprob = p_x
                clt_leaf.get_log_cond_cpt()  #   0809
                # Try to save the memory
                clt_leaf.xyprob = np.zeros((1, 1, 2, 2))       #   0809

                
                save_info = {}
                save_info['ids'] = ids
                save_info['next_id'] = variable_id
                save_info['next_weights'] = np.array([p0,p1])
                save_info['evid_list'] = evid_list 
                save_info['data_ind'] = data_ind 
                
                
                clt_leaf.save_info = save_info
                return clt_leaf
        
        else:
            variable_id = next_id
            p0 = next_weights[0]
            p1 = next_weights[1]
            variable = np.where(ids==variable_id)[0][0]
                
        
        evid_list_0 = copy.deepcopy(evid_list) 
        evid_list_1 = copy.deepcopy(evid_list)
        evid_list_0[-1][1] = 0
        evid_list_1[-1][1] = 1
        new_ids=np.delete(ids,variable)
        
        
        
        if alpha> 0:
            #print ('0+1', data_ind.shape[0])
            new_data_ind0 = data_ind[np.where(sub_dataset[:,variable] ==0)[0]]
            #print ('0:',new_data_ind0.shape[0])
            new_data_ind1 = data_ind[np.where(sub_dataset[:,variable] ==1)[0]]
            #print ('1:',new_data_ind1.shape[0])
        else:
            new_data_ind0 = np.array([])
            new_data_ind1 = np.array([])
        
        new_ids=np.delete(ids,variable)
        
        
        #print ("p0, p1: ", p0, p1)

        
        #return [variable,variable_id,p0,p1,self.learnStructureHelper(tum, new_ids, evid_list_0),
        #        self.learnStructureHelper(tum,  new_ids, evid_list_1)]
        return [variable,variable_id,p0,p1,self.learnStructureHelper(tum, dataset, new_ids, lamda,  beta_function, evid_list_0, new_data_ind0),
                self.learnStructureHelper(tum,dataset, new_ids, lamda, beta_function, evid_list_1, new_data_ind1)]