Esempio n. 1
0
File: JT.py Progetto: vibhavg1/CNxD
def main_jt():
    dataset_dir = sys.argv[2]
    data_name = sys.argv[4]

    train_name = dataset_dir + data_name + '.ts.data'
    valid_name = dataset_dir + data_name + '.valid.data'
    test_name = dataset_dir + data_name + '.test.data'
    data_train = np.loadtxt(train_name, delimiter=',', dtype=np.uint32)
    data_valid = np.loadtxt(valid_name, delimiter=',', dtype=np.uint32)
    data_test = np.loadtxt(test_name, delimiter=',', dtype=np.uint32)

    clt = CLT()
    clt.learnStructure(data_train)
    print 'clt testset loglikihood score: ', clt.computeLL(
        data_test) / data_test.shape[0]

    n_variable = data_train.shape[1]
    clt.get_log_cond_cpt()

    jt = JunctionTree()
    jt.learn_structure(clt.topo_order, clt.parents, clt.cond_cpt)

    evid_list = []
    query_var = np.arange(n_variable)

    start = time.time()
    marginal = get_marginal_JT(jt, evid_list, query_var)

    print '------Marginals------'
    for i in xrange(query_var.shape[0]):

        print marginal[i]
    print 'running time for new: ', time.time() - start
Esempio n. 2
0
    def learnStructureHelper(self,tum, dataset, ids, lamda,  beta_function, evid_list, data_ind, next_id = -1, next_weights = np.zeros(2)):
        
        curr_depth=self.nvariables - ids.shape[0]
        
        if len(evid_list) == 0:    # the first run
            #alpha = 1.0 * lamda
            sub_dataset = dataset
        else:

            if data_ind.shape[0] == 0:
                sub_dataset = np.array([])
                #alpha = 0.0
            else:
                sub_dataset = dataset[data_ind,:][:, ids]
                #print (sub_dataset.shape)            
        alpha = utilM.updata_coef(sub_dataset.shape[0], dataset.shape[0], lamda, beta_function)
        #if True: 
        if next_id == -1:
            # tum part
            p_xy, p_x = tum.inference_jt(evid_list,ids)
            
            if alpha > 0:
                # dataset part
                xycounts = Util.compute_xycounts(sub_dataset) + 1  # laplace correction
                xcounts = Util.compute_xcounts(sub_dataset) + 2  # laplace correction
                p_xy_d = Util.normalize2d(xycounts)
                #print p_xy
                p_x_d = Util.normalize1d(xcounts)
                #print (p_xy)
                # leaf node
                
                p_xy = alpha * p_xy_d + (1-alpha) * p_xy
                p_x = alpha * p_x_d + (1-alpha) * p_x
            
            
            # compute mutual information score for all pairs of variables
            # weights are multiplied by -1.0 because we compute the minimum spanning tree
            edgemat = Util.compute_MI_prob(p_xy, p_x)
            
            # reset self mutual information to be 0
            np.fill_diagonal(edgemat, 0)
            #for i in xrange(self.nvariables):
                #print (edgemat[i,i])
            
            #print ("edgemat: ", edgemat)
            scores = np.sum(edgemat, axis=0)
            #print (scores)
            variable = np.argmax(scores)   
            
            #variable = 7 ####test
            variable_id = ids[variable] # the index in the original file
            
            p1 =p_x[variable,1]
            p0 =p_x[variable,0]
            
            evid_list.append(np.array([variable_id, -1]))   # -1 means not determined yet
        
            if curr_depth >= self.depth:
                clt_leaf=CLT()
                clt_leaf.learnStructure_MI(edgemat)
                #edgemat = None # Save memory
                clt_leaf.xyprob = p_xy
                clt_leaf.xprob = p_x
                clt_leaf.get_log_cond_cpt()  #   0809
                # Try to save the memory
                clt_leaf.xyprob = np.zeros((1, 1, 2, 2))       #   0809

                
                save_info = {}
                save_info['ids'] = ids
                save_info['next_id'] = variable_id
                save_info['next_weights'] = np.array([p0,p1])
                save_info['evid_list'] = evid_list 
                save_info['data_ind'] = data_ind 
                
                
                clt_leaf.save_info = save_info
                return clt_leaf
        
        else:
            variable_id = next_id
            p0 = next_weights[0]
            p1 = next_weights[1]
            variable = np.where(ids==variable_id)[0][0]
                
        
        evid_list_0 = copy.deepcopy(evid_list) 
        evid_list_1 = copy.deepcopy(evid_list)
        evid_list_0[-1][1] = 0
        evid_list_1[-1][1] = 1
        new_ids=np.delete(ids,variable)
        
        
        
        if alpha> 0:
            #print ('0+1', data_ind.shape[0])
            new_data_ind0 = data_ind[np.where(sub_dataset[:,variable] ==0)[0]]
            #print ('0:',new_data_ind0.shape[0])
            new_data_ind1 = data_ind[np.where(sub_dataset[:,variable] ==1)[0]]
            #print ('1:',new_data_ind1.shape[0])
        else:
            new_data_ind0 = np.array([])
            new_data_ind1 = np.array([])
        
        new_ids=np.delete(ids,variable)
        
        
        #print ("p0, p1: ", p0, p1)

        
        #return [variable,variable_id,p0,p1,self.learnStructureHelper(tum, new_ids, evid_list_0),
        #        self.learnStructureHelper(tum,  new_ids, evid_list_1)]
        return [variable,variable_id,p0,p1,self.learnStructureHelper(tum, dataset, new_ids, lamda,  beta_function, evid_list_0, new_data_ind0),
                self.learnStructureHelper(tum,dataset, new_ids, lamda, beta_function, evid_list_1, new_data_ind1)]