Exemple #1
0
    def learnStructureHelper(self,dataset,ids):
        curr_depth=self.nvariables-dataset.shape[1]
        #print ("curr_dept: ", curr_depth)
        if dataset.shape[0]<self.min_rec or dataset.shape[1]<self.min_var or curr_depth >= self.depth:
            clt=CLT()
            clt.learnStructure(dataset)

                    
            return clt
        xycounts = Util.compute_xycounts(dataset) + 1  # laplace correction
        xcounts = Util.compute_xcounts(dataset) + 2  # laplace correction
        # compute mutual information score for all pairs of variables
        # weights are multiplied by -1.0 because we compute the minimum spanning tree
        edgemat = Util.compute_edge_weights(xycounts, xcounts)
        np.fill_diagonal(edgemat, 0) #  #
        
    
        scores = np.sum(edgemat, axis=0)
        variable = np.argmax(scores)
        
        new_dataset1=np.delete(dataset[dataset[:,variable]==1],variable,1)
        p1=float(new_dataset1.shape[0])+1.0
        new_ids=np.delete(ids,variable,0)
        
        new_dataset0 = np.delete(dataset[dataset[:, variable] == 0], variable, 1)
        p0 = float(new_dataset0.shape[0]) +1.0
        
        return [variable,ids[variable],p0,p1,self.learnStructureHelper(new_dataset0,new_ids),
                self.learnStructureHelper(new_dataset1,new_ids)]
Exemple #2
0
def main_jt():
    dataset_dir = sys.argv[2]
    data_name = sys.argv[4]

    train_name = dataset_dir + data_name + '.ts.data'
    valid_name = dataset_dir + data_name + '.valid.data'
    test_name = dataset_dir + data_name + '.test.data'
    data_train = np.loadtxt(train_name, delimiter=',', dtype=np.uint32)
    data_valid = np.loadtxt(valid_name, delimiter=',', dtype=np.uint32)
    data_test = np.loadtxt(test_name, delimiter=',', dtype=np.uint32)

    clt = CLT()
    clt.learnStructure(data_train)
    print 'clt testset loglikihood score: ', clt.computeLL(
        data_test) / data_test.shape[0]

    n_variable = data_train.shape[1]
    clt.get_log_cond_cpt()

    jt = JunctionTree()
    jt.learn_structure(clt.topo_order, clt.parents, clt.cond_cpt)

    evid_list = []
    query_var = np.arange(n_variable)

    start = time.time()
    marginal = get_marginal_JT(jt, evid_list, query_var)

    print '------Marginals------'
    for i in xrange(query_var.shape[0]):

        print marginal[i]
    print 'running time for new: ', time.time() - start
Exemple #3
0
    def learn_structure_weight(self, dataset, weights, ids, smooth):
        curr_depth = self.nvariables - dataset.shape[1]

        if dataset.shape[0] < self.min_rec or dataset.shape[
                1] < self.min_var or curr_depth >= self.depth:
            clt = CLT()
            clt.learnStructure(dataset)
            clt.xyprob = np.zeros((1, 1, 2, 2))
            clt.xprob = np.zeros((1, 2))
            return clt

        self.xycounts = Util.compute_weighted_xycounts(dataset,
                                                       weights) + smooth
        self.xcounts = Util.compute_weighted_xcounts(dataset,
                                                     weights) + 2.0 * smooth
        edgemat = Util.compute_edge_weights(self.xycounts, self.xcounts)

        #edgemat[edgemat == 0.0] = 1e-20
        np.fill_diagonal(edgemat, 0)

        scores = np.sum(edgemat, axis=0)
        #print (scores)
        variable = np.argmax(scores)

        #print ("variable: ", ids[variable])

        index1 = np.where(dataset[:, variable] == 1)[0]
        index0 = np.where(dataset[:, variable] == 0)[0]
        #index0 = np.setdiff1d(np.arange(dataset.shape[0]), index1)

        new_dataset = np.delete(dataset, variable, axis=1)

        new_dataset1 = new_dataset[index1]
        new_weights1 = weights[index1]
        p1 = np.sum(new_weights1) + smooth

        #print ("new_ids: ", new_ids)
        new_dataset0 = new_dataset[index0]
        new_weights0 = weights[index0]
        p0 = np.sum(new_weights0) + smooth

        # Normalize
        p0 = p0 / (p0 + p1)
        p1 = 1.0 - p0

        #print p0, p1

        new_ids = np.delete(ids, variable, 0)

        #print ("p0, p1:", float(p0)/(p0+p1), float(p1)/(p0+p1))
        return [
            variable, ids[variable], p0, p1,
            self.learn_structure_weight(new_dataset0, new_weights0, new_ids,
                                        smooth),
            self.learn_structure_weight(new_dataset1, new_weights1, new_ids,
                                        smooth)
        ]
Exemple #4
0
    def learnStructureP_Helper(self, dataset, ids, portion):
        curr_depth = self.nvariables - dataset.shape[1]
        #print ("curr_dept: ", curr_depth)
        if dataset.shape[0] < self.min_rec or dataset.shape[
                1] < self.min_var or curr_depth >= self.depth:
            clt = CLT()
            clt.learnStructure(dataset)

            return clt
        xycounts = Util.compute_xycounts(dataset) + 1  # laplace correction
        xcounts = Util.compute_xcounts(dataset) + 2  # laplace correction
        # compute mutual information score for all pairs of variables
        # weights are multiplied by -1.0 because we compute the minimum spanning tree
        edgemat = Util.compute_edge_weights(xycounts, xcounts)
        np.fill_diagonal(edgemat, 0)  #  #

        #print ("edgemat: ", edgemat)
        scores = np.sum(edgemat, axis=0)
        #print (scores)
        ind_portion = np.random.choice(ids.shape[0],
                                       int(ids.shape[0] * portion),
                                       replace=False)
        #print 'ind_portion: ', ind_portion
        scores_portion = scores[ind_portion]
        #print 'scores_portion: ', scores_portion

        #print np.argmax(scores_portion)
        variable = ind_portion[np.argmax(scores_portion)]

        new_dataset1 = np.delete(dataset[dataset[:, variable] == 1], variable,
                                 1)
        p1 = float(new_dataset1.shape[0]) + 1.0
        new_ids = np.delete(ids, variable, 0)

        #print ("new_ids: ", new_ids)
        new_dataset0 = np.delete(dataset[dataset[:, variable] == 0], variable,
                                 1)
        p0 = float(new_dataset0.shape[0]) + 1.0

        #print ("p0, p1:", float(p0)/(p0+p1), float(p1)/(p0+p1))
        return [
            variable, ids[variable], p0, p1,
            self.learnStructureP_Helper(new_dataset0, new_ids, portion),
            self.learnStructureP_Helper(new_dataset1, new_ids, portion)
        ]
Exemple #5
0
    def learnStructure(self, dataset, n_components):
        #print ("Mixture of Chow-Liu Tree ......" )
        # Shuffle the dataset
        self.n_components = n_components
        self.mixture_weight = np.full(n_components, 1.0 / n_components)
        #print ("mixture weights: ", self.mixture_weight)
        data_shuffle = np.copy(dataset)
        np.random.shuffle(data_shuffle)
        n_data = data_shuffle.shape[0] / self.n_components

        for c in xrange(self.n_components):
            if c == self.n_components - 1:  # the last portion
                data_slice = data_shuffle[c * n_data:, :]

            else:
                data_slice = data_shuffle[c * n_data:((c + 1) * n_data), :]

            clt = CLT()
            clt.learnStructure(data_slice)

            self.clt_list.append(clt)