Exemple #1
0
def main_rcn_structure():
    
    dataset_dir = sys.argv[4]
    data_name = sys.argv[6]
    min_depth = int(sys.argv[8])
    max_depth = int(sys.argv[10])
    
    train_filename = dataset_dir + data_name + '.ts.data'
    train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',')
    
    n_variables = train_dataset.shape[1]

    max_depth = min (n_variables-2, max_depth)  # at least 2 nodes in the leaf to bulid the chow-liu tree
    
    print("-----Learning the structure Randomly----")
    tree = []
    output_cnet = '../rcn_output/'
       
    
    
    for i in range(min_depth, max_depth+1):
        cnet  = CNET_deep(tree, depth=i)       
        #cnet.learnStructure(reload_mix_clt, train_dataset, lamda, beta_function)   
        cnet.learnStructure(n_variables)
        tree = copy.deepcopy(cnet.tree)

        main_dict = {}
        utilM.save_cutset(main_dict, cnet.tree, np.arange(n_variables), ccpt_flag = True)
        np.savez_compressed(output_cnet + data_name + '_structure_'  + str(i), module = main_dict)
Exemple #2
0
def main_rcn_structure(parms_dict):

    print('------------------------------------------------------------------')
    print('Learning the structure of Deep Random Cutset Network')
    print('------------------------------------------------------------------')

    dataset_dir = parms_dict['dir']
    data_name = parms_dict['dn']
    min_depth = int(parms_dict['min_depth'])
    max_depth = int(parms_dict['max_depth'])
    output_dir = parms_dict['output_dir']

    train_filename = dataset_dir + data_name + '.ts.data'
    train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',')

    n_variables = train_dataset.shape[1]

    max_depth = min(
        n_variables - 2,
        max_depth)  # at least 2 nodes in the leaf to bulid the chow-liu tree

    tree = []

    for i in range(min_depth, max_depth + 1):
        cnet = CNET_deep(tree, depth=i)
        cnet.learnStructure(n_variables)
        tree = copy.deepcopy(cnet.tree)

        main_dict = {}
        utilM.save_cutset(main_dict,
                          cnet.tree,
                          np.arange(n_variables),
                          ccpt_flag=True)
        np.savez_compressed(output_dir + data_name + '_structure_' + str(i),
                            module=main_dict)
Exemple #3
0
    def structure_redefine(self, load_info):

        self.weights = load_info.mixture_weight
        self.n_components = load_info.n_components
        for cn in load_info.cnet_list:
            main_dict = {}
            utilM.save_cutset(main_dict,
                              cn.tree,
                              np.arange(self.n_variable),
                              ccpt_flag=True)
            cnet_component = CNET_dfs(main_dict, self.n_variable)
            self.cnet_dict_list.append(cnet_component)

            internal_list, leaf_list = cnet_component.get_node_list()
            self.internal_list.append(internal_list)
            self.leaf_list.append(leaf_list)
Exemple #4
0
def main_cutset_opt():

    dataset_dir = sys.argv[2]
    data_name = sys.argv[4]
    max_depth = int(sys.argv[6])

    train_filename = dataset_dir + data_name + '.ts.data'
    test_filename = dataset_dir + data_name + '.test.data'
    valid_filename = dataset_dir + data_name + '.valid.data'

    train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',')
    valid_dataset = np.loadtxt(valid_filename, dtype=int, delimiter=',')
    test_dataset = np.loadtxt(test_filename, dtype=int, delimiter=',')

    print("Learning Cutset Networks only using data.....")
    #max_depth = min(train_dataset.shape[1], 20) +1
    train_ll = np.zeros(max_depth)
    valid_ll = np.zeros(max_depth)
    test_ll = np.zeros(max_depth)
    #cnet_list =[]
    best_valid = -np.inf
    best_module = None
    for i in range(1, max_depth + 1):
        #for i in range(5, 6):
        cnet = CNET(depth=i)
        cnet.learnStructure(train_dataset)
        train_ll[i - 1] = np.sum(
            cnet.getWeights(train_dataset)) / train_dataset.shape[0]
        valid_ll[i - 1] = np.sum(
            cnet.getWeights(valid_dataset)) / valid_dataset.shape[0]
        test_ll[i - 1] = np.sum(
            cnet.getWeights(test_dataset)) / test_dataset.shape[0]

        if best_valid < valid_ll[i - 1]:
            best_valid = valid_ll[i - 1]
            best_module = copy.deepcopy(cnet)

        #cnet_list.append(cnet)

    #print("done")

    print('Train set cnet LL scores')
    for l in xrange(max_depth):
        print(train_ll[l], l + 1)
    print()

    print('Valid set cnet LL scores')
    for l in xrange(max_depth):
        print(valid_ll[l], l + 1)
    print()

    print('test set cnet LL scores')
    for l in xrange(max_depth):
        print(test_ll[l], l + 1)

    best_ind = np.argmax(valid_ll)
    #best_module = cnet_list[best_ind]

    print()
    print('Best Validation ll score achived in layer: ', best_ind)
    print(
        'train: ',
        np.sum(best_module.getWeights(train_dataset)) / train_dataset.shape[0])
    print(
        'valid: ',
        np.sum(best_module.getWeights(valid_dataset)) / valid_dataset.shape[0])
    print('test : ',
          np.sum(best_module.getWeights(test_dataset)) / test_dataset.shape[0])

    main_dict = {}
    utilM.save_cutset(main_dict,
                      best_module.tree,
                      np.arange(train_dataset.shape[1]),
                      ccpt_flag=True)
    np.savez_compressed('../cn_output/' + data_name, module=main_dict)
Exemple #5
0
def main_cnxd():
    
    dataset_dir = sys.argv[2]
    data_name = sys.argv[4]
    lamda = float(sys.argv[6])  # using validation dataset 
    beta_function = sys.argv[8]  # 'linear', square, root (square root)
    min_depth = int(sys.argv[10])
    max_depth = int(sys.argv[12])
    tum_module = sys.argv[14]
    
    
    
    print('------------------------------------------------------------------')
    print('Learning CNxD using Data and TUM')
    print('------------------------------------------------------------------')
    
    
    train_filename = dataset_dir + data_name + '.ts.data'
    test_filename = dataset_dir + data_name +'.test.data'
    valid_filename = dataset_dir + data_name + '.valid.data'
    
    train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',')
    valid_dataset = np.loadtxt(valid_filename, dtype=int, delimiter=',')
    test_dataset = np.loadtxt(test_filename, dtype=int, delimiter=',')

    
    n_variables = train_dataset.shape[1]

    ### Load the trained mixture of clt
    print ('Start reloading MT...')
    mt_dir =  '../mt_output/'
    reload_mix_clt = load_mt(mt_dir, tum_module)


     # Set information for MT
    for t in reload_mix_clt.clt_list:

        t.nvariables = n_variables
        # learn the junction tree for each clt
        jt = JT.JunctionTree()
        jt.learn_structure(t.topo_order, t.parents, t.cond_cpt)
        reload_mix_clt.jt_list.append(jt)
        

    print("Learning Cutset Networks by inferece.....")
    #for lamda in lamda_array:
    print ("Current Lamda: ", lamda)
    print ("Current Function: ", beta_function)
    #n_variable = valid_dataset.shape[1]
    #cnets = []
    tree = []

    module_dir = '../cnxd_output/' + data_name +'/'
    
    train_ll_score = np.zeros(max_depth)
    valid_ll_score = np.zeros(max_depth)
    test_ll_score = np.zeros(max_depth)
    learning_time = np.zeros(max_depth)
    for i in range(min_depth, max_depth+1):
    #for i in range(10, 20):
        start = time.time()
        cnet  = CNXD(tree, depth=i)       
        cnet.learnStructure(reload_mix_clt, train_dataset, lamda, beta_function)  
        learning_time[i-1] = time.time() - start
        #cnets.append(cnet)        
        tree = copy.deepcopy(cnet.tree)
        
        # compute ll score
        train_ll_score[i-1] = cnet.computeLL(train_dataset) / train_dataset.shape[0]
        valid_ll_score[i-1] = cnet.computeLL(valid_dataset) / valid_dataset.shape[0]
        test_ll_score[i-1] = cnet.computeLL(test_dataset) / test_dataset.shape[0]
        
        
        main_dict = {}
        utilM.save_cutset(main_dict, cnet.tree, np.arange(n_variables), ccpt_flag = True)
        np.savez_compressed(module_dir + data_name + '_' + str(lamda) + '_'  + beta_function + '_'  + str(i), module = main_dict)
        
    
        
    
    #print (train_ll_score)
    print('CNxD train set LL scores')
    for l in xrange(max_depth):
        print (train_ll_score[l], l+1)
    print()
        
    print('CNxD valid set LL scores')
    for l in xrange(max_depth):
        print (valid_ll_score[l], l+1)
    print()   
    
    print('CNxD test set LL scores')
    for l in xrange(max_depth):
        print (test_ll_score[l], l+1)
    print()
    
    print ('CNxD learning times: ')
    for l in xrange(max_depth):
        print (np.sum(learning_time[0:l+1]), l+1)
    print()
Exemple #6
0
def main_cutset_opt(parms_dict):
    
    print ("----------------------------------------------------")
    print ("Learning Cutset Networks on original data           ")
    print ("----------------------------------------------------")
    
    
    dataset_dir = parms_dict['dir']
    data_name = parms_dict['dn']
    max_depth = int(parms_dict['max_depth']) 
    out_dir = parms_dict['output_dir']

    

    train_filename = dataset_dir + data_name + '.ts.data'
    test_filename = dataset_dir + data_name +'.test.data'
    valid_filename = dataset_dir + data_name + '.valid.data'
    

    train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',')
    valid_dataset = np.loadtxt(valid_filename, dtype=int, delimiter=',')
    test_dataset = np.loadtxt(test_filename, dtype=int, delimiter=',')
    
    

    train_ll = np.zeros(max_depth)
    valid_ll = np.zeros(max_depth)
    test_ll = np.zeros(max_depth)
    
    best_valid = -np.inf
    best_module = None
    for i in range(1, max_depth+1):
        cnet = CNET(depth=i)
        cnet.learnStructure(train_dataset)
        train_ll[i-1] = np.sum(cnet.getWeights(train_dataset)) / train_dataset.shape[0]
        valid_ll[i-1] = np.sum(cnet.getWeights(valid_dataset)) / valid_dataset.shape[0]
        test_ll[i-1] = np.sum(cnet.getWeights(test_dataset))  / test_dataset.shape[0]
        
        if best_valid < valid_ll[i-1]:
            best_valid = valid_ll[i-1]
            best_module = copy.deepcopy(cnet)
            
    
    print('Train set cnet LL scores')
    for l in xrange(max_depth):
        print (train_ll[l], l+1)
    print()
    
    print('Valid set cnet LL scores')
    for l in xrange(max_depth):
        print (valid_ll[l], l+1)
    print()   
    
    print('test set cnet LL scores')
    for l in xrange(max_depth):
        print (test_ll[l], l+1)
        
    best_ind = np.argmax(valid_ll)
    
    print ()
    print ('Best Validation ll score achived in layer: ', best_ind )    
    print( 'Train set LL score: ', np.sum(best_module.getWeights(train_dataset)) / train_dataset.shape[0])
    print( 'valid set LL score: ', np.sum(best_module.getWeights(valid_dataset)) / valid_dataset.shape[0])
    print( 'test set LL score : ',np.sum(best_module.getWeights(test_dataset)) / test_dataset.shape[0])
    
    main_dict = {}
    utilM.save_cutset(main_dict, best_module.tree, np.arange(train_dataset.shape[1]), ccpt_flag = True)
    np.savez_compressed(out_dir + data_name, module = main_dict)
    

            

            
            
            
            
            
            
            
Exemple #7
0
def main_cnxd(parms_dict):

    dataset_dir = parms_dict['dir']
    data_name = parms_dict['dn']
    lamda = float(parms_dict['a'])
    beta_function = parms_dict['f']
    min_depth = int(parms_dict['min_depth'])
    max_depth = int(parms_dict['max_depth'])
    mt_dir = parms_dict['input_dir']
    tum_module = parms_dict['input_module']
    module_dir = parms_dict['output_dir']

    print('------------------------------------------------------------------')
    print('Learning CNxD using Data and MAP Intractable Model')
    print('------------------------------------------------------------------')

    train_filename = dataset_dir + data_name + '.ts.data'
    test_filename = dataset_dir + data_name + '.test.data'
    valid_filename = dataset_dir + data_name + '.valid.data'

    train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',')
    valid_dataset = np.loadtxt(valid_filename, dtype=int, delimiter=',')
    test_dataset = np.loadtxt(test_filename, dtype=int, delimiter=',')

    n_variables = train_dataset.shape[1]

    ### Load the trained mixture of clt
    print('Start reloading MT...')
    reload_mix_clt = load_mt(mt_dir, tum_module)

    # Set information for MT
    for t in reload_mix_clt.clt_list:

        t.nvariables = n_variables
        # learn the junction tree for each clt
        jt = JT.JunctionTree()
        jt.learn_structure(t.topo_order, t.parents, t.cond_cpt)
        reload_mix_clt.jt_list.append(jt)

    print("Current Alpha: ", lamda)
    print("Current Function: ", beta_function)

    tree = []

    #module_dir = '../cnxd_output/' + data_name +'/'

    train_ll_score = np.zeros(max_depth)
    valid_ll_score = np.zeros(max_depth)
    test_ll_score = np.zeros(max_depth)
    learning_time = np.zeros(max_depth)
    for i in range(min_depth, max_depth + 1):
        start = time.time()
        cnet = CNXD(tree, depth=i)
        cnet.learnStructure(reload_mix_clt, train_dataset, lamda,
                            beta_function)
        learning_time[i - 1] = time.time() - start
        tree = copy.deepcopy(cnet.tree)

        # compute ll score
        train_ll_score[
            i - 1] = cnet.computeLL(train_dataset) / train_dataset.shape[0]
        valid_ll_score[
            i - 1] = cnet.computeLL(valid_dataset) / valid_dataset.shape[0]
        test_ll_score[i -
                      1] = cnet.computeLL(test_dataset) / test_dataset.shape[0]

        main_dict = {}
        utilM.save_cutset(main_dict,
                          cnet.tree,
                          np.arange(n_variables),
                          ccpt_flag=True)
        np.savez_compressed(module_dir + data_name + '_' + str(lamda) + '_' +
                            beta_function + '_' + str(i),
                            module=main_dict)

    print('CNxD train set LL scores')
    for l in xrange(max_depth):
        print(train_ll_score[l], l + 1)
    print()

    print('CNxD valid set LL scores')
    for l in xrange(max_depth):
        print(valid_ll_score[l], l + 1)
    print()

    print('CNxD test set LL scores')
    for l in xrange(max_depth):
        print(test_ll_score[l], l + 1)
    print()

    print('CNxD learning times: ')
    for l in xrange(max_depth):
        print(np.sum(learning_time[0:l + 1]), l + 1)
    print()
Exemple #8
0
def main_bag_cnet(parms_dict):

    print("----------------------------------------------------")
    print("Learning Bags of Cutset Networks                    ")
    print("----------------------------------------------------")

    dataset_dir = parms_dict['dir']
    data_name = parms_dict['dn']
    n_components = int(parms_dict['ncomp'])
    max_depth = int(parms_dict['max_depth'])
    sel_option = int(parms_dict['sp'])
    depth_option = int(parms_dict['dp'])
    output_dir = parms_dict['output_dir']

    train_name = dataset_dir + data_name + '.ts.data'
    valid_name = dataset_dir + data_name + '.valid.data'
    test_name = dataset_dir + data_name + '.test.data'
    data_train = np.loadtxt(train_name, delimiter=',', dtype=np.uint32)
    data_valid = np.loadtxt(valid_name, delimiter=',', dtype=np.uint32)
    data_test = np.loadtxt(test_name, delimiter=',', dtype=np.uint32)
    #new_dataset = np.concatenate((data_train, data_valid), axis=0)

    bag_cnet = BAG_CNET()
    bag_cnet.learnStructure(data_train,
                            n_components,
                            max_depth,
                            node_sel_option=sel_option,
                            depth_sel_option=depth_option)
    #bag_cnet.learnStructure(new_dataset, n_components, max_depth, node_sel_option = sel_option, depth_sel_option =depth_option)

    #    # save the ll
    train_ll = bag_cnet.computeLL(data_train) / data_train.shape[0]
    valid_ll = bag_cnet.computeLL(data_valid) / data_valid.shape[0]
    test_ll = bag_cnet.computeLL(data_test) / data_test.shape[0]
    ll_score = np.zeros(3)
    ll_score[0] = train_ll
    ll_score[1] = valid_ll
    ll_score[2] = test_ll

    print('Train set LL scores')
    print(train_ll)

    print('Valid set LL scores')
    print(valid_ll)

    print('Test set LL scores')
    print(test_ll)

    for i in xrange(n_components):
        main_dict = {}
        utilM.save_cutset(main_dict,
                          bag_cnet.cnet_list[i].tree,
                          np.arange(data_train.shape[1]),
                          ccpt_flag=True)
        np.savez_compressed(output_dir + data_name + '_' + str(i),
                            module=main_dict)

    # save the component weights
    np.savetxt(output_dir + data_name + '_component_weights.txt',
               bag_cnet.mixture_weight,
               delimiter=',')
Exemple #9
0
def main_bag_cnet():
    #train_filename = sys.argv[1]

    dataset_dir = sys.argv[2]
    data_name = sys.argv[4]
    n_components = int(sys.argv[6])
    max_depth = int(sys.argv[8])
    sel_option = int(sys.argv[10])
    depth_option = int(sys.argv[12])

    train_name = dataset_dir + data_name + '.ts.data'
    valid_name = dataset_dir + data_name + '.valid.data'
    test_name = dataset_dir + data_name + '.test.data'
    data_train = np.loadtxt(train_name, delimiter=',', dtype=np.uint32)
    data_valid = np.loadtxt(valid_name, delimiter=',', dtype=np.uint32)
    data_test = np.loadtxt(test_name, delimiter=',', dtype=np.uint32)

    #new_dataset = np.concatenate((data_train, data_valid), axis=0)

    print("Learning Bags of Cutset Network on original data ......")
    #start = time.time()
    bag_cnet = BAG_CNET()
    bag_cnet.learnStructure(data_train,
                            n_components,
                            max_depth,
                            node_sel_option=sel_option,
                            depth_sel_option=depth_option)
    #bag_cnet.learnStructure(new_dataset, n_components, max_depth, node_sel_option = sel_option, depth_sel_option =depth_option)
    #running_time = time.time() -  start

    #    # save the ll
    train_ll = bag_cnet.computeLL(data_train) / data_train.shape[0]
    valid_ll = bag_cnet.computeLL(data_valid) / data_valid.shape[0]
    test_ll = bag_cnet.computeLL(data_test) / data_test.shape[0]
    ll_score = np.zeros(3)
    ll_score[0] = train_ll
    ll_score[1] = valid_ll
    ll_score[2] = test_ll

    print('Train set LL scores')
    print(train_ll)

    print('Valid set LL scores')
    print(valid_ll)

    print('Test set LL scores')
    print(test_ll)

    output_dir = '../bcnet_output/'

    for i in xrange(n_components):
        main_dict = {}
        utilM.save_cutset(main_dict,
                          bag_cnet.cnet_list[i].tree,
                          np.arange(data_train.shape[1]),
                          ccpt_flag=True)
        np.savez_compressed(output_dir + data_name + '_' + str(i),
                            module=main_dict)

    # save the component weights
    np.savetxt(output_dir + data_name + '_component_weights.txt',
               bag_cnet.mixture_weight,
               delimiter=',')