Beispiel #1
0
def rcn_tune_paramter(train_dataset, valid_dataset, test_dataset, data_name,
                      depth, module_dir):

    print('-----Tuning Paramters----')

    # load the structure of the cnet
    input_module = module_dir + data_name + '_parm_' + str(depth) + '.npz'

    # reload the structure
    cnet_load = np.load(input_module)['module'].item()

    functions = ['linear', 'square', 'root']  # currently support
    lamda = np.arange(11) / 10.0

    # the module has highest validation ll score
    best_func_val = ''
    best_lam_val = 0.0
    best_module_val = None
    best_ll_val = -np.inf
    ll_results = np.zeros(3)

    for func in functions:
        for lam in lamda:
            cnet_module = copy.deepcopy(cnet_load)
            getExactParm(cnet_module, train_dataset.shape[0], lam, func)

            # Get LL score
            train_ll = np.sum(
                utilM.computeLL_reload(cnet_module,
                                       train_dataset)) / train_dataset.shape[0]
            valid_ll = np.sum(
                utilM.computeLL_reload(cnet_module,
                                       valid_dataset)) / valid_dataset.shape[0]
            test_ll = np.sum(utilM.computeLL_reload(
                cnet_module, test_dataset)) / test_dataset.shape[0]

            if valid_ll > best_ll_val:
                best_ll_val = valid_ll
                best_func_val = func
                best_lam_val = lam
                best_module_val = copy.deepcopy(cnet_module)
                ll_results[0] = train_ll
                ll_results[1] = valid_ll
                ll_results[2] = test_ll

    #print ('Best function: ', best_func_val)
    #print ('Best Lamda: ', best_lam_val)
    print('Train LL score for RCN: ', ll_results[0])
    print('Valid LL score for RCN: ', ll_results[1])
    print('Test LL score for RCN : ', ll_results[2])

    # save the module
    np.savez_compressed(module_dir + data_name + '_' + str(depth),
                        module=best_module_val)
Beispiel #2
0
def main_cnet():

    module_type = sys.argv[2]
    dataset_dir = sys.argv[4]
    data_name = sys.argv[6]
    max_depth = int(sys.argv[8])
    e_percent = float(sys.argv[10])  #  #evidence/ # total variables
    seq = int(
        sys.argv[12]
    )  # the sequence of evidence record in pre-generated evidence file

    test_filename = dataset_dir + data_name + '.test.data'
    test_dataset = np.loadtxt(test_filename, dtype=int, delimiter=',')

    n_variables = test_dataset.shape[1]

    output_dir = '../infer_output/'
    e_file = output_dir + 'evid_file/' + data_name + '_evid_' + str(
        int(e_percent * 100)) + '.txt'
    e_var_arr = np.loadtxt(e_file, dtype=int, delimiter=',')

    e_var = e_var_arr[seq]
    x_var = np.setdiff1d(np.arange(n_variables), e_var)

    ### Load the trained cutset network
    print('Start reloading cutset network ...')
    if module_type == 'cnxd':
        cnet_file = '../cnxd_output/' + data_name + '_' + str(
            max_depth) + '.npz'
    if module_type == 'rcn':
        cnet_file = '../rcn_output/' + data_name + '_' + str(
            max_depth) + '.npz'
    if module_type == 'cn':
        cnet_file = '../cn_output/' + data_name + '.npz'

    print('Getting the MAP tuple...')
    cnet_module = np.load(cnet_file)['module'].item()
    xmax_prob_cnet, map_dataset_cnet = compute_xmax_cnet(
        cnet_module, max_depth, test_dataset, x_var, e_var)

    # save the max tuple
    if module_type == 'cn':
        new_data_file = output_dir + module_type + '/' + data_name + '_' + str(
            int(e_percent * 100)) + '_' + str(seq) + '.txt'
    else:
        new_data_file = output_dir + module_type + '/' + data_name + '_' + str(
            int(e_percent *
                100)) + '_' + str(max_depth) + '_' + str(seq) + '.txt'

    map_dataset_cnet = np.asarray(map_dataset_cnet).astype(int)
    np.savetxt(new_data_file, map_dataset_cnet, fmt='%i', delimiter=',')

    ll_score = np.sum(utilM.computeLL_reload(
        cnet_module, map_dataset_cnet)) / map_dataset_cnet.shape[0]

    print('MAP dataset Set LL Score: ', ll_score)
Beispiel #3
0
def main_map_cnet(parms_dict):

    print('------------------------------------------------------------------')
    print('MAP inference of Cutset Network')
    print('------------------------------------------------------------------')

    dataset_dir = parms_dict['dir']
    data_name = parms_dict['dn']
    depth = int(parms_dict['depth'])
    e_file = parms_dict['efile']
    e_percent = float(parms_dict['e'])
    output_dir = parms_dict['output_dir']
    module_type = parms_dict['t']
    seq = int(parms_dict['seq'])

    input_dir = parms_dict['input_dir']
    input_module = parms_dict['input_module']

    test_filename = dataset_dir + data_name + '.test.data'
    test_dataset = np.loadtxt(test_filename, dtype=int, delimiter=',')

    n_variables = test_dataset.shape[1]
    e_var_arr = np.loadtxt(e_file, dtype=int, delimiter=',')

    e_var = e_var_arr[seq]
    x_var = np.setdiff1d(np.arange(n_variables), e_var)

    ### Load the trained cutset network
    print('Start reloading cutset network ...')

    cnet_file = input_dir + input_module + '.npz'
    print('Getting the MAP tuple...')
    cnet_module = np.load(cnet_file)['module'].item()
    xmax_prob_cnet, map_dataset_cnet = compute_xmax_cnet(
        cnet_module, depth, test_dataset, x_var, e_var)

    # save the max tuple
    if module_type == 'cn':
        new_data_file = output_dir + data_name + '_' + module_type + '_' + str(
            int(e_percent * 100)) + '_' + str(seq) + '.txt'
    else:
        new_data_file = output_dir + data_name + '_' + module_type + '_' + str(
            int(e_percent * 100)) + '_' + str(depth) + '_' + str(seq) + '.txt'

    map_dataset_cnet = np.asarray(map_dataset_cnet).astype(int)
    np.savetxt(new_data_file, map_dataset_cnet, fmt='%i', delimiter=',')

    ll_score = np.sum(utilM.computeLL_reload(
        cnet_module, map_dataset_cnet)) / map_dataset_cnet.shape[0]

    print('MAP dataset Set LL Score: ', ll_score)
Beispiel #4
0
def compute_ll_from_disk(reload_bcn, dataset):
    n_components = reload_bcn['n_components']

    cnet_weights_list = np.zeros((n_components, dataset.shape[0]))

    log_cm_weights = np.log(reload_bcn['cm_weights'])

    for c in xrange(n_components):
        cnet_weights_list[c] = utilM.computeLL_reload(
            reload_bcn['cnet_list'][c], dataset) + log_cm_weights[c]

    cnet_weights_list, ll_score = Util.m_step_trick(cnet_weights_list)

    return ll_score