Esempio n. 1
0
    def buildData(self, srcBatch, goldBatch, svo_batch):
        srcData = []
        tgtData = [] if goldBatch else None
        svoData = []
        tgt_extend_vocab = [] if goldBatch else None
        src_extend_vocab = []
        src_oovs_list = []
        for i, (srcWords, svo_list) in enumerate(zip(srcBatch, svo_batch)):
            srcData += [
                self.src_dict.convertToIdx(srcWords, Constants.UNK_WORD)
            ]
            svoData += [[
                self.src_dict.convertToIdx(one_svo, Constants.UNK_WORD)
                for one_svo in svo_list
            ]]

            if goldBatch:
                tgtData += [
                    self.tgt_dict.convertToIdx(goldBatch[i],
                                               Constants.UNK_WORD,
                                               Constants.BOS_WORD,
                                               Constants.EOS_WORD)
                ]

            if self.opt.pointer_gen:
                # 存储临时的oov词典
                enc_input_extend_vocab, article_oovs = self.article2ids(
                    srcWords, self.src_dict)
                src_extend_vocab += [enc_input_extend_vocab]
                src_oovs_list += [article_oovs]
                if goldBatch:
                    abs_ids_extend_vocab = self.abstract2ids(
                        goldBatch[i], self.tgt_dict, article_oovs)
                    # 覆盖target,用于使用临时词典
                    vec = []
                    vec += [self.src_dict.lookup(Constants.BOS_WORD)]
                    vec += abs_ids_extend_vocab
                    vec += [self.src_dict.lookup(Constants.EOS_WORD)]
                    tgt_extend_vocab.append(torch.LongTensor(vec))

        if goldBatch:
            train = {
                'src': (srcData, svoData),
                'tgt': tgtData,
                'src_extend_vocab': src_extend_vocab,
                'tgt_extend_vocab': tgt_extend_vocab,
                'src_oovs_list': src_oovs_list,
            }
        else:
            train = {
                'src': (srcData, svoData),
                'src_extend_vocab': src_extend_vocab,
                'src_oovs_list': src_oovs_list,
            }
        return Dataset(train,
                       self.opt.batch_size,
                       self.opt.cuda,
                       volatile=True,
                       pointer_gen=self.opt.pointer_gen,
                       is_coverage=self.opt.is_coverage)
 def __init__(self, dir_model, dataset_options, feature_columns, mode, balanced_datasets=True, resample_datasets=False):
     self.dir_model = dir_model;
     self.dataset_options = dataset_options;
     self.dataset = Dataset(self.dataset_options);
     self.feature_columns = feature_columns;
     self.mode = mode;
     self.balanced_datasets = balanced_datasets;
     self.resample_datasets = resample_datasets
     return;
Esempio n. 3
0
def load_train_data():
    onlinePreprocess.seq_length = opt.max_sent_length_source  # 训练的截断
    onlinePreprocess.shuffle = 1 if opt.process_shuffle else 0
    train_data, vocab_dicts = prepare_data_online(opt)
    trainData = Dataset(train_data, opt.batch_size, opt.gpus, pointer_gen=opt.pointer_gen, is_coverage=opt.is_coverage)
    logger.info(' * vocabulary size. source = %d; target = %d' %
                (vocab_dicts['src'].size(), vocab_dicts['tgt'].size()))
    logger.info(' * number of training sentences. %d' %
                len(train_data['src']))
    return trainData, vocab_dicts
Esempio n. 4
0
 def __init__(self,
              mode,
              dir_model,
              dataset_options,
              balanced_datasets=True):
     self.dir_model = dir_model
     self.mode = mode
     self.dataset_options = dataset_options
     self.dataset = Dataset(self.dataset_options)
     self.balanced_datasets = balanced_datasets
     return
def learn(
        env,
        policy_fn,
        *,
        timesteps_per_actorbatch,  # timesteps per actor per update
        optim_stepsize,
        optim_batchsize,  # optimization hypers
        gamma,
        lam,  # advantage estimation
        entcoeff=0.0,
        max_episodes=0,
        max_iters=0,
        max_seconds=0,  # time constraint
        callback=None,  # you can do anything in the callback, since it takes locals(), globals()
        adam_epsilon=1e-5,
        schedule='constant',  # annealing for stepsize parameters (epsilon and adam)
        args):
    # Setup losses and stuff`
    # ----------------------------------------
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = policy_fn("pi", ob_space,
                   ac_space)  # Construct network for new policy
    oldpi = policy_fn("oldpi", ob_space, ac_space)  # Network for old policy

    # Ops to reassign params from new to old
    assign_old_eq_new = U.function(
        [], [],
        updates=[
            tf.assign(oldv, newv)
            for (oldv,
                 newv) in zipsame(oldpi.get_variables(), pi.get_variables())
        ])

    atarg = tf.placeholder(
        dtype=tf.float32,
        shape=[None])  # Target advantage function (if applicable)
    ret = tf.placeholder(dtype=tf.float32, shape=[None])  # Empirical return

    lrmult = tf.placeholder(
        name='lrmult', dtype=tf.float32,
        shape=[])  # learning rate multiplier, updated with schedule

    ob = U.get_placeholder_cached(name="ob")
    ac = pi.pdtype.sample_placeholder([None])

    kloldnew = oldpi.pd.kl(pi.pd)
    ent = pi.pd.entropy()
    meankl = tf.reduce_mean(kloldnew)
    meanent = tf.reduce_mean(ent)
    pol_entpen = (-entcoeff) * meanent

    newprob = tf.exp(pi.pd.logp(ac))
    oldprob = tf.exp(oldpi.pd.logp(ac))

    ratio = newprob / oldprob

    kl = pi.pd.kl(oldpi.pd)
    mean_kl = tf.reduce_mean(kl)
    get_kl = U.function([ob, ac], kl)
    get_mean_kl = U.function([ob, ac], mean_kl)

    threshold = kl < args.kl_threshold
    threshold = tf.cast(threshold, tf.float32)

    pol_surr = (kl - ratio * atarg / args.sepg_lam) * threshold

    pol_surr = tf.reduce_mean(pol_surr)

    vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret))
    total_loss = pol_surr + pol_entpen + vf_loss
    losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent]
    loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"]

    var_list = pi.get_trainable_variables()
    lossandgrad = U.function([ob, ac, atarg, ret, lrmult],
                             losses + [U.flatgrad(total_loss, var_list)])

    adam = MpiAdam(var_list, epsilon=adam_epsilon)

    compute_losses = U.function([ob, ac, atarg, ret, lrmult], losses)

    U.initialize()
    adam.sync()

    # Prepare for rollouts
    # ----------------------------------------
    seg_gen = traj_segment_generator(pi,
                                     env,
                                     timesteps_per_actorbatch,
                                     stochastic=True)

    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=100)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=100)  # rolling buffer for episode rewards

    running_scores = []

    assert sum([
        max_iters > 0, args.num_timesteps > 0, max_episodes > 0,
        max_seconds > 0
    ]) == 1, "Only one time constraint permitted"

    while True:
        if callback: callback(locals(), globals())
        if args.num_timesteps and timesteps_so_far >= args.num_timesteps:
            break
        elif max_episodes and episodes_so_far >= max_episodes:
            break
        elif max_iters and iters_so_far >= max_iters:
            break
        elif max_seconds and time.time() - tstart >= max_seconds:
            break

        if schedule == 'constant':
            cur_lrmult = 1.0
        elif schedule == 'linear':
            cur_lrmult = max(
                1.0 - float(timesteps_so_far) / args.num_timesteps, 0)
        else:
            raise NotImplementedError

        if MPI.COMM_WORLD.Get_rank() == 0:
            logger.log("********** Iteration %i ************" % iters_so_far)

        seg = seg_gen.__next__()
        add_vtarg_and_adv(seg, gamma, lam)

        # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets))
        ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg[
            "tdlamret"]
        vpredbefore = seg["vpred"]  # predicted value function before udpate
        atarg = (atarg - atarg.mean()) / (
            atarg.std() + 1e-8)  # standardized advantage function estimate

        optim_batchsize = optim_batchsize or ob.shape[0]

        if hasattr(pi, "ob_rms"):
            pi.ob_rms.update(ob)  # update running mean/std for policy

        assign_old_eq_new()  # set old parameter values to new parameter values

        d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret),
                    shuffle=not pi.recurrent)

        # Here we do a bunch of optimization epochs over the data
        for num_epoch in count():
            losses = [
            ]  # list of tuples, each of which gives the loss for a minibatch
            for batch in d.iterate_once(optim_batchsize):
                *newlosses, g = lossandgrad(batch["ob"], batch["ac"],
                                            batch["atarg"], batch["vtarg"],
                                            cur_lrmult)
                g = np.nan_to_num(g)
                adam.update(g, optim_stepsize * cur_lrmult)
                losses.append(newlosses)

            agg_mean_kl = get_mean_kl(ob, ac)

            if agg_mean_kl > args.agg_kl_threshold or num_epoch == args.optim_epochs:
                break

        lrlocal = (seg["ep_lens"], seg["ep_rets"])  # local values
        listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal)  # list of tuples
        lens, rews = map(flatten_lists, zip(*listoflrpairs))

        rewbuffer.extend(rews)

        mean_score = None

        if rewbuffer:
            mean_score = np.mean(rewbuffer)
            running_scores.append((timesteps_so_far, mean_score))

        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)
        iters_so_far += 1
        if MPI.COMM_WORLD.Get_rank() == 0:
            logger.record_tabular("EpRewMean", mean_score)
            logger.record_tabular("EpThisIter", len(lens))
            logger.record_tabular("EpisodesSoFar", episodes_so_far)
            logger.record_tabular("TimestepsSoFar", timesteps_so_far)
            logger.record_tabular("TimeElapsed", time.time() - tstart)
            logger.record_tabular("NumEpoch", num_epoch)

            logger.dump_tabular()

    return running_scores
def encode(flags_obj):
    """Run Wide-Deep training and eval loop.
    Args:
    flags_obj: An object containing parsed flag values.
    """
    dict_data_training = {
        'dir_data': DIRPROJECT + 'data/',
        'data_prefix': 'nz',
        'dataset': '20012016',
        'encoding': 'embedding',
        'newfeatures': None,
        'featurereduction': {
            'method': 'FUSION'
        },
        'grouping': 'verylightgrouping'
    }
    dataset_options_training = DatasetOptions(dict_data_training)

    dict_data_encoding = {
        'dir_data': DIRPROJECT + 'data/',
        'data_prefix': 'nz',
        'dataset': '2017',
        'encoding': 'embedding',
        'newfeatures': None,
        'featurereduction': {
            'method': 'FUSION'
        },
        'grouping': 'verylightgrouping'
    }
    dataset_options_encoding = DatasetOptions(dict_data_encoding)

    feature_columns = FeatureColumnsAutoEncoderNZ(
        dataset_options=dataset_options_encoding)

    dict_dataset_options = {
        'train': dataset_options_training,
        'eval': None,
        'test': dataset_options_encoding
    }

    nn = AutoEncoderModel('test', dict_dataset_options, feature_columns,
                          flags_obj)
    diag_encodings = nn.encode()
    print('diag_encodings --> main diag: ' + str(diag_encodings[0].shape))
    print('diag_encodings --> secondary diags: ' +
          str(diag_encodings[1].shape))

    main_diag_encodings = diag_encodings[0]
    sec_diag_encodings = diag_encodings[1]

    dataset_encoding = Dataset(dataset_options_encoding)
    df_encoding = dataset_encoding.getDf()
    print('df_encoding: ' + str(df_encoding.shape))
    num_encoded_dim = main_diag_encodings.shape[1]

    dir_data = dataset_options_encoding.getDirData()
    dataset = dataset_options_encoding.getDatasetName()
    data_prefix = dataset_options_encoding.getDataPrefix()
    demographic_featurename = dataset_options_encoding.getFilenameOptionDemographicFeatures(
    )
    featureset_str = dataset_options_encoding.getFeatureSetStr()
    encoding = dataset_options_encoding.getEncodingScheme()
    name_event_column = dataset_options_encoding.getEventColumnName()

    name_main_diag = dataset_options_encoding.getNameMainDiag()
    name_sec_diag = dataset_options_encoding.getNameSecDiag()
    df_encoding_sec_diag = df_encoding[name_event_column].to_frame()
    df_encoding_main_diag = df_encoding[name_event_column].to_frame()

    num_encoded_dim = sec_diag_encodings.shape[1]
    for k in range(0, num_encoded_dim):
        new_col_secdiag = name_sec_diag + '_dim_' + str(k)
        df_encoding_sec_diag[new_col_secdiag] = sec_diag_encodings[:, k]

        new_col_maindiag = name_main_diag + '_dim_' + str(k)
        df_encoding_main_diag[new_col_maindiag] = main_diag_encodings[:, k]

    print('df_encoding_main_diag: ' + str(df_encoding_main_diag.shape))
    print('df_encoding_sec_diag: ' + str(df_encoding_sec_diag.shape))

    filename_sec_diag_encoding = dir_data + 'data_' + data_prefix + '_' + dataset + '_' + name_sec_diag + '_' + str(
        num_encoded_dim) + 'dim.csv'
    filename_main_diag_encoding = dir_data + 'data_' + data_prefix + '_' + dataset + '_' + name_main_diag + '_' + str(
        num_encoded_dim) + 'dim.csv'

    list_df = [
        df_encoding_sec_diag[i:i + 10000]
        for i in range(0, df_encoding_sec_diag.shape[0], 10000)
    ]
    list_df[0].to_csv(filename_sec_diag_encoding,
                      index=False,
                      line_terminator='\n')
    for l in list_df[1:]:
        l.to_csv(filename_sec_diag_encoding,
                 index=False,
                 line_terminator='\n',
                 header=False,
                 mode='a')

    list_df = [
        df_encoding_main_diag[i:i + 10000]
        for i in range(0, df_encoding_main_diag.shape[0], 10000)
    ]
    list_df[0].to_csv(filename_main_diag_encoding,
                      index=False,
                      line_terminator='\n')
    for l in list_df[1:]:
        l.to_csv(filename_main_diag_encoding,
                 index=False,
                 line_terminator='\n',
                 header=False,
                 mode='a')
Esempio n. 7
0
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from config.test_config import TestConfig
import os
import numpy as np
from PIL import Image

opt = TestConfig().parse()
model = CycleGAN(opt)
model.load_state_dict(
    torch.load('log/snapshot/' + opt.name + '_snapshot_' + str(opt.epoch) +
               '.pkl'))
model.eval()
model.cuda()
dataset = Dataset(opt)
data_loader = DataLoader(dataset,
                         batch_size=1,
                         shuffle=opt.shuffle,
                         num_workers=4)
pic_dir = opt.pic_dir

for iteration, input in enumerate(data_loader):
    model.deal_with_input(input)
    model.test()
    g_A = model.generated_A.cpu().numpy()
    g_B = model.generated_B.cpu().numpy()
    c_A = model.cycled_A.cpu().numpy()
    c_B = model.cycled_B.cpu().numpy()
    #g_A = Image.fromarray(((g_A+1.)/2.*255).astype(np.uint8).transpose(1,2,0))
    #g_A.save(os.path.join(pic_dir, 'generated_A_'+str(opt.epoch)+'.png'))
Esempio n. 8
0
File: BAG.py Progetto: wujindou/BAG
     logger.info('Cannot find preprocess data %s, program will shut down.',
                 '{}.preprocessed.pickle'.format(train_file_name_prefix))
     sys.exit()
 dev_file_name_prefix, fileExist = checkPreprocessFile(
     dev_file, add_query_node)
 if not fileExist:
     logger.info('Cannot find preprocess data %s, program will shut down.',
                 '{}.preprocessed.pickle'.format(dev_file_name_prefix))
     sys.exit()
 if not evaluation_mode:
     logger.info('Loading preprocessed training data file %s',
                 '{}.preprocessed.pickle'.format(train_file_name_prefix))
     dataset = Dataset(train_file_name_prefix,
                       use_elmo,
                       use_glove,
                       use_extra_feature,
                       max_nodes=500,
                       max_query_size=25,
                       max_candidates=80,
                       max_candidates_len=10)
     logger.info('Loading preprocessed development data file %s',
                 '{}.preprocessed.pickle'.format(dev_file_name_prefix))
     dev_dataset = Dataset(dev_file_name_prefix,
                           use_elmo,
                           use_glove,
                           use_extra_feature,
                           max_nodes=500,
                           max_query_size=25,
                           max_candidates=80,
                           max_candidates_len=10)
 else:
     logger.info('Loading preprocessed evaluation data file %s',
Esempio n. 9
0
parser.add_argument('--gallery_feature_dir', type=str)
parser.add_argument('--query_feature_dir', type=str)
parser.add_argument('--useCAM', action='store_true')

args = parser.parse_args()

data_transforms = transforms.Compose([
    transforms.Resize((args.img_h, args.img_w)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# image_datasets = {x: datasets.ImageFolder(os.path.join(args.test_dir, x) ,data_transforms) for x in ['gallery','query']}
image_datasets = {
    x: Dataset(os.path.join(args.test_dir, x),
               data_transforms,
               CAM=args.useCAM)
    for x in ['gallery', 'query']
}
# labelsloader = {x: iter(image_datasets[x].imgs) for x in ['gallery', 'query']}
dataloaders = {
    x: torch.utils.data.DataLoader(image_datasets[x],
                                   batch_size=args.batch_size,
                                   shuffle=False,
                                   num_workers=4)
    for x in ['gallery', 'query']
}


def load_network(network):
    save_path = os.path.join(args.model_save_dir,
Esempio n. 10
0
import os
import numpy as np
from utils.Dataset import Dataset
from model import model_multi_view
from utils.cluster import cluster
import csv
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
print(os.environ['CUDA_VISIBLE_DEVICES'])
'''
each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx)
ae net need pretraining before the whole optimizatoin
'''
if __name__ == '__main__':

    num = 30
    data = Dataset('coil_2views')
    x1, x2, gt = data.load_data()
    X = dict()
    X[str(0)], X[str(1)] = x1, x2
    acc_H_all = np.zeros(num)
    nmi_H_all = np.zeros(num)
    RI_H_all = np.zeros(num)
    f1_H_all = np.zeros(num)

    para_lambda = 1
    batch_size = X['0'].shape[0]
    lr_pre = 1.0e-3
    lr_ae = 1.0e-3
    lr_dg = 1.0e-3
    lr_h = 1.0e-2
    epochs_pre = 300
Esempio n. 11
0
def test_item_file(end_test_file, embedding_file_path, vocab_file_path,
                   use_gpu):
    embed = torch.Tensor(np.load(embedding_file_path)['arr_0'])
    with open(vocab_file_path) as f:
        word2id = json.load(f)
    vocab = Vocab(embed, word2id)
    #with open(end_test_file) as f:
    #    examples = [json.loads(line) for line in f]
    with open(end_test_file) as f:
        examples = list()
        for line in f:
            if line and not line.isspace():
                examples.append(json.loads(line))
    #print(examples[0])
    test_dataset = Dataset(examples)

    test_iter = DataLoader(dataset=test_dataset,
                           batch_size=args.batch_size,
                           shuffle=False)
    load_dir = os.path.join(args.input, 'model_files', 'CNN_RNN.pt')
    if use_gpu:
        checkpoint = torch.load(load_dir)
    else:
        checkpoint = torch.load(load_dir,
                                map_location=lambda storage, loc: storage)
    if not use_gpu:
        checkpoint['args'].device = None
    net = getattr(models, checkpoint['args'].model)(checkpoint['args'])
    net.load_state_dict(checkpoint['model'])
    if use_gpu:
        net.cuda()
    net.eval()
    doc_num = len(test_dataset)

    all_targets = []
    all_results = []
    all_probs = []
    all_acc = []
    all_p = []
    all_r = []
    all_f1 = []
    all_sum = []
    for batch in tqdm(test_iter):
        features, targets, summaries, doc_lens = vocab.make_features(batch)
        if use_gpu:
            probs = net(Variable(features).cuda(), doc_lens)
        else:
            probs = net(Variable(features), doc_lens)
        start = 0
        for doc_id, doc_len in enumerate(doc_lens):
            doc = batch['doc'][doc_id].split('\n')[:doc_len]
            stop = start + doc_len
            prob = probs[start:stop]
            hyp = []
            for _p, _d in zip(prob, doc):
                print(_p)
                print(_d)
                if _p > 0.5:
                    hyp.append(_d)
            if len(hyp) > 0:
                print(hyp)
                all_sum.append("###".join(hyp))
            else:
                all_sum.append('')
            all_targets.append(targets[start:stop])
            all_probs.append(prob)
            start = stop
    file_path_elems = end_test_file.split('/')
    file_name = 'TR-' + file_path_elems[len(file_path_elems) - 1]
    with open(os.path.join(args.output, file_name), mode='w',
              encoding='utf-8') as f:
        for text in all_sum:
            f.write(text.strip() + '\n')
    for item in all_probs:
        all_results.append([1 if tmp > 0.5 else 0 for tmp in item.tolist()])
    print(len(all_results))
    print(len(all_targets))
    print(len(all_probs))
    for _1, _2, _3 in zip(all_results, all_targets, all_probs):
        _2 = _2.tolist()
        _3 = _3.tolist()
        print("*" * 3)
        print('probs : ', _3)
        print('results : ', _1)
        print('targets : ', _2)
        tmp_acc = accuracy_score(_1, _2)
        tmp_p = precision_score(_1, _2)
        tmp_r = recall_score(_1, _2)
        tmp_f1 = f1_score(_1, _2)
        print('acc : ', tmp_acc)
        print('p : ', tmp_p)
        print('r : ', tmp_r)
        print('f1 : ', tmp_f1)
        all_acc.append(tmp_acc)
        all_p.append(tmp_p)
        all_r.append(tmp_r)
        all_f1.append(tmp_f1)
    print('all dataset acc : ', np.mean(all_acc))
    print('all dataset p : ', np.mean(all_p))
    print('all dataset r : ', np.mean(all_r))
    print('all dataset f1 : ', np.mean(all_f1))
    print('all results length : ', len(all_results))
Esempio n. 12
0
import os
import numpy as np
from utils.Dataset import Dataset
from model import model_multi_view
from utils.cluster import cluster
import csv
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
print(os.environ['CUDA_VISIBLE_DEVICES'])
'''
each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx)
ae net need pretraining before the whole optimizatoin
'''
if __name__ == '__main__':

    num = 30
    data = Dataset('ORL_2views')
    x1, x2, gt = data.load_data()
    X = dict()
    X[str(0)], X[str(1)] = x1, x2
    acc_H_all = np.zeros(num)
    nmi_H_all = np.zeros(num)
    RI_H_all = np.zeros(num)
    f1_H_all = np.zeros(num)

    para_lambda = 1
    batch_size = x1.shape[0]
    lr_pre = 1.0e-3
    lr_ae = 1.0e-3
    lr_dg = 1.0e-3
    lr_h = 1.0e-2
    epochs_pre = 50
Esempio n. 13
0
import os
import numpy as np
from utils.Dataset import Dataset
from model import model_multi_view
from utils.cluster import cluster
import csv
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
print(os.environ['CUDA_VISIBLE_DEVICES'])
'''
each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx)
ae net need pretraining before the whole optimizatoin
'''
if __name__ == '__main__':

    num = 30
    data = Dataset('ORL_3views')
    X, gt = data.load_data()

    acc_H_all = np.zeros(num)
    nmi_H_all = np.zeros(num)
    RI_H_all = np.zeros(num)
    f1_H_all = np.zeros(num)

    para_lambda = 1
    batch_size = X['0'].shape[0]
    lr_pre = 1.0e-3
    lr_ae = 1.0e-3
    lr_dg = 1.0e-3
    lr_h = 1.0e-2
    epochs_pre = 50
    epochs_total = 200
#Email:		[email protected]
#Date:		Min 13 Des 2020 02:50:08  WIB

from model.nn import NNModel
from cf.DiCE import DiCE
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from utils.Dataset import Dataset
from utils.adult_dataset import load_adult_income

if __name__ == "__main__":

    income_df = load_adult_income("data/adult/adult.csv")
    d = Dataset(dataframe=income_df,
                continuous_features=[
                    'age', 'education', 'educational-num', 'capital-gain',
                    'capital-loss', 'hours-per-week', 'native-country'
                ],
                outcome_name='income',
                scaler=MinMaxScaler())
    clf = NNModel(model_path='weights/adult.pth')
    cf = DiCE(d, clf)
    test_instance = {
        'age': 57,
        'workclass': 'Self-Employed',
        'education': 2,
        'educational-num': 10,
        'marital-status': 'Married',
        'occupation': 'Service',
        'relationship': 'Husband',
        'race': 'White',
        'gender': 'Male',
Esempio n. 15
0
FilePath: /Signal-1/AE2-Nets-master/test_Caltech.py
'''
from utils.Dataset import Dataset
from AE_BinAE_revise import MaeAEModel
from model import model
from utils.print_result import print_result
import os
from collections import Counter

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
'''
each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx)
ae net need pretraining before the whole optimization
'''
if __name__ == '__main__':
    data = Dataset('Caltech101_7_2views')
    x1, x2, gt = data.load_data()
    x1 = data.normalize(x1, 0)
    x2 = data.normalize(x2, 0)
    n_clusters = len(set(gt))
    print(x1.shape)
    print(x2.shape)
    print(n_clusters)
    #act_ae1, act_ae2, act_dg1, act_dg2 = 'sigmoid', 'sigmoid', 'sigmoid', 'sigmoid'
    v1_aedims_ = [[x1.shape[1], 1024, 512, 256], [256, 512, 1024, x1.shape[1]]]

    v2_aedims_ = [[x2.shape[1], 256, 128], [128, 256, x2.shape[1]]]
    #原来的
    mae_dims_ = [[256, 256], [128, 128, 64], [256, 256], [64, 128, 128]]
    #现在用的
    #dims_dg1 = [64, 100]
Esempio n. 16
0
Description: Nothing
FilePath: /Signal-1/AE2-Nets-master/test_CUB.py
'''
from utils.Dataset import Dataset
from AE_BinAE_revise import MaeAEModel
from model import model
from utils.print_result import print_result
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
'''
each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx)
ae net need pretraining before the whole optimization
'''
if __name__ == '__main__':
    data = Dataset('CUB_c10_2views')
    x1, x2, gt = data.load_data()
    x1 = data.normalize(x1, 0)
    x2 = data.normalize(x2, 0)
    n_clusters = len(set(gt))
    print(x1.shape)
    print(x2.shape)
    print(gt.shape)
    #act_ae1, act_ae2, act_dg1, act_dg2 = 'sigmoid', 'sigmoid', 'sigmoid', 'sigmoid'
    v1_aedims_ = [[x1.shape[1], 512, 256], [256, 512, x1.shape[1]]]

    v2_aedims_ = [[x2.shape[1], 256, 128], [128, 256, x2.shape[1]]]
    #原来的
    mae_dims_ = [[256, 128, 64], [128, 128, 64], [64, 128, 256],
                 [64, 128, 128]]
    #现在用的
Esempio n. 17
0
G = AEI_Net(512).to(device)
D = MultiscaleDiscriminator(input_nc=3,
                            ndf=64,
                            n_layers=6,
                            norm_layer=torch.nn.InstanceNorm2d).to(device)
G.train()
D.train()

arcface = Backbone(50, 0.6, 'ir_se').to(device)
arcface.eval()
arcface.load_state_dict(torch.load("./model_weights/model_ir_se50.pth"))

opt_G = optim.Adam(G.parameters(), lr=lr_G, betas=(0, 0.999))
opt_D = optim.Adam(D.parameters(), lr=lr_D, betas=(0, 0.999))

dataset = Dataset("./dataset/celeb/", same_prob=0.2)

dataloader = DataLoader(dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=0,
                        drop_last=True)

MSE = torch.nn.MSELoss()
L1 = torch.nn.L1Loss()


def hinge_loss(X, positive=True):
    if positive:
        return torch.relu(1 - X).mean()
    return torch.relu(X).mean()
def train():
    print("*"*100)
    print("train begin")
    # use gpu
    use_gpu = args.device is not None
    if torch.cuda.is_available() and not use_gpu:
        print("WARNING: You have a CUDA device, should run with -device 0")
    if use_gpu:
        # set cuda device and seed
        torch.cuda.set_device(args.device)
    torch.cuda.manual_seed(args.seed)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    numpy.random.seed(args.seed)
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)

    # 路径准备
    embedding_file_path = os.path.join(args.project, "embedding.npz")
    vocab_file_path = os.path.join(args.project, "word2id.json")
    end_train_file = os.path.join(args.input, "train_files", "train.txt")
    train_files_dir = os.path.join(args.input, "train_files")

    # 合并同后缀文本文件
    merge_same_suf_text_file(train_files_dir, end_train_file, '.txt')

    print('Loading vocab,train and val dataset.Wait a second,please')
    embed = torch.Tensor(np.load(embedding_file_path)['arr_0'])  # embed = torch.Tensor(list(np.load(args.embedding)))
    with open(vocab_file_path) as f:
        word2id = json.load(f)
    vocab = Vocab(embed, word2id)
    with open(end_train_file) as f:
        examples = list()
        for line in tqdm(f):
            if line and not line.isspace():
                examples.append(json.loads(line))
    train_dataset = Dataset(examples)
    print(train_dataset[:1])

    args.embed_num = embed.size(0)  # 从embeding中读取维度
    args.embed_dim = embed.size(1)  #
    args.kernel_sizes = [int(ks) for ks in args.kernel_sizes.split(',')]
    net = getattr(models, args.model)(args, embed)
    if use_gpu:
        net.cuda()
    train_iter = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=False)
    criterion = nn.BCELoss()
    params = sum(p.numel() for p in list(net.parameters())) / 1e6
    print('#Params: %.1fM' % (params))

    min_loss = float('inf')
    optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate)
    net.train()

    t1 = time()
    for epoch in range(1, args.max_epoch + 1):
        print("*"*10, 'epoch ', str(epoch), '*'*50)
        for i, batch in enumerate(train_iter):
            print("*"*10, 'batch', i, '*'*10)
            features, targets, _, doc_lens = vocab.make_features(batch, args.seq_trunc)
            features, targets = Variable(features), Variable(targets.float())
            if use_gpu:
                features = features.cuda()
                targets = targets.cuda()
            probs = net(features, doc_lens)
            loss = criterion(probs, targets)
            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm(net.parameters(), args.max_norm)
            optimizer.step()
            net.save()
            print('Epoch: %2d Loss: %f' % (epoch, loss))
    t2 = time()
    print('Total Cost:%f h' % ((t2 - t1) / 3600))
    print("模型配置文件保存至输出文件夹")
Esempio n. 19
0
import os
import numpy as np
from utils.Dataset import Dataset
from model import model_multi_view
from utils.cluster import cluster
import csv
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
print(os.environ['CUDA_VISIBLE_DEVICES'])
'''
each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx)
ae net need pretraining before the whole optimizatoin
'''
if __name__ == '__main__':

    num = 30
    data = Dataset('handwritten_6views')
    X, gt = data.load_data()

    acc_H_all = np.zeros(num)
    nmi_H_all = np.zeros(num)
    RI_H_all = np.zeros(num)
    f1_H_all = np.zeros(num)

    para_lambda = 1
    batch_size = 2000
    lr_pre = 1.0e-3
    lr_ae = 1.0e-3
    lr_dg = 1.0e-3
    lr_h = 1.0e-1
    epochs_pre = 10
    epochs_total = 20
Esempio n. 20
0
from utils.DatasetFilter import DatasetFilter
from utils.Dataset import Dataset
from utils.DatasetOptions import DatasetOptions

import helpers.constants as constants
import helpers.constantsNZ as constantsNZ

dirProject = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + '/'
dirData = dirProject + 'data/'
dirPlotsBase = dirProject + 'plots/feature_comparison_wiederkehrer_normal/'

dict_options_analyzing = {
    'dir_data': dirData,
    'data_prefix': 'patrec',
    'dataset': '20122015',
    'grouping': 'verylightgrouping',
    'encoding': 'categorical',
    'newfeatures': {
        'names': constants.NEW_FEATURES
    },
    'featurereduction': None,
    'filter_options': 'chronic_lung'
}

options = DatasetOptions(dict_options_analyzing)
dataset = Dataset(options)

datafilter = DatasetFilter(options)
datafilter.filterDataDisease()
Esempio n. 21
0
 def __init__(self, dataset_options, dir_plots):
     self.dataset_options = dataset_options
     self.dataset = Dataset(dataset_options=dataset_options)
     self.dir_plots = dir_plots
     return
device = torch.device("cuda")

G = AEI_Net(512).to(device)
D = MultiscaleDiscriminator(input_nc=3,
                            ndf=64,
                            n_layers=6,
                            norm_layer=torch.nn.InstanceNorm2d).to(device)
G.train()
D.train()

arcface = Backbone(50, 0.6, 'ir_se').to(device)
arcface.eval()
arcface.load_state_dict(torch.load("./model_weights/model_ir_se50.pth"))

dataset = Dataset("./inputs/processed")

dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0)

MSE = torch.nn.MSELoss()
L1 = torch.nn.L1Loss()


def hinge_loss(X, positive=True):
    if positive:
        return torch.relu(1 - X).mean()
    return torch.relu(X).mean()


def get_grid_image(X):
    X = X[:8]
Esempio n. 23
0
import numpy as np
from utils.Dataset import Dataset
from model import model_multi_view
from utils.cluster import cluster
import csv

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
print(os.environ['CUDA_VISIBLE_DEVICES'])
'''
each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx)
ae net need pretraining before the whole optimizatoin
'''
if __name__ == '__main__':

    num = 10
    data = Dataset('COIL20_3views')
    X, gt = data.load_data()

    acc_H_all = np.zeros(num)
    nmi_H_all = np.zeros(num)
    RI_H_all = np.zeros(num)
    f1_H_all = np.zeros(num)

    para_lambda = 1
    batch_size = X['0'].shape[0]
    lr_pre = 1.0e-3
    lr_ae = 1.0e-3
    lr_dg = 1.0e-3
    lr_h = 1.0e-2
    epochs_pre = 300
    epochs_total = 100
Esempio n. 24
0
parser.add_argument('--data_dir', type=str, default='./data')
parser.add_argument('--save_dir', type=str, default='./saves')
parser.add_argument('--conf_dir', type=str, default='./conf')
parser.add_argument('--seed', type=int, default=225)

conf = parser.parse_args()
model_conf = Params(os.path.join(conf.conf_dir, conf.model.lower() + '.json'))

np.random.seed(conf.seed)
torch.random.manual_seed(conf.seed)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device(
    'cpu')

dataset = Dataset(data_dir=conf.data_dir,
                  data_name=model_conf.data_name,
                  train_ratio=model_conf.train_ratio,
                  device=device)

log_dir = os.path.join('saves', conf.model)
logger = Logger(log_dir)
model_conf.save(os.path.join(logger.log_dir, 'config.json'))

eval_pos, eval_target = dataset.eval_data()
item_popularity = dataset.item_popularity
evaluator = Evaluator(eval_pos, eval_target, item_popularity, model_conf.top_k)

model_base = getattr(models, conf.model)
model = model_base(model_conf, dataset.num_users, dataset.num_items, device)

logger.info(model_conf)
logger.info(dataset)
        dirModelsBase,
        options_training.getFilenameOptions(filteroptions=True),
        options_clf=dict_opt_sgd)
    clf_sgd = ClassifierSGD(options_sgd)

    dict_options_dataset_training = {
        'dir_data': dirData,
        'data_prefix': 'nz',
        'dataset': '2016',
        'newfeatures': {
            'names': constantsNZ.NEW_FEATURES
        },
        'featurereduction': None
    }
    options_testing = DatasetOptions(dict_options_dataset_training)
    dataset_testing = Dataset(dataset_options=options_testing)

    years = [2012, 2013, 2014, 2015]
    for year in years:
        dict_options_dataset_training = {
            'dir_data': dirData,
            'data_prefix': 'nz',
            'dataset': str(year),
            'newfeatures': {
                'names': constantsNZ.NEW_FEATURES
            },
            'featurereduction': None
        }

        options_training = DatasetOptions(dict_options_dataset_training)
        dataset_training = Dataset(dataset_options=options_training)
Esempio n. 26
0
import tensorflow as tf
import numpy as np
import scipy.io as scio
from utils.Net_ae import Net_ae
from utils.Net_dg import Net_dg
from utils.next_batch import next_batch
import math
from sklearn.utils import shuffle
import timeit
from keras.layers import *
from utils.print_result import print_result
from keras.models import Model
from utils.Dataset import Dataset
data = Dataset('handwritten_2views')
x1, x2, gt = data.load_data()
x1 = data.normalize(x1, 0)
x2 = data.normalize(x2, 0)
n_clusters = len(set(gt))

def xavier_init(fan_in, fan_out, constant=1):
    low = -constant * np.sqrt(6.0 / (fan_in + fan_out))
    high = constant * np.sqrt(6.0 / (fan_in + fan_out))
    return tf.random_uniform((fan_in, fan_out),
                            minval=low, maxval=high,
                            dtype=tf.float32)
class dualModel:
    def __init__(self,epochs):
        self.epochs=epochs
    def train_model(self,X1, X2, gt, para_lambda, dims, act, lr, epochs, batch_size):
        err_total = list()
        start = timeit.default_timer()
    for year in years:
        print('year: ' + str(year))
        dict_options_dataset = {
            'dir_data': dirData,
            'data_prefix': 'nz',
            'dataset': str(year),
            'encoding': 'embedding',
            'grouping': 'verylightgrouping',
            'newfeatures': None,
            'featurereduction': {
                'method': 'FUSION'
            }
        }

        options_dataset_year = DatasetOptions(dict_options_dataset)
        dataset_year = Dataset(options_dataset_year)
        if balanced:
            df_year = dataset_year.getBalancedSubSet()
        else:
            df_year = dataset_year.getDf()

        #df_year['main_diag'] = df_year['main_diag'].apply(convertDiagToInd)
        print(df_year.shape)
        df_all_years = df_all_years.append(df_year)

    print('df balanced all years: ' + str(df_all_years.shape))

    encoding = options_dataset_year.getEncodingScheme()
    grouping = options_dataset_year.getGroupingName()
    featureset = options_dataset_year.getFeatureSetStr()
    filename_data_years = dirData + 'data_nz_' + str(min(years)) + str(