def buildData(self, srcBatch, goldBatch, svo_batch): srcData = [] tgtData = [] if goldBatch else None svoData = [] tgt_extend_vocab = [] if goldBatch else None src_extend_vocab = [] src_oovs_list = [] for i, (srcWords, svo_list) in enumerate(zip(srcBatch, svo_batch)): srcData += [ self.src_dict.convertToIdx(srcWords, Constants.UNK_WORD) ] svoData += [[ self.src_dict.convertToIdx(one_svo, Constants.UNK_WORD) for one_svo in svo_list ]] if goldBatch: tgtData += [ self.tgt_dict.convertToIdx(goldBatch[i], Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD) ] if self.opt.pointer_gen: # 存储临时的oov词典 enc_input_extend_vocab, article_oovs = self.article2ids( srcWords, self.src_dict) src_extend_vocab += [enc_input_extend_vocab] src_oovs_list += [article_oovs] if goldBatch: abs_ids_extend_vocab = self.abstract2ids( goldBatch[i], self.tgt_dict, article_oovs) # 覆盖target,用于使用临时词典 vec = [] vec += [self.src_dict.lookup(Constants.BOS_WORD)] vec += abs_ids_extend_vocab vec += [self.src_dict.lookup(Constants.EOS_WORD)] tgt_extend_vocab.append(torch.LongTensor(vec)) if goldBatch: train = { 'src': (srcData, svoData), 'tgt': tgtData, 'src_extend_vocab': src_extend_vocab, 'tgt_extend_vocab': tgt_extend_vocab, 'src_oovs_list': src_oovs_list, } else: train = { 'src': (srcData, svoData), 'src_extend_vocab': src_extend_vocab, 'src_oovs_list': src_oovs_list, } return Dataset(train, self.opt.batch_size, self.opt.cuda, volatile=True, pointer_gen=self.opt.pointer_gen, is_coverage=self.opt.is_coverage)
def __init__(self, dir_model, dataset_options, feature_columns, mode, balanced_datasets=True, resample_datasets=False): self.dir_model = dir_model; self.dataset_options = dataset_options; self.dataset = Dataset(self.dataset_options); self.feature_columns = feature_columns; self.mode = mode; self.balanced_datasets = balanced_datasets; self.resample_datasets = resample_datasets return;
def load_train_data(): onlinePreprocess.seq_length = opt.max_sent_length_source # 训练的截断 onlinePreprocess.shuffle = 1 if opt.process_shuffle else 0 train_data, vocab_dicts = prepare_data_online(opt) trainData = Dataset(train_data, opt.batch_size, opt.gpus, pointer_gen=opt.pointer_gen, is_coverage=opt.is_coverage) logger.info(' * vocabulary size. source = %d; target = %d' % (vocab_dicts['src'].size(), vocab_dicts['tgt'].size())) logger.info(' * number of training sentences. %d' % len(train_data['src'])) return trainData, vocab_dicts
def __init__(self, mode, dir_model, dataset_options, balanced_datasets=True): self.dir_model = dir_model self.mode = mode self.dataset_options = dataset_options self.dataset = Dataset(self.dataset_options) self.balanced_datasets = balanced_datasets return
def learn( env, policy_fn, *, timesteps_per_actorbatch, # timesteps per actor per update optim_stepsize, optim_batchsize, # optimization hypers gamma, lam, # advantage estimation entcoeff=0.0, max_episodes=0, max_iters=0, max_seconds=0, # time constraint callback=None, # you can do anything in the callback, since it takes locals(), globals() adam_epsilon=1e-5, schedule='constant', # annealing for stepsize parameters (epsilon and adam) args): # Setup losses and stuff` # ---------------------------------------- ob_space = env.observation_space ac_space = env.action_space pi = policy_fn("pi", ob_space, ac_space) # Construct network for new policy oldpi = policy_fn("oldpi", ob_space, ac_space) # Network for old policy # Ops to reassign params from new to old assign_old_eq_new = U.function( [], [], updates=[ tf.assign(oldv, newv) for (oldv, newv) in zipsame(oldpi.get_variables(), pi.get_variables()) ]) atarg = tf.placeholder( dtype=tf.float32, shape=[None]) # Target advantage function (if applicable) ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return lrmult = tf.placeholder( name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule ob = U.get_placeholder_cached(name="ob") ac = pi.pdtype.sample_placeholder([None]) kloldnew = oldpi.pd.kl(pi.pd) ent = pi.pd.entropy() meankl = tf.reduce_mean(kloldnew) meanent = tf.reduce_mean(ent) pol_entpen = (-entcoeff) * meanent newprob = tf.exp(pi.pd.logp(ac)) oldprob = tf.exp(oldpi.pd.logp(ac)) ratio = newprob / oldprob kl = pi.pd.kl(oldpi.pd) mean_kl = tf.reduce_mean(kl) get_kl = U.function([ob, ac], kl) get_mean_kl = U.function([ob, ac], mean_kl) threshold = kl < args.kl_threshold threshold = tf.cast(threshold, tf.float32) pol_surr = (kl - ratio * atarg / args.sepg_lam) * threshold pol_surr = tf.reduce_mean(pol_surr) vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret)) total_loss = pol_surr + pol_entpen + vf_loss losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent] loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"] var_list = pi.get_trainable_variables() lossandgrad = U.function([ob, ac, atarg, ret, lrmult], losses + [U.flatgrad(total_loss, var_list)]) adam = MpiAdam(var_list, epsilon=adam_epsilon) compute_losses = U.function([ob, ac, atarg, ret, lrmult], losses) U.initialize() adam.sync() # Prepare for rollouts # ---------------------------------------- seg_gen = traj_segment_generator(pi, env, timesteps_per_actorbatch, stochastic=True) episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=100) # rolling buffer for episode lengths rewbuffer = deque(maxlen=100) # rolling buffer for episode rewards running_scores = [] assert sum([ max_iters > 0, args.num_timesteps > 0, max_episodes > 0, max_seconds > 0 ]) == 1, "Only one time constraint permitted" while True: if callback: callback(locals(), globals()) if args.num_timesteps and timesteps_so_far >= args.num_timesteps: break elif max_episodes and episodes_so_far >= max_episodes: break elif max_iters and iters_so_far >= max_iters: break elif max_seconds and time.time() - tstart >= max_seconds: break if schedule == 'constant': cur_lrmult = 1.0 elif schedule == 'linear': cur_lrmult = max( 1.0 - float(timesteps_so_far) / args.num_timesteps, 0) else: raise NotImplementedError if MPI.COMM_WORLD.Get_rank() == 0: logger.log("********** Iteration %i ************" % iters_so_far) seg = seg_gen.__next__() add_vtarg_and_adv(seg, gamma, lam) # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets)) ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg[ "tdlamret"] vpredbefore = seg["vpred"] # predicted value function before udpate atarg = (atarg - atarg.mean()) / ( atarg.std() + 1e-8) # standardized advantage function estimate optim_batchsize = optim_batchsize or ob.shape[0] if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy assign_old_eq_new() # set old parameter values to new parameter values d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret), shuffle=not pi.recurrent) # Here we do a bunch of optimization epochs over the data for num_epoch in count(): losses = [ ] # list of tuples, each of which gives the loss for a minibatch for batch in d.iterate_once(optim_batchsize): *newlosses, g = lossandgrad(batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult) g = np.nan_to_num(g) adam.update(g, optim_stepsize * cur_lrmult) losses.append(newlosses) agg_mean_kl = get_mean_kl(ob, ac) if agg_mean_kl > args.agg_kl_threshold or num_epoch == args.optim_epochs: break lrlocal = (seg["ep_lens"], seg["ep_rets"]) # local values listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal) # list of tuples lens, rews = map(flatten_lists, zip(*listoflrpairs)) rewbuffer.extend(rews) mean_score = None if rewbuffer: mean_score = np.mean(rewbuffer) running_scores.append((timesteps_so_far, mean_score)) episodes_so_far += len(lens) timesteps_so_far += sum(lens) iters_so_far += 1 if MPI.COMM_WORLD.Get_rank() == 0: logger.record_tabular("EpRewMean", mean_score) logger.record_tabular("EpThisIter", len(lens)) logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) logger.record_tabular("NumEpoch", num_epoch) logger.dump_tabular() return running_scores
def encode(flags_obj): """Run Wide-Deep training and eval loop. Args: flags_obj: An object containing parsed flag values. """ dict_data_training = { 'dir_data': DIRPROJECT + 'data/', 'data_prefix': 'nz', 'dataset': '20012016', 'encoding': 'embedding', 'newfeatures': None, 'featurereduction': { 'method': 'FUSION' }, 'grouping': 'verylightgrouping' } dataset_options_training = DatasetOptions(dict_data_training) dict_data_encoding = { 'dir_data': DIRPROJECT + 'data/', 'data_prefix': 'nz', 'dataset': '2017', 'encoding': 'embedding', 'newfeatures': None, 'featurereduction': { 'method': 'FUSION' }, 'grouping': 'verylightgrouping' } dataset_options_encoding = DatasetOptions(dict_data_encoding) feature_columns = FeatureColumnsAutoEncoderNZ( dataset_options=dataset_options_encoding) dict_dataset_options = { 'train': dataset_options_training, 'eval': None, 'test': dataset_options_encoding } nn = AutoEncoderModel('test', dict_dataset_options, feature_columns, flags_obj) diag_encodings = nn.encode() print('diag_encodings --> main diag: ' + str(diag_encodings[0].shape)) print('diag_encodings --> secondary diags: ' + str(diag_encodings[1].shape)) main_diag_encodings = diag_encodings[0] sec_diag_encodings = diag_encodings[1] dataset_encoding = Dataset(dataset_options_encoding) df_encoding = dataset_encoding.getDf() print('df_encoding: ' + str(df_encoding.shape)) num_encoded_dim = main_diag_encodings.shape[1] dir_data = dataset_options_encoding.getDirData() dataset = dataset_options_encoding.getDatasetName() data_prefix = dataset_options_encoding.getDataPrefix() demographic_featurename = dataset_options_encoding.getFilenameOptionDemographicFeatures( ) featureset_str = dataset_options_encoding.getFeatureSetStr() encoding = dataset_options_encoding.getEncodingScheme() name_event_column = dataset_options_encoding.getEventColumnName() name_main_diag = dataset_options_encoding.getNameMainDiag() name_sec_diag = dataset_options_encoding.getNameSecDiag() df_encoding_sec_diag = df_encoding[name_event_column].to_frame() df_encoding_main_diag = df_encoding[name_event_column].to_frame() num_encoded_dim = sec_diag_encodings.shape[1] for k in range(0, num_encoded_dim): new_col_secdiag = name_sec_diag + '_dim_' + str(k) df_encoding_sec_diag[new_col_secdiag] = sec_diag_encodings[:, k] new_col_maindiag = name_main_diag + '_dim_' + str(k) df_encoding_main_diag[new_col_maindiag] = main_diag_encodings[:, k] print('df_encoding_main_diag: ' + str(df_encoding_main_diag.shape)) print('df_encoding_sec_diag: ' + str(df_encoding_sec_diag.shape)) filename_sec_diag_encoding = dir_data + 'data_' + data_prefix + '_' + dataset + '_' + name_sec_diag + '_' + str( num_encoded_dim) + 'dim.csv' filename_main_diag_encoding = dir_data + 'data_' + data_prefix + '_' + dataset + '_' + name_main_diag + '_' + str( num_encoded_dim) + 'dim.csv' list_df = [ df_encoding_sec_diag[i:i + 10000] for i in range(0, df_encoding_sec_diag.shape[0], 10000) ] list_df[0].to_csv(filename_sec_diag_encoding, index=False, line_terminator='\n') for l in list_df[1:]: l.to_csv(filename_sec_diag_encoding, index=False, line_terminator='\n', header=False, mode='a') list_df = [ df_encoding_main_diag[i:i + 10000] for i in range(0, df_encoding_main_diag.shape[0], 10000) ] list_df[0].to_csv(filename_main_diag_encoding, index=False, line_terminator='\n') for l in list_df[1:]: l.to_csv(filename_main_diag_encoding, index=False, line_terminator='\n', header=False, mode='a')
import torch.nn as nn from torch.autograd import Variable from torch.utils.data import DataLoader from config.test_config import TestConfig import os import numpy as np from PIL import Image opt = TestConfig().parse() model = CycleGAN(opt) model.load_state_dict( torch.load('log/snapshot/' + opt.name + '_snapshot_' + str(opt.epoch) + '.pkl')) model.eval() model.cuda() dataset = Dataset(opt) data_loader = DataLoader(dataset, batch_size=1, shuffle=opt.shuffle, num_workers=4) pic_dir = opt.pic_dir for iteration, input in enumerate(data_loader): model.deal_with_input(input) model.test() g_A = model.generated_A.cpu().numpy() g_B = model.generated_B.cpu().numpy() c_A = model.cycled_A.cpu().numpy() c_B = model.cycled_B.cpu().numpy() #g_A = Image.fromarray(((g_A+1.)/2.*255).astype(np.uint8).transpose(1,2,0)) #g_A.save(os.path.join(pic_dir, 'generated_A_'+str(opt.epoch)+'.png'))
logger.info('Cannot find preprocess data %s, program will shut down.', '{}.preprocessed.pickle'.format(train_file_name_prefix)) sys.exit() dev_file_name_prefix, fileExist = checkPreprocessFile( dev_file, add_query_node) if not fileExist: logger.info('Cannot find preprocess data %s, program will shut down.', '{}.preprocessed.pickle'.format(dev_file_name_prefix)) sys.exit() if not evaluation_mode: logger.info('Loading preprocessed training data file %s', '{}.preprocessed.pickle'.format(train_file_name_prefix)) dataset = Dataset(train_file_name_prefix, use_elmo, use_glove, use_extra_feature, max_nodes=500, max_query_size=25, max_candidates=80, max_candidates_len=10) logger.info('Loading preprocessed development data file %s', '{}.preprocessed.pickle'.format(dev_file_name_prefix)) dev_dataset = Dataset(dev_file_name_prefix, use_elmo, use_glove, use_extra_feature, max_nodes=500, max_query_size=25, max_candidates=80, max_candidates_len=10) else: logger.info('Loading preprocessed evaluation data file %s',
parser.add_argument('--gallery_feature_dir', type=str) parser.add_argument('--query_feature_dir', type=str) parser.add_argument('--useCAM', action='store_true') args = parser.parse_args() data_transforms = transforms.Compose([ transforms.Resize((args.img_h, args.img_w)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # image_datasets = {x: datasets.ImageFolder(os.path.join(args.test_dir, x) ,data_transforms) for x in ['gallery','query']} image_datasets = { x: Dataset(os.path.join(args.test_dir, x), data_transforms, CAM=args.useCAM) for x in ['gallery', 'query'] } # labelsloader = {x: iter(image_datasets[x].imgs) for x in ['gallery', 'query']} dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=args.batch_size, shuffle=False, num_workers=4) for x in ['gallery', 'query'] } def load_network(network): save_path = os.path.join(args.model_save_dir,
import os import numpy as np from utils.Dataset import Dataset from model import model_multi_view from utils.cluster import cluster import csv os.environ["CUDA_VISIBLE_DEVICES"] = "1" print(os.environ['CUDA_VISIBLE_DEVICES']) ''' each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx) ae net need pretraining before the whole optimizatoin ''' if __name__ == '__main__': num = 30 data = Dataset('coil_2views') x1, x2, gt = data.load_data() X = dict() X[str(0)], X[str(1)] = x1, x2 acc_H_all = np.zeros(num) nmi_H_all = np.zeros(num) RI_H_all = np.zeros(num) f1_H_all = np.zeros(num) para_lambda = 1 batch_size = X['0'].shape[0] lr_pre = 1.0e-3 lr_ae = 1.0e-3 lr_dg = 1.0e-3 lr_h = 1.0e-2 epochs_pre = 300
def test_item_file(end_test_file, embedding_file_path, vocab_file_path, use_gpu): embed = torch.Tensor(np.load(embedding_file_path)['arr_0']) with open(vocab_file_path) as f: word2id = json.load(f) vocab = Vocab(embed, word2id) #with open(end_test_file) as f: # examples = [json.loads(line) for line in f] with open(end_test_file) as f: examples = list() for line in f: if line and not line.isspace(): examples.append(json.loads(line)) #print(examples[0]) test_dataset = Dataset(examples) test_iter = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) load_dir = os.path.join(args.input, 'model_files', 'CNN_RNN.pt') if use_gpu: checkpoint = torch.load(load_dir) else: checkpoint = torch.load(load_dir, map_location=lambda storage, loc: storage) if not use_gpu: checkpoint['args'].device = None net = getattr(models, checkpoint['args'].model)(checkpoint['args']) net.load_state_dict(checkpoint['model']) if use_gpu: net.cuda() net.eval() doc_num = len(test_dataset) all_targets = [] all_results = [] all_probs = [] all_acc = [] all_p = [] all_r = [] all_f1 = [] all_sum = [] for batch in tqdm(test_iter): features, targets, summaries, doc_lens = vocab.make_features(batch) if use_gpu: probs = net(Variable(features).cuda(), doc_lens) else: probs = net(Variable(features), doc_lens) start = 0 for doc_id, doc_len in enumerate(doc_lens): doc = batch['doc'][doc_id].split('\n')[:doc_len] stop = start + doc_len prob = probs[start:stop] hyp = [] for _p, _d in zip(prob, doc): print(_p) print(_d) if _p > 0.5: hyp.append(_d) if len(hyp) > 0: print(hyp) all_sum.append("###".join(hyp)) else: all_sum.append('') all_targets.append(targets[start:stop]) all_probs.append(prob) start = stop file_path_elems = end_test_file.split('/') file_name = 'TR-' + file_path_elems[len(file_path_elems) - 1] with open(os.path.join(args.output, file_name), mode='w', encoding='utf-8') as f: for text in all_sum: f.write(text.strip() + '\n') for item in all_probs: all_results.append([1 if tmp > 0.5 else 0 for tmp in item.tolist()]) print(len(all_results)) print(len(all_targets)) print(len(all_probs)) for _1, _2, _3 in zip(all_results, all_targets, all_probs): _2 = _2.tolist() _3 = _3.tolist() print("*" * 3) print('probs : ', _3) print('results : ', _1) print('targets : ', _2) tmp_acc = accuracy_score(_1, _2) tmp_p = precision_score(_1, _2) tmp_r = recall_score(_1, _2) tmp_f1 = f1_score(_1, _2) print('acc : ', tmp_acc) print('p : ', tmp_p) print('r : ', tmp_r) print('f1 : ', tmp_f1) all_acc.append(tmp_acc) all_p.append(tmp_p) all_r.append(tmp_r) all_f1.append(tmp_f1) print('all dataset acc : ', np.mean(all_acc)) print('all dataset p : ', np.mean(all_p)) print('all dataset r : ', np.mean(all_r)) print('all dataset f1 : ', np.mean(all_f1)) print('all results length : ', len(all_results))
import os import numpy as np from utils.Dataset import Dataset from model import model_multi_view from utils.cluster import cluster import csv os.environ["CUDA_VISIBLE_DEVICES"] = "1" print(os.environ['CUDA_VISIBLE_DEVICES']) ''' each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx) ae net need pretraining before the whole optimizatoin ''' if __name__ == '__main__': num = 30 data = Dataset('ORL_2views') x1, x2, gt = data.load_data() X = dict() X[str(0)], X[str(1)] = x1, x2 acc_H_all = np.zeros(num) nmi_H_all = np.zeros(num) RI_H_all = np.zeros(num) f1_H_all = np.zeros(num) para_lambda = 1 batch_size = x1.shape[0] lr_pre = 1.0e-3 lr_ae = 1.0e-3 lr_dg = 1.0e-3 lr_h = 1.0e-2 epochs_pre = 50
import os import numpy as np from utils.Dataset import Dataset from model import model_multi_view from utils.cluster import cluster import csv os.environ["CUDA_VISIBLE_DEVICES"] = "1" print(os.environ['CUDA_VISIBLE_DEVICES']) ''' each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx) ae net need pretraining before the whole optimizatoin ''' if __name__ == '__main__': num = 30 data = Dataset('ORL_3views') X, gt = data.load_data() acc_H_all = np.zeros(num) nmi_H_all = np.zeros(num) RI_H_all = np.zeros(num) f1_H_all = np.zeros(num) para_lambda = 1 batch_size = X['0'].shape[0] lr_pre = 1.0e-3 lr_ae = 1.0e-3 lr_dg = 1.0e-3 lr_h = 1.0e-2 epochs_pre = 50 epochs_total = 200
#Email: [email protected] #Date: Min 13 Des 2020 02:50:08 WIB from model.nn import NNModel from cf.DiCE import DiCE from sklearn.preprocessing import MinMaxScaler, StandardScaler from utils.Dataset import Dataset from utils.adult_dataset import load_adult_income if __name__ == "__main__": income_df = load_adult_income("data/adult/adult.csv") d = Dataset(dataframe=income_df, continuous_features=[ 'age', 'education', 'educational-num', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country' ], outcome_name='income', scaler=MinMaxScaler()) clf = NNModel(model_path='weights/adult.pth') cf = DiCE(d, clf) test_instance = { 'age': 57, 'workclass': 'Self-Employed', 'education': 2, 'educational-num': 10, 'marital-status': 'Married', 'occupation': 'Service', 'relationship': 'Husband', 'race': 'White', 'gender': 'Male',
FilePath: /Signal-1/AE2-Nets-master/test_Caltech.py ''' from utils.Dataset import Dataset from AE_BinAE_revise import MaeAEModel from model import model from utils.print_result import print_result import os from collections import Counter os.environ["CUDA_VISIBLE_DEVICES"] = "0" ''' each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx) ae net need pretraining before the whole optimization ''' if __name__ == '__main__': data = Dataset('Caltech101_7_2views') x1, x2, gt = data.load_data() x1 = data.normalize(x1, 0) x2 = data.normalize(x2, 0) n_clusters = len(set(gt)) print(x1.shape) print(x2.shape) print(n_clusters) #act_ae1, act_ae2, act_dg1, act_dg2 = 'sigmoid', 'sigmoid', 'sigmoid', 'sigmoid' v1_aedims_ = [[x1.shape[1], 1024, 512, 256], [256, 512, 1024, x1.shape[1]]] v2_aedims_ = [[x2.shape[1], 256, 128], [128, 256, x2.shape[1]]] #原来的 mae_dims_ = [[256, 256], [128, 128, 64], [256, 256], [64, 128, 128]] #现在用的 #dims_dg1 = [64, 100]
Description: Nothing FilePath: /Signal-1/AE2-Nets-master/test_CUB.py ''' from utils.Dataset import Dataset from AE_BinAE_revise import MaeAEModel from model import model from utils.print_result import print_result import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" ''' each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx) ae net need pretraining before the whole optimization ''' if __name__ == '__main__': data = Dataset('CUB_c10_2views') x1, x2, gt = data.load_data() x1 = data.normalize(x1, 0) x2 = data.normalize(x2, 0) n_clusters = len(set(gt)) print(x1.shape) print(x2.shape) print(gt.shape) #act_ae1, act_ae2, act_dg1, act_dg2 = 'sigmoid', 'sigmoid', 'sigmoid', 'sigmoid' v1_aedims_ = [[x1.shape[1], 512, 256], [256, 512, x1.shape[1]]] v2_aedims_ = [[x2.shape[1], 256, 128], [128, 256, x2.shape[1]]] #原来的 mae_dims_ = [[256, 128, 64], [128, 128, 64], [64, 128, 256], [64, 128, 128]] #现在用的
G = AEI_Net(512).to(device) D = MultiscaleDiscriminator(input_nc=3, ndf=64, n_layers=6, norm_layer=torch.nn.InstanceNorm2d).to(device) G.train() D.train() arcface = Backbone(50, 0.6, 'ir_se').to(device) arcface.eval() arcface.load_state_dict(torch.load("./model_weights/model_ir_se50.pth")) opt_G = optim.Adam(G.parameters(), lr=lr_G, betas=(0, 0.999)) opt_D = optim.Adam(D.parameters(), lr=lr_D, betas=(0, 0.999)) dataset = Dataset("./dataset/celeb/", same_prob=0.2) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True) MSE = torch.nn.MSELoss() L1 = torch.nn.L1Loss() def hinge_loss(X, positive=True): if positive: return torch.relu(1 - X).mean() return torch.relu(X).mean()
def train(): print("*"*100) print("train begin") # use gpu use_gpu = args.device is not None if torch.cuda.is_available() and not use_gpu: print("WARNING: You have a CUDA device, should run with -device 0") if use_gpu: # set cuda device and seed torch.cuda.set_device(args.device) torch.cuda.manual_seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) numpy.random.seed(args.seed) os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id) # 路径准备 embedding_file_path = os.path.join(args.project, "embedding.npz") vocab_file_path = os.path.join(args.project, "word2id.json") end_train_file = os.path.join(args.input, "train_files", "train.txt") train_files_dir = os.path.join(args.input, "train_files") # 合并同后缀文本文件 merge_same_suf_text_file(train_files_dir, end_train_file, '.txt') print('Loading vocab,train and val dataset.Wait a second,please') embed = torch.Tensor(np.load(embedding_file_path)['arr_0']) # embed = torch.Tensor(list(np.load(args.embedding))) with open(vocab_file_path) as f: word2id = json.load(f) vocab = Vocab(embed, word2id) with open(end_train_file) as f: examples = list() for line in tqdm(f): if line and not line.isspace(): examples.append(json.loads(line)) train_dataset = Dataset(examples) print(train_dataset[:1]) args.embed_num = embed.size(0) # 从embeding中读取维度 args.embed_dim = embed.size(1) # args.kernel_sizes = [int(ks) for ks in args.kernel_sizes.split(',')] net = getattr(models, args.model)(args, embed) if use_gpu: net.cuda() train_iter = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=False) criterion = nn.BCELoss() params = sum(p.numel() for p in list(net.parameters())) / 1e6 print('#Params: %.1fM' % (params)) min_loss = float('inf') optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate) net.train() t1 = time() for epoch in range(1, args.max_epoch + 1): print("*"*10, 'epoch ', str(epoch), '*'*50) for i, batch in enumerate(train_iter): print("*"*10, 'batch', i, '*'*10) features, targets, _, doc_lens = vocab.make_features(batch, args.seq_trunc) features, targets = Variable(features), Variable(targets.float()) if use_gpu: features = features.cuda() targets = targets.cuda() probs = net(features, doc_lens) loss = criterion(probs, targets) optimizer.zero_grad() loss.backward() clip_grad_norm(net.parameters(), args.max_norm) optimizer.step() net.save() print('Epoch: %2d Loss: %f' % (epoch, loss)) t2 = time() print('Total Cost:%f h' % ((t2 - t1) / 3600)) print("模型配置文件保存至输出文件夹")
import os import numpy as np from utils.Dataset import Dataset from model import model_multi_view from utils.cluster import cluster import csv os.environ["CUDA_VISIBLE_DEVICES"] = "0" print(os.environ['CUDA_VISIBLE_DEVICES']) ''' each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx) ae net need pretraining before the whole optimizatoin ''' if __name__ == '__main__': num = 30 data = Dataset('handwritten_6views') X, gt = data.load_data() acc_H_all = np.zeros(num) nmi_H_all = np.zeros(num) RI_H_all = np.zeros(num) f1_H_all = np.zeros(num) para_lambda = 1 batch_size = 2000 lr_pre = 1.0e-3 lr_ae = 1.0e-3 lr_dg = 1.0e-3 lr_h = 1.0e-1 epochs_pre = 10 epochs_total = 20
from utils.DatasetFilter import DatasetFilter from utils.Dataset import Dataset from utils.DatasetOptions import DatasetOptions import helpers.constants as constants import helpers.constantsNZ as constantsNZ dirProject = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + '/' dirData = dirProject + 'data/' dirPlotsBase = dirProject + 'plots/feature_comparison_wiederkehrer_normal/' dict_options_analyzing = { 'dir_data': dirData, 'data_prefix': 'patrec', 'dataset': '20122015', 'grouping': 'verylightgrouping', 'encoding': 'categorical', 'newfeatures': { 'names': constants.NEW_FEATURES }, 'featurereduction': None, 'filter_options': 'chronic_lung' } options = DatasetOptions(dict_options_analyzing) dataset = Dataset(options) datafilter = DatasetFilter(options) datafilter.filterDataDisease()
def __init__(self, dataset_options, dir_plots): self.dataset_options = dataset_options self.dataset = Dataset(dataset_options=dataset_options) self.dir_plots = dir_plots return
device = torch.device("cuda") G = AEI_Net(512).to(device) D = MultiscaleDiscriminator(input_nc=3, ndf=64, n_layers=6, norm_layer=torch.nn.InstanceNorm2d).to(device) G.train() D.train() arcface = Backbone(50, 0.6, 'ir_se').to(device) arcface.eval() arcface.load_state_dict(torch.load("./model_weights/model_ir_se50.pth")) dataset = Dataset("./inputs/processed") dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0) MSE = torch.nn.MSELoss() L1 = torch.nn.L1Loss() def hinge_loss(X, positive=True): if positive: return torch.relu(1 - X).mean() return torch.relu(X).mean() def get_grid_image(X): X = X[:8]
import numpy as np from utils.Dataset import Dataset from model import model_multi_view from utils.cluster import cluster import csv os.environ["CUDA_VISIBLE_DEVICES"] = "0" print(os.environ['CUDA_VISIBLE_DEVICES']) ''' each net has its own learning_rate(lr_xx), activation_function(act_xx), nodes_of_layers(dims_xx) ae net need pretraining before the whole optimizatoin ''' if __name__ == '__main__': num = 10 data = Dataset('COIL20_3views') X, gt = data.load_data() acc_H_all = np.zeros(num) nmi_H_all = np.zeros(num) RI_H_all = np.zeros(num) f1_H_all = np.zeros(num) para_lambda = 1 batch_size = X['0'].shape[0] lr_pre = 1.0e-3 lr_ae = 1.0e-3 lr_dg = 1.0e-3 lr_h = 1.0e-2 epochs_pre = 300 epochs_total = 100
parser.add_argument('--data_dir', type=str, default='./data') parser.add_argument('--save_dir', type=str, default='./saves') parser.add_argument('--conf_dir', type=str, default='./conf') parser.add_argument('--seed', type=int, default=225) conf = parser.parse_args() model_conf = Params(os.path.join(conf.conf_dir, conf.model.lower() + '.json')) np.random.seed(conf.seed) torch.random.manual_seed(conf.seed) device = torch.device('cuda') if torch.cuda.is_available() else torch.device( 'cpu') dataset = Dataset(data_dir=conf.data_dir, data_name=model_conf.data_name, train_ratio=model_conf.train_ratio, device=device) log_dir = os.path.join('saves', conf.model) logger = Logger(log_dir) model_conf.save(os.path.join(logger.log_dir, 'config.json')) eval_pos, eval_target = dataset.eval_data() item_popularity = dataset.item_popularity evaluator = Evaluator(eval_pos, eval_target, item_popularity, model_conf.top_k) model_base = getattr(models, conf.model) model = model_base(model_conf, dataset.num_users, dataset.num_items, device) logger.info(model_conf) logger.info(dataset)
dirModelsBase, options_training.getFilenameOptions(filteroptions=True), options_clf=dict_opt_sgd) clf_sgd = ClassifierSGD(options_sgd) dict_options_dataset_training = { 'dir_data': dirData, 'data_prefix': 'nz', 'dataset': '2016', 'newfeatures': { 'names': constantsNZ.NEW_FEATURES }, 'featurereduction': None } options_testing = DatasetOptions(dict_options_dataset_training) dataset_testing = Dataset(dataset_options=options_testing) years = [2012, 2013, 2014, 2015] for year in years: dict_options_dataset_training = { 'dir_data': dirData, 'data_prefix': 'nz', 'dataset': str(year), 'newfeatures': { 'names': constantsNZ.NEW_FEATURES }, 'featurereduction': None } options_training = DatasetOptions(dict_options_dataset_training) dataset_training = Dataset(dataset_options=options_training)
import tensorflow as tf import numpy as np import scipy.io as scio from utils.Net_ae import Net_ae from utils.Net_dg import Net_dg from utils.next_batch import next_batch import math from sklearn.utils import shuffle import timeit from keras.layers import * from utils.print_result import print_result from keras.models import Model from utils.Dataset import Dataset data = Dataset('handwritten_2views') x1, x2, gt = data.load_data() x1 = data.normalize(x1, 0) x2 = data.normalize(x2, 0) n_clusters = len(set(gt)) def xavier_init(fan_in, fan_out, constant=1): low = -constant * np.sqrt(6.0 / (fan_in + fan_out)) high = constant * np.sqrt(6.0 / (fan_in + fan_out)) return tf.random_uniform((fan_in, fan_out), minval=low, maxval=high, dtype=tf.float32) class dualModel: def __init__(self,epochs): self.epochs=epochs def train_model(self,X1, X2, gt, para_lambda, dims, act, lr, epochs, batch_size): err_total = list() start = timeit.default_timer()
for year in years: print('year: ' + str(year)) dict_options_dataset = { 'dir_data': dirData, 'data_prefix': 'nz', 'dataset': str(year), 'encoding': 'embedding', 'grouping': 'verylightgrouping', 'newfeatures': None, 'featurereduction': { 'method': 'FUSION' } } options_dataset_year = DatasetOptions(dict_options_dataset) dataset_year = Dataset(options_dataset_year) if balanced: df_year = dataset_year.getBalancedSubSet() else: df_year = dataset_year.getDf() #df_year['main_diag'] = df_year['main_diag'].apply(convertDiagToInd) print(df_year.shape) df_all_years = df_all_years.append(df_year) print('df balanced all years: ' + str(df_all_years.shape)) encoding = options_dataset_year.getEncodingScheme() grouping = options_dataset_year.getGroupingName() featureset = options_dataset_year.getFeatureSetStr() filename_data_years = dirData + 'data_nz_' + str(min(years)) + str(