def main(eval_args): # ensures that weight initializations are all the same logging = utils.Logger(eval_args.local_rank, eval_args.save) # load a checkpoint logging.info('loading the model at:') logging.info(eval_args.checkpoint) checkpoint = torch.load(eval_args.checkpoint, map_location='cpu') args = checkpoint['args'] logging.info('loaded the model at epoch %d', checkpoint['epoch']) arch_instance = utils.get_arch_cells(args.arch_instance) model = AutoEncoder(args, None, arch_instance) model.load_state_dict(checkpoint['state_dict']) model = model.cuda() logging.info('args = %s', args) logging.info('num conv layers: %d', len(model.all_conv_layers)) logging.info('param size = %fM ', utils.count_parameters_in_M(model)) if eval_args.eval_mode == 'evaluate': # load train valid queue args.data = eval_args.data train_queue, valid_queue, num_classes, test_queue = datasets.get_loaders(args) if eval_args.eval_on_train: logging.info('Using the training data for eval.') valid_queue = train_queue if eval_args.eval_on_test: logging.info('Using the test data for eval.') valid_queue = test_queue # get number of bits num_output = utils.num_output(args.dataset, args) bpd_coeff = 1. / np.log(2.) / num_output valid_neg_log_p, valid_nelbo = test(valid_queue, model, num_samples=eval_args.num_iw_samples, args=args, logging=logging) logging.info('final valid nelbo %f', valid_nelbo) logging.info('final valid neg log p %f', valid_neg_log_p) logging.info('final valid nelbo in bpd %f', valid_nelbo * bpd_coeff) logging.info('final valid neg log p in bpd %f', valid_neg_log_p * bpd_coeff) else: bn_eval_mode = not eval_args.readjust_bn num_samples = 16 with torch.no_grad(): n = int(np.floor(np.sqrt(num_samples))) set_bn(model, bn_eval_mode, num_samples=36, t=eval_args.temp, iter=500) for ind in range(eval_args.repetition): # sampling is repeated. torch.cuda.synchronize() start = time() with autocast(): logits = model.sample(num_samples, eval_args.temp) output = model.decoder_output(logits) output_img = output.mean if isinstance(output, torch.distributions.bernoulli.Bernoulli) \ else output.sample() torch.cuda.synchronize() end = time() # save to file total_name = "{}/data_to_save_{}_{}.pickle".format(eval_args.save, eval_args.name_to_save, ind) with open(total_name, 'wb') as handle: pickle.dump(output_img.deatach().numpy(), handle, protocol=pickle.HIGHEST_PROTOCOL) output_tiled = utils.tile_image(output_img, n).cpu().numpy().transpose(1, 2, 0) logging.info('sampling time per batch: %0.3f sec', (end - start)) output_tiled = np.asarray(output_tiled * 255, dtype=np.uint8) output_tiled = np.squeeze(output_tiled) plt.imshow(output_tiled) plt.savefig("{}/generation_{}_{}".format(eval_args.save, eval_args.name_to_save, ind))
batch_size, shuffle=False, num_workers=4, collate_fn=trim_collate) else: train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=32, collate_fn=trim_collate) eval_loader = DataLoader(val_dset, batch_size, shuffle=False, num_workers=32, collate_fn=trim_collate) output_meta_folder = join(args.output, "regat_%s" % args.relation_type) utils.create_dir(output_meta_folder) args.output = output_meta_folder + "/%s_%s_%s_%d" % ( fusion_methods, args.relation_type, args.dataset, args.seed) if exists(args.output) and os.listdir(args.output): raise ValueError("Output directory ({}) already exists and is not " "empty.".format(args.output)) utils.create_dir(args.output) with open(join(args.output, 'hps.json'), 'w') as writer: json.dump(vars(args), writer, indent=4) logger = utils.Logger(join(args.output, 'log.txt')) train(model, train_loader, eval_loader, args, device)
def train(model, train_loader, eval_loader, num_epochs, output, model_fn): #utils.create_dir(output) optim = build_optimizer(model) #optim = torch.optim.Adamax(model.parameters(), lr=2e-3) criterion = nn.CrossEntropyLoss(reduce=True).cuda() logger = utils.Logger(os.path.join(output, 'log.txt')) best_eval_score = 0 print_freq = 10 image_dir = '/data1/coco_raw_images/train2014' evaluate(model, eval_loader, output) pdb.set_trace() idx2word = train_loader.dataset.explanation_dictionary.idx2word for epoch in range(num_epochs): total_vqa_loss = 0 total_vqe_loss = 0 train_score = 0 t = time.time() for i, (v, q, a) in enumerate(train_loader): v = Variable(v).cuda() q = Variable(q).cuda() a = Variable(a).cuda() ans_pred, exp_pred, alphas = model(v, q) #if i % 100 == 0: if epoch > 10 and i % 100 == 0: q_sent = ' '.join([train_loader.dataset.question_dictionary.idx2word[w.data[0]] for w in q[50] if w.data[0] > 0]) pred_tokens = [idx2word[w.data[0]] for id, w in enumerate(exp_pred[50].max(1)[1])] print('Question: %s?' % q_sent) print('Explain Pred: %s' % ('<start> '+' '.join(pred_tokens))) #visualize_attention('train', iid[50], q_sent, gt_tokens, pred_tokens, s[50], alphas[50], output, epoch) #exp_pred = pack_padded_sequence(exp_pred, [j-1 for j in l], batch_first=True)[0] #c = pack_padded_sequence(c[:, 1:], [j-1 for j in l], batch_first=True)[0] vqa_loss = instance_bce_with_logits(ans_pred, a) #vqe_loss = criterion(exp_pred.cuda(), c) #vqe_loss.backward() vqa_loss.backward() nn.utils.clip_grad_norm(model.parameters(), 0.25) optim.step() optim.zero_grad() batch_score = compute_score_with_logits(ans_pred, a.data).sum() total_vqa_loss += vqa_loss.data[0] * v.size(0) #total_vqe_loss += vqe_loss.data[0] * v.size(0) train_score += batch_score if i % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' #'VQE Loss {vqe_loss:.4f}\t' 'Batch Time {batch_time:.3f}\t' 'VQA Loss {vqa_loss:.4f}\t' 'Acc@1 {acc1:.2f}\t' .format( epoch, i, len(train_loader), vqa_loss=vqa_loss.data[0], acc1=batch_score/v.size(0)*100, #vqe_loss=vqe_loss.data[0], batch_time=time.time()-t)) t = time.time() total_vqa_loss /= len(train_loader.dataset) #total_vqe_loss /= len(train_loader.dataset) train_score = 100 * train_score / len(train_loader.dataset) model.train(False) eval_score, bound = evaluate(model, eval_loader, output) model.train(True) logger.write('epoch %d, time: %.2f' % (epoch, time.time()-t)) logger.write('\ttrain_vqe_loss: %.2f' % (total_vqe_loss)) logger.write('\teval score: vqa = %.2f (%.2f)' % (100 * eval_score, 100 * bound)) #if eval_score > best_eval_score: model_path = os.path.join(output, model_fn) torch.save(model.state_dict(), model_path)
dictionary, args.relation_type, adaptive=args.adaptive, pos_emb_dim=args.imp_pos_emb_dim, dataroot=args.data_folder) train_dset = VQAFeatureDataset('train', dictionary, args.relation_type, adaptive=args.adaptive, pos_emb_dim=args.imp_pos_emb_dim, dataroot=args.data_folder) # 5. Initialize ReGAT_all print("[LOG] 5. Initializing ReGAT_all...") model = build_regat_all(val_dset, args).to(device) logger = utils.Logger(os.path.join(args.output, 'model_all_log.txt')) utils.print_model(model, logger) # 6. tfidf # Takes around 4 minutes print("[LOG] 6. tfidf_from_questions...") tfidf = None weights = None if args.tfidf: tfidf, weights = tfidf_from_questions(['train', 'val', 'test2015'], dictionary) # 7. Initialize word embeddings print("[LOG] 7. Initializing word embeddings...") model.w_emb.init_embedding( join(args.data_folder, 'glove/glove6b_init_300d.npy'), tfidf, weights)
def train(num_epochs, batch_size): current_time = time.strftime('%Y_%m_%d_%H_%M', time.localtime(time.time())) logger = utils.Logger(os.path.join('out', 'log_' + current_time + '.txt')) # logger.write(f'{current_time}logger_batch_size{batch_size}') vocab_size = ge.vocab_size answer_size = ge.answer_size train_set = GQA(root='data', split='train') val_set = GQA(root='data', split='val') train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, collate_fn=collate_data, drop_last=True) val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, collate_fn=collate_data, drop_last=True) # add a tqdm bar d = iter(train_loader) pbar = tqdm(d) net = MyModel(out_features=answer_size).to(device) with open('checkpoint/exp_3/checkpoint_1.pth', 'rb') as f: net.load_state_dict(torch.load(f, map_location=device)) logger.write('load model successfully!') optimizer = torch.optim.Adam(net.parameters(), lr=1e-4) loss = nn.CrossEntropyLoss() # nn.utils.clip_grad_norm(net.parameters(), 0.25) def _validation(val_loader): acc_sum, n = 0.0, 0 for img, q, loq, a in val_loader: img, q, a = ( img.to(device), q.to(device), a.to(device), ) a_hat = net(img, q) acc_sum += (a_hat.detach().argmax(dim=1) == a).sum().item() n += a.shape[0] if n <= 0: print('validation loader does not work') return 0.0 return acc_sum / n for i in range(num_epochs): loss_sum = 0.0 acc_sum = 0.0 n = 0 start = time.time() for img, q, loq, a in train_loader: img, q, a = ( img.to(device), q.to(device), a.to(device), ) net.train() net.zero_grad() a_hat = net(img, q) l = loss(a_hat, a) l.backward() optimizer.step() loss_sum += l.item() acc_sum += (a_hat.detach().argmax(dim=1) == a).sum().item() n += a.shape[0] # if n < 36: # logger.write('time for one batch:{} s'.format(time.time()-start)) if n % 32000 == 0: acc = acc_sum / n logger.write('%d-loss/n:%.3f;acc:%.4f;time:%.2f' % (n, loss_sum / n, acc, (time.time() - start) / 60)) if n % 160000 == 0: x = n // 160000 torch.save(net.state_dict(), 'checkpoint/attention_{}.pth'.format(x)) net.eval() logger.write('validating...') with torch.no_grad(): vali_acc = _validation(val_loader) logger.write( 'Epoch #%d, Loss:%.3f, Train Acc: %.4f, Validation Acc:%.4f' % (i, loss_sum / n, acc_sum / n, vali_acc)) print('Epoch #%d, Loss:%.3f, Train Acc: %.4f, Validation Acc:%.4f' % (i, loss_sum / n, acc_sum / n, vali_acc)) try: torch.save(net.state_dict(), 'checkpoint/epochattention{}.pth'.format(i + 2)) except: print('can not save checkpoint.') torch.save(net.state_dict(), 'checkpoint/attmodel_3.pth')
def pretrain(model, train_loader, eval_loader, num_epoch, output): optim = torch.optim.Adamax(model.parameters(), lr=2e-3) gCrit = nn.CrossEntropyLoss(reduce=True).cuda() logger = utils.Logger(os.path.join(output, 'pretrain_log.txt')) best_score = 0 for epoch in range(num_epoch): total_vqa_loss = 0 total_vqe_loss = 0 total_vqa_score = 0 labeled_cnt = 0 print_freq = 20 t = time.time() for i, E in enumerate(train_loader): bs = E['features'].size(0) l, sort_ind = E['exp_len'].sort(dim=0, descending=True) ml = l[0] L = (l > 0).sum() labeled_cnt += L v = Variable(E['features'][sort_ind]).cuda() q = Variable(E['question'][sort_ind]).cuda() c = Variable(E['explain'][sort_ind]).cuda() a = Variable(E['target'][sort_ind]).cuda() vqe_logits_gen, _, joint_vq_gen = model.generate(v[L:], q[L:]) vqe_logits_gt, _, joint_vq_gt = model.generate(v[:L], q[:L], c[:L], [j-1 for j in l[:L]], ml, tf=True) gt_emb = model.dec_enc(None, c[:L]) gen_emb = model.dec_enc(vqe_logits_gen) gt_enc = model.e_net(gt_emb) gen_enc = model.e_net(gen_emb) enc = torch.cat([gt_enc, gen_enc], 0) joint_vq = torch.cat([joint_vq_gt, joint_vq_gen], 0) joint_vqe = joint_vq * enc ans_pred = model.classifier(joint_vqe) exp_pred = pack_padded_sequence(vqe_logits_gt, [j-1 for j in l[:L]], batch_first=True)[0] p_c = pack_padded_sequence(c[:L, 1:], [j-1 for j in l[:L]], batch_first=True)[0] vqa_loss = instance_bce_with_logits(ans_pred, a) vqe_loss = gCrit(exp_pred.cuda(), p_c) VQA_score = compute_score_with_logits(ans_pred, a.data).sum() total_vqa_loss += vqa_loss.data[0] * bs total_vqa_score += VQA_score total_vqe_loss += vqe_loss.data[0] * L if i % print_freq == 0: print('Pretrain Epoch: [{0}][{1}/{2}] - Batch Time {batch_time:.3f}' '\n\tVQA Loss {vqa_loss:.4f}\t' 'Acc {acc:.2f}\n\t' 'VQE Loss {vqe_loss:.4f}' .format( epoch, i, len(train_loader), batch_time=time.time()-t, vqa_loss=vqa_loss.data[0], acc=VQA_score/bs*100, vqe_loss=vqe_loss.data[0] )) t = time.time() loss = vqa_loss + vqe_loss loss.backward() nn.utils.clip_grad_norm(model.parameters(), 0.25) optim.step() optim.zero_grad() total_vqa_loss /= len(train_loader.dataset) total_vqe_loss /= labeled_cnt total_vqa_score = 100 * total_vqa_score / len(train_loader.dataset) model.train(False) eval_score, upper_bound = eval_ans(model, eval_loader) model.train(True) logger.write('Pretrain epoch %d, time: %.2f' % (epoch, time.time()-t)) logger.write('\ttrain vqa loss %.4f' % total_vqa_loss) logger.write('\ttrain vqa score %.2f' % total_vqa_score) logger.write('\ttrain vqe loss: %.4f' % total_vqe_loss) logger.write('\teval_vqa_score: %.2f (%.2f)' % (100 * eval_score, 100 * upper_bound)) if best_score < eval_score: model_path = os.path.join(output, 'pretrained_vqae_enc_gt.pth') torch.save(model.state_dict(), model_path) best_score = eval_score
parser.add_argument('--no-cuda', action='store_true', default=False, help='without CUDA training') args = parser.parse_args() # args.cuda=False # args.no_cuda=True # args.batch_size=50 # args.epochs=1 # args.lr=1e-4 args.momentum = 0.9 args.decay = 4e-4 args.schedule = [4, 8, 12] args.gammas = [0.2, 0.2, 0.2] # args.dataset_path="/Users/nomanshafqat/Desktop/newdata" logger = utils.Logger("../", "pens").get_logger() args.cuda = not args.no_cuda and torch.cuda.is_available() dataset = dataset.Trendage(args.dataset_path) train_loader = dataloader.HDDLoader(dataset, dataset.train_data, dataset.bbox, dataset.train_labels, dataset.transform) val_loader = dataloader.HDDLoader(dataset, dataset.val_data, dataset.bbox, dataset.val_labels, dataset.transform) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_iterator = torch.utils.data.DataLoader(train_loader, batch_size=args.batch_size, shuffle=True, **kwargs) val_iterator = torch.utils.data.DataLoader(val_loader, batch_size=args.batch_size, shuffle=True, **kwargs)
def test_ok(self): h = kea.CalloutHandle(kea.CalloutManager()) foo = utils.Logger() h.setContext('foo', foo) bar = h.getContext('foo') self.assertIs(foo, bar)
def train(model, train_loader, eval_loader, num_epochs, output, opt=None, s_epoch=0): lr_default = 1e-3 if eval_loader is not None else 7e-4 lr_decay_step = 2 lr_decay_rate = 0.01 lr_decay_epochs = range(16,50,lr_decay_step) if eval_loader is not None else range(10,20,lr_decay_step) gradual_warmup_steps = [0.5 * lr_default, 1.0 * lr_default, 1.5 * lr_default, 2.0 * lr_default] saving_epoch = 3 grad_clip = .25 utils.create_dir(output) optim = torch.optim.Adamax(filter(lambda p: p.requires_grad, model.parameters()), lr=lr_default) \ if opt is None else opt logger = utils.Logger(os.path.join(output, 'log.txt')) best_eval_score = 0 utils.print_model(model, logger) logger.write('optim: adamax lr=%.4f, decay_step=%d, decay_rate=%.2f, grad_clip=%.2f' % \ (lr_default, lr_decay_step, lr_decay_rate, grad_clip)) for epoch in range(s_epoch, num_epochs): total_loss = 0 train_score = 0 train_score_vqa=0 total_norm = 0 count_norm = 0 total_fair_loss=0 total_dis_loss=0 woman=0 woman_o=0 man=0 man_o=0 other=0 other_o=0 t = time.time() N = len(train_loader.dataset) print(N) if epoch < len(gradual_warmup_steps): optim.param_groups[0]['lr'] = gradual_warmup_steps[epoch] logger.write('gradual warmup lr: %.4f' % optim.param_groups[0]['lr']) elif epoch in lr_decay_epochs: optim.param_groups[0]['lr'] =optim.param_groups[0]['lr']*lr_decay_rate logger.write('decreased lr: %.4f' % optim.param_groups[0]['lr']) else: logger.write('lr: %.4f' % optim.param_groups[0]['lr']) for name,subnet in model.named_children(): if name=='w_emb' or name=='q_emb' or name=='q_att' or name=='v_att' or name=='v_net' or name=='q_net' or name=='classifier2': print(name) for param in subnet.parameters(): param.requires_grad=False for i, (v, b, q, a,ques,im,g,gender) in enumerate(train_loader): v = v.cuda() b = b.cuda() q = q.cuda() a = a.cuda() visual_pred, vqa_pred,att = model(v, b, q, a) #import pdb;pdb.set_trace() gender=gender.squeeze(1) weights=torch.Tensor([2.0,1.0,0.001]).cuda() vqa_loss = instance_bce_with_logits(vqa_pred, a) loss=nn.CrossEntropyLoss(weights) loss=loss(visual_pred,gender.cuda()) #dis_loss=torch.abs(visual_pred[:,0]-visual_pred[:,1]).mean() #dis_loss=dis_loss.cuda() if epoch < 12: t_loss=vqa_loss else: t_loss=loss+vqa_loss t_loss.backward() #import pdb;pdb.set_trace() #vp=visual_pred[:,:2].cuda() #g=g[:,:2] #crossloss=instance_bce_with_logits(vp,g.cuda()) #mseloss=torch.nn.functional.mse_loss(vp.softmax(1),g.cuda()) #g_swap=g[:,[1,0]].cuda() #swap_loss=(vp.softmax(1)*g_swap).sum(1) #swap_loss=swap_loss.sum() for j in range(len(v)): if gender[j]==0: woman=woman+1 #if visual_pred[j].argmax()==0 or visual_pred[j].argmax()==1: if visual_pred[j].argmax()==gender[j].cuda(): woman_o=woman_o+1 elif gender[j]==1: #if visual_pred[j].argmax()==0 or visual_pred[j].argmax()==1: man=man+1 if visual_pred[j].argmax()==gender[j].cuda(): man_o=man_o+1 else: other=other+1 if visual_pred[j].argmax()==gender[j].cuda(): other_o=other_o+1 total_norm += nn.utils.clip_grad_norm_(model.parameters(), grad_clip) count_norm += 1 optim.step() optim.zero_grad() #total_fair_loss+=soft_fair_loss #total_dis_loss+=dis_loss batch_score=torch.eq(visual_pred.argmax(1),gender.cuda()).sum() batch_score_vqa = compute_score_with_logits(vqa_pred, a.data).sum() #batch_score = compute_score_with_logits(visual_pred, g.cuda()).sum() #total_loss += loss.item() * v.size(0) train_score += batch_score.item() train_score_vqa+=batch_score_vqa.item() #train_score+=batch_score if i==0: print(loss) #print(10*soft_fair_loss) print("\n\n") total_loss /= N train_score = 100 * train_score / N train_score_vqa = 100 * train_score_vqa / N print("epoch",epoch) woman_score=float(woman_o)/woman man_score=float(man_o)/man other_score=float(other_o)/other print("woman",woman) print("man",man) print("other",other) print("train_woman_score",woman_score*100) print("train_man_score",man_score*100) print("train_other_score",other_score*100) print("vqa",train_score_vqa) if None != eval_loader: model.train(False) eval_score, bound, _ = evaluate(model, eval_loader) model.train(True) #print("total_fair_loss",total_fair_loss) #print("totla_dis_loss",total_dis_loss) logger.write('epoch %d, time: %.2f' % (epoch, time.time()-t)) logger.write('\ttrain_loss: %.2f, norm: %.4f, score: %.2f' % (total_loss, total_norm/count_norm, train_score)) #logger.write('\total_fair_loss: %.2f, norm: %.4f, score: %.2f' % (total_loss, total_norm/count_norm, total_fair_loss)) logger.write('\teval score: %.2f (%.2f)' % (100 * eval_score, 100 * bound)) model_path = os.path.join(output, 'model_epoch%d.pth' % epoch) utils.save_model(model_path, model, epoch, optim)
def train(args): sub_dir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(args.log_dir, sub_dir) model_dir = os.path.join(args.model_dir, sub_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(model_dir): os.makedirs(model_dir) train_logger = utils.Logger('train', file_name=os.path.join(log_dir, 'train.log'), control_log=False) test_logger = utils.Logger('test', file_name=os.path.join(log_dir, 'test.log')) utils.save_arguments(args, os.path.join(log_dir, 'arguments.txt')) # data #split_dataset(args) base_dir = os.path.dirname(args.dataset_path) train_dataset = load_data(os.path.join(base_dir, 'train.txt')) test_dataset = load_data(os.path.join(base_dir, 'test.txt')) dataset_size = train_dataset.num_examples train_logger.info('dataset size: %s' % dataset_size) tf.reset_default_graph() with tf.Graph().as_default(): tf.set_random_seed(10) x = tf.placeholder(tf.float32, shape=[None, args.timesteps, 1], name='input') y = tf.placeholder(tf.int64, shape=[None], name='label') one_hot_y = tf.one_hot(y, depth=args.num_classes, dtype=tf.int64) is_training = tf.placeholder(tf.bool, name='training') net = Network(args.tcn_num_channels, args.tcn_kernel_size, args.tcn_dropout, args.embedding_size, args.weight_decay, args.num_classes) prelogits, logits, embeddings = net(x, is_training) # metrics with tf.variable_scope('metrics'): tpr_op, fpr_op, g_mean_op, accuracy_op, f1_op = calc_accuracy( logits, y) # loss with tf.variable_scope('loss'): focal_loss_op = utils.focal_loss(y, logits, 5.0) cross_entropy_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=one_hot_y), name='cross_entropy') center_loss_op, centers, centers_update_op = utils.center_loss( prelogits, y, args.num_classes, args.center_loss_alpha) regularization_loss_op = tf.reduce_sum(tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES), name='l2_loss') loss_op = center_loss_op * args.center_loss_factor + \ cross_entropy_op + regularization_loss_op # optimizer with tf.variable_scope('optimizer'), tf.control_dependencies( [centers_update_op]): global_step = tf.Variable(0, trainable=False, name='global_step') # optimizer_op = tf.train.MomentumOptimizer(learning_rate_op, 0.9, name='optimizer') optimizer = tf.train.AdamOptimizer(args.lr_values, name='optimizer') train_op = optimizer.minimize(loss_op, global_step) # summary tf.summary.scalar('total_loss', loss_op) tf.summary.scalar('l2_loss', regularization_loss_op) tf.summary.scalar('cross_entropy', cross_entropy_op) tf.summary.scalar('focal_loss', focal_loss_op) tf.summary.scalar('center_loss', center_loss_op) tf.summary.scalar('accuracy', accuracy_op) tf.summary.scalar('tpr', tpr_op) tf.summary.scalar('fpr', fpr_op) tf.summary.scalar('g_mean', g_mean_op) tf.summary.scalar('f1', f1_op) train_summary_op = tf.summary.merge_all() val_summary_op = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=100) config = tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)) with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter(log_dir, sess.graph, filename_suffix='train') val_writer = tf.summary.FileWriter(log_dir, filename_suffix='val') tf.global_variables_initializer().run() if args.pretrained_model: ckpt = tf.train.get_checkpoint_state(args.pretrained_model) saver.restore(sess, ckpt.model_checkpoint_path) steps_per_epoch = np.ceil(dataset_size / args.batch_size).astype(int) batch_num_seq = range(steps_per_epoch) best_test_accuracy = 0.0 best_test_fpr = 1.0 try: for epoch in range(1, args.max_epochs + 1): batch_num_seq = tqdm(batch_num_seq, desc='Epoch: {:d}'.format(epoch), ascii=True) for step in batch_num_seq: feature, label = train_dataset.next_batch( args.batch_size) feature = np.reshape( feature, (args.batch_size, args.timesteps, 1)) if step % args.display == 0: tensor_list = [ train_op, train_summary_op, global_step, accuracy_op, tpr_op, fpr_op, g_mean_op, cross_entropy_op, center_loss_op, focal_loss_op ] _, summary, train_step, accuracy, tpr, fpr, g_mean, cross_entropy, center_loss, focal_loss = sess.run( tensor_list, feed_dict={ x: feature, y: label, is_training: True }) train_logger.info( 'Train Step: %d, accuracy: %.3f%%, tpr: %.3f%%, fpr: %.3f%%, g_mean: %.3f%%, cross_entropy: %.4f, center_loss: %.4f, focal_loss: %.4f' % (train_step, accuracy * 100, tpr * 100, fpr * 100, g_mean * 100, cross_entropy, center_loss, focal_loss)) else: _, summary, train_step = sess.run( [train_op, train_summary_op, global_step], feed_dict={ x: feature, y: label, is_training: True }) train_writer.add_summary(summary, global_step=train_step) train_writer.flush() # evaluate num_batches = int( np.ceil(test_dataset.num_examples / args.batch_size)) accuracy_array = np.zeros((num_batches, ), np.float32) tpr_array = np.zeros((num_batches, ), np.float32) fpr_array = np.zeros((num_batches, ), np.float32) g_mean_array = np.zeros((num_batches, ), np.float32) for i in range(num_batches): feature, label = test_dataset.next_batch( args.batch_size) feature = np.reshape( feature, (args.batch_size, args.timesteps, 1)) tensor_list = [ accuracy_op, tpr_op, fpr_op, g_mean_op, val_summary_op ] feed_dict = {x: feature, y: label, is_training: False} accuracy_array[i], tpr_array[i], fpr_array[ i], g_mean_array[i], summary = sess.run( tensor_list, feed_dict=feed_dict) val_writer.add_summary(summary) val_writer.flush() test_logger.info( 'Validation Epoch: %d, train_step: %d, accuracy: %.3f%%, tpr: %.3f%%, fpr: %.3f%%, g_mean: %.3f%%' % (epoch, train_step, np.mean(accuracy_array) * 100, np.mean(tpr_array) * 100, np.mean(fpr_array) * 100, np.mean(g_mean_array) * 100)) test_accuracy = np.mean(accuracy_array) test_fpr = np.mean(fpr_array) if test_accuracy > best_test_accuracy: best_test_accuracy = test_accuracy saver.save(sess, os.path.join(model_dir, 'arc_fault'), global_step=train_step) elif test_accuracy == best_test_accuracy and test_fpr < best_test_fpr: best_test_fpr = test_fpr saver.save(sess, os.path.join(model_dir, 'arc_fault'), global_step=train_step) except Exception as e: train_logger.error(e) train_writer.close() val_writer.close()
def test_ok(self): h = kea.CalloutHandle(kea.CalloutManager()) foo = utils.Logger() self.assertIsNone(h.setContext('foo', foo)) self.assertEqual(3, sys.getrefcount(foo))
def main(params): # register logger log_file_name = '{}_[FUSION]_[{}]_[{}_{}_{}_{}]_[{}_{}_{}_{}].txt'.format( datetime.now(tz=tz.gettz('Asia/Shanghai')).strftime("%Y-%m-%d-%H-%M"), '_'.join(params.emo_dim_set), params.rnn, params.d_model, params.n_layers, params.rnn_bi, params.lr, params.batch_size, params.dr, params.out_dr) params.log_file_name = log_file_name params.log_dir = os.path.join(params.base_dir, 'log') if not os.path.exists(params.log_dir): os.mkdir(params.log_dir) params.log_file = os.path.join(params.log_dir, params.log_file_name) if params.log: sys.stdout = utils.Logger(params.log_file) print(' '.join(sys.argv)) print(f'Parameters: {params}') # check params if params.loss_weights is None: if len(params.emo_dim_set) == 2: params.loss_weights = [0.5, 0.5 ] # default: 0.5 * arousal + 0.5 * valence else: params.loss_weights = [1] assert len(params.emo_dim_set) == len(params.loss_weights) # load data pred_dirs = glob.glob(os.path.join(params.base_dir, 'source/*')) data = utils.load_fusion_data(pred_dirs, params.emo_dim_set, params.segment_type) print('Constructing dataset and data loader ...') data_loader = {} for partition in data.keys(): set_ = MyDataset(data, partition) print(f'Samples in "{partition}" set: {len(set_)}') batch_size = params.batch_size if partition == 'train' else 1 shuffle = True if partition == 'train' else False data_loader[partition] = torch.utils.data.DataLoader( set_, batch_size=batch_size, shuffle=shuffle, num_workers=4) # additional params params.d_in = data_loader['train'].dataset.get_feature_dim() print(f'Input feature dim: {params.d_in}.') params.d_out = len(params.emo_dim_set) # seed setting seeds = range(params.seed, params.seed + params.n_seeds) val_losses, val_cccs, val_pccs, val_rmses = [], [], [], [] for seed in seeds: params.current_seed = seed torch.manual_seed(seed) if params.gpu is not None and torch.cuda.is_available(): torch.cuda.manual_seed(seed) print('*' * 100) print(f'Using seed "{seed}"') print('*' * 100) # construct model model = FusionModel(params) print(model) # train model print('Training model...') val_loss, val_ccc, val_pcc, val_rmse = \ train_model(model, data_loader, params) val_losses.append(val_loss) val_cccs.append(val_ccc) val_pccs.append(val_pcc) val_rmses.append(val_rmse) print('*' * 100) print(f'Seed "{params.current_seed}" over!') print('*' * 100) mean_val_cccs = [np.mean(val_ccc) for val_ccc in val_cccs] best_idx = mean_val_cccs.index(max(mean_val_cccs)) best_val_ccc, best_mean_val_ccc = val_cccs[best_idx], mean_val_cccs[ best_idx] best_val_pcc, best_mean_val_pcc = val_pccs[best_idx], np.mean( val_pccs[best_idx]) best_val_rmse, best_mean_val_rmse = val_rmses[best_idx], np.mean( val_rmses[best_idx]) best_val_loss = val_losses[best_idx] print( f'Best [Val CCC] when seed "{seeds[best_idx]}":{best_mean_val_ccc:7.4f} {[format(x, "7.4f") for x in best_val_ccc]}' ) print( f'Val PCC when get best CCC: {best_mean_val_pcc:>.4f} {[format(x, ".4f") for x in best_val_pcc]}' ) print( f'Val RMSE when get best CCC: {best_mean_val_rmse:>.4f} {[format(x, ".4f") for x in best_val_rmse]}' ) print(f'Val Loss when get best CCC: {best_val_loss:>.4f}')
def main(): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id cudnn.benchmark = True if args.model == 'res18': net = resnet.ResNet18(num_classes=40).cuda() elif args.model =='resnext': net = resnext.ResNeXt(cardinality=args.cardinality, depth=args.depth, nlabels=40, base_width=args.base_width, widen_factor=args.widen_factor).cuda() elif args.model =='res_cifar': net = resnet_cifar.resnet20(num_classes=40).cuda() state_dict = torch.load(f'{args.model_path}/model_200.pth') net.load_state_dict(state_dict) criterion = nn.CrossEntropyLoss().cuda() metric_logger = utils.Logger(os.path.join(args.save_path, 'test_metric.log')) ''' Misclassification Detection ''' print('') print('Misclassification Detection') print('data: CIFAR40') print('') train_loader = dataloader.train_loader(args.data_root, args.data, args.batch_size) test_loader, test_targets = dataloader.test_loader(args.data_root, args.in_data, args.batch_size, mode='test') in_softmax, in_openmax, in_softlogit, in_openlogit, in_open_pred, \ correct, labels = test(net, train_loader, test_loader) acc, auroc, aurc, eaurc, \ fpr, aupr, ece, li_acc, li_count = metrics.md_metrics_om(in_openlogit, in_openmax, correct, labels) plot.draw_reliability_diagrams(args.save_path, li_acc, li_count, ece) metric_logger.write(['Miscls Detect', '\t\t', 'ACCURACY', '\t', 'AUROC', '\t\t', 'AURC', '\t\t', 'E-AURC', '\t\t', 'AUPR', '\t\t', 'FPR@95%TPR', '\t', 'ECE']) metric_logger.write(['\t', '\t\t', acc * 100, '\t', auroc * 100, '\t', aurc * 1000, '\t', eaurc * 1000, '\t', aupr * 100, '\t', fpr * 100, '\t', ece * 100]) with open(f'{args.save_path}/base-scores.csv', 'w', newline='') as f: columns = ["", "ACC", "AUROC", "AURC", "E-AURC", "AUPR", "FPR@95%TPR", "ECE"] writer = csv.writer(f) writer.writerow(['* Misclassification Detection']) writer.writerow(columns) writer.writerow( ['', acc * 100, auroc * 100, aurc * 1000, eaurc * 1000, aupr * 100, fpr * 100, ece * 100]) writer.writerow(['']) f.close() ''' test ''' print('') print('Open Set Recognition-Test') print('known data: CIFAR40') print('unknown data: CIFAR60') print('') in_test_loader = dataloader.in_dist_loader(args.data_root, args.in_data, args.batch_size, 'test') ood_test_loader = dataloader.out_dist_loader(args.data_root, 'cifar60', args.batch_size, 'test') in_softmax, in_openmax, in_softlogit, in_openlogit,\ _, _, _ = test(net, train_loader, in_test_loader) out_softmax, out_openmax, out_softlogit, out_openlogit,\ _, _, _ = test(net, train_loader, ood_test_loader) f1, li_f1, li_thresholds, \ li_precision, li_recall = metrics.f1_score(1-np.array(in_openmax), 1-np.array(out_openmax), pos_label=0) ood_scores = metrics.ood_metrics(1-np.array(in_openmax), 1-np.array(out_openmax)) metric_logger.write(['TEST CIFAR40-CIFAR60', '\t', 'FPR@95%TPR', '\t', 'DET ERR', '\t', 'AUROC', '\t\t', 'AUPR-IN', '\t', 'AUPR-OUT', '\t', 'F1 SCORE', '\t', '']) metric_logger.write(['', '\t\t\t', 100 * ood_scores['FPR95'], '\t', 100 * ood_scores['DTERR'], '\t', 100 * ood_scores['AUROC'], '\t', 100 * ood_scores['AUIN'], '\t', 100 * ood_scores['AUOUT'], '\t', f1, '\t', '']) plot.draw_f1(args.save_path, f1, li_f1, li_thresholds, data='CIFAR60', mode='test', task='OsR') with open(f'{args.save_path}/base-scores.csv', 'a', newline='') as f: columns = ["", "FPR@95%TPR", "DET ERR", "AUROC", "AUPR-IN", "AUPR-OUT", "F1 SCORE"] writer = csv.writer(f) writer.writerow(['* Open Set Recognition Test-CIFAR60']) writer.writerow(columns) writer.writerow( ['', 100 * ood_scores['FPR95'], 100 * ood_scores['DTERR'], 100 * ood_scores['AUROC'], 100 * ood_scores['AUIN'], 100 * ood_scores['AUOUT'], f1]) writer.writerow(['']) f.close() ''' Out of Distribution Detection ''' ''' test ''' print('') print('Out of Distribution Detection-Test') print('known data: CIFAR40') print('unknown data: SVHN') print('') ood_test_loader = dataloader.out_dist_loader(args.data_root, 'svhn', args.batch_size, 'test') out_softmax, out_openmax, out_softlogit, out_openlogit,\ _, _, _ = test(net, train_loader, ood_test_loader) f1, li_f1, li_thresholds, \ li_precision, li_recall = metrics.f1_score(1-np.array(in_openmax), 1-np.array(out_openmax), pos_label=0) ood_scores = metrics.ood_metrics(1-np.array(in_openmax), 1-np.array(out_openmax)) metric_logger.write(['TEST CIFAR40-SVHN', '\t', 'FPR@95%TPR', '\t', 'DET ERR', '\t', 'AUROC', '\t\t', 'AUPR-IN', '\t', 'AUPR-OUT', '\t', 'F1 SCORE', '\t', '']) metric_logger.write(['', '\t\t\t', 100 * ood_scores['FPR95'], '\t', 100 * ood_scores['DTERR'], '\t', 100 * ood_scores['AUROC'], '\t', 100 * ood_scores['AUIN'], '\t', 100 * ood_scores['AUOUT'], '\t', f1, '\t', '']) plot.draw_f1(args.save_path, f1, li_f1, li_thresholds, data='SVHN', mode='test', task='OoD') with open(f'{args.save_path}/base-scores.csv', 'a', newline='') as f: columns = ["", "FPR@95%TPR", "DET ERR", "AUROC", "AUPR-IN", "AUPR-OUT", "F1 SCORE"] writer = csv.writer(f) writer.writerow(['* Out of Distribution Detection Test-SVHN']) writer.writerow(columns) writer.writerow( ['', 100 * ood_scores['FPR95'], 100 * ood_scores['DTERR'], 100 * ood_scores['AUROC'], 100 * ood_scores['AUIN'], 100 * ood_scores['AUOUT'], f1]) writer.writerow(['']) f.close() print('') print('Out of Distribution Detection-Test') print('known data: CIFAR40') print('unknown data: LSUN-FIX') print('') ood_test_loader = dataloader.out_dist_loader(args.data_root, 'lsun-fix', args.batch_size, 'test') out_softmax, out_openmax, out_softlogit, out_openlogit,\ _, _, _ = test(net, train_loader, ood_test_loader) f1, li_f1, li_thresholds, \ li_precision, li_recall = metrics.f1_score(1-np.array(in_openmax), 1-np.array(out_openmax), pos_label=0) ood_scores = metrics.ood_metrics(1-np.array(in_openmax), 1-np.array(out_openmax)) metric_logger.write(['TEST CIFAR40-LSUNFIX', '\t', 'FPR@95%TPR', '\t', 'DET ERR', '\t', 'AUROC', '\t\t', 'AUPR-IN', '\t', 'AUPR-OUT', '\t', 'F1 SCORE', '\t', '']) metric_logger.write(['', '\t\t\t', 100 * ood_scores['FPR95'], '\t', 100 * ood_scores['DTERR'], '\t', 100 * ood_scores['AUROC'], '\t', 100 * ood_scores['AUIN'], '\t', 100 * ood_scores['AUOUT'], '\t', f1, '\t', '']) plot.draw_f1(args.save_path, f1, li_f1, li_thresholds, data='LSUN-FIX', mode='test', task='OoD') with open(f'{args.save_path}/base-scores.csv', 'a', newline='') as f: columns = ["", "FPR@95%TPR", "DET ERR", "AUROC", "AUPR-IN", "AUPR-OUT", "F1 SCORE"] writer = csv.writer(f) writer.writerow(['* Out of Distribution Detection Test-LSUN-FIX']) writer.writerow(columns) writer.writerow( ['', 100 * ood_scores['FPR95'], 100 * ood_scores['DTERR'], 100 * ood_scores['AUROC'], 100 * ood_scores['AUIN'], 100 * ood_scores['AUOUT'], f1]) writer.writerow(['']) f.close() print('') print('Out of Distribution Detection-Test') print('known data: CIFAR40') print('unknown data: new-TinyImageNet158') print('') ood_test_loader = dataloader.out_dist_loader(args.data_root, 'new-tinyimagenet158', args.batch_size, 'test') out_softmax, out_openmax, out_softlogit, out_openlogit,\ _, _, _ = test(net, train_loader, ood_test_loader) f1, li_f1, li_thresholds, \ li_precision, li_recall = metrics.f1_score(1-np.array(in_openmax), 1-np.array(out_openmax), pos_label=0) ood_scores = metrics.ood_metrics(1-np.array(in_openmax), 1-np.array(out_openmax)) metric_logger.write(['TEST CIFAR40-Tiny158', '\t', 'FPR@95%TPR', '\t', 'DET ERR', '\t', 'AUROC', '\t\t', 'AUPR-IN', '\t', 'AUPR-OUT', '\t', 'F1 SCORE', '\t', '']) metric_logger.write(['', '\t\t\t', 100 * ood_scores['FPR95'], '\t', 100 * ood_scores['DTERR'], '\t', 100 * ood_scores['AUROC'], '\t', 100 * ood_scores['AUIN'], '\t', 100 * ood_scores['AUOUT'], '\t', f1, '\t', '']) plot.draw_f1(args.save_path, f1, li_f1, li_thresholds, data='new-TinyImageNet158', mode='test', task='OoD') with open(f'{args.save_path}/base-scores.csv', 'a', newline='') as f: columns = ["", "FPR@95%TPR", "DET ERR", "AUROC", "AUPR-IN", "AUPR-OUT", "F1 SCORE"] writer = csv.writer(f) writer.writerow(['* Out of Distribution Detection Test-new-TinyImageNet158']) writer.writerow(columns) writer.writerow( ['', 100 * ood_scores['FPR95'], 100 * ood_scores['DTERR'], 100 * ood_scores['AUROC'], 100 * ood_scores['AUIN'], 100 * ood_scores['AUOUT'], f1]) writer.writerow(['']) f.close()
batch_size = opt.batch_size model = models.DDPG(n_states=tconfig['num_states'], n_actions=tconfig['num_actions'], opt=ddpg_opt, supervised=True) if not os.path.exists('log'): os.mkdir('log') if opt.phase == 'train': if not os.path.exists('sl_model_params'): os.mkdir('sl_model_params') expr_name = 'sl_train_ddpg_{}'.format(str(utils.get_timestamp())) logger = utils.Logger( name='ddpg', log_file='log/{}.log'.format(expr_name) ) assert len(opt.sa_path) != 0, "SA_PATH should be specified when training DDPG Actor" with open(opt.sa_path, 'rb') as f: data = pickle.load(f) for epoch in xrange(opt.epoches): random.shuffle(data) num_samples = len(data) print(num_samples) n_train_samples = int(num_samples * 0.8) n_test_samples = num_samples - n_train_samples train_data = data[:n_train_samples]
def train(model, train_loader, eval_loader, num_epochs, output, eval_each_epoch): utils.create_dir(output) optim = torch.optim.Adamax(model.parameters()) logger = utils.Logger(os.path.join(output, 'log.txt')) all_results = [] total_step = 0 for epoch in range(num_epochs): total_loss = 0 train_score = 0 t = time.time() for i, (v, q, a, b, qid) in tqdm(enumerate(train_loader), desc="Epoch %d" % (epoch), total=len(train_loader)): total_step += 1 v = Variable(v).cuda() q = Variable(q).cuda() a = Variable(a).cuda() b = Variable(b).cuda() pred, loss = model(v, None, q, a, b) if (loss != loss).any(): raise ValueError("NaN loss") loss.backward() nn.utils.clip_grad_norm(model.parameters(), 0.25) optim.step() optim.zero_grad() batch_score = compute_score_with_logits(pred, a.data).sum().item() total_loss += loss.item() * v.size(0) train_score += batch_score wandb.log({"train_loss_batch": total_loss}) total_loss /= len(train_loader.dataset) train_score = 100 * train_score / len(train_loader.dataset) run_eval = eval_each_epoch or (epoch == num_epochs - 1) if run_eval: model.train(False) results, predictions = evaluate(model, eval_loader) results["epoch"] = epoch results["step"] = total_step results["train_loss"] = total_loss results["train_score"] = train_score all_results.append(results) with open(join(output, "results.json"), "w") as f: json.dump(all_results, f, indent=2) with open(join(output, f"predictions-{epoch}.json"), "w") as f: json.dump(predictions, f, indent=2) model.train(True) wandb.log(results) eval_score = results["score"] bound = results["upper_bound"] logger.write('epoch %d, time: %.2f' % (epoch, time.time() - t)) logger.write('\ttrain_loss: %.2f, score: %.2f' % (total_loss, train_score)) if run_eval: logger.write('\teval score: %.2f (%.2f)' % (100 * eval_score, 100 * bound)) model_path = os.path.join(output, 'model.pth') torch.save(model.state_dict(), model_path)
parser.add_argument('--adam', action='store_true', help='Whether to use adam (default is rmsprop)') parser.add_argument('--adadelta', action='store_true', help='Whether to use adadelta (default is rmsprop)') parser.add_argument('--keep_ratio', action='store_true', help='whether to keep ratio for image resize') parser.add_argument('--manualSeed', type=int, default=1234, help='reproduce experiemnt') parser.add_argument('--random_sample', action='store_true',default=True, help='whether to sample the dataset with random sampler') opt = parser.parse_args() print(opt) debug = True train_num, val_num = 192023, 20000 # log config if not os.path.exists("./logs/"): os.mkdir("./logs/") log_file = "./logs/"+str(opt.expr_dir) log = utils.Logger() if not os.path.exists(log_file): os.mkdir(log_file) log.open(log_file+"/log_train.txt", mode="a") # gpu devices device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if not os.path.exists(opt.expr_dir): os.makedirs(opt.expr_dir) # set seed random.seed(opt.manualSeed) np.random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) torch.cuda.manual_seed_all(opt.manualSeed)
def main(args): # ensures that weight initializations are all the same torch.manual_seed(args.seed) np.random.seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) logging = utils.Logger(args.global_rank, args.save) writer = utils.Writer(args.global_rank, args.save) # Get data loaders. train_queue, valid_queue, num_classes = datasets.get_loaders(args) args.num_total_iter = len(train_queue) * args.epochs warmup_iters = len(train_queue) * args.warmup_epochs swa_start = len(train_queue) * (args.epochs - 1) arch_instance = utils.get_arch_cells(args.arch_instance) model = AutoEncoder(args, writer, arch_instance) model = model.cuda() logging.info('args = %s', args) logging.info('param size = %fM ', utils.count_parameters_in_M(model)) logging.info('groups per scale: %s, total_groups: %d', model.groups_per_scale, sum(model.groups_per_scale)) if args.fast_adamax: # Fast adamax has the same functionality as torch.optim.Adamax, except it is faster. cnn_optimizer = Adamax(model.parameters(), args.learning_rate, weight_decay=args.weight_decay, eps=1e-3) else: cnn_optimizer = torch.optim.Adamax(model.parameters(), args.learning_rate, weight_decay=args.weight_decay, eps=1e-3) cnn_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( cnn_optimizer, float(args.epochs - args.warmup_epochs - 1), eta_min=args.learning_rate_min) grad_scalar = GradScaler(2**10) num_output = utils.num_output(args.dataset) bpd_coeff = 1. / np.log(2.) / num_output # if load checkpoint_file = os.path.join(args.save, 'checkpoint.pt') if args.cont_training: logging.info('loading the model.') checkpoint = torch.load(checkpoint_file, map_location='cpu') init_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) model = model.cuda() cnn_optimizer.load_state_dict(checkpoint['optimizer']) grad_scalar.load_state_dict(checkpoint['grad_scalar']) cnn_scheduler.load_state_dict(checkpoint['scheduler']) global_step = checkpoint['global_step'] else: global_step, init_epoch = 0, 0 for epoch in range(init_epoch, args.epochs): # update lrs. if args.distributed: train_queue.sampler.set_epoch(global_step + args.seed) valid_queue.sampler.set_epoch(0) if epoch > args.warmup_epochs: cnn_scheduler.step() # Logging. logging.info('epoch %d', epoch) # Training. train_nelbo, global_step = train(train_queue, model, cnn_optimizer, grad_scalar, global_step, warmup_iters, writer, logging) logging.info('train_nelbo %f', train_nelbo) writer.add_scalar('train/nelbo', train_nelbo, global_step) model.eval() # generate samples less frequently eval_freq = 1 if args.epochs <= 50 else 20 if epoch % eval_freq == 0 or epoch == (args.epochs - 1): with torch.no_grad(): num_samples = 16 n = int(np.floor(np.sqrt(num_samples))) for t in [0.7, 0.8, 0.9, 1.0]: logits = model.sample(num_samples, t) output = model.decoder_output(logits) output_img = output.mean if isinstance(output, torch.distributions.bernoulli.Bernoulli) else output.sample(t) output_tiled = utils.tile_image(output_img, n) writer.add_image('generated_%0.1f' % t, output_tiled, global_step) valid_neg_log_p, valid_nelbo = test(valid_queue, model, num_samples=10, args=args, logging=logging) logging.info('valid_nelbo %f', valid_nelbo) logging.info('valid neg log p %f', valid_neg_log_p) logging.info('valid bpd elbo %f', valid_nelbo * bpd_coeff) logging.info('valid bpd log p %f', valid_neg_log_p * bpd_coeff) writer.add_scalar('val/neg_log_p', valid_neg_log_p, epoch) writer.add_scalar('val/nelbo', valid_nelbo, epoch) writer.add_scalar('val/bpd_log_p', valid_neg_log_p * bpd_coeff, epoch) writer.add_scalar('val/bpd_elbo', valid_nelbo * bpd_coeff, epoch) save_freq = int(np.ceil(args.epochs / 100)) if epoch % save_freq == 0 or epoch == (args.epochs - 1): if args.global_rank == 0: logging.info('saving the model.') torch.save({'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': cnn_optimizer.state_dict(), 'global_step': global_step, 'args': args, 'arch_instance': arch_instance, 'scheduler': cnn_scheduler.state_dict(), 'grad_scalar': grad_scalar.state_dict()}, checkpoint_file) # Final validation valid_neg_log_p, valid_nelbo = test(valid_queue, model, num_samples=1000, args=args, logging=logging) logging.info('final valid nelbo %f', valid_nelbo) logging.info('final valid neg log p %f', valid_neg_log_p) writer.add_scalar('val/neg_log_p', valid_neg_log_p, epoch + 1) writer.add_scalar('val/nelbo', valid_nelbo, epoch + 1) writer.add_scalar('val/bpd_log_p', valid_neg_log_p * bpd_coeff, epoch + 1) writer.add_scalar('val/bpd_elbo', valid_nelbo * bpd_coeff, epoch + 1) writer.close()
def run(self, rm_save_folder=False): for model_cfg in models.functional.common.allcfgs(): if hasattr(model_cfg, 'name') and model_cfg.name == self.model.__name__: model_name = os.path.splitext( os.path.split(model_cfg._path)[1])[0] logger = utils.Logger( os.path.join(os.path.dirname(__file__), 'test', model_name), model_name) logger.info('Testing model: ' + model_name + ' ...') for data_cfg in datasets.functional.common.allcfgs(): if not self.model.check_cfg(data_cfg, model_cfg): # print("\tDataset '" + data_cfg.name + "' not support") continue data_name = os.path.splitext( os.path.split(data_cfg._path)[1])[0] logger.info('\tTesting dataset: ' + data_name + ' ...') data_cfg.index_cross = 1 test_batchsize = configs.env.ci.batchsize sample_dict, test_sample_dict = dict(), dict() for name, value in vars(data_cfg).items(): if name.startswith('source') or name.startswith( 'target'): kernel = getattr( data_cfg, 'kernel' if name.startswith('source') else 'out_kernel', None) if kernel is not None: sample_shape = (kernel.kT, kernel.kW, kernel.kH) sample_dict[name] = torch.randn( configs.env.ci.batchsize, *sample_shape) else: if hasattr(value, 'patch'): sample_shape = (value.patch, value.time, value.width, value.height) elif hasattr(value, 'time'): sample_shape = (value.time, value.width, value.height) else: sample_shape = (value.elements, ) # TODO re-ID target class in [-1, 1] not in [0, 1] sample_dict[name] = torch.randint(value.classes, (configs.env.ci.batchsize, 1)).long() \ if len(sample_shape) == 1 and sample_shape[0] == 1 \ else torch.randn(configs.env.ci.batchsize, *sample_shape) logger.info("\t-- " + name + " size: " + str(sample_dict[name].size())) elif name.startswith('test_source') or name.startswith( 'test_target'): test_kernel = getattr( data_cfg, 'test_kernel' if name.startswith('test_source') else 'test_out_kernel', None) if test_kernel is not None: test_sample_shape = (test_kernel.kT, test_kernel.kW, test_kernel.kH) test_sample_dict[name[5:]] = torch.randn( test_batchsize, *test_sample_shape) else: if hasattr(value, 'patch'): test_sample_shape = (value.patch, value.time, value.width, value.height) elif hasattr(value, 'time'): test_sample_shape = (value.time, value.width, value.height) else: test_sample_shape = (value.elements, ) test_sample_dict[name[5:]] = \ torch.randint(value.classes, (test_batchsize, 1)).long() \ if len(test_sample_shape) == 1 and test_sample_shape[0] == 1 \ else torch.randn(test_batchsize, *test_sample_shape) logger.info("\t-- " + name + " size: " + str(test_sample_dict[name[5:]].size())) for name, value in sample_dict.items(): if name not in test_sample_dict.keys(): test_sample_dict[name] = value[0:test_batchsize] sample_loader = DataLoader( datasets.SampleDataset(sample_dict), pin_memory=True) test_sample_loader = DataLoader( datasets.SampleDataset(test_sample_dict), pin_memory=True) for run_cfg in configs.Run.all(): run_name = os.path.splitext( os.path.split(run_cfg._path)[1])[0] logger.info('\t\tTesting config: ' + run_name + ' ...') save_folder = os.path.join(os.path.dirname(__file__), 'test', model_name, data_name + '-' + run_name) summary = utils.Summary(save_folder, dataset=datasets.SampleDataset( dict())) summary.dataset.logger = logger main_msg = dict(ci='ci') main_msg.update( dict(index_cross=data_cfg.index_cross, while_idx=1, while_flag=True)) model = self.model(model_cfg, data_cfg, run_cfg, summary=summary, main_msg=main_msg) params, params_all = dict(), 0 for name, value in model.modules().items(): params[name] = sum(p.numel() for p in value.parameters() if p.requires_grad) params_all += params[name] logger.info("\t\t-- parameter(s): " + str(params)) logger.info("\t\t-- all parameters: " + str(params_all)) while main_msg['while_flag']: model.process_pre_hook() model.main_msg = main_msg sample_loader = model.train_loader_hook( sample_loader) epoch_info = { 'epoch': 1, 'batch_idx': 0, 'index': torch.arange(configs.env.ci.batchsize), 'batch_count': configs.env.ci.batchsize, 'batch_per_epoch': 1, 'count_data': configs.env.ci.batchsize } summary.update_epochinfo(epoch_info) model.train_epoch_pre_hook(epoch_info, sample_loader) loss_all = dict() loss_dict = model.train_process( epoch_info, sample_dict) loss_dict.update( dict(_count=[configs.env.ci.batchsize])) utils.common.merge_dict(loss_all, loss_dict) model.train_epoch_hook(epoch_info, sample_loader) loss_all = model.train_return_hook( epoch_info, loss_all) logger.info("\t\t-- loss(es) " + str(main_msg['while_idx']) + ": " + str(loss_all)) model.main_msg.update( dict(test_idx=1, test_flag=True, only_test=False)) while model.main_msg['test_flag']: torch.cuda.empty_cache() with torch.no_grad(): test_sample_loader = model.test_loader_hook( test_sample_loader) epoch_info.update( dict( index=torch.arange(test_batchsize), batch_count=test_batchsize, count_data=test_batchsize)) model.test_epoch_pre_hook( epoch_info, test_sample_loader) result_dict = model.test_process( epoch_info, test_sample_dict) model.test_epoch_hook( epoch_info, test_sample_loader) result_dict = model.test_return_hook( epoch_info, { k: v.detach().cpu().numpy() for k, v in result_dict.items() if isinstance(v, torch.Tensor) } if isinstance(result_dict, dict) else result_dict) add_data_msgs, msgs = None, None if isinstance(result_dict, tuple): if len(result_dict) == 2: add_data_msgs = result_dict[1] result_dict = result_dict[0] elif len(result_dict) == 3: msgs = result_dict[2] add_data_msgs = result_dict[1] result_dict = result_dict[0] for name, value in result_dict.items(): result_dict[name] = value.shape logger.info( "\t\t-- result(s) " + str(model.main_msg['test_idx']) + " size: " + str(result_dict)) if msgs is not None: logger.info("\t\t-- msg(s): " + str(msgs)) model.process_test_msg_hook(model.main_msg) model.process_hook() model.process_msg_hook(main_msg) logger.info("\t\t-- save folder: " + str( utils.path.get_path(model_cfg, data_cfg, run_cfg))) model.save(epoch=1, path=save_folder) model.load(path=save_folder) if rm_save_folder: shutil.rmtree(save_folder) logger.info('\t\tTesting config: ' + run_name + ' completed.') break logger.info('\tTesting dataset: ' + data_name + ' completed.') # break logger.info('Testing model: ' + model_name + ' completed.')
if not os.path.exists('save_memory'): os.mkdir('save_memory') if not os.path.exists('save_knobs'): os.mkdir('save_knobs') if not os.path.exists('save_state_actions'): os.mkdir('save_state_actions') if not os.path.exists('model_params'): os.mkdir('model_params') expr_name = 'train_{}_{}'.format(opt.method, str(utils.get_timestamp())) logger = utils.Logger( name=opt.method, log_file='log/{}.log'.format(expr_name) ) if opt.other_knob != 0: logger.warn('USE Other Knobs') current_knob = environment.get_init_knobs() # OUProcess origin_sigma = 0.20 sigma = origin_sigma # decay rate sigma_decay_rate = 0.99 step_counter = 0 train_step = 0 if opt.method == 'ddpg':
def eval_user_adaptation(opt): log = utils.Logger(opt.verbose) timer = utils.Timer() # Read vocabs lexicon = helpers.get_lexicon(opt) # Read data filepairs = load_user_filepairs(opt.usr_file_list) # Get target language model lang_model = None # Load model s2s = helpers.build_model(opt, lexicon, lang_model, test=True) #if not opt.full_training: # s2s.freeze_parameters() # Trainer trainer = helpers.get_trainer(opt, s2s) # print config if opt.verbose: options.print_config(opt, src_dict_size=len(lexicon.w2ids), trg_dict_size=len(lexicon.w2idt)) # This will store translations and gold sentences translations = dict([(i, []) for i in range(opt.min_n_train, opt.max_n_train)]) gold = [] # Run training for usr_id, (src_file, trg_file) in enumerate(filepairs): log.info('Evaluating on files %s' % os.path.basename(src_file).split()[0]) # Load file pair src_data = data.read_corpus(src_file, lexicon.w2ids, raw=True) trg_data = data.read_corpus(trg_file, lexicon.w2idt, raw=True) # split train/test train_src, test_src, train_trg, test_trg, order = split_user_data( src_data, trg_data, n_test=opt.n_test) # Convert train data to indices train_src = lexicon.sents_to_ids(train_src) train_trg = lexicon.sents_to_ids(train_trg, trg=True) # Save test data for s in test_trg: gold.append(' '.join(s)) # Start loop for n_train in range(opt.min_n_train, opt.max_n_train): log.info('Training on %d sentence pairs' % n_train) # Train on n_train first sentences X, Y = train_src[:n_train], train_trg[:n_train] temp_out = utils.exp_temp_filename(opt, str(n_train) + 'out.txt') if opt.full_training: s2s.load() if opt.log_unigram_bias: if opt.use_trg_unigrams: unigrams = lexicon.compute_unigrams(Y, lang='trg') else: unigrams = lexicon.estimate_unigrams(X) log_unigrams = np.log(unigrams + opt.log_unigrams_eps) s2s.reset_usr_vec(log_unigrams) elif n_train > 0: adapt(s2s, trainer, X, Y, opt.num_epochs, opt.check_train_error_every) log.info('Translating test file') s2s.set_test_mode() # Test on test split for x in test_src: y_hat = s2s.translate(x, 0, beam_size=opt.beam_size) translations[n_train].append(y_hat) # Temp files temp_gold = utils.exp_temp_filename(opt, 'gold.txt') np.savetxt(temp_gold, gold, fmt='%s') # Results test_bleus = np.zeros(opt.max_n_train - opt.min_n_train) for n_train in range(opt.min_n_train, opt.max_n_train): log.info('Evaluation for %d sentence pairs' % n_train) temp_out = utils.exp_temp_filename(opt, str(n_train) + 'out.txt') temp_bootstrap_out = utils.exp_temp_filename( opt, str(n_train) + '_bootstrap_out.txt') temp_bootstrap_ref = utils.exp_temp_filename( opt, str(n_train) + '_bootstrap_ref.txt') np.savetxt(temp_out, translations[n_train], fmt='%s') bleu, details = evaluation.bleu_score(temp_gold, temp_out) log.info('BLEU score: %.2f' % bleu) bleus = evaluation.bootstrap_resampling(temp_gold, temp_out, opt.bootstrap_num_samples, opt.bootstrap_sample_size, temp_bootstrap_ref, temp_bootstrap_out) evaluation.print_stats(bleus) test_bleus[n_train - opt.min_n_train] = bleu np.savetxt(utils.exp_filename(opt, 'bleu_scores.txt'), test_bleus, fmt='%.3f')
args = parse_args() torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.benchmark = True if args.task == 'vqa': from gender_base_genderloss_train import train dict_path = 'data/dictionary.pkl' dictionary = Dictionary.load_from_file(dict_path) train_dset = VQAFeatureDataset('train', dictionary, adaptive=True) val_dset = VQAFeatureDataset('train', dictionary, adaptive=True) w_emb_path = 'data/glove6b_init_300d.npy' utils.create_dir(args.output) logger = utils.Logger(os.path.join(args.output, 'args.txt')) logger.write(args.__repr__()) batch_size = args.batch_size constructor = 'build_%s' % args.model model = getattr(gender_base_genderloss, constructor)(train_dset, args.num_hid, args.op, args.gamma, args.task).cuda() tfidf = None weights = None #import pdb;pdb.set_trace() model.w_emb.init_embedding(w_emb_path, tfidf, weights) model = model.cuda()
def train(model, train_loader, eval_loader, num_epochs, output, dataset): #utils.create_dir(output) #optim = build_optimizer(model) optim = torch.optim.Adamax(model.parameters(), lr=2e-3) #optim = torch.optim.Adamax(model.T_vq.parameters(), lr=2e-3) #optim = torch.optim.Adamax([ # {'params': model.v_att_2.parameters()}, # {'params': model.q_net_2.parameters()}, # {'params': model.v_net_2.parameters()}, # {'params': model.T_vq.parameters()} #], lr=2e-3) relevance_loss = nn.PairwiseDistance(p=2).cuda() coherence_loss = nn.CrossEntropyLoss(reduce=True).cuda() logger = utils.Logger(os.path.join(output, 'log.txt')) best_eval_score = 0 print_freq = 10 lam = 0.1 A, B = evaluate(model, eval_loader, dataset) for epoch in range(num_epochs): total_vqa_loss = 0 total_vqe_loss = 0 total_rel_loss = 0 train_score = 0 t = time.time() for i, (v, q, a, c, l, f) in enumerate(train_loader): bs = v.size(0) l, sort_ind = l.sort(dim=0, descending=True) ml = l[0] v = Variable(v[sort_ind]).cuda() q = Variable(q[sort_ind]).cuda() a = Variable(a[sort_ind]).cuda() c = Variable(c[sort_ind]).cuda() ans_pred, exp_pred, T_vq, T_e = model(v, q, c[:, :-1], [j - 1 for j in l], ml) if False: #i % 100 == 0: print('Explain GT: %s' % (' '.join([ train_loader.dataset.explanation_dictionary.idx2word[ w.data[0]] for id, w in enumerate(c[50]) if id < l[50] ]))) print('Explain Pred: %s' % ('<start> ' + ' '.join([ train_loader.dataset.explanation_dictionary.idx2word[ w.data[0]] for id, w in enumerate(exp_pred[50].max(1)[1]) if id < (l[50] - 1) ]))) #exp_pred = pack_padded_sequence(exp_pred, [j-1 for j in l], batch_first=True)[0] #c = pack_padded_sequence(c[:, 1:], [j-1 for j in l], batch_first=True)[0] vqa_loss = instance_bce_with_logits(ans_pred, a) #vqe_loss = coherence_loss(exp_pred.cuda(), c) rel_loss = torch.mean(relevance_loss(T_vq, T_e)) loss = vqa_loss + (lam * rel_loss) #+ (lam * vqe_loss) #loss = vqa_loss + rel_loss #loss = vqa_loss loss.backward() nn.utils.clip_grad_norm(model.parameters(), 0.25) optim.step() optim.zero_grad() batch_score = compute_score_with_logits(ans_pred, a.data).sum() total_vqa_loss += vqa_loss.data[0] * v.size(0) #total_vqe_loss += vqe_loss.data[0] * v.size(0) total_rel_loss += rel_loss.data[0] * v.size(0) train_score += batch_score if i % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' #'VQE Loss {vqe_loss:.4f}\t' 'Batch Time {batch_time:.3f}\t' 'VQA Loss {vqa_loss:.4f}\t' 'Acc@1 {acc1:.2f}\t'.format( epoch, i, len(train_loader), vqa_loss=vqa_loss.data[0], #vqe_loss=vqe_loss.data[0], acc1=batch_score / v.size(0) * 100, batch_time=time.time() - t)) t = time.time() total_vqa_loss /= len(train_loader.dataset) #total_vqe_loss /= len(train_loader.dataset) total_rel_loss /= len(train_loader.dataset) train_score = 100 * train_score / len(train_loader.dataset) model.train(False) eval_score, bound = evaluate(model, eval_loader, dataset) model.train(True) logger.write('epoch %d, time: %.2f' % (epoch, time.time() - t)) logger.write('\ttrain_vqa_loss: %.2f, score: %.2f' % (total_vqa_loss, train_score)) #logger.write('\ttrain_vqe_loss: %.4f' % (total_vqe_loss)) logger.write('\ttrain_rel_loss: %.4f' % (total_rel_loss)) logger.write('\teval score: %.2f (%.2f)' % (100 * eval_score, 100 * bound)) if eval_score > best_eval_score: model_path = os.path.join(output, 'model.pth') torch.save(model.state_dict(), model_path) best_eval_score = eval_score
def log_reg_plot_and_CI(scores: list, dep_vars: list, sample_size: int, repeat: int, log_reg_date: str, bootstrap_id: int, max_model_size: int, max_expr_len: int, language_name: str, lang_gen_date: str): '''Make and store distplots and 95% CI of regression coefficients. Args: scores: A list of strings. The names of the complexity measures: the independent variables. dep_vars: A list of strings. The names of the quantifier props: the dependent variables. sample_size: An int. The size of the data samples that were used for the regressions. repeat: An int. The number of samples that were taken, i.e. the number regressions. log_reg_date: A string. The date on which the regression data was made. bootstrap_id: An int. Used for loading csv data with logistic regression data. Identifies the bootstrap series for a given date. Multiple regression sessions were done on the same data to check for convergence. max_model_size: An int. Used for loading and storing csv data. The maximum model size over which the meaning of quantifiers was computed in the language data. max_expr_len: An int. Used for loading and storing csv data. The maximum expression length of the quantifiers expressions in the language data. language_name: A string. Used for loading and storing csv data. Identifies the the collection of operators used for generating the language data. lang_gen_date: A string. Used for loading and storing csv data. The date on which the language data was generated. ''' old_stdout = sys.stdout log_reg_plot_fileloc = utils.make_log_reg_plot_path( max_model_size, language_name, lang_gen_date, log_reg_date) sys.stdout = utils.Logger(log_reg_plot_fileloc / f"mean_and_CI-{bootstrap_id}-{log_reg_date}.txt") print("-" * 30) print("language \t\t", language_name) print("max_expr_len \t", max_expr_len) print("max_model_size \t", max_model_size) print("lang_gen_date \t", lang_gen_date) print("log_reg_date \t", log_reg_date) print("repeat \t\t\t", repeat) print("sample_size \t", sample_size) print("bootstrap_id \t\t\t\t", bootstrap_id) print("-" * 30) print() for score in scores: ind_var1 = f"{score}_zscore" # complexity (normalized) ind_var2 = f"{score}_shuff_zscore" # complexity random baseline distplot_log_reg_from_csv(ind_var1, ind_var2, dep_vars, sample_size, repeat, log_reg_date, bootstrap_id, max_model_size, max_expr_len, language_name, lang_gen_date) mean_and_CI_log_reg(ind_var1, ind_var2, dep_vars, sample_size, repeat, log_reg_date, bootstrap_id, max_model_size, max_expr_len, language_name, lang_gen_date, orig=False, rand=False, diff=True, verbose=False) print() sys.stdout = old_stdout
best_top_5 = 0 # set up optimizer for training optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-5) # optimizer = torch.optim.SGD(model.parameters(), args.lr, # momentum = args.momentum, # nesterov = True, # weight_decay = args.weight_decay) print('==> optimizer loaded') # set up experiment path exp_path = os.path.join('exp', args.exp) utils.shell.mkdir(exp_path, clean=args.clean) logger = utils.Logger(exp_path) print('==> save logs to {0}'.format(exp_path)) # load snapshot of model and optimizer if args.resume is not None: if os.path.isfile(args.resume): snapshot = torch.load(args.resume) epoch = snapshot['epoch'] model.load_state_dict(snapshot['model']) # If this doesn't work, can use optimizer.load_state_dict optimizer.load_state_dict(snapshot['optimizer']) print('==> snapshot "{0}" loaded (epoch {1})'.format( args.resume, epoch)) else: raise FileNotFoundError('no snapshot found at "{0}"'.format( args.resume))
]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(dataDir, train=True, download=False, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(dataDir, train=False, download=False, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2) nTrainSamples, width, height, channel = trainset.data.shape nTestSamples, width, height, channel = testset.data.shape print(f'# train samples: {nTrainSamples} | # test samples:{nTestSamples}') print(f'per image size: {width}*{height} | per image channel:{channel}') net = ResNet18() netname=f'cifar10-resnet18-adv-pgd-0.3-0.01-25' # choose optimizer optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4, nesterov=True) logFilePath= f'{logDir}/{netname}' logger = utils.Logger(logFilePath) criterion = torch.nn.CrossEntropyLoss() checkpointPath = f'{modelDir}/{netname}-checkpoint.pth.tar' netclf = AdvTrainAndTest(net, trainloader, testloader, criterion, optimizer, netname=netname) attacker = ATTACKER(type='PGD', epsilon=0.3, alpha=1e-2, num_iter=25) netclf.build(start_epoch=0, total_epochs=200, attacker=attacker, checkpointPath=checkpointPath, logger=logger, modelDir=modelDir)
import platform import sys import tensorflow as tf import numpy as np import pickle os.environ['TF_CPP_MIN_LOG_LEVEL'] = "3" tf.logging.set_verbosity(tf.logging.ERROR) from model import Model from load_embedding import load_embedding import utils import tf_utils logger = utils.Logger("./logs/") timer = utils.Timer() # !pip install tensorboardcolab # from tensorboardcolab import * # tbc = TensorBoardColab() logger.append("SYSTEM", platform.system()) logger.append("MACHINE", platform.machine()) logger.append("PLATFORM", platform.platform()) logger.append("UNAME", platform.uname(), "\n") logger.append("PYTHON", sys.version.split('\n')) logger.append("TF VERSION", tf.__version__, "\n") # -------------------------------------------------------------------------------------------------------------------- #
predict(best_model, v, params) with open( os.path.join(config.PREDICTION_FOLDER, params.save_dir, "results.csv"), "a") as file: file.write(f"{params.preds_path},{best_mean_val_ccc:7.4f}\n") print('...done.') if not params.save: utils.delete_model(best_model_files[best_idx]) if __name__ == '__main__': # parse parameters params = parse_params() # register logger log_file_name = '{}_[{}]_[{}]_[{}]_[{}_{}_{}_{}]_[{}_{}_{}]_[{}_{}_{}_{}_{}]_{}.txt'.format( datetime.now(tz=tz.gettz('Asia/Shanghai')).strftime("%Y-%m-%d-%H-%M"), '_'.join(params.feature_set), '_'.join(params.emo_dim_set), 'NOSEG' if not params.add_seg_id else 'SEG', params.rnn, params.d_rnn, params.rnn_n_layers, params.rnn_bi, params.attn, params.n_layers, params.n_heads, params.lr, params.batch_size, params.rnn_dr, params.attn_dr, params.out_dr, params.annotator) params.log_file_name = log_file_name if params.log: if not os.path.exists(config.LOG_FOLDER): os.makedirs(config.LOG_FOLDER) sys.stdout = utils.Logger( os.path.join(config.LOG_FOLDER, log_file_name)) print(' '.join(sys.argv)) main(params)
def main(): file_name = "./flood_graph/150_250/128/500/ji_sort/1_conf/sample-wised/default/{}/".format( args.b) start = time.time() # set GPU ID os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu cudnn.benchmark = True # check save path save_path = file_name # save_path = args.save_path if not os.path.exists(save_path): os.makedirs(save_path) # make dataloader if args.valid == True: train_loader, valid_loader, test_loader, test_onehot, test_label = dataset.get_valid_loader( args.data, args.data_path, args.batch_size) else: train_loader, train_onehot, train_label, test_loader, test_onehot, test_label = dataset.get_loader( args.data, args.data_path, args.batch_size) # set num_class if args.data == 'cifar100': num_class = 100 else: num_class = 10 # set num_classes model_dict = { "num_classes": num_class, } # set model if args.model == 'res': model = resnet.resnet110(**model_dict).cuda() elif args.model == 'dense': model = densenet_BC.DenseNet3(depth=100, num_classes=num_class, growth_rate=12, reduction=0.5, bottleneck=True, dropRate=0.0).cuda() elif args.model == 'vgg': model = vgg.vgg16(**model_dict).cuda() # set criterion if args.loss == 'MS': cls_criterion = losses.MultiSimilarityLoss().cuda() elif args.loss == 'Contrastive': cls_criterion = losses.ContrastiveLoss().cuda() elif args.loss == 'Triplet': cls_criterion = losses.TripletLoss().cuda() elif args.loss == 'NPair': cls_criterion = losses.NPairLoss().cuda() elif args.loss == 'Focal': cls_criterion = losses.FocalLoss(gamma=3.0).cuda() else: if args.mode == 0: cls_criterion = nn.CrossEntropyLoss().cuda() else: cls_criterion = nn.CrossEntropyLoss(reduction="none").cuda() ranking_criterion = nn.MarginRankingLoss(margin=0.0).cuda() # set optimizer (default:sgd) optimizer = optim.SGD( model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4, # weight_decay=0.0001, nesterov=False) # optimizer = optim.SGD(model.parameters(), # lr=float(args.lr), # momentum=0.9, # weight_decay=args.weight_decay, # nesterov=False) # set scheduler # scheduler = MultiStepLR(optimizer, # milestones=[500, 750], # gamma=0.1) scheduler = MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_decay_step, gamma=args.lr_decay_gamma) # make logger train_logger = utils.Logger(os.path.join(save_path, 'train.log')) result_logger = utils.Logger(os.path.join(save_path, 'result.log')) # make History Class correctness_history = crl_utils.History(len(train_loader.dataset)) ## define matrix if args.data == 'cifar': matrix_idx_confidence = [[_] for _ in range(50000)] matrix_idx_iscorrect = [[_] for _ in range(50000)] else: matrix_idx_confidence = [[_] for _ in range(73257)] matrix_idx_iscorrect = [[_] for _ in range(73257)] # write csv #''' import csv f = open('{}/logs_{}_{}.txt'.format(file_name, args.b, args.epochs), 'w', newline='') f.write("location = {}\n\n".format(file_name) + str(args)) f0 = open('{}/Test_confidence_{}_{}.csv'.format(file_name, args.b, args.epochs), 'w', newline='') # f0 = open('./baseline_graph/150_250/128/500/Test_confidence_{}_{}.csv'.format(args.b, args.epochs), 'w', newline='') # f0 = open('./CRL_graph/150_250/Test_confidence_{}_{}.csv'.format(args.b, args.epochs), 'w', newline='') wr_conf_test = csv.writer(f0) header = [_ for _ in range(args.epochs + 1)] header[0] = 'Epoch' wr_conf_test.writerows([header]) f1 = open('{}/Train_confidence_{}_{}.csv'.format(file_name, args.b, args.epochs), 'w', newline='') # f1 = open('./baseline_graph/150_250/128/500/Train_confidence_{}_{}.csv'.format(args.b, args.epochs), 'w', newline='') # f1 = open('./CRL_graph/150_250/Train_confidence_{}_{}.csv'.format(args.b, args.epochs), 'w', newline='') wr = csv.writer(f1) header = [_ for _ in range(args.epochs + 1)] header[0] = 'Epoch' wr.writerows([header]) f2 = open('{}/Train_Flood_{}_{}_{}.csv'.format(file_name, args.data, args.b, args.epochs), 'w', newline='') # f2 = open('./baseline_graph/150_250/128/500/Train_Base_{}_{}_{}.csv'.format(args.data, args.b, args.epochs), 'w', newline='') # f2 = open('./CRL_graph/150_250/Train_Flood_{}_{}_{}.csv'.format(args.data, args.b, args.epochs), 'w', newline='') wr_train = csv.writer(f2) header = [_ for _ in range(args.epochs + 1)] header[0] = 'Epoch' wr_train.writerows([header]) f3 = open('{}/Test_Flood_{}_{}_{}.csv'.format(file_name, args.data, args.b, args.epochs), 'w', newline='') # f3 = open('./baseline_graph/150_250/128/500/Test_Base_{}_{}_{}.csv'.format(args.data, args.b, args.epochs), 'w', newline='') # f3 = open('./CRL_graph/150_250/Test_Flood_{}_{}_{}.csv'.format(args.data, args.b, args.epochs), 'w', newline='') wr_test = csv.writer(f3) header = [_ for _ in range(args.epochs + 1)] header[0] = 'Epoch' wr_test.writerows([header]) #''' # start Train best_valid_acc = 0 test_ece_report = [] test_acc_report = [] test_nll_report = [] test_over_con99_report = [] test_e99_report = [] test_cls_loss_report = [] train_ece_report = [] train_acc_report = [] train_nll_report = [] train_over_con99_report = [] train_e99_report = [] train_cls_loss_report = [] train_rank_loss_report = [] train_total_loss_report = [] for epoch in range(1, args.epochs + 1): scheduler.step() matrix_idx_confidence, matrix_idx_iscorrect, idx, iscorrect, confidence, target, cls_loss_tr, rank_loss_tr, batch_correctness, total_confidence, total_correctness = \ train.train(matrix_idx_confidence, matrix_idx_iscorrect, train_loader, model, wr, cls_criterion, ranking_criterion, optimizer, epoch, correctness_history, train_logger, args) if args.rank_weight != 0.0: print("RANK ", rank_loss_tr) total_loss_tr = cls_loss_tr + rank_loss_tr if args.valid == True: idx, iscorrect, confidence, target, cls_loss_val, acc = train.valid( valid_loader, model, cls_criterion, ranking_criterion, optimizer, epoch, correctness_history, train_logger, args) if acc > best_valid_acc: best_valid_acc = acc print("*** Update Best Acc ***") # save model if epoch == args.epochs: torch.save(model.state_dict(), os.path.join(save_path, 'model.pth')) print("########### Train ###########") acc_tr, aurc_tr, eaurc_tr, aupr_tr, fpr_tr, ece_tr, nll_tr, brier_tr, E99_tr, over_99_tr, cls_loss_tr = metrics.calc_metrics( train_loader, train_label, train_onehot, model, cls_criterion, args) if args.sort == True and epoch == 260: #if args.sort == True: train_loader = dataset.sort_get_loader( args.data, args.data_path, args.batch_size, idx, np.array(target), iscorrect, batch_correctness, total_confidence, total_correctness, np.array(confidence), epoch, args) train_acc_report.append(acc_tr) train_nll_report.append(nll_tr * 10) train_ece_report.append(ece_tr) train_over_con99_report.append(over_99_tr) train_e99_report.append(E99_tr) train_cls_loss_report.append(cls_loss_tr) if args.rank_weight != 0.0: train_total_loss_report.append(total_loss_tr) train_rank_loss_report.append(rank_loss_tr) print("CLS ", cls_loss_tr) # finish train print("########### Test ###########") # calc measure acc_te, aurc_te, eaurc_te, aupr_te, fpr_te, ece_te, nll_te, brier_te, E99_te, over_99_te, cls_loss_te = metrics.calc_metrics( test_loader, test_label, test_onehot, model, cls_criterion, args) test_ece_report.append(ece_te) test_acc_report.append(acc_te) test_nll_report.append(nll_te * 10) test_over_con99_report.append(over_99_te) test_e99_report.append(E99_te) test_cls_loss_report.append(cls_loss_te) print("CLS ", cls_loss_te) print("############################") # for idx in matrix_idx_confidence: # wr.writerow(idx) #''' # draw graph df = pd.DataFrame() df['epoch'] = [i for i in range(1, args.epochs + 1)] df['test_ece'] = test_ece_report df['train_ece'] = train_ece_report fig_loss = plt.figure(figsize=(35, 35)) fig_loss.set_facecolor('white') ax = fig_loss.add_subplot() ax.plot(df['epoch'], df['test_ece'], df['epoch'], df['train_ece'], linewidth=10) ax.legend(['Test', 'Train'], loc=2, prop={'size': 60}) plt.title('[FL] ECE per epoch', fontsize=80) # plt.title('[BASE] ECE per epoch', fontsize=80) # plt.title('[CRL] ECE per epoch', fontsize=80) plt.xlabel('Epoch', fontsize=70) plt.ylabel('ECE', fontsize=70) plt.ylim([0, 1]) plt.setp(ax.get_xticklabels(), fontsize=30) plt.setp(ax.get_yticklabels(), fontsize=30) plt.savefig('{}/{}_{}_ECE_lr_{}.png'.format(file_name, args.model, args.b, args.epochs)) # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_ECE_lr_{}.png'.format(args.model, args.b, args.epochs)) # plt.savefig('./CRL_graph/150_250/{}_{}_ECE_lr_{}.png'.format(args.model, args.b, args.epochs)) df2 = pd.DataFrame() df2['epoch'] = [i for i in range(1, args.epochs + 1)] df2['test_acc'] = test_acc_report df2['train_acc'] = train_acc_report fig_acc = plt.figure(figsize=(35, 35)) fig_acc.set_facecolor('white') ax = fig_acc.add_subplot() ax.plot(df2['epoch'], df2['test_acc'], df2['epoch'], df2['train_acc'], linewidth=10) ax.legend(['Test', 'Train'], loc=2, prop={'size': 60}) plt.title('[FL] Accuracy per epoch', fontsize=80) # plt.title('[BASE] Accuracy per epoch', fontsize=80) # plt.title('[CRL] Accuracy per epoch', fontsize=80) plt.xlabel('Epoch', fontsize=70) plt.ylabel('Accuracy', fontsize=70) plt.ylim([0, 100]) plt.setp(ax.get_xticklabels(), fontsize=30) plt.setp(ax.get_yticklabels(), fontsize=30) plt.savefig('{}/{}_{}_acc_lr_{}.png'.format(file_name, args.model, args.b, args.epochs)) # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_acc_lr_{}.png'.format(args.model, args.b, args.epochs)) # plt.savefig('./CRL_graph/150_250/{}_{}_acc_lr_{}.png'.format(args.model, args.b, args.epochs)) df3 = pd.DataFrame() df3['epoch'] = [i for i in range(1, args.epochs + 1)] df3['test_nll'] = test_nll_report df3['train_nll'] = train_nll_report fig_acc = plt.figure(figsize=(35, 35)) fig_acc.set_facecolor('white') ax = fig_acc.add_subplot() ax.plot(df3['epoch'], df3['test_nll'], df3['epoch'], df3['train_nll'], linewidth=10) ax.legend(['Test', 'Train'], loc=2, prop={'size': 60}) plt.title('[FL] NLL per epoch', fontsize=80) # plt.title('[BASE] NLL per epoch', fontsize=80) # plt.title('[CRL] NLL per epoch', fontsize=80) plt.xlabel('Epoch', fontsize=70) plt.ylabel('NLL', fontsize=70) plt.ylim([0, 45]) plt.setp(ax.get_xticklabels(), fontsize=30) plt.setp(ax.get_yticklabels(), fontsize=30) plt.savefig('{}/{}_{}_nll_lr_{}.png'.format(file_name, args.model, args.b, args.epochs)) # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_nll_lr_{}.png'.format(args.model, args.b, args.epochs)) # plt.savefig('./CRL_graph/150_250/{}_{}_nll_lr_{}.png'.format(args.model, args.b, args.epochs)) df4 = pd.DataFrame() df4['epoch'] = [i for i in range(1, args.epochs + 1)] df4['test_over_con99'] = test_over_con99_report df4['train_over_con99'] = train_over_con99_report fig_acc = plt.figure(figsize=(35, 35)) fig_acc.set_facecolor('white') ax = fig_acc.add_subplot() ax.plot(df4['epoch'], df4['test_over_con99'], df4['epoch'], df4['train_over_con99'], linewidth=10) ax.legend(['Test', 'Train'], loc=2, prop={'size': 60}) plt.title('[FL] Over conf99 per epoch', fontsize=80) # plt.title('[BASE] Over conf99 per epoch', fontsize=80) # plt.title('[CRL] Over conf99 per epoch', fontsize=80) plt.xlabel('Epoch', fontsize=70) plt.ylabel('Over con99', fontsize=70) if args.data == 'cifar10' or args.data == 'cifar100': plt.ylim([0, 50000]) else: plt.ylim([0, 73257]) plt.setp(ax.get_xticklabels(), fontsize=30) plt.setp(ax.get_yticklabels(), fontsize=30) plt.savefig('{}/{}_{}_over_conf99_lr_{}.png'.format( file_name, args.model, args.b, args.epochs)) # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_over_conf99_lr_{}.png'.format(args.model, args.b, args.epochs)) # plt.savefig('./CRL_graph/150_250/{}_{}_over_conf99_lr_{}.png'.format(args.model, args.b, args.epochs)) df5 = pd.DataFrame() df5['epoch'] = [i for i in range(1, args.epochs + 1)] df5['test_e99'] = test_e99_report df5['train_e99'] = train_e99_report fig_acc = plt.figure(figsize=(35, 35)) fig_acc.set_facecolor('white') ax = fig_acc.add_subplot() ax.plot(df5['epoch'], df5['test_e99'], df5['epoch'], df5['train_e99'], linewidth=10) ax.legend(['Test', 'Train'], loc=2, prop={'size': 60}) plt.title('[FL] E99 per epoch', fontsize=80) # plt.title('[BASE] E99 per epoch', fontsize=80) # plt.title('[CRL] E99 per epoch', fontsize=80) plt.xlabel('Epoch', fontsize=70) plt.ylabel('E99', fontsize=70) plt.ylim([0, 0.2]) plt.setp(ax.get_xticklabels(), fontsize=30) plt.setp(ax.get_yticklabels(), fontsize=30) plt.savefig('{}/{}_{}_E99_flood_lr_{}.png'.format(file_name, args.model, args.b, args.epochs)) # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_E99_flood_lr_{}.png'.format(args.model, args.b, args.epochs)) # plt.savefig('./CRL_graph/150_250/{}_{}_E99_flood_lr_{}.png'.format(args.model, args.b, args.epochs)) df5 = pd.DataFrame() df5['epoch'] = [i for i in range(1, args.epochs + 1)] df5['test_cls_loss'] = test_cls_loss_report df5['train_cls_loss'] = train_cls_loss_report fig_acc = plt.figure(figsize=(35, 35)) fig_acc.set_facecolor('white') ax = fig_acc.add_subplot() ax.plot(df5['epoch'], df5['test_cls_loss'], df5['epoch'], df5['train_cls_loss'], linewidth=10) ax.legend(['Test', 'Train'], loc=2, prop={'size': 60}) plt.title('[FL] CLS_loss per epoch', fontsize=80) # plt.title('[BASE] CLS_loss per epoch', fontsize=80) # plt.title('[CRL] CLS_loss per epoch', fontsize=80) plt.xlabel('Epoch', fontsize=70) plt.ylabel('Loss', fontsize=70) plt.ylim([0, 5]) plt.setp(ax.get_xticklabels(), fontsize=30) plt.setp(ax.get_yticklabels(), fontsize=30) plt.savefig('{}/{}_{}_cls_loss_flood_lr_{}.png'.format( file_name, args.model, args.b, args.epochs)) # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_cls_loss_flood_lr_{}.png'.format(args.model, args.b, args.epochs)) # plt.savefig('./CRL_graph/150_250/{}_{}_cls_loss_flood_lr_{}.png'.format(args.model, args.b, args.epochs)) if args.rank_weight != 0.0: df6 = pd.DataFrame() df6['epoch'] = [i for i in range(1, args.epochs + 1)] df6['train_cls_loss'] = train_cls_loss_report df6['train_rank_loss'] = train_rank_loss_report df6['train_total_loss'] = train_total_loss_report fig_acc = plt.figure(figsize=(35, 35)) fig_acc.set_facecolor('white') ax = fig_acc.add_subplot() ax.plot(df6['epoch'], df6['train_cls_loss'], df6['epoch'], df6['train_rank_loss'], df6['epoch'], df6['train_total_loss'], linewidth=10) ax.legend(['CLS', 'Rank', 'Total'], loc=2, prop={'size': 60}) plt.title('[FL] CLS_loss per epoch', fontsize=80) plt.xlabel('Epoch', fontsize=70) plt.ylabel('Loss', fontsize=70) # plt.ylim([0, 5]) plt.setp(ax.get_xticklabels(), fontsize=30) plt.setp(ax.get_yticklabels(), fontsize=30) plt.savefig( './CRL_graph/150_250/{}_{}_cls_loss_flood_lr_{}.png'.format( args.model, args.b, args.epochs)) test_acc_report.insert(0, 'ACC') test_ece_report.insert(0, 'ECE') test_nll_report.insert(0, 'NLL') test_over_con99_report.insert(0, 'Over_conf99') test_e99_report.insert(0, 'E99') test_cls_loss_report.insert(0, 'CLS') wr_test.writerow(test_acc_report) wr_test.writerow(test_ece_report) wr_test.writerow(test_nll_report) wr_test.writerow(test_over_con99_report) wr_test.writerow(test_e99_report) wr_test.writerow(test_cls_loss_report) train_acc_report.insert(0, 'ACC') train_ece_report.insert(0, 'ECE') train_nll_report.insert(0, 'NLL') train_over_con99_report.insert(0, 'Over_conf99') train_e99_report.insert(0, 'E99') train_cls_loss_report.insert(0, 'CLS') wr_train.writerow(train_acc_report) wr_train.writerow(train_ece_report) wr_train.writerow(train_nll_report) wr_train.writerow(train_over_con99_report) wr_train.writerow(train_e99_report) wr_train.writerow(train_cls_loss_report) if args.rank_weight != 0.0: train_rank_loss_report.insert(0, 'Rank') train_total_loss_report.insert(0, 'Total') wr_train.writerow(train_rank_loss_report) wr_train.writerow(train_total_loss_report) #''' # result write result_logger.write([ acc_te, aurc_te * 1000, eaurc_te * 1000, aupr_te * 100, fpr_te * 100, ece_te * 100, nll_te * 10, brier_te * 100, E99_te * 100 ]) if args.valid == True: print("Best Valid Acc : {}".format(acc)) print("Flood Level: {}".format(args.b)) print("Sort : {}".format(args.sort)) print("Sort Mode : {}".format(args.sort_mode)) print("TIME : ", time.time() - start)
def train(model, train_loader, val_loader, output_dir, cfg, ): num_epochs = cfg['epoches'] base_lr = cfg['baselr'] early_stop_n = cfg['early_stop_n'], utils.create_dir(output_dir) optim = Nadam(model.parameters(), lr=base_lr) scheduler = lr_scheduler.MultiStepLR(optim, [6, 12, 18], gamma=0.5) logger = utils.Logger(os.path.join(output_dir, 'log.txt')) best_eval_score_avg = 0.0 print('Begin training......') early_stop_counter = 0 for epoch in range(num_epochs): scheduler.step(epoch) total_loss = 0 rightN = 0 if cfg['with_mixup'] and epoch < cfg['mixup_max_n']: if train_loader.dataset.with_mixup == True: train_loader.dataset.with_mixup = False print('Close mixup..') else: train_loader.dataset.with_mixup = True print('Open mixup..') for data, label in tqdm.tqdm(train_loader): optim.zero_grad() data = data.cuda() pred = model(data) label = label.cuda() loss = F.binary_cross_entropy_with_logits(pred, label, reduction='none') loss = loss.sum(1) loss, _ = loss.topk(k=int(loss.size(0) * 0.9)) loss = loss.mean() pred = np.argmax(pred.data.cpu().numpy(), 1) gt = label.cpu().numpy() gt = np.argmax(gt, 1) rightN += (pred == gt).sum() loss.backward() optim.step() total_loss += loss.item() * label.size(0) NN = len(train_loader.dataset) total_loss /= NN logger.write('epoch %d, \ttrain_loss: %.3f,' ' train_score: %.3f' % (epoch, total_loss, rightN / NN)) train_loader.dataset.on_epoch_finish() if epoch >= cfg['val_after_epoch']: model.train(False) eval_score = eval(model, val_loader) model.train(True) if eval_score >= best_eval_score_avg: early_stop_counter = 0 model_path = os.path.join(output_dir, 'model_best.pth') torch.save(model.state_dict(), model_path) best_eval_score_avg = eval_score else: early_stop_counter += 1 logger.write('epoch %d,' % (epoch) + '\teval score: %.2f ' % (100 * eval_score) + '( best: %.2f)' % (100 * best_eval_score_avg)) if early_stop_counter > early_stop_n: break print('**************************************************')
def train(model, train_loader, eval_loader, num_epochs, output, eval_each_epoch, tiny_train=False): utils.create_dir(output) optim = torch.optim.Adamax(model.parameters()) logger = utils.Logger(os.path.join(output, 'log.txt')) all_results = [] total_step = 0 ans_embed = np.load("./data/mutant_only_vqacp_v2/answer_embs.npy") + 1e-8 ans_embed = torch.from_numpy(ans_embed).cuda() ans_embed = torch.nn.functional.normalize(ans_embed, dim=1) for epoch in range(num_epochs): total_loss = 0 train_score = 0 t = time.time() model.train(True) cos = nn.CosineSimilarity() type_loss = nn.NLLLoss() for i, (v, q, typetarget, a, b, answertypefeats, top_ans_emb, orig_v, orig_q, orig_a, orig_top_ans_emb) in tqdm(enumerate(train_loader), ncols=100, desc="Epoch %d" % (epoch + 1), total=len(train_loader)): total_step += 1 if tiny_train and i == 10: break v, orig_v = Variable(v).cuda(), Variable(orig_v).cuda() q, orig_q = Variable(q).cuda(), Variable(orig_q).cuda() typetarget = Variable(typetarget).cuda() a, orig_a = Variable(a).cuda(), Variable(orig_a).cuda() b = Variable(b).cuda() answertypefeats = Variable(answertypefeats).cuda() top_ans_emb, orig_top_ans_emb = Variable( top_ans_emb).cuda(), Variable(orig_top_ans_emb).cuda() ### MUTANT gen_embs, pred, type_logit, loss, all_ans_embs = model( v, None, q, a, b) all_ans_embs = torch.stack([all_ans_embs] * gen_embs.shape[0]) if (loss != loss).any(): raise ValueError("NaN loss") # ## NCE LOSS positive_dist = cos(gen_embs, top_ans_emb) # shape b,k;b,k-> b gen_embs = torch.cat([gen_embs.unsqueeze(1)] * all_ans_embs.shape[1], dim=1) # d_logit = cos(gen_embs,all_ans_embs) # num = torch.exp(positive_dist).squeeze(-1) # den = torch.exp(d_logit).sum(-1) # loss_nce = -1 *torch.log(num/den) # loss_nce = loss_nce.mean() * d_logit.size(1) ## TYPE LOSS logit = nn.functional.sigmoid(pred) type_logit_soft = nn.functional.softmax(type_logit) type_logit = nn.functional.log_softmax(type_logit) logit = calculatelogits(logit, answertypefeats, "train") loss_type = type_loss(type_logit, typetarget) loss_type = loss_type #* type_logit.size(1) mutant_loss = loss + loss_type ### MUTANT orig_gen_embs, orig_pred, orig_type_logit, orig_loss, orig_all_ans_embs = model( orig_v, None, orig_q, orig_a, b) orig_all_ans_embs = torch.stack([orig_all_ans_embs] * orig_gen_embs.shape[0]) if (orig_loss != orig_loss).any(): raise ValueError("NaN loss") # ## NCE LOSS orig_positive_dist = cos(orig_gen_embs, orig_top_ans_emb) # shape b,k;b,k-> b orig_gen_embs = torch.cat([orig_gen_embs.unsqueeze(1)] * orig_all_ans_embs.shape[1], dim=1) # orig_d_logit = cos(orig_gen_embs,orig_all_ans_embs) # orig_num = torch.exp(orig_positive_dist).squeeze(-1) # orig_den = torch.exp(orig_d_logit).sum(-1) # orig_loss_nce = -1 *torch.log(orig_num/orig_den) # orig_loss_nce = orig_loss_nce.mean() * orig_d_logit.size(1) ## TYPE LOSS orig_logit = nn.functional.sigmoid(orig_pred) orig_type_logit_soft = nn.functional.softmax(orig_type_logit) orig_type_logit = nn.functional.log_softmax(orig_type_logit) orig_logit = calculatelogits(orig_logit, answertypefeats, "train") orig_loss_type = type_loss(orig_type_logit, typetarget) orig_loss_type = orig_loss_type #* orig_type_logit.size(1) orig_loss = orig_loss + orig_loss_type ## PW LOSS top_gen_embs, _ = gen_embs.max(1) top_orig_gen_embs, _ = orig_gen_embs.max(1) cos_gt = cos(top_ans_emb, orig_top_ans_emb) cos_pred = cos(top_gen_embs, top_orig_gen_embs) loss_pw = torch.abs(cos_gt - cos_pred).mean() #* logit.size(1) loss = orig_loss + mutant_loss + loss_pw loss.backward() nn.utils.clip_grad_norm(model.parameters(), 0.25) optim.step() optim.zero_grad() batch_score = compute_score_with_logits(pred, a.data).sum() total_loss += loss.data[0] * v.size(0) train_score += batch_score if tiny_train: L = total_step else: L = len(train_loader.dataset) total_loss /= L train_score = 100 * train_score / L run_eval = eval_each_epoch or (epoch == num_epochs - 1) if run_eval: model.train(False) results = evaluate(model, eval_loader, tiny_train) results["epoch"] = epoch + 1 results["step"] = total_step results["train_loss"] = total_loss results["train_score"] = train_score eval_score = results["score"] bound = results["upper_bound"] print("RANDOM SCORE BEFORE TRAINING", eval_score) print("UPPER BOUNG", bound) # print("TYPEWISE SCORE", results["type_acc"]) all_results.append(results) with open(join(output, "results.json"), "w") as f: json.dump(all_results, f, indent=2) model.train(True) logger.write('epoch %d, time: %.2f' % (epoch + 1, time.time() - t)) logger.write('\ttrain_loss: %.2f, score: %.2f' % (total_loss, train_score)) if run_eval: logger.write('\teval score: %.2f (%.2f)' % (100 * eval_score, 100 * bound)) model_path = os.path.join(output, 'model.pth') torch.save(model.state_dict(), model_path)