def train(self, dataset: myDataset) -> list: """ input: train dataset return: loss history by epoch as a list """ loss_history = [] for i in range(self.epoch): preds = [] for batch_ind, (userInd, movieInd, rating) in enumerate(dataset): userInd, movieInd = int(userInd), int(movieInd) self.userFlag[userInd] = 1 self.movieFlag[movieInd] = 1 pred = self.predict(userInd, movieInd) preds.append(pred) userG = 2 * self.lr * (rating - pred) * self.movieW[movieInd, :] movieG = 2 * self.lr * (rating - pred) * self.userW[userInd, :] self.userW[userInd, :] += userG self.movieW[movieInd, :] += movieG loss_history.append( calc_loss(dataset.getY().flatten(), np.array(preds)).sum() / len(dataset)) self.userAvg = np.sum(self.userW, axis=0) / self.userW.shape[0] self.movieAvg = np.sum(self.movieW, axis=0) / self.movieW.shape[0] return loss_history
def forward(self, embeddings): torch.clamp(self.w, 1e-6) centroids = get_centroids(embeddings) cossim = get_cossim(embeddings, centroids) sim_matrix = self.w * cossim.to(self.device) + self.b loss, _ = calc_loss(sim_matrix) return loss
def forward(self, embeddings, y=None): #pdb.set_trace() torch.clamp(self.w, 1e-6) centroids = get_centroids(embeddings) cossim = get_cossim(embeddings, centroids) sim_matrix = self.w*cossim + self.b loss, _ = calc_loss(sim_matrix) return loss
def evaluate(self, dataset: myDataset) -> float: """ input: test dataset output: loss avg on current test set """ loss_sum = 0 for userInd, movieInd, rating in dataset: userInd, movieInd = int(userInd), int(movieInd) pred = self.predict(userInd, movieInd) loss_sum += calc_loss(rating, pred) return loss_sum / len(dataset)
def forward(self, embeddings, embedder_net, lamb): torch.clamp(self.w, 1e-6) centroids = get_centroids(embeddings) cossim = get_cossim(embeddings, centroids) sim_matrix = self.w * cossim.to(self.device) + self.b per_loss, _ = calc_loss(sim_matrix) weights = embedder_net.LSTM_stack.all_weights norm_loss = lamb * torch.sum( torch.Tensor([ torch.norm(weights[i][j].data.to(self.device), 2) for i in range(hp.model.num_layer) for j in range(4) ])) loss = per_loss + norm_loss return loss, per_loss, norm_loss
def test(data_loader, model): losses = AverageMeter() model.eval() with torch.no_grad(): for idx, (video_seq, audio_seq, target, audiopath) in tqdm(enumerate(data_loader), total=len(data_loader)): video_seq = video_seq.to(cuda) audio_seq = audio_seq.to(cuda) target = target.to(cuda) B = video_seq.size(0) vid_out = model.module.forward_lip(video_seq) aud_out = model.module.forward_aud(audio_seq) vid_class = model.module.final_classification_lip(vid_out) aud_class = model.module.final_classification_aud(aud_out) del video_seq del audio_seq loss1 = calc_loss(vid_out, aud_out, target, args.hyper_param) loss2 = criterion(vid_class, target.view(-1)) loss3 = criterion(aud_class, target.view(-1)) loss = loss1 + loss2 + loss3 losses.update(loss.item(), B) dist = torch.dist(vid_out[0, :].view(-1), aud_out[0, :].view(-1), 2) tar = target[0, :].view(-1).item() vid_name = audiopath[0].split('\\')[-2] print(vid_name) if (test_dissimilarity_score.get(vid_name)): test_dissimilarity_score[vid_name] += dist test_number_ofile_number_of_chunks[vid_name] += 1 else: test_dissimilarity_score[vid_name] = dist test_number_ofile_number_of_chunks[vid_name] = 1 if (test_target.get(vid_name)): pass else: test_target[vid_name] = tar print('Loss {loss.avg:.4f}\t'.format(loss=losses)) write_log(content='Loss {loss.avg:.4f}\t'.format(loss=losses, args=args), epoch=num_epoch, filename=os.path.join(os.path.dirname(args.test), 'test_log.md')) return losses.avg
def test_seg_model(model, args): # prepare dataset test_dset = ChromosomeDataset(os.path.join(args.data_dir+args.simu_type, "test_imgs"), transform = transforms.Compose([transforms.ToTensor(),])) test_dataloader = DataLoader(test_dset, batch_size=args.batch_size, shuffle=False, num_workers=0) model.eval() # Set model to evaluate mode metrics = defaultdict(float) epoch_samples = 0 for inputs, labels in test_dataloader: inputs = inputs.cuda() labels = labels.cuda() # forward # track history if only in train with torch.no_grad(): outputs = model(inputs) loss = calc_loss(outputs, labels, metrics) # statistics epoch_samples += inputs.size(0) print_metrics(metrics, epoch_samples, "test")
def test(test_set, classifier): classifier.eval() iterator = data.BucketIterator(dataset=test_set, batch_size=args.batch_size, sort_key=lambda x: len(x.text), device=args.device_id if args.cuda else -1, train=False) loss = 0 correct = 0 for batch in iterator: output = classifier(batch.text) preds = predict(output) loss += calc_loss(output, batch.label) correct += preds.eq(batch.label.data.view_as(preds)).cpu().sum() loss = loss / len(test_set) accuracy = correct / len(test_set) print(f'test set |', f'accuracy: {accuracy * 100:6.2f}% |', f'loss: {loss:6.4f} |')
# hidden layers h, edges = layer(args.layer_type, (h, edges), 64, training, args, activation=tf.nn.elu) # classification layer logits, _ = layer(args.layer_type, (h, edges), nC, training, args, multi_edge_aggregation='mean') Yhat = tf.one_hot(tf.argmax(logits, axis=-1), nC) loss_train = utils.calc_loss(Y, logits, idx_train, W=W) loss_val = utils.calc_loss(Y, logits, idx_val) loss_test = utils.calc_loss(Y, logits, idx_test) vars = tf.trainable_variables() lossL2 = tf.add_n([ tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name and 'gamma' not in v.name ]) * args.weight_decay optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_op = optimizer.minimize(loss_train + lossL2) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # ************************************************************
def train(args, model, optimizer): if args.dataset == "mnist": dataset_f = memory_mnist elif args.dataset == "fashion_mnist": dataset_f = memory_fashion repr_args = string_args(args) n_bins = 2.0**args.n_bits z_sample = [] z_shapes = calc_z_shapes(args.n_channels, args.img_size, args.n_flow, args.n_block) for z in z_shapes: z_new = torch.randn(args.n_sample, *z) * args.temp z_sample.append(z_new.to(device)) deltas = create_deltas_sequence(0.1, 0.005) args.delta = deltas[0] epoch_losses = [] f_train_loss = open(f"losses/seq_losses_train_{repr_args}_.txt", "w", buffering=1) f_test_loss = open(f"losses/seq_losses_test_{repr_args}_.txt", "w", buffering=1) with tqdm(range(200)) as pbar: for i in pbar: args.delta = deltas[i] repr_args = string_args(args) train_loader, val_loader, train_val_loader = dataset_f( args.batch, args.img_size, args.n_channels) train_losses = [] for image in train_loader: optimizer.zero_grad() image = image.to(device) if args.tr_dq: noisy_image += torch.rand_like(image) / n_bins noisy_image += torch.randn_like(image) * args.delta log_p, logdet, _ = model(noisy_image) logdet = logdet.mean() loss, log_p, log_det = calc_loss(log_p, logdet, args.img_size, n_bins, args.n_channels) loss.backward() optimizer.step() train_losses.append(loss.item()) current_train_loss = np.mean(train_losses) print(f"{current_train_loss},{args.delta},{i + 1}", file=f_train_loss) with torch.no_grad(): utils.save_image( model.reverse(z_sample).cpu().data, f"sample/seq_sample_{repr_args}_{str(i + 1).zfill(6)}.png", normalize=True, nrow=10, range=(-0.5, 0.5), ) losses = [] logdets = [] logps = [] for image in val_loader: image = image.to(device) noisy_image = image if args.te_dq: noisy_image += torch.rand_like(image) / n_bins if args.te_noise: noisy_image += torch.randn_like(image) * args.delta log_p, logdet, _ = model(noisy_image) logdet = logdet.mean() loss, log_p, log_det = calc_loss(log_p, logdet, args.img_size, n_bins, args.n_channels) losses.append(loss.item()) logdets.append(log_det.item()) logps.append(log_p.item()) pbar.set_description( f"Loss: {np.mean(losses):.5f}; logP: {np.mean(logps):.5f}; logdet: {np.mean(logdets):.5f}; delta: {args.delta:.5f}" ) current_loss = np.mean(losses) print(f"{current_loss},{args.delta},{i + 1}", file=f_test_loss) epoch_losses.append(current_loss) if (i + 1) % 10 == 0: torch.save( model.state_dict(), f"checkpoint/seq_model_{repr_args}_{i + 1}_.pt", ) f_ll = open(f"ll/seq_ll_{repr_args}_{i + 1}.txt", "w") train_loader, val_loader, train_val_loader = dataset_f( args.batch, args.img_size, args.n_channels) train_val_loader = iter(train_val_loader) for image_val in val_loader: image = image_val image = image.to(device) if args.te_dq: noisy_image += torch.rand_like(image) / n_bins if args.te_noise: noisy_image += torch.randn_like(image) * args.delta log_p_val, logdet_val, _ = model(noisy_image) image = next(train_val_loader) image = image.to(device) if args.te_dq: noisy_image += torch.rand_like(image) / n_bins if args.te_noise: noisy_image += torch.randn_like(image) * args.delta log_p_train_val, logdet_train_val, _ = model(noisy_image) for ( lpv, ldv, lptv, ldtv, ) in zip(log_p_val, logdet_val, log_p_train_val, logdet_train_val): print( args.delta, lpv.item(), ldv.item(), lptv.item(), ldtv.item(), file=f_ll, ) f_ll.close() f_train_loss.close() f_test_loss.close()
def train(args, model, optimizer): if args.dataset == "mnist": dataset_f = memory_mnist elif args.dataset == "fashion_mnist": dataset_f = memory_fashion elif args.dataset == "celeba": dataset_f = celeba elif args.dataset == "ffhq_gan_32": dataset_f = ffhq_gan_32 elif args.dataset == "cifar_horses_40": dataset_f = cifar_horses_40 elif args.dataset == "ffhq_50": dataset_f = ffhq_50 elif args.dataset == "cifar_horses_20": dataset_f = cifar_horses_20 elif args.dataset == "cifar_horses_80": dataset_f = cifar_horses_80 elif args.dataset == "mnist_30": dataset_f = mnist_30 elif args.dataset == "mnist_gan_all": dataset_f = mnist_gan_all elif args.dataset == "mnist_pad": dataset_f = mnist_pad elif args.dataset == "cifar_horses_20_top": dataset_f = cifar_horses_20_top elif args.dataset == "cifar_horses_40_top": dataset_f = cifar_horses_40_top elif args.dataset == "cifar_horses_20_top_small_lr": dataset_f = cifar_horses_20_top_small_lr elif args.dataset == "cifar_horses_40_top_small_lr": dataset_f = cifar_horses_40_top_small_lr elif args.dataset == "arrows_small": dataset_f = arrows_small elif args.dataset == "arrows_big": dataset_f = arrows_big elif args.dataset == "cifar_20_picked_inds_2": dataset_f = cifar_20_picked_inds_2 elif args.dataset == "cifar_40_picked_inds_2": dataset_f = cifar_40_picked_inds_2 elif args.dataset == "cifar_40_picked_inds_3": dataset_f = cifar_40_picked_inds_3 elif args.dataset == "cifar_20_picked_inds_3": dataset_f = cifar_20_picked_inds_3 else: raise ValueError("Unknown dataset:", args.dataset) repr_args = string_args(args) n_bins = 2.0**args.n_bits z_sample = [] z_shapes = calc_z_shapes(args.n_channels, args.img_size, args.n_flow, args.n_block) for z in z_shapes: z_new = torch.randn(args.n_sample, *z) * args.temp z_sample.append(z_new.to(device)) epoch_losses = [] f_train_loss = open(f"losses/losses_train_{repr_args}_.txt", "a", buffering=1) f_test_loss = open(f"losses/losses_test_{repr_args}_.txt", "a", buffering=1) last_model_path = f"checkpoint/model_{repr_args}_last_.pt" try: model.load_state_dict(torch.load(last_model_path)) model.eval() f_epoch = open(f"checkpoint/last_epoch_{repr_args}.txt", "r", buffering=1) epoch_n = int(f_epoch.readline().strip()) f_epoch.close() except FileNotFoundError: print("Training the model from scratch.") epoch_n = 0 with tqdm(range(epoch_n, args.epochs + epoch_n)) as pbar: for i in pbar: repr_args = string_args(args) train_loader, val_loader, train_val_loader = dataset_f( args.batch, args.img_size, args.n_channels) train_losses = [] for image in train_loader: if isinstance(image, list): image = image[0] optimizer.zero_grad() image = image.to(device) noisy_image = image if args.tr_dq: noisy_image += torch.rand_like(image) / n_bins noisy_image += torch.randn_like(image) * args.delta log_p, logdet, _ = model(noisy_image) logdet = logdet.mean() loss, log_p, log_det = calc_loss(log_p, logdet, args.img_size, n_bins, args.n_channels) loss.backward() optimizer.step() train_losses.append(loss.item()) current_train_loss = np.mean(train_losses) print(f"{current_train_loss},{args.delta},{i + 1}", file=f_train_loss) with torch.no_grad(): utils.save_image( model.reverse(z_sample).cpu().data, f"sample/sample_{repr_args}_{str(i + 1).zfill(6)}.png", normalize=True, nrow=10, range=(-0.5, 0.5), ) losses = [] logdets = [] logps = [] for image in val_loader: if isinstance(image, list): image = image[0] image = image.to(device) log_p, logdet, _ = model(image) logdet = logdet.mean() loss, log_p, log_det = calc_loss(log_p, logdet, args.img_size, n_bins, args.n_channels) losses.append(loss.item()) logdets.append(log_det.item()) logps.append(log_p.item()) pbar.set_description( f"Loss: {np.mean(losses):.5f}; logP: {np.mean(logps):.5f}; logdet: {np.mean(logdets):.5f}; delta: {args.delta:.5f}" ) current_loss = np.mean(losses) print(f"{current_loss},{args.delta},{i + 1}", file=f_test_loss) epoch_losses.append(current_loss) # early stopping if len(epoch_losses) >= 20 and epoch_losses[-20] < min( epoch_losses[-19:]): break ''' too much space if (i + 1) % 5 == 0: torch.save( model.state_dict(), f"checkpoint/model_{repr_args}_{i + 1}_.pt" ) ''' torch.save(model.state_dict(), last_model_path) f_epoch = open(f"checkpoint/last_epoch_{repr_args}.txt", "w", buffering=1) f_epoch.write(str(i + 1)) f_epoch.close() f_ll = open(f"ll/ll_{repr_args}_{i + 1}.txt", "w") train_loader, val_loader, train_val_loader = dataset_f( args.batch, args.img_size, args.n_channels) train_val_loader = iter(train_val_loader) for image_val in val_loader: image = image_val if isinstance(image, list): image = image[0] image = image.to(device) log_p_val, logdet_val, _ = model(image) image = next(train_val_loader) if isinstance(image, list): image = image[0] image = image.to(device) log_p_train_val, logdet_train_val, _ = model(image) for ( lpv, ldv, lptv, ldtv, ) in zip(log_p_val, logdet_val, log_p_train_val, logdet_train_val): print( args.delta, lpv.item(), ldv.item(), lptv.item(), ldtv.item(), file=f_ll, ) f_ll.close() f_train_loss.close() f_test_loss.close()
def train_model(model, args): dataloaders = gen_dataloader(args) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.decay_epoch, gamma=0.1) best_loss = 1e10 for epoch in np.arange(1, args.maxepoch + 1): print('Epoch {}/{}'.format(epoch, args.maxepoch)) print('-' * 10) since = time.time() # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': scheduler.step() for param_group in optimizer.param_groups: print("LR", param_group['lr']) model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode metrics = defaultdict(float) epoch_samples = 0 for inputs, labels in dataloaders[phase]: inputs = inputs.cuda() labels = labels.cuda() # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) loss = calc_loss(outputs, labels, metrics) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics epoch_samples += inputs.size(0) print_metrics(metrics, epoch_samples, phase) epoch_loss = metrics['loss'] / epoch_samples # deep copy the model if phase == 'val' and epoch_loss < best_loss: # print("saving best model") best_loss = epoch_loss best_model = copy.deepcopy(model.state_dict()) time_elapsed = time.time() - since print('Epoch {:2d} takes {:.0f}m {:.0f}s'.format( epoch, time_elapsed // 60, time_elapsed % 60)) print( "================================================================================" ) print("Training finished...") best_loss_str = '{:.4f}'.format(best_loss) print('Best val loss: ' + best_loss_str) # Save best model best_model_dir = os.path.join(args.model_dir, "SegModels", args.simu_type + args.network, args.session) if not os.path.exists(best_model_dir): os.makedirs(best_model_dir) best_model_name = args.network.lower() + "-" + str(best_loss_str) + ".pth" best_model_path = os.path.join(best_model_dir, best_model_name) torch.save(best_model, best_model_path)
def train_model(net, train_data, val_data, num_epochs, device, train_id): torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True net.train() LR = 1e-4 model_folder = "./model/" + train_id optimizer = optim.Adam(net.parameters(), lr=LR, betas=(0.9, 0.999)) for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) for batch_idx, sample_batched in enumerate(train_data): in_hsv = sample_batched["image"] _, _, in_v = torch.split(in_hsv, 1, dim=1) S = in_v.to(device) S_1 = utils.disturbance(in_v).to(device) optimizer.zero_grad() R, I = net(S) R_1, I_1 = net(S_1) loss = utils.calc_loss(R, R_1, I, I_1, S, device) loss.backward() optimizer.step() if (batch_idx + 1) % 50 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(sample_batched["image"]), len(train_data.dataset), 100. * batch_idx / len(train_data), loss.item())) with torch.no_grad(): if epoch % 20 == 0: net.eval() out_path = "./data/result/" + train_id + "/" + str( epoch) + "/img/" if not os.path.exists(out_path): os.makedirs(out_path) print("========validating========") for i, test_batched in enumerate(val_data): index = test_batched["index"][0] in_hsv = test_batched["image"].to(device) h, s, in_v = torch.split(in_hsv, 1, dim=1) out_v, _ = net(in_v) out_hsv = torch.cat((h, s, out_v), dim=1) out_rgb = kornia.color.hsv_to_rgb(out_hsv) img_path = out_path + str(index) + ".JPG" torchvision.utils.save_image(out_rgb, img_path) PSNR = psnr.calc_averger_psnr(out_path) model_dir = model_folder + "/" + str(epoch) + "(" + str( PSNR) + ").pth" torch.save(net.state_dict(), model_dir) net.train() print('-' * 10) print("Training is over.") return net
def train(self,dataset_path,num_classes,batch_size,lr_base,lr_decay,step_size,\ max_iteration,pretrained_model=None): ''' @description: 构建VGG-Net16网络结构,训练网络模型,输出训练过程中的logs,保存网络模型 @params: - dataset_path: 训练样本集和验证样本集对应的txt文件所在的路径 - num_classes: 分类数目 - batch_size: 训练过程中的每次输入网络中的样本数 - lr_base: 初始学习率 - lr_decay: 学习率衰减系数 - step_size: 学习率衰减速度 lr = lr_base * lr_decay ^ (global_step / step_size) - max_iteration: 迭代的最大次数 - pretrained_model: 预训练的模型所在的路径 @return: None ''' train_file_name = dataset_path + 'train_list.txt' valid_file_name = dataset_path + 'valid_list.txt' log_dir = './log/vgg' model_dir = './model/vgg' vgg = VGG(weight_decay=0.0005, keep_prob=0.5, num_classes=num_classes) train_summary_list = [] valid_summary_list = [] with tf.Graph().as_default(), tf.device('/gpu:0'): with tf.name_scope('input'): #队列读取训练数据 train_image,train_label = get_batch(train_file_name,self._image_H,\ self._image_W,batch_size) valid_image,valid_label = get_batch(valid_file_name,self._image_H,\ self._image_W,250,is_train=False) x = tf.placeholder(tf.float32,[None,self._image_H,self._image_W,\ self._image_channels],name='x') y = tf.placeholder(tf.int64, [None], name='y') #loss, accuracy, train_op logits, _ = vgg.vgg16(x) loss = utils.calc_loss(logits, y) accuracy = utils.calc_accuracy(logits, y) train_op, learning_rate, global_step = utils.optimizer( lr_base, step_size, lr_decay, loss) #summary train_summary_list.append(tf.summary.scalar('train_loss', loss)) valid_summary_list.append(tf.summary.scalar('valid_loss', loss)) train_summary_list.append( tf.summary.scalar('train_accuracy', accuracy)) valid_summary_list.append( tf.summary.scalar('test_accuracy', accuracy)) train_summary_list.append( tf.summary.scalar('learning rate', learning_rate)) valid_summary_list.append( tf.summary.scalar('learning rate', learning_rate)) for var in tf.trainable_variables(): valid_summary_list.append(tf.summary.histogram(var.name, var)) train_summary = tf.summary.merge(train_summary_list) valid_summary = tf.summary.merge(valid_summary_list) #session saver = tf.train.Saver(max_to_keep=50) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,\ log_device_placement=True)) as sess: train_writer = tf.summary.FileWriter(log_dir + 'train', sess.graph) test_writer = tf.summary.FileWriter(log_dir + 'valid') tf.global_variables_initializer().run() tf.local_variables_initializer().run() #启动多线程 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) #加载预训练的模型 if pretrained_model != None: ckpt = tf.train.get_checkpoint_state(pretrained_model) print('Restoring pretrained model: %s' % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) train_time = 0 for step in range(max_iteration): #模型持久化操作 # graph_def = tf.get_default_graph().as_graph_def() # output_graph_def = graph_util.convert_variables_to_constants(sess,graph_def,['input/x','deepid/Relu']) # with tf.gfile.GFile(model_dir+'deepid_model.pb','wb') as file: # file.write(output_graph_def.SerializeToString()) # break start_time = time.time() image, label = sess.run([train_image, train_label]) _, train_loss, summary_str, train_step = sess.run( [train_op, loss, train_summary, global_step], feed_dict={ x: image, y: label }) train_writer.add_summary(summary_str, global_step=train_step) train_writer.flush() duration = time.time() - start_time train_time += duration #valid and save model if step % 1000 == 0 or (step + 1) == max_iteration: image, label = sess.run([valid_image, valid_label]) lr,summary_str,valid_loss,validation_accuracy,\ train_step = sess.run([learning_rate, valid_summary, loss, accuracy, global_step], feed_dict={x:image,y:label}) test_writer.add_summary(summary_str, global_step=train_step) test_writer.flush() print('Step %d: train loss = %.3f, valid loss = %.3f,valid accuracy = %.3f%%, lr = %.6f (%.3f sec)'%\ (train_step,train_loss,valid_loss,validation_accuracy,\ lr,train_time)) saver.save(sess, model_dir + 'model.ckpt', global_step=train_step) with open(log_dir + 'valid_result.txt', 'at') as file_writer: file_writer.write('%d\t%.3f%%\t%.5f\t%d\r\n' % (train_step, validation_accuracy, lr, train_time)) #退出多线程 coord.request_stop() coord.join(threads)
epoch_loss = 0 # train_loss = [] # val_loss = [] # epoch_train_loss = [] # epoch_val_loss = [] epoch_samples = 0 for batch_idx, batch in enumerate(train_loader): model.train() img = batch[0].to(device) mask = batch[1].to(device) print("Shapes", img.shape, mask.shape) u1, c1 = numpy.unique(mask.cpu().numpy(), return_counts=True) print("UC", u1, c1) epoch_samples += img.size(0) pred_mask = model(img) loss = calc_loss(pred_mask, mask, metrics) epoch_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() print("EPOCH:{0} || BATCH NO:{1} || LOSS:{2}".format( epoch, batch_idx, loss.item())) if batch_idx % 3000 == 0: torch.save( model.module.state_dict(), "../outputs5/checkpoints/ckpt_{}_{}.pth".format( batch_idx, epoch)) metrics["batch_idx"] = batch_idx metrics["epoch"] = epoch metrics["epoch_samples"] = epoch_samples a = open(
def test_model(model, dloader, save_pic=False): start_time = time.time() print("Start separating") desc = ZernikeMoments(radius=21) lda_paras = pickle.load( open(os.path.join("../data/Models/LDA", args.lda_model_path), "rb")) def lda_pred(fea): testVector = np.matmul(fea, lda_paras['ProjectMat']) class_num = lda_paras['ClassMean'].shape[0] tmp_dist = np.matmul(np.ones((class_num, 1)), np.expand_dims(testVector, axis=0)) dist = np.sum((tmp_dist - lda_paras['ClassMean'])**2, axis=1) label, min_dist = np.argmin(dist), min(dist) return label, min_dist # testing on each image metrics = defaultdict(float) for ind, (inputs, labels) in enumerate(dloader): filename = os.path.splitext(dloader.dataset.cur_img_name)[0] save_path = os.path.join(args.data_dir, "Predictions", filename + ".png") img_ori = inputs[0].cpu().numpy().transpose((1, 2, 0)) mask = labels[0].cpu().numpy().transpose((1, 2, 0)) mask = (mask[..., 0] * 1 + mask[..., 1] * 255).astype(np.uint8) mask_c = mask2color(mask) inputs = inputs.cuda() labels = labels.cuda() outputs = model(inputs) loss = calc_loss(outputs, labels, metrics) preds = torch.sigmoid(outputs) preds = preds.data.cpu().numpy() pred = preds[0].transpose((1, 2, 0)) # overlap override single single = pred[..., 0] > 0.5 overlap = pred[..., 1] > 0.5 # basic combine combine1 = np.zeros_like(single, dtype=np.uint8) combine1[single == True] = 1 combine1[overlap == True] = 255 pred_c1 = mask2color(combine1) single = combine1 == True # combine prediction combine = refine_pred(single, overlap) pred_c = mask2color(combine) group_num = len(np.unique(measure.label(combine > 0))) - 1 if group_num > 1: continue region_num = len(np.unique(combine)) - 1 if region_num < 3 or region_num > 5: continue assignments = assign_combine(combine) if len(assignments) == 0: continue min_dists = [] for assignment in assignments: fea1, fea2 = cal_assignment_fea(assignment, desc) _, min_dist1 = lda_pred(fea1) _, min_dist2 = lda_pred(fea2) min_dists.append(min_dist1 + min_dist2) best_assign_ind = np.argmin(min_dists) best_assign = assignments[best_assign_ind] final_img = np.copy(img_ori) final_img = np.ascontiguousarray(final_img * 255, dtype=np.uint8) cv2.drawContours(final_img, best_assign, 0, [255, 0, 0], 2) cv2.drawContours(final_img, best_assign, 1, [0, 255, 0], 2) io.imsave(save_path, final_img) # avg_dice_single = metrics["dice_single"] / len(dloader) # avg_dice_overlap = metrics["dice_overlap"] / len(dloader) # print("Average single chromosome dice ratio is: {}".format(avg_dice_single)) # print("Average overlap chromosome dice ratio is: {}".format(avg_dice_overlap)) elapsed_time = time.time() - start_time print("Takes {} seconds on {} images".format(elapsed_time, len(dloader.dataset)))
def main(_config,_run): logger = _run SAVE_NAME = _config['SAVE_NAME'] LOAD_SAVED_MODEL = _config['LOAD_SAVED_MODEL'] MODEL_PATH_FINAL = _config['MODEL_PATH_FINAL'] total_steps = 1000000 params = common.HYPERPARAMS['gamePlay2'] params['epsilon_frames'] *= 2 parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda") args = parser.parse_args() env = gym.make(params['env_name'],glob_conf=_config,logger=logger) #env = ptan.common.wrappers.wrap_dqn(env) writer = SummaryWriter(comment="-" + params['run_name'] + "-rainbow-beta200") net = RainbowDQN(env.observation_space.shape, env.action_space.n).to(device) #net.load_state_dict(torch.load( )) name_load = current_path +"/models" +MODEL_PATH_FINAL if _config['LOAD_SAVED_MODEL']: mdl, opt, lss = load_ckp(MODEL_PATH_FINAL, net, optimizer) net = mdl optimizer = opt tgt_net = ptan.agent.TargetNet(net) agent = ptan.agent.DQNAgent(lambda x: net.qvals(x), ptan.actions.ArgmaxActionSelector(), device=device) # change the step_counts to change multi step prediction exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, gamma=params['gamma'], steps_count=REWARD_STEPS) buffer = ptan.experience.PrioritizedReplayBuffer(exp_source, params['replay_size'], PRIO_REPLAY_ALPHA) optimizer = optim.Adam(net.parameters(), lr=params['learning_rate']) today = datetime.datetime.now() todays_date_full = str(today.year) + "_" + str(today.month) + "_" + str(today.day) + "_" todays_date_full += str(today.hour) + "_" + str(today.minute) + "_" + str(today.second) folder_name = todays_date_full +"_"+experiment_name results_dir = current_path + "/results/" + folder_name results_dir_weights = results_dir + "/weights" os.makedirs(results_dir) os.makedirs(results_dir_weights) frame_idx = 0 beta = BETA_START best_mean_reward = 0.0 eval_states = None with common.RewardTracker(writer, params['stop_reward']) as reward_tracker: while frame_idx < total_steps: frame_idx += 1 buffer.populate(1) beta = min(1.0, BETA_START + frame_idx * (1.0 - BETA_START) / BETA_FRAMES) new_rewards = exp_source.pop_total_rewards() if new_rewards: # start saving the model after actual training begins if frame_idx > 100: if best_mean_reward is None or best_mean_reward < reward_tracker.mean_reward: torch.save(net.state_dict(), SAVE_NAME + "-best.dat") if best_mean_reward is not None: print("Best mean reward updated %.3f -> %.3f, model saved" % \ (best_mean_reward, reward_tracker.mean_reward)) if not reward_tracker.mean_reward == 0: best_mean_reward = reward_tracker.mean_reward if reward_tracker.reward(new_rewards[0], frame_idx): break if len(buffer) < params['replay_initial']: continue if eval_states is None: eval_states, _, _ = buffer.sample(STATES_TO_EVALUATE, beta) eval_states = [np.array(transition.state, copy=False) for transition in eval_states] eval_states = np.array(eval_states, copy=False) optimizer.zero_grad() batch, batch_indices, batch_weights = buffer.sample(params['batch_size'], beta) loss_v, sample_prios_v = calc_loss(batch, batch_weights, net, tgt_net.target_model, params['gamma'] ** REWARD_STEPS, device=device) # if frame_idx % 10000 == 0: if frame_idx % 5000 == 0: checkpoint = ({ 'model': net.state_dict(), 'optimizer': optimizer.state_dict(), 'loss': loss_v, 'num_step': frame_idx }) torch.save(checkpoint, results_dir_weights + "/rainbow" + str(frame_idx) + "step.dat") # Save network parameters as histogram for name, param in net.named_parameters(): writer.add_histogram(name, param.clone().cpu().data.numpy(), frame_idx) loss_v.backward() optimizer.step() buffer.update_priorities(batch_indices, sample_prios_v.data.cpu().numpy()) if frame_idx % params['target_net_sync'] == 0: tgt_net.sync() if logger: loss_v.item() logger.log_scalar("loss", loss_v.item()) logger.log_scalar("mean_reward", reward_tracker.mean_reward)
def test_model(model, dloader, save_pic=True): start_time = time.time() desc = ZernikeMoments(radius=21) lda_paras = pickle.load( open(os.path.join("../data/Models/LDA", args.lda_model_path), "rb")) def lda_pred(fea): testVector = np.matmul(fea, lda_paras['ProjectMat']) class_num = lda_paras['ClassMean'].shape[0] tmp_dist = np.matmul(np.ones((class_num, 1)), np.expand_dims(testVector, axis=0)) dist = np.sum((tmp_dist - lda_paras['ClassMean'])**2, axis=1) label, min_dist = np.argmin(dist), min(dist) return label, min_dist # testing on each image metrics = defaultdict(float) for ind, (inputs, labels) in enumerate(dloader): filename = os.path.splitext(dloader.dataset.cur_img_name)[0] save_path = os.path.join(args.data_dir, "Predictions", filename + ".png") img_ori = inputs[0].cpu().numpy().transpose((1, 2, 0)) mask = labels[0].cpu().numpy().transpose((1, 2, 0)) mask = (mask[..., 0] * 1 + mask[..., 1] * 255).astype(np.uint8) mask_c = mask2color(mask) inputs = inputs.cuda() labels = labels.cuda() outputs = model(inputs) loss = calc_loss(outputs, labels, metrics) preds = torch.sigmoid(outputs) preds = preds.data.cpu().numpy() pred = preds[0].transpose((1, 2, 0)) # overlap override single single = pred[..., 0] > 0.5 overlap = pred[..., 1] > 0.5 # basic combine combine1 = np.zeros_like(single, dtype=np.uint8) combine1[single == True] = 1 combine1[overlap == True] = 255 pred_c1 = mask2color(combine1) single = combine1 == True # combine prediction combine = refine_pred(single, overlap) pred_c = mask2color(combine) if (ind + 1) % 20 == 0: print("Processing {}/{}".format(ind + 1, len(dloader))) if save_pic == True: # with PdfPages(save_path) as pdf: fig = plt.figure(figsize=(10, 3)) fig.add_subplot(1, 3, 1) plt.imshow(img_ori) plt.title("Input image") plt.axis('off') fig.add_subplot(1, 3, 2) plt.imshow(mask_c) plt.title("Ground-truth") plt.axis('off') fig.add_subplot(1, 3, 3) plt.imshow(pred_c) plt.title("Prediction") plt.axis('off') plt.savefig(save_path) plt.close() avg_dice_single = metrics["dice_single"] / len(dloader) avg_dice_overlap = metrics["dice_overlap"] / len(dloader) elapsed_time = time.time() - start_time print("Takes {} seconds on {} images".format(elapsed_time, len(dloader.dataset))) print( "Average single chromosome dice ratio is: {}".format(avg_dice_single)) print("Average overlap chromosome dice ratio is: {}".format( avg_dice_overlap))
with torch.no_grad(): for j, (input_images, label_images) in enumerate(loader): netG.eval() label_images, input_images = label_images.to( device), input_images.to(device) pred_label_images = netG(input_images) pred_label_images = torch.abs(pred_label_images) pred_label_images = torch.where( pred_label_images > 1.0, torch.tensor(1.0).float().cuda(), pred_label_images) pred_label_images_cpu = pred_label_images.data.cpu().numpy() label_images_cpu = label_images.data.cpu().numpy() input_images_cpu = input_images.cpu().numpy() rec_losses.append( utils.calc_loss(pred_label_images, label_images, ssim_loss).item()) for idx in range(label_images_cpu.shape[0]): color = label_images_cpu[idx] pred_color = pred_label_images_cpu[idx] input_img = input_images_cpu[idx] color = np.moveaxis(color, source=[0, 1, 2], destination=[2, 0, 1]) pred_color = np.moveaxis(pred_color, source=[0, 1, 2], destination=[2, 0, 1]) input_img = np.moveaxis(input_img, source=[0, 1, 2], destination=[2, 0, 1]) color = cv2.cvtColor(np.uint8(255 * color),
def train(data_loader, model, optimizer, epoch): losses = AverageMeter() model.train() global iteration dissimilarity_score_dict = {} target_dict = {} number_of_chunks_dict = {} for idx, (video_seq, audio_seq, target, audiopath) in enumerate(data_loader): tic = time.time() video_seq = video_seq.to(cuda) audio_seq = audio_seq.to(cuda) target = target.to(cuda) B = video_seq.size(0) vid_out = model.module.forward_lip(video_seq) aud_out = model.module.forward_aud(audio_seq) vid_class = model.module.final_classification_lip(vid_out) aud_class = model.module.final_classification_aud(aud_out) del video_seq del audio_seq loss1 = calc_loss(vid_out, aud_out, target, args.hyper_param) loss2 = criterion(vid_class, target.view(-1)) loss3 = criterion(aud_class, target.view(-1)) acc = calc_accuracy(vid_out, aud_out, target, args.threshold) loss = loss1 + loss2 + loss3 losses.update(loss.item(), B) optimizer.zero_grad() loss.backward() optimizer.step() for batch in range(B): vid_name = audiopath[batch].split('/')[-2] dist = torch.dist(vid_out[batch, :].view(-1), aud_out[batch, :].view(-1), 2) tar = target[batch, :].view(-1).item() if (dissimilarity_score_dict.get(vid_name)): dissimilarity_score_dict[vid_name] += dist number_of_chunks_dict[vid_name] += 1 else: dissimilarity_score_dict[vid_name] = dist number_of_chunks_dict[vid_name] = 1 if (target_dict.get(vid_name)): pass else: target_dict[vid_name] = tar if idx % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.local_avg:.4f})\t'.format( epoch, idx, len(data_loader), time.time() - tic, loss=losses)) total_weight = 0.0 decay_weight = 0.0 for m in model.parameters(): if m.requires_grad: decay_weight += m.norm(2).data total_weight += m.norm(2).data print('Decay weight / Total weight: %.3f/%.3f' % (decay_weight, total_weight)) writer_train.add_scalar('local/loss', losses.val, iteration) iteration += 1 avg_score_real, avg_score_fake = get_scores(dissimilarity_score_dict, number_of_chunks_dict, target_dict) return losses.local_avg, avg_score_real, avg_score_fake