def training_epoch(cb, opt, model, train_loader, optimizer): """logic for each training epoch""" model.train() for batch_idx, batch in enumerate(train_loader): for key in batch.keys(): batch[key] = batch[key].to(opt.device) optimizer.zero_grad() # training step input, target = batch['input'], batch['target'] output = model(input) loss, _l_ship, _l_bbox = compute_loss(output, target) loss = loss.mean() loss.backward() optimizer.step() # required info for - logging cb cb.on_train_batch_end(opt=opt, batch_idx=batch_idx, batch=batch, dataloader=train_loader, output=loss.item(), l_ship=_l_ship.mean().item(), l_bbox=_l_bbox.mean().item()) del loss del batch gc.collect()
def train_step(inputs): """Train step Args : inputs (tuple) : includes images (tf.tensor) and labels (tf.tensor) train_loss (global, tf.keras.metric) train_accuracy (global, tf.keras.metric) train_iou (global, tf.keras.metric) optimizer (gloabl, tf.optimizer) Return loss """ images, labels = inputs with tf.GradientTape() as tape: predictions = model(images) loss = compute_loss(labels, predictions, class_weight) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, predictions) argmax_predictions = tf.math.argmax(predictions, 3) train_iou.update_state(labels, argmax_predictions) return loss
def validation_step(self, batch, batch_idx): """ Lightning calls this inside the validation loop with the data from the validation dataloader passed in as `batch`. """ # batch_size = self.hparams.train.batch_size num_hierarchy_levels = self.hparams.train.num_hierarchy_levels truncation = self.hparams.train.truncation use_loss_masking = self.hparams.train.use_loss_masking logweight_target_sdf = self.hparams.model.logweight_target_sdf weight_missing_geo = self.hparams.train.weight_missing_geo sample = batch sdfs = sample['sdf'] # TODO: fix it # if sdfs.shape[0] < batch_size: # continue # maintain same batch size for training inputs = sample['input'] known = sample['known'] hierarchy = sample['hierarchy'] for h in range(len(hierarchy)): hierarchy[h] = hierarchy[h].cuda() if use_loss_masking: known = known.cuda() inputs[0] = inputs[0].cuda() inputs[1] = inputs[1].cuda() target_for_sdf, target_for_occs, target_for_hier = loss_util.compute_targets( sdfs.cuda(), hierarchy, num_hierarchy_levels, truncation, use_loss_masking, known) # update loss weights _iter = self._iter_counter loss_weights = get_loss_weights( _iter, self.hparams.train.num_hierarchy_levels, self.hparams.train.num_iters_per_level, self.hparams.train.weight_sdf_loss) output_sdf, output_occs = self(inputs, loss_weights) loss, losses = loss_util.compute_loss(output_sdf, output_occs, target_for_sdf, target_for_occs, target_for_hier, loss_weights, truncation, logweight_target_sdf, weight_missing_geo, inputs[0], use_loss_masking, known) output = OrderedDict({ 'val_loss': loss, }) losses_dict = dict([(f'val_loss_{i}', l) for (i, l) in enumerate(losses)]) output.update(losses_dict) return output
def test_step(sample): var, ref_aa, alt_aa, feature, label, padding_mask = sample logit = model((ref_aa, alt_aa, feature), False, padding_mask) loss = compute_loss(label, logit) pred = model.predict_from_logit(logit) return var, label, pred, loss
def validate(epoch, writer, log_valid, args): global global_step loss_all_avg = loss_main_avg = MSE_avg = MAE_avg = SBP_avg = MAP_avg = DBP_avg = 0. model.eval() for i, (x, y) in enumerate(val_loader): x, y = x.to(device), y.to(device) out = model(x) pred = out[:, :1, :] loss_main = compute_loss(y, out, 'evi', args.zeta) loss_aux = compute_auxiliary_loss(y, pred, args.loss_aux) loss_all = loss_main + args.eta * loss_aux MAE, MSE, SBP, MAP, DBP = performance_check(pred, y, stats) loss_all_avg += loss_all.item() loss_main_avg += loss_main.item() MAE_avg += MAE.item() MSE_avg += MSE.item() SBP_avg += SBP.item() MAP_avg += MAP.item() DBP_avg += DBP.item() loss_all_avg = loss_all_avg / len(val_loader) loss_main_avg = loss_main_avg / len(val_loader) MAE_avg = MAE_avg / len(val_loader) MSE_avg = MSE_avg / len(val_loader) SBP_avg = SBP_avg / len(val_loader) MAP_avg = MAP_avg / len(val_loader) DBP_avg = DBP_avg / len(val_loader) writer.add_scalar('Valid/evi_loss_main', loss_main_avg, global_step) writer.add_scalar('Valid/evi_loss_all', loss_all_avg, global_step) writer.add_scalar('Valid/MAE', MAE_avg, global_step) writer.add_scalar('Valid/MSE', MSE_avg, global_step) writer.add_scalar('Valid/SBP', SBP_avg, global_step) writer.add_scalar('Valid/MAP', MAP_avg, global_step) writer.add_scalar('Valid/DBP', DBP_avg, global_step) state = {} state['Epoch'] = epoch state['Global step'] = global_step state['evi_loss_all'] = loss_all_avg state['evi_loss_main'] = loss_main_avg state['MAE'] = MAE_avg state['MSE'] = MSE_avg state['SBP'] = SBP_avg state['MAP'] = MAP_avg state['DBP'] = DBP_avg log_valid.write('%s\n' % json.dumps(state)) log_valid.flush() print( '[Valid] Epoch: {}, Itr:{}, Loss: {:0.4f}, Loss-main: {:0.4f}, MAE: {:0.4f}, MSE: {:0.4f} SBP: {:0.4f}, MAP: {:0.4f}, DBP: {:0.4f}' .format(epoch, global_step, loss_all_avg, loss_main_avg, MAE_avg, MSE_avg, SBP_avg, MAP_avg, DBP_avg)) return loss_all_avg, MAE_avg
def validation_epoch(cb, opt, model, val_loader): """logic for each validation epoch""" model.eval() # metrics to return losses = [] prec = [] rec = [] f1 = [] ap = [] iou = [] l_ship = [] l_bbox = [] with torch.no_grad(): for batch_idx, batch in enumerate(val_loader): for key in batch.keys(): batch[key] = batch[key].to(opt.device) # validation step input, target = batch['input'], batch['target'] output = model(input) loss, _l_ship, _l_bbox = compute_loss(output, target) _prec, _rec, _f1, _ap, _iou = compute_metrics(output, target) # append incase analysis of distribution is of interest losses.append(loss) l_ship.append(_l_ship) l_bbox.append(_l_bbox) prec.append(_prec) rec.append(_rec) f1.append(_f1) ap.append(_ap) iou.append(_iou) loss_avg = torch.mean(torch.cat(losses)) l_ship = torch.mean(torch.cat(l_ship)) l_bbox = torch.mean(torch.cat(l_bbox)) metrics = {} for k, m in zip(["prec", "rec", "f1", "ap", "iou"], [prec, rec, f1, ap, iou]): m = sum(m) / len(m) metrics[k] = m cb.on_validation_end(opt=opt, output=loss_avg, metrics=metrics, l_ship=l_ship, l_bbox=l_bbox) return loss_avg
def train_step(model, opt, x, y_true, mask=None): """ Args: - model: Keras model - x: (T, B, n_input) - y_true: (T, B, n_pol) """ with tf.GradientTape() as tape: logits_seq, h_seq = do_trial(model, x) loss, acc = compute_loss(y_true, logits_seq, h=h_seq, mask=mask) grads = tape.gradient(loss, model.trainable_variables) opt.apply_gradients(zip(grads, model.trainable_variables)) return loss, acc
def train(): dark_net = darknet19(True).cuda() yolo = YOLOv1(dark_net) yolo.cuda() optimizer = optim.SGD(yolo.parameters(), lr=init_lr, momentum=momentum, weight_decay=weight_decay) #load dataset VOC_ROOT = "D:/pyworks/FasterRCNN/data/VOCdevkit/" data_set = VOCDetection(VOC_ROOT, transform=SSDAugmentation([448, 448], mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229))) data_loader = data.DataLoader(data_set, batch_size=2, shuffle=True, collate_fn=detection_collate, pin_memory=True) #start_time = time.time() for epoch in range(num_epochs): loss_per_batch = 0 for i, (images, targets) in enumerate(data_loader): update_lr(optimizer, epoch, float(i) / float(len(data_loader) - 1)) lr = get_lr(optimizer) predicted_tensor = yolo(images) target_tenor = [] for k in range(images.shape[0]): bboxes = targets[:, :4] labels = targets[:, 4] target_tenor.append(encode(bboxes, labels)) loss = compute_loss(predicted_tensor, target_tenor) loss_per_batch += loss optimizer.zero_grad() loss.backward() optimizer.step() if epoch % print_freq == 0: print('Epoch [%d/%d], Loss: %.4f' % (epoch, num_epochs, loss_per_batch / batch_size)) if epoch % 10 and epoch >= 10: save(yolo)
def train_step(sample): var, ref_aa, alt_aa, feature, label, padding_mask = sample with tf.GradientTape() as tape: logit = model((ref_aa, alt_aa, feature), True, padding_mask) loss = compute_loss(label, logit) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) metric_train_loss.update_state(loss) #if optimizer.iterations % 512 == 0: # _update_gradient_norm_summary(model.trainable_variables, gradients) return loss
def train(epoch, writer, log_train, args): global global_step loss_all_avg = loss_main_avg = MSE_avg = MAE_avg = SBP_avg = MAP_avg = DBP_avg = 0. model.train() for i, (x, y) in enumerate(tr_loader): global_step += 1 x, y = x.to(device), y.to(device) out = model(x) pred = out[:, :1, :] loss_main = compute_loss(y, out, args.loss, args.zeta) loss_aux = compute_auxiliary_loss(y, pred, args.loss_aux) loss_all = loss_main + args.eta * loss_aux optimizer.zero_grad() loss_all.backward() nn.utils.clip_grad_norm_(model.parameters(), 1.) optimizer.step() with torch.no_grad(): MAE, MSE, SBP, MAP, DBP = performance_check(pred, y, stats) loss_all_avg += loss_all.item() / len(tr_loader) loss_main_avg += loss_main.item() / len(tr_loader) MAE_avg += MAE.item() / len(tr_loader) MSE_avg += MSE.item() / len(tr_loader) SBP_avg += SBP.item() / len(tr_loader) MAP_avg += MAP.item() / len(tr_loader) DBP_avg += DBP.item() / len(tr_loader) writer.add_scalar('Train/{}_loss_all'.format(args.loss), loss_all.item(), global_step) writer.add_scalar('Train/{}_loss_main'.format(args.loss), loss_main.item(), global_step) writer.add_scalar('Train/{}_loss_aux'.format(args.loss), loss_aux.item(), global_step) writer.add_scalar('Train/MAE', MAE.item(), global_step) writer.add_scalar('Train/MSE', MSE.item(), global_step) writer.add_scalar('Train/SBP', SBP.item(), global_step) writer.add_scalar('Train/MAP', MAP.item(), global_step) writer.add_scalar('Train/DBP', DBP.item(), global_step) state = {} state['Epoch'] = epoch state['Global step'] = global_step state['{}_loss_all'.format(args.loss)] = loss_all_avg state['{}_loss_main'.format(args.loss)] = loss_main_avg state['MAE'] = MAE_avg state['MSE'] = MSE_avg state['SBP'] = SBP_avg state['MAP'] = MAP_avg state['DBP'] = DBP_avg log_train.write('%s\n' % json.dumps(state)) log_train.flush() print('[Train] Epoch: {}, Itr:{}, Loss: {:0.4f}, Loss-main: {:0.4f}, MAE: {:0.4f}, MSE: {:0.4f} SBP: {:0.4f}, MAP: {:0.4f}, DBP: {:0.4f}'.format( epoch, global_step, loss_all_avg, loss_main_avg, MAE_avg, MSE_avg, SBP_avg, MAP_avg, DBP_avg))
def train_loop(dataloader, model, optimizer, device): for batch_idx, (imgs, targets) in enumerate(tqdm(dataloader, desc="Training")): model.train() # Forward Pass imgs = imgs.to(device, non_blocking=True) targets = targets.to(device) outputs = model(imgs) # Calculate the loss loss, loss_components = compute_loss(outputs, targets, model) # Backpropogation optimizer.zero_grad() loss.backward() optimizer.step() # Logging loss_, current = loss.item(), batch_idx * len(imgs) wandb.log({"Train/loss": loss_})
def evaluate(net, loader, thres=0.5, max_aabbs=None): batch_imgs = [] batch_aabbs = [] loss = 0 for i in range(len(loader)): # get batch loader_item = loader[i] with torch.no_grad(): y = net(loader_item.batch_imgs, apply_softmax=True) y_np = y.to('cpu').numpy() if loader_item.batch_gt_maps is not None: loss += compute_loss( y, loader_item.batch_gt_maps).to('cpu').numpy() scale_up = 1 / compute_scale_down(WordDetectorNet.input_size, WordDetectorNet.output_size) metrics = BinaryClassificationMetrics(0, 0, 0) for i in range(len(y_np)): img_np = loader_item.batch_imgs[i, 0].to('cpu').numpy() pred_map = y_np[i] aabbs = decode(pred_map, comp_fg=fg_by_cc(thres, max_aabbs), f=scale_up) h, w = img_np.shape aabbs = [aabb.clip(AABB(0, w - 1, 0, h - 1)) for aabb in aabbs] # bounding box must be inside img clustered_aabbs = cluster_aabbs(aabbs) if loader_item.batch_aabbs is not None: curr_metrics = binary_classification_metrics( loader_item.batch_aabbs[i], clustered_aabbs) metrics = metrics.accumulate(curr_metrics) batch_imgs.append(img_np) batch_aabbs.append(clustered_aabbs) return EvaluateRes(batch_imgs, batch_aabbs, loss / len(loader), metrics)
def train(net, optimizer, loader, writer): global global_step net.train() loader.reset() loader.random() for i in range(len(loader)): # get batch loader_item = loader[i] # forward pass optimizer.zero_grad() y = net(loader_item.batch_imgs) loss = compute_loss(y, loader_item.batch_gt_maps) # backward pass, optimize loss loss.backward() optimizer.step() # output print(f'{i + 1}/{len(loader)}: {loss}') writer.add_scalar('loss', loss, global_step) global_step += 1
def main(net_config, ckpt_for_init): ## load the config config = configs.Config(net_config) ## set the logger test_dir = os.path.join(config.log_dir, "test") log_dir = helper.make_dir([test_dir], re_create_dir = True) log_file = os.path.join(log_dir, config.net_config + '_test.txt') csv_file = os.path.join(log_dir, config.net_config + '_test.csv') logger = helper.Logger(log_file) logger.add(config.config_str, do_print=True) ## load the dasets from the csv file (train, val, feat_len) data = input_data.load_datasets(config.input_csv) # data has train.next_batch(xx) test.images. test.labels feat_len = data.feat_len ## set the input placeholders layer = 'input' with tf.name_scope(layer) as scope: x = tf.placeholder(tf.float32, [None, feat_len], name='input') y = tf.placeholder(tf.float32, [None, 1], name = 'output') keep_prob = tf.constant(1.0, name = 'keep_prob') ## call inference and compute the output y_ = deepnets.inference(config, input_tensors = {"x": x, "keep_prob": keep_prob}) ## set the global step global_step = tf_utils.get_global_step() ## tensors to compute the validatoin loss with tf.name_scope('validation') as scope: val_loss = loss.compute_loss(est=y_, gt=y, loss_func= config.test_loss) val_summary = tf.summary.scalar('val_loss', val_loss) init_op = tf.initialize_all_variables() sess = tf.Session() sess.run(init_op) ## saving and restoring operations restore_variables = tf_utils.get_model_varaibles() +\ tf.get_collection("GLOBAL_STEP")+\ tf.get_collection('BN_VARIABLES') saver = tf.train.Saver(restore_variables) step_init = tf_utils.restore_model(config, sess, restore_variables, ckpt_for_init, logger) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) # do the validation features = np.concatenate((data.train.features, data.val.features), axis=0) output= np.concatenate((data.train.output, data.val.output), axis=0) feed = {x:features, y: output} est, v_loss, v_summary = sess.run([y_, val_loss, val_summary], feed_dict=feed) # input_headers = [x.encode('latin1') for x in data.input_header] headers = ','.join(data.input_header) + ", gt-y, est-y" vals = np.concatenate((features, output, est), axis=1) # append dataset default mu and sigma for estimated values mu = np.append(data.mu, data.mu[-1]) sigma = np.append(data.sigma, data.sigma[-1]) # reverse the standardization operation to the vals vals = np.add(vals * sigma, mu) np.savetxt(csv_file, vals, header= headers, delimiter=",") summary_writer.add_summary(v_summary, step_init) logger.add('val_loss {:f}'.format(v_loss), do_print=True) logger.save()
def compute_grads(cfg): # computing the gradients with tf.GradientTape() as tape: all_loss = compute_loss(**cfg) return tape.gradient(all_loss[0], cfg['init_image']), all_loss
def train(epoch, iter, dataloader, log_file, output_save): train_losses = [[] for i in range(args.num_hierarchy_levels + 2)] train_l1preds = [] train_l1tgts = [] train_ious = [[] for i in range(args.num_hierarchy_levels)] model.train() start = time.time() if args.scheduler_step_size == 0: scheduler.step() num_batches = len(dataloader) for t, sample in enumerate(dataloader): loss_weights = get_loss_weights(iter, args.num_hierarchy_levels, args.num_iters_per_level, args.weight_sdf_loss) if epoch == args.start_epoch and t == 0: print('[iter %d/epoch %d] loss_weights' % (iter, epoch), loss_weights) sdfs = sample['sdf'] if sdfs.shape[0] < args.batch_size: continue # maintain same batch size for training inputs = sample['input'] known = sample['known'] hierarchy = sample['hierarchy'] for h in range(len(hierarchy)): hierarchy[h] = hierarchy[h].cuda() if args.use_loss_masking: known = known.cuda() inputs[0] = inputs[0].cuda() inputs[1] = inputs[1].cuda() target_for_sdf, target_for_occs, target_for_hier = loss_util.compute_targets( sdfs.cuda(), hierarchy, args.num_hierarchy_levels, args.truncation, args.use_loss_masking, known) optimizer.zero_grad() output_sdf, output_occs = model(inputs, loss_weights) loss, losses = loss_util.compute_loss( output_sdf, output_occs, target_for_sdf, target_for_occs, target_for_hier, loss_weights, args.truncation, args.logweight_target_sdf, args.weight_missing_geo, inputs[0], args.use_loss_masking, known) loss.backward() optimizer.step() output_visual = output_save and t + 2 == num_batches compute_pred_occs = (iter % 20 == 0) or output_visual if compute_pred_occs: pred_occs = [None] * args.num_hierarchy_levels for h in range(args.num_hierarchy_levels): factor = 2**(args.num_hierarchy_levels - h - 1) pred_occs[h] = [None] * args.batch_size if len(output_occs[h][0]) == 0: continue output_occs[h][1] = torch.nn.Sigmoid()( output_occs[h][1][:, 0].detach()) > 0.5 for b in range(args.batch_size): batchmask = output_occs[h][0][:, -1] == b locs = output_occs[h][0][batchmask][:, :-1] vals = output_occs[h][1][batchmask] pred_occs[h][b] = locs[vals.view(-1)] train_losses[0].append(loss.item()) for h in range(args.num_hierarchy_levels): train_losses[h + 1].append(losses[h]) target = target_for_occs[h].byte() if compute_pred_occs: iou = loss_util.compute_iou_sparse_dense( pred_occs[h], target, args.use_loss_masking) train_ious[h].append(iou) train_losses[args.num_hierarchy_levels + 1].append(losses[-1]) if len(output_sdf[0]) > 0: output_sdf = [output_sdf[0].detach(), output_sdf[1].detach()] if loss_weights[-1] > 0 and iter % 20 == 0: train_l1preds.append( loss_util.compute_l1_predsurf_sparse_dense( output_sdf[0], output_sdf[1], target_for_sdf, None, False, args.use_loss_masking, known).item()) train_l1tgts.append( loss_util.compute_l1_tgtsurf_sparse_dense( output_sdf[0], output_sdf[1], target_for_sdf, args.truncation, args.use_loss_masking, known)) iter += 1 if args.scheduler_step_size > 0 and iter % args.scheduler_step_size == 0: scheduler.step() if iter % 20 == 0: took = time.time() - start print_log(log_file, epoch, iter, train_losses, train_l1preds, train_l1tgts, train_ious, None, None, None, None, took) if iter % 2000 == 0: torch.save( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.save, 'model-iter%s-epoch%s.pth' % (iter, epoch))) if output_visual: vis_pred_sdf = [None] * args.batch_size if len(output_sdf[0]) > 0: for b in range(args.batch_size): mask = output_sdf[0][:, -1] == b if len(mask) > 0: vis_pred_sdf[b] = [ output_sdf[0][mask].cpu().numpy(), output_sdf[1][mask].squeeze().cpu().numpy() ] inputs = [inputs[0].cpu().numpy(), inputs[1].cpu().numpy()] for h in range(args.num_hierarchy_levels): for b in range(args.batch_size): if pred_occs[h][b] is not None: pred_occs[h][b] = pred_occs[h][b].cpu().numpy() data_util.save_predictions( os.path.join(args.save, 'iter%d-epoch%d' % (iter, epoch), 'train'), sample['name'], inputs, target_for_sdf.cpu().numpy(), [x.cpu().numpy() for x in target_for_occs], vis_pred_sdf, pred_occs, sample['world2grid'].numpy(), args.vis_dfs, args.truncation) return train_losses, train_l1preds, train_l1tgts, train_ious, iter, loss_weights
def test(epoch, iter, loss_weights, dataloader, log_file, output_save): val_losses = [[] for i in range(args.num_hierarchy_levels + 2)] val_l1preds = [] val_l1tgts = [] val_ious = [[] for i in range(args.num_hierarchy_levels)] model.eval() #start = time.time() num_batches = len(dataloader) with torch.no_grad(): for t, sample in enumerate(dataloader): sdfs = sample['sdf'] if sdfs.shape[0] < args.batch_size: continue # maintain same batch size inputs = sample['input'] known = sample['known'] hierarchy = sample['hierarchy'] for h in range(len(hierarchy)): hierarchy[h] = hierarchy[h].cuda() if args.use_loss_masking: known = known.cuda() inputs[0] = inputs[0].cuda() inputs[1] = inputs[1].cuda() target_for_sdf, target_for_occs, target_for_hier = loss_util.compute_targets( sdfs.cuda(), hierarchy, args.num_hierarchy_levels, args.truncation, args.use_loss_masking, known) output_sdf, output_occs = model(inputs, loss_weights) loss, losses = loss_util.compute_loss( output_sdf, output_occs, target_for_sdf, target_for_occs, target_for_hier, loss_weights, args.truncation, args.logweight_target_sdf, args.weight_missing_geo, inputs[0], args.use_loss_masking, known) output_visual = output_save and t + 2 == num_batches compute_pred_occs = (t % 20 == 0) or output_visual if compute_pred_occs: pred_occs = [None] * args.num_hierarchy_levels for h in range(args.num_hierarchy_levels): factor = 2**(args.num_hierarchy_levels - h - 1) pred_occs[h] = [None] * args.batch_size if len(output_occs[h][0]) == 0: continue for b in range(args.batch_size): batchmask = output_occs[h][0][:, -1] == b locs = output_occs[h][0][batchmask][:, :-1] vals = torch.nn.Sigmoid()( output_occs[h][1][:, 0].detach()[batchmask]) > 0.5 pred_occs[h][b] = locs[vals.view(-1)] val_losses[0].append(loss.item()) for h in range(args.num_hierarchy_levels): val_losses[h + 1].append(losses[h]) target = target_for_occs[h].byte() if compute_pred_occs: iou = loss_util.compute_iou_sparse_dense( pred_occs[h], target, args.use_loss_masking) val_ious[h].append(iou) val_losses[args.num_hierarchy_levels + 1].append(losses[-1]) if len(output_sdf[0]) > 0: output_sdf = [output_sdf[0].detach(), output_sdf[1].detach()] if loss_weights[-1] > 0 and t % 20 == 0: val_l1preds.append( loss_util.compute_l1_predsurf_sparse_dense( output_sdf[0], output_sdf[1], target_for_sdf, None, False, args.use_loss_masking, known).item()) val_l1tgts.append( loss_util.compute_l1_tgtsurf_sparse_dense( output_sdf[0], output_sdf[1], target_for_sdf, args.truncation, args.use_loss_masking, known)) if output_visual: vis_pred_sdf = [None] * args.batch_size if len(output_sdf[0]) > 0: for b in range(args.batch_size): mask = output_sdf[0][:, -1] == b if len(mask) > 0: vis_pred_sdf[b] = [ output_sdf[0][mask].cpu().numpy(), output_sdf[1][mask].squeeze().cpu().numpy() ] inputs = [inputs[0].cpu().numpy(), inputs[1].cpu().numpy()] for h in range(args.num_hierarchy_levels): for b in range(args.batch_size): if pred_occs[h][b] is not None: pred_occs[h][b] = pred_occs[h][b].cpu().numpy() data_util.save_predictions( os.path.join(args.save, 'iter%d-epoch%d' % (iter, epoch), 'val'), sample['name'], inputs, target_for_sdf.cpu().numpy(), [x.cpu().numpy() for x in target_for_occs], vis_pred_sdf, pred_occs, sample['world2grid'], args.vis_dfs, args.truncation) #took = time.time() - start return val_losses, val_l1preds, val_l1tgts, val_ious
with open("./boxes.json",'r') as load_f: target = json.load(load_f) result = target_handle(target,H,W,B,C) targets = {'coords':[],'confs':[],'probs':[]} targets['coords'] = result[:,:,:,0:4] targets['confs'] = np.reshape(result[:,:,:,4:5],[-1,H*W,B]) targets['probs'] = result[:,:,:,5:5+C] output_sizes = input_size[0]//32, input_size[1]//32 sprob=np.ones([1,1,1,80]) sconf=np.ones([1,1,1,1]) snoob=np.ones([1,1,1,1]) scoor=np.ones([1,1,1,4]) train_op,loss,preds,confs_loss = compute_loss(model_output,targets,anchors,(sprob,sconf,snoob,scoor),num_classes=80) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for i in range(Itle): print_loss = sess.run(loss,{tf_image:image_cp}) sess.run(train_op,{tf_image:image_cp}) print(print_loss) a,b,c = decode(model_output=preds,output_sizes=output_sizes, num_class=C,anchors=anchors) a1 = sess.run(a,{tf_image:image_cp}) b1 = sess.run(b,{tf_image:image_cp}) c1 = sess.run(c,{tf_image:image_cp}) d = sess.run(confs_loss,{tf_image:image_cp}) print(d) #print(result)
def evaluation(model, dataset, device, save_mask=True, plot_roc=True, print_metric=True): """ Function to perform an evaluation of a trained model. We compute different metrics show in the dictionary to_plot_metrics and plot the ROC over different thresholds. :param model: a trained model :param dataset: dataset of images :param device: GPU or CPU. Used to transfer the dataset to the right device. :param save_mask: Boolean to call or not saveMask to plot the mask predicted by the model :param plot_roc: Boolean to plot and save the ROC computer over the different thresholds :param print_metric: Boolean to plot or not the different metrics computed over the thresholds :return: the dictionary containing the metrics """ # Set model modules to eval model.eval() loss = 0 last_masks = [None] * len(dataset) last_truths = [None] * len(dataset) # thresholds for the probabilities defined in the feature maps to classifiy the pixels thesholds = [ 0, 0.0000001, 0.000001, 0.000005, 0.00001, 0.000025, 0.00005, 0.0001, 0.00025, 0.0005, 0.001, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.2, 0.4, 0.6, 0.8, 1 ] n_thesholds = len(thesholds) #All metrics and measures to be computed for each threshold to_plot_metrics = dict([("F1", np.zeros(n_thesholds)), ("Recall", np.zeros(n_thesholds)), ("Precision", np.zeros(n_thesholds)), ("TP", np.zeros(n_thesholds)), ("TN", np.zeros(n_thesholds)), ("FP", np.zeros(n_thesholds)), ("FN", np.zeros(n_thesholds)), ("AUC", 0), ("TPR", np.zeros(n_thesholds)), ("FPR", np.zeros(n_thesholds))]) with tqdm(desc=f'Validation', unit='img') as progress_bar: for i, (image, ground_truth) in enumerate(dataset): image = image[0, ...] ground_truth = ground_truth[0, ...] last_truths[i] = ground_truth image = image.to(device) ground_truth = ground_truth.to(device) with torch.no_grad(): mask_predicted = model(image) last_masks[i] = mask_predicted progress_bar.set_postfix(**{'loss': loss}) bce_weight = torch.Tensor([1, 8]).to(device) loss += compute_loss(mask_predicted, ground_truth, bce_weight=bce_weight) get_metrics(mask_predicted[0, 0], ground_truth[0], to_plot_metrics, thesholds) progress_bar.update() if save_mask: save_masks(last_masks, last_truths, str(device), max_img=50, shuffle=False, color="red", filename="mask_predicted_test.png", threshold=thesholds[np.argmax(to_plot_metrics["F1"])]) if print_metric: print_metrics(to_plot_metrics, len(dataset), "test set") # AVERAGING THE METRICS nb_images = len(dataset) for (k, v) in to_plot_metrics.items(): to_plot_metrics[k] = v / nb_images # ROC if plot_roc: plt.title('Receiver Operating Characteristic') plt.plot(to_plot_metrics["FPR"], to_plot_metrics["TPR"], 'b', label='AUC = %0.2f' % to_plot_metrics["AUC"]) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0, 1]) plt.ylim([0, 1]) plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') #plt.show() plt.savefig("ROC.png") plt.show() plt.close("ROC.png") loss /= len(dataset) to_plot_metrics["loss"] = loss to_plot_metrics["best_threshold"] = thesholds[np.argmax( to_plot_metrics["F1"])] return to_plot_metrics
num_epochs = 200 train_loss = [] test_accuracy = [] for epoch in range(num_epochs): print(epoch) base_model.train(True) for i, data in enumerate(trainloader): y, _ = data x = screwed_transform(y) x, y = x.to(device), y.to(device) loss = compute_loss(x, y, base_model) loss.backward() opt.step() opt.zero_grad() train_loss.append(loss.to("cpu").data.numpy()) if i % 10 == 0: print(np.mean(train_loss), end=" ") train_loss = [] with torch.no_grad(): base_model.train(False) for i, data in enumerate(testloader): y, _ = data h, w = y.shape[2], y.shape[3]
def training_step(self, batch, batch_idx): """ Lightning calls this inside the training loop with the data from the training dataloader passed in as `batch`. """ ## forward pass #x, y = batch #x = x.view(x.size(0), -1) #y_hat = self(x) ## calculate loss #loss_val = self.loss(y, y_hat) #tqdm_dict = {'train_loss': loss_val} #output = OrderedDict({ #'loss': loss_val, #'progress_bar': tqdm_dict, #'log': tqdm_dict #}) ## can also return just a scalar instead of a dict (return loss_val) #return output batch_size = self.hparams.batch_size num_hierarchy_levels = self.hparams.num_hierarchy_levels truncation = self.hparams.truncation use_loss_masking = self.hparams.use_loss_masking logweight_target_sdf = self.hparams.logweight_target_sdf weight_missing_geo = self.hparams.weight_missing_geo sample = batch sdfs = sample['sdf'] # TODO: fix it #if sdfs.shape[0] < batch_size: # continue # maintain same batch size for training inputs = sample['input'] known = sample['known'] hierarchy = sample['hierarchy'] for h in range(len(hierarchy)): hierarchy[h] = hierarchy[h].cuda() if use_loss_masking: known = known.cuda() inputs[0] = inputs[0].cuda() inputs[1] = inputs[1].cuda() target_for_sdf, target_for_occs, target_for_hier = loss_util.compute_targets(sdfs.cuda(), hierarchy, num_hierarchy_levels, truncation, use_loss_masking, known) # TODO: update #loss_weights = self.model._loss_weights _iter = self._iter_counter loss_weights = get_loss_weights(_iter, self.hparams.num_hierarchy_levels, self.hparams.num_iters_per_level, self.hparams.weight_sdf_loss) output_sdf, output_occs = self(inputs, loss_weights) loss, losses = loss_util.compute_loss(output_sdf, output_occs, target_for_sdf, target_for_occs, target_for_hier, loss_weights, truncation, logweight_target_sdf, weight_missing_geo, inputs[0], use_loss_masking, known) tqdm_dict = {'train_loss': loss} output = OrderedDict({ 'loss': loss, 'progress_bar': tqdm_dict, 'log': tqdm_dict }) self._iter_counter += 1 return output
def main(net_config, ckpt_for_init): ## load the config config = configs.Config(net_config) ## set the logger re_create_dir = False if ckpt_for_init == "": re_create_dir = True log_dir = helper.make_dir([config.log_dir], re_create_dir = re_create_dir) log_file = os.path.join(log_dir, config.net_config + '.info') logger = helper.Logger(log_file) logger.add(config.config_str, do_print=True) ## load the dasets from the csv file (train, val, feat_len) data = input_data.load_datasets(config.input_csv) # data has train.next_batch(xx) test.images. test.labels feat_len = data.feat_len ## set the input placeholders layer = 'input' with tf.name_scope(layer) as scope: x = tf.placeholder(tf.float32, [None, feat_len], name='input') y = tf.placeholder(tf.float32, [None, 1], name = 'output') keep_prob = tf.placeholder(tf.float32, name = 'keep_prob') ## call inference and compute the output y_ = deepnets.inference(config, input_tensors = {"x": x, "keep_prob": keep_prob}) ## set the global step global_step = tf_utils.get_global_step() ## do training with tf.name_scope('training') as scope: train_loss = loss.compute_loss(est=y_, gt=y, loss_func= config.train_loss) train_summary = tf.summary.scalar('train_loss', train_loss) # train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(train_cost) train_step = tf.train.AdamOptimizer(config.learning_rate).minimize(train_loss, global_step=global_step) ## tensors to compute the validatoin loss with tf.name_scope('validation') as scope: val_loss = loss.compute_loss(est=y_, gt=y, loss_func= config.test_loss) val_summary = tf.summary.scalar('val_loss', val_loss) init_op = tf.initialize_all_variables() sess = tf.Session() sess.run(init_op) ## saving and restoring operations restore_variables = tf_utils.get_model_varaibles() +\ tf.get_collection("GLOBAL_STEP")+\ tf.get_collection('BN_VARIABLES') saver = tf.train.Saver(restore_variables) step_init = tf_utils.restore_model(config, sess, restore_variables, ckpt_for_init, logger) # write the graph (both txt and binary) tf.train.write_graph(sess.graph_def, log_dir, config.net_config + '_graph.pb', as_text=False) tf.train.write_graph(sess.graph_def, log_dir, config.net_config + '_graph.txt', as_text=True) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) # only saving the checkpoints if the loss is better than the previous one last_saved_loss = 100.0 for step in range(step_init, config.max_steps): # do the optimisation batch_x, batch_y = data.train.next_batch(config.batch_size) feed = {x: batch_x, y:batch_y, keep_prob: 0.6} _, t_loss, t_summary = sess.run([train_step, train_loss, train_summary], feed_dict=feed) summary_writer.add_summary(t_summary, step) # do the validataion for every 10th step if(step%10 ==0): feed = {x:data.val.features, y: data.val.output, keep_prob: 1.0} v_loss, v_summary = sess.run([val_loss, val_summary], feed_dict=feed) summary_writer.add_summary(v_summary, step) #save the model for every 500th step if(step%500 ==0): logger.add('step {:05d} | train_loss {:f} | val_loss {:f}'.format(step, t_loss, v_loss), do_print=True) if v_loss < last_saved_loss: checkpoint_path = os.path.join(log_dir, config.net_config + '.ckpt') saver.save(sess, checkpoint_path, global_step=step) logger.save() last_saved_loss = v_loss
def main(args): dataset_kwargs = { 'transforms': {}, 'max_length': None, 'sensor_resolution': None, 'preload_events': False, 'num_bins': 16, 'voxel_method': { 'method': 'random_k_events', 'k': 60000, 't': 0.5, 'sliding_window_w': 500, 'sliding_window_t': 0.1 } } unet_kwargs = { 'base_num_channels': 32, # written as '64' in EVFlowNet tf code 'num_encoders': 4, 'num_residual_blocks': 2, # transition 'num_output_channels': 2, # (x, y) displacement 'skip_type': 'concat', 'norm': None, 'use_upsample_conv': True, 'kernel_size': 3, 'channel_multiplier': 2, 'num_bins': 16 } torch.autograd.set_detect_anomaly(True) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') ev_loader = EventDataLoader(args.h5_file_path, batch_size=1, num_workers=6, shuffle=True, pin_memory=True, dataset_kwargs=dataset_kwargs) H, W = ev_loader.H, ev_loader.W model = UNet(unet_kwargs) model = model.to(device) model.train() crop = CropParameters(W, H, 4) print("=== Let's use", torch.cuda.device_count(), "GPUs!") if torch.cuda.device_count() > 1: model = nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, betas=(0.9, 0.999)) # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), weight_decay=0.01) # raise # tmp_voxel = crop.pad(torch.randn(1, 9, H, W).to(device)) # F, P = profile(model, inputs=(tmp_voxel, )) for idx in range(10): # for i, item in enumerate(tqdm(ev_loader)): for i, item in enumerate(ev_loader): events = item['events'] voxel = item['voxel'].to(device) voxel = crop.pad(voxel) model.zero_grad() optimizer.zero_grad() flow = model(voxel) * 10 flow = torch.clamp(flow, min=-40, max=40) loss = compute_loss(events, flow) loss.backward() # cvshow_voxel_grid(voxel.squeeze()[0:2].cpu().numpy()) # raise optimizer.step() if i % 10 == 0: print( idx, i, '\t', "{0:.2f}".format(loss.data.item()), "{0:.2f}".format(torch.max(flow[0, 0]).item()), "{0:.2f}".format(torch.min(flow[0, 0]).item()), "{0:.2f}".format(torch.max(flow[0, 1]).item()), "{0:.2f}".format(torch.min(flow[0, 1]).item()), ) xs, ys, ts, ps = events print_voxel = voxel[0].sum(axis=0).cpu().numpy() print_flow = flow[0].clone().detach().cpu().numpy() print_co = warp_events_with_flow_torch( (xs[0][ps[0] == 1], ys[0][ps[0] == 1], ts[0][ps[0] == 1], ps[0][ps[0] == 1]), flow[0].clone().detach(), sensor_size=(H, W)) print_co = crop.pad(print_co) print_co = print_co.cpu().numpy() cvshow_all(idx=idx * 10000 + i, voxel=print_voxel, flow=flow[0].clone().detach().cpu().numpy(), frame=None, compensated=print_co)
def train_model(model, num_epochs, batch_size, learning_rate, device, n_augmentation, train_dataset, test_dataset, reload, save_model): logging.info(f'''Starting training : Type : {model.name} Epochs: {num_epochs} Batch size: {batch_size} Data Augmentation: {n_augmentation} Learning rate: {learning_rate} Device: {device.type} Reloading model : {reload} Saving model : {save_model}''') # Variables initialization if reload: model.load_state_dict(torch.load('Weights/last.pth',map_location=torch.device(device))) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) last_masks = [None] * len(train_dataset) last_truths = [None] * len(train_dataset) prev_epochs = 0 losses_train = [] losses_test = [] losses_test_19 = [] losses_test_91 = [] metrics_idx = [] auc = [] f1_score = [] metrics_idx.append(0) auc.append(0.5) f1_score.append(0) # Reloading previous runs if reload: try: prev_loss = np.loadtxt('Loss/last.pth') losses_train = list(prev_loss[:, 0]) losses_test = list(prev_loss[:, 1]) losses_test_19 = list(prev_loss[:, 2]) losses_test_91 = list(prev_loss[:, 3]) prev_epochs = len(losses_train) prev_metrics = np.loadtxt('Loss/last_metrics.pth') metrics_idx = list(prev_metrics[:, 0]) auc = list(prev_metrics[:, 1]) f1_score = list(prev_metrics[:, 2]) except: print("Failed to load previous loss values") changed = 10 # EPOCH MAIN LOOP for epochs in range(0, num_epochs): # New dataset with random augmentation at each epoch train_dataset = load_dataset(IMAGE_NUM[0:22], n_augmentation, batch_size=batch_size) # Adaptive learning rate logging.info(f'Epoch {epochs}') if len(losses_train) > 100: if np.linalg.norm(losses_train[-1:-4]) < 0.01 and changed < 1: changed = 10 learning_rate /= 2 logging.info(f'Learning rate going to {learning_rate}') optimizer.lr = learning_rate else: changed -= 1 torch.autograd.set_detect_anomaly(True) loss_train = 0 loss_test = 0 loss_test_19 = 0 loss_test_91 = 0 # Every epoch has a training and validation phase # TRAIN with tqdm(desc=f'Epoch {epochs}', unit='img') as progress_bar: model.train() for i, (images, ground_truth) in enumerate(train_dataset): # Get the correct data from the dataloader images = images[0, ...] ground_truth = ground_truth[0, ...] # Upload the images to the device images = images.to(device) last_truths[i] = ground_truth # Keep track to save the masks as images ground_truth = ground_truth.to(device) # Forward propagation mask_predicted = model(images) last_masks[i] = mask_predicted # Keep track to save the masks as images # Compute loss bce_weight = torch.Tensor([1, 8]).to(device) loss = compute_loss(mask_predicted, ground_truth, bce_weight=bce_weight) loss_train += loss.item() / len(train_dataset) progress_bar.set_postfix(**{'loss': loss.item()}) # Zero the gradient and back propagation optimizer.zero_grad() loss.backward() optimizer.step() progress_bar.update(1) # TEST test_metrics = evaluation(model, test_dataset, device, save_mask=False, plot_roc=False, print_metric=False) loss_test = test_metrics["loss"] # Metrics bookkeeping #print_metrics(metrics, len(train_dataset), phase) logging.info(f'Train loss {loss_train}') logging.info(f'Test loss {loss_test}') losses_train.append(loss_train) losses_test.append(loss_test) losses_test_19.append(loss_test_19) losses_test_91.append(loss_test_91) metrics_idx.append(prev_epochs + epochs) auc.append(test_metrics["AUC"]) f1_score.append(np.max(test_metrics["F1"])) # END OF EPOCH MAIN LOOP # Save the predicted masks in an image save_masks(last_masks, last_truths, str(device), max_img=50, shuffle=False, threshold=test_metrics["best_threshold"]) logging.info(f'Best threshold {test_metrics["best_threshold"]}') # Save model weights and metrics current_datetime = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if save_model: placeholder_file('Weights/last.pth') torch.save(model.state_dict(), 'Weights/last.pth') placeholder_file('Weights/' + current_datetime + "-" + str(prev_epochs+num_epochs) + '.pth') torch.save(model.state_dict(), 'Weights/' + current_datetime + "-" + str(prev_epochs+num_epochs) + '.pth') logging.info(f'Model saved') # Save losses loss_to_save = np.stack([np.asarray(losses_train), np.asarray(losses_test), np.asarray(losses_test_19), np.asarray(losses_test_91)], axis=1) placeholder_file( 'Loss/' + 'learning_' + str(learning_rate) + '_epoch_' + str(num_epochs) + '_time_' + current_datetime + '.pth') np.savetxt( 'Loss/' + 'learning_' + str(learning_rate) + '_epoch_' + str(num_epochs) + '_time_' + current_datetime + '.pth', loss_to_save) placeholder_file('Loss/last.pth') np.savetxt('Loss/last.pth', loss_to_save) # Save other metrics metrics_to_save = np.stack([np.asarray(metrics_idx), np.asarray(auc), np.asarray(f1_score)], axis=1) placeholder_file('Loss/last_metrics.pth') np.savetxt('Loss/last_metrics.pth', metrics_to_save) # Plot train and test losses and metrics plt.plot([i for i in range(0, len(losses_train))], losses_train, label='Train Loss = '+str(round(losses_train[len(losses_train)-1], 3))) plt.plot([i for i in range(0, len(losses_test))], losses_test, label='Test Loss = '+str(round(losses_test[len(losses_test)-1].item(), 3))) #plt.plot([i for i in range(0, len(losses_test_19))], losses_test_19, label='Test Loss 19 = '+str(round(losses_test_19[len(losses_test_19)-1].item(), 3))) #plt.plot([i for i in range(0, len(losses_test_91))], losses_test_91, label='Test Loss 91 = '+str(round(losses_test_91[len(losses_test_91)-1].item(), 3))) plt.plot(metrics_idx, [1-auc_ for auc_ in auc], label='1 - AUC (AUC = '+ str(round(float(auc[len(auc)-1]), 3)) +')') plt.plot(metrics_idx, [1-f1 for f1 in f1_score], label='1 - F1 (F1 = '+ str(round(float(f1_score[len(f1_score)-1]), 3)) +')') plt.legend() plt.ylim(bottom=0, top=1) plt.xlabel("Epochs") plt.ylabel("Metric") plt.savefig("Loss.png") plt.show() plt.close("Loss.png")
yield left_images.astype(np.float32), right_images.astype(np.float32) epochs = 50 model = MonodepthNetwork() optimizer = tf.keras.optimizers.Adam() image_paths_txt = "/media/yang/e2053f1d-2479-4407-a2f3-7d7c3bfd5f9c/kitti_raw/kitti_train_files.txt" writer = tf.summary.create_file_writer("./log") trainset = DataGenerator(image_paths_txt, 4) for epoch in range(epochs): for step in range(5000): with tf.GradientTape() as tape: left_images, right_images = next(trainset) lr_disp = model(left_images, training=True) image_loss, disp_gradient_loss, lr_loss = compute_loss(left_images, right_images, lr_disp) total_loss = image_loss + disp_gradient_loss + lr_loss gradients = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) print("EPOCH %2d STEP %3d total_loss %.6f image_loss %.6f disp_gradient_loss %.6f lr_loss %.6f" %( epoch, step, total_loss.numpy(), image_loss.numpy(), disp_gradient_loss.numpy(), lr_loss.numpy())) # writing summary data global_steps = step + epoch * 5000 with writer.as_default(): tf.summary.scalar("loss/total_loss", total_loss, step=global_steps) tf.summary.scalar("loss/image_loss", image_loss, step=global_steps) tf.summary.scalar("loss/gradient_loss", disp_gradient_loss, step=global_steps) tf.summary.scalar("loss/lr_loss", lr_loss, step=global_steps) writer.flush()
def compute_gradients(model, x, y, ae_type): with tf.GradientTape() as tape: loss = compute_loss(model, x, y, ae_type) return tape.gradient(loss, model.trainable_variables), loss
def create_model(dataset, aug_dataset, args): # define network with tf.compat.v1.variable_scope("OffsetNetwork"): with tf.compat.v1.variable_scope("create_neural_texture"): neural_texture = create_neural_texture(args) sampled_texture, reduced_basis, multiplied_texture, output, reconstruct = neural_render( neural_texture, dataset.uv, dataset.index, dataset.basis, args) if args.with_aug: with tf.compat.v1.variable_scope("OffsetNetwork", reuse=True): aug_sampled_texture, aug_reduced_basis, aug_multiplied_texture, aug_output, aug_reconstruct = neural_render( neural_texture, aug_dataset.uv, aug_dataset.index, aug_dataset.basis, args) # loss and train_op loss = tf.zeros(shape=(), dtype=tf.float32) loss_aug = tf.zeros(shape=(), dtype=tf.float32) if args.LDR: target_image = quantize(dataset.color * dataset.mask, args.keep_max_val) else: target_image = dataset.color * dataset.mask target = args.mapper.map_input(target_image) loss += compute_loss(output, target, args.loss) if args.with_aug: aug_target = args.mapper.map_input(aug_dataset.color * aug_dataset.mask) loss_aug += compute_loss(aug_output, aug_target, args.loss) tf_vars = tf.trainable_variables() print("[info] Pameters: #%d, Variables: #%d" % (compute_number_of_parameters(tf_vars), len(tf_vars))) train_op = create_train_op(args.lr, 0.9, 0.999, loss, tf_vars, "all", args) if args.with_aug: aug_train_op = create_train_op(args.lr, 0.9, 0.999, loss_aug, tf_vars, "aug_all", args) else: aug_train_op = None # visualize summary_writer = GDSummaryWriter(args.batch_size) with tf.name_scope("tensorboard_visualize"): # scalar summary_writer.add_scalar("loss", loss) if args.with_aug: summary_writer.add_scalar("loss(aug)", loss_aug) # image summary_writer.add_image("image(GT)", dataset.color * dataset.mask, rescale_factor=args.rescale_output) summary_writer.add_image("image(recon)", reconstruct, rescale_factor=args.rescale_output) summary_writer.add_image("sampled_texture", sampled_texture, channels=args.texture_channels) summary_writer.add_image("basis", reduced_basis, channels=args.texture_channels) summary_writer.add_image("multiplied_texture", multiplied_texture, channels=args.texture_channels) if args.with_aug: summary_writer.add_image("aug_image(GT)", aug_dataset.color * aug_dataset.mask, rescale_factor=args.rescale_output) summary_writer.add_image("aug_image(recon)", aug_reconstruct, rescale_factor=args.rescale_output) summary_writer.add_image("aug_sampled_texture", aug_sampled_texture, channels=args.texture_channels) summary_writer.add_image("aug_basis", aug_reduced_basis, channels=args.texture_channels) summary_writer.add_image("aug_multiplied_texture", aug_multiplied_texture, channels=args.texture_channels) for i in range(args.texture_levels): summary_writer.add_image('neural_texture_level_%d(0-2)' % i, tf.clip_by_value( neural_texture[i][..., :3][tf.newaxis, ...], 0, 1), channels=3, batch=1) summary_op = tf.summary.merge(summary_writer.lists) return Model(train_op=train_op, aug_train_op=aug_train_op, summary_op=summary_op, loss=loss, vars=tf_vars, output=reconstruct)
batch_size = 10 train_generator = train.generate(batch_size=batch_size, ssd_box_encoder=encoder, train=True) model_name = 'ssd300' epochs = 1 for epoch in range(epochs): for steps in range(int(np.ceil(n_train_samples / batch_size))): data = next(train_generator) x_batch = data[0] y_true = data[1] with tf.GradientTape() as tape: y_pred = model(x_batch) loss = compute_loss(y_true, y_pred) print('Epoch : {} , Step : {} , Loss : {} '.format( epoch, steps, loss)) grads = tape.gradient(loss, model.variables) optimizer.apply_gradients(zip(grads, model.variables)) if steps % 25 == 0: model.save_weights('weights/{}_step_{}_weights.h5'.format( model_name, steps))
def train(args): print(hparams_debug_string()) # prepare logging, checkpoint directories prepare_directories(args.out_dir, args.log_dir, args.checkpoint_dir) # create model model = CNNVocoder(n_heads=hparams.n_heads, layer_channels=hparams.layer_channels, pre_conv_channels=hparams.pre_conv_channels, pre_residuals=hparams.pre_residuals, up_residuals=hparams.up_residuals, post_residuals=hparams.post_residuals) model.apply(weights_init) model = model.cuda() # create optimizer lr = hparams.lr optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=hparams.weight_decay) dataloader = DataLoader(MelDataset(hparams.file_list), batch_size=hparams.batch_size, shuffle=True, num_workers=hparams.n_workers) steps = 0 checkpoint_dir = os.path.join(args.out_dir, args.checkpoint_dir) log_dir = os.path.join(args.out_dir, args.log_dir) writer = SummaryWriter(log_dir) # load model from checkpoint if args.checkpoint_path: model, optimizer, lr, steps = load_checkpoint( args.checkpoint_path, model, optimizer, warm_start=args.warm_start) for i in range(hparams.epochs): print('Epoch: {}'.format(i)) for idx, batch in enumerate(dataloader): steps += 1 wav, spec = batch[0].cuda(), batch[1].cuda() optimizer.zero_grad() pre_predict, predict = model(spec) post_loss, l1, l2, l3 = compute_loss(predict, wav) loss = post_loss print( 'Step: {:8d}, Loss = {:8.4f}, post_loss = {:8.4f}, pre_loss = {:8.4f}' .format(steps, loss, post_loss, post_loss)) if torch.isnan(loss).item() != 0: print('nan loss, ignore') return loss.backward() # clip grad norm grad_norm = clip_grad_norm_(model.parameters(), hparams.grad_clip_thresh) optimizer.step() # log training # add_log(writer, loss, p_loss, low_p_loss, phrase_loss, p1, grad_norm, steps) add_log(writer, loss, l1, l2, l3, steps) if steps > 0 and steps % hparams.checkpoint_interval == 0: checkpoint_path = '{}/checkpoint_{}'.format( checkpoint_dir, steps) save_checkpoint(checkpoint_path, lr, steps, model, optimizer) # saving example idx = np.random.randint(wav.shape[0]) t1 = wav[idx].data.cpu().numpy() t2 = predict[idx].data.cpu().numpy() audio.save_wav( t2, '{}/generated_{}.wav'.format(checkpoint_dir, steps)) audio.save_wav( t1, '{}/target_{}.wav'.format(checkpoint_dir, steps))
def validation_step(self, batch, batch_idx): """ Lightning calls this inside the validation loop with the data from the validation dataloader passed in as `batch`. """ #x, y = batch #x = x.view(x.size(0), -1) #y_hat = self(x) #loss_val = self.loss(y, y_hat) ## acc #labels_hat = torch.argmax(y_hat, dim=1) #val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0) #val_acc = torch.tensor(val_acc) #if self.on_gpu: #val_acc = val_acc.cuda(loss_val.device.index) #output = OrderedDict({ #'val_loss': loss_val, #'val_acc': val_acc, #}) ## can also return just a scalar instead of a dict (return loss_val) #return output batch_size = self.hparams.batch_size num_hierarchy_levels = self.hparams.num_hierarchy_levels truncation = self.hparams.truncation use_loss_masking = self.hparams.use_loss_masking logweight_target_sdf = self.hparams.logweight_target_sdf weight_missing_geo = self.hparams.weight_missing_geo sample = batch sdfs = sample['sdf'] # TODO: fix it #if sdfs.shape[0] < batch_size: # continue # maintain same batch size for training inputs = sample['input'] known = sample['known'] hierarchy = sample['hierarchy'] for h in range(len(hierarchy)): hierarchy[h] = hierarchy[h].cuda() if use_loss_masking: known = known.cuda() inputs[0] = inputs[0].cuda() inputs[1] = inputs[1].cuda() target_for_sdf, target_for_occs, target_for_hier = loss_util.compute_targets(sdfs.cuda(), hierarchy, num_hierarchy_levels, truncation, use_loss_masking, known) # TODO: update _iter = self._iter_counter loss_weights = get_loss_weights(_iter, self.hparams.num_hierarchy_levels, self.hparams.num_iters_per_level, self.hparams.weight_sdf_loss) output_sdf, output_occs = self(inputs, loss_weights) loss, losses = loss_util.compute_loss(output_sdf, output_occs, target_for_sdf, target_for_occs, target_for_hier, loss_weights, truncation, logweight_target_sdf, weight_missing_geo, inputs[0], use_loss_masking, known) output = OrderedDict({ 'val_loss': loss, }) return output