def main(): parser = argparse.ArgumentParser() arguments = [ ("preprocess", preprocess_dataset, "Preprocess samples - cleaning/filtering of invalid data."), ("split", split_dataset, "Split dataset in separate folds for training/validation/testing."), ("pretrain", prepare_embeddings, "Precompute input representations from unlabeled/training data."), ("prepare_input", prepare_input, "Convert raw inputs to numpy compatible data types."), ("train", train, "Train currently selected model."), ("test", test, "Run available model on evaluation data.") # ("analyse", analyse_dataset), # WIP # ("extract_embeddings", extract_embeddings), # WIP ] for arg, _, description in arguments: parser.add_argument('--{}'.format(arg), action='store_true', help=description) params = parser.parse_args() args = parse_config("config.json") setup_logging(args) set_random_seed(args) for arg, fun, _ in arguments: if hasattr(params, arg) and getattr(params, arg): logging.info("Performing {} operation..".format(arg)) fun(args)
def main(cfg: DictConfig, train_id: str) -> None: cwd = Path.cwd() myutil.print_config(cfg) # Setting seed myutil.set_random_seed(0) model_file_name = "{}_best.pth".format(cfg.model.name) # Checking history directory history_dir = cwd / 'history' / train_id if (history_dir / model_file_name).exists(): pass else: return # Setting result directory # All outputs will be written into (p / 'result' / train_id). if not (cwd / 'result').exists(): (cwd / 'result').mkdir(parents=True) result_dir = cwd / 'result' / train_id if result_dir.exists(): # removing files in result_dir? pass else: result_dir.mkdir(parents=True, exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Testing testset = get_dataset(cfg) net = myutil.get_model(cfg) net.module.load_state_dict(torch.load(str(history_dir / model_file_name))) predict(result_dir, testset, net, device)
def train(self, device): set_random_seed() self.loaded_data.negative_sample() # Compose Graph NN gnn_channel = GNNChannel(self.sr_ent_num, self.tg_ent_num, self.dim, self.layer_num, self.drop_out, self.channels) self.gnn_channel = gnn_channel gnn_channel.to(device) gnn_channel.train() # Prepare optimizer optimizer = Adagrad(filter(lambda p: p.requires_grad, gnn_channel.parameters()), lr=self.learning_rate, weight_decay=self.l2_regularization) criterion = AlignLoss(self.margin_gamma) best_hit_at_1 = 0 best_epoch_num = 0 for epoch_num in range(1, self.epoch_num + 1): gnn_channel.train() optimizer.zero_grad() sr_seed_hid, tg_seed_hid, _, _ = gnn_channel.forward( self.loaded_data.train_sr_ent_seeds, self.loaded_data.train_tg_ent_seeds) loss = criterion(sr_seed_hid, tg_seed_hid) loss.backward() optimizer.step() if epoch_num % self.nega_sample_freq == 0: if str(self.directory).find('DWY100k') >= 0: self.loaded_data.negative_sample() else: self.negative_sample() hit_at_1 = self.evaluate(epoch_num, gnn_channel, print_info=False, device=device) if hit_at_1 > best_hit_at_1: best_hit_at_1 = hit_at_1 best_epoch_num = epoch_num print('Model best Hit@1 on valid set is %.2f at %d epoch.' % (best_hit_at_1, best_epoch_num)) return best_hit_at_1, best_epoch_num
def prepare(self): seed = random.randint(1, 10000) util.set_random_seed(seed) torch.backends.cudnn.benchmark = True for cnf in self.config['datasets'].values(): ds = Dataset(cnf).createLoader() if ds.phase == 'train': train_size = int(math.ceil(len(ds) / cnf['minibatch'])) self.log('Number of train images: {:,d}, iters: {:,d}'.format( len(ds), train_size)) self.total_iters = int(self.config['total_iters']) self.total_epochs = int( math.ceil(self.total_iters / train_size)) self.log('Total epochs needed: {:d} for iters {:,d}'.format( self.total_epochs, self.total_iters)) self.datasets.append(ds) elif ds.phase == 'val': self.val_datasets.append(ds) self.log('Number of val images in [{:s}]: {:d}'.format( cnf['name'], len(ds))) self.model = ESRGAN(self.config)
def main(): parser = argparse.ArgumentParser( description='TensorFlow Fashion MNIST Example') parser.add_argument('--batch-size', type=int, default=100, help='input batch size for training') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=5, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=100, help='how many batches to wait before' ' evaluate the model') args = parser.parse_args() start_time = time.time() util.set_random_seed(args.seed) sess = util.set_session() fashion_mnist = keras.datasets.fashion_mnist class_names = [ 'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot' ] (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() train_images, train_labels = preprocess_data(train_images, train_labels, num_classes=len(class_names)) test_images, test_labels = preprocess_data(test_images, test_labels, num_classes=len(class_names)) # print(train_images.shape) # embed() features_placeholder = tf.placeholder(train_images.dtype, train_images.shape) labels_placeholder = tf.placeholder(train_labels.dtype, train_labels.shape) train_dataset = tf.data.Dataset.from_tensor_slices( (features_placeholder, labels_placeholder)) train_dataset = train_dataset.shuffle(10000).batch(args.batch_size) iterator = train_dataset.make_initializable_iterator() next_element = iterator.get_next() model = SimpleCNN(num_classes=len(class_names)) model.compile(optimizer=keras.optimizers.Adam(lr=args.lr), loss='categorical_crossentropy', metrics=['accuracy']) iter = 0 train_log = {'iter': [], 'loss': [], 'accuracy': []} test_log = {'iter': [], 'loss': [], 'accuracy': []} for ep in range(args.epochs): sess.run(iterator.initializer, feed_dict={ features_placeholder: train_images, labels_placeholder: train_labels }) try: while True: iter += 1 images, labels = sess.run(next_element) train_loss, train_acc = model.train_on_batch(images, labels) if iter % args.log_interval == 0: print( 'Epoch: {0:d}/{1:d} Iteration:{2:d} Training Loss:{3:.4f} ' 'Training Accuracy:{4:.4f}'.format( ep, args.epochs, iter, train_loss, train_acc)) train_log['iter'].append(iter) train_log['loss'].append(train_loss) train_log['accuracy'].append(train_acc) if iter % args.eval_interval == 0: test_loss, test_acc = model.evaluate( test_images, test_labels) test_log['iter'].append(iter) test_log['loss'].append(test_loss) test_log['accuracy'].append(test_acc) except tf.errors.OutOfRangeError: pass model.summary() end_time = time.time() print('Elapsed time: {0:.3f}s'.format(end_time - start_time)) fig = plt.figure() plt.plot(train_log['iter'], train_log['loss'], 'r', label='Training') plt.plot(test_log['iter'], test_log['loss'], 'b', label='Testing') plt.title('Loss') plt.legend() plt.savefig('static_loss_10.png') fig = plt.figure() plt.plot(train_log['iter'], train_log['accuracy'], 'r', label='Training') plt.plot(test_log['iter'], test_log['accuracy'], 'b', label='Testing') plt.title('Accuracy') plt.legend() plt.savefig('static_accuracy_10.png') plt.show()
def main(): parser = argparse.ArgumentParser(description='TensorFlow Pascal Example') parser.add_argument('--batch-size', type=int, default=20, help='input batch size for training') parser.add_argument('--epochs', type=int, default=5, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=20, help='how many batches to wait before' ' evaluate the model') parser.add_argument('--log-dir', type=str, default='tb', help='path for logging directory') parser.add_argument('--data-dir', type=str, default='./VOCdevkit/VOC2007', help='Path to PASCAL data storage') args = parser.parse_args() util.set_random_seed(args.seed) sess = util.set_session() # train_images, train_labels, train_weights = util.load_pascal(args.data_dir, # class_names=CLASS_NAMES, # split='trainval') test_images, test_labels, test_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='test') model = SimpleCNN(num_classes=len(CLASS_NAMES)) test_dataset = tf.data.Dataset.from_tensor_slices( (test_images, test_labels, test_weights)) test_dataset = test_dataset.batch(args.batch_size) ## TODO write the training and testing code for multi-label classification global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay(args.lr, global_step, 5000, 0.5, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) model.build((args.batch_size, 224, 224, 3)) ckpt_path = "./tb/2019-02-25_10-45-32/" status = checkpoint.restore(os.path.join(ckpt_path, "ckpt-60")) status.assert_consumed() # Test visualization # tmp = test_images[4,:,:,:] # plt.imshow(tmp) # plt.show() # return #0,1,2,3,6,7,10 20 22 25 #1 2 3 4 10 11 15 40 45 54 image name query_ind = [0, 1, 2, 3, 6, 7, 10, 20, 22, 25] # For testing only, need to generate them for each class image_num = test_images.shape[0] total_pool5_out = [] total_fc7_out = [] for batch, (images, labels, weights) in enumerate(test_dataset): pool5_out, fc7_out = model.call_fc7_pool5(images) pool5_out = pool5_out.numpy() pool5_out = pool5_out.reshape( (pool5_out.shape[0], pool5_out.shape[1] * pool5_out.shape[2] * pool5_out.shape[3])) #fc7_out = model.call_fc7(test_images) fc7_out = fc7_out.numpy() for i in range(pool5_out.shape[0]): total_pool5_out.append(pool5_out[i, :]) total_fc7_out.append(fc7_out[i, :]) total_pool5_out = np.array(total_pool5_out) total_fc7_out = np.array(total_fc7_out) # pool5_out = model.call_pool5(test_images) # pool5_out = pool5_out.numpy() # pool5_out = pool5_out.reshape((image_num, pool5_out.shape[1]*pool5_out.shape[2]*pool5_out.shape[3])) kdt = KDTree(total_pool5_out, metric='euclidean') pool5_inds = kdt.query(total_pool5_out[np.array(query_ind)], k=5, return_distance=False) # fc7_out = model.call_fc7(test_images) # fc7_out = fc7_out.numpy() print(pool5_inds) kdt = KDTree(total_fc7_out, metric='euclidean') fc7_inds = kdt.query(total_fc7_out[np.array(query_ind)], k=5, return_distance=False) print(fc7_inds) # For visualization for i in range(0, len(query_ind)): img_list_pool5 = pool5_inds[i, :] img_list_fc7 = fc7_inds[i, :] img_name_pool5 = "./hw1/figures/caffe_pool5_" + str(i) img_name_fc7 = "./hw1/figures/caffe_fc7_" + str(i) for j in range(1, 5): img_id = img_list_pool5[j] save_name = img_name_pool5 + "_" + str(j) + ".jpg" img = test_images[img_id, :, :, :] img = img.astype(np.uint8) plt.imshow(img) plt.savefig(save_name) for j in range(1, 5): img_id = img_list_fc7[j] save_name = img_name_fc7 + "_" + str(j) + ".jpg" img = test_images[img_id, :, :, :] img = img.astype(np.uint8) plt.imshow(img) plt.savefig(save_name)
def main(): parser = argparse.ArgumentParser(description='TensorFlow Pascal Example') parser.add_argument('--batch-size', type=int, default=20, help='input batch size for training') parser.add_argument('--epochs', type=int, default=5, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=20, help='how many batches to wait before' ' evaluate the model') parser.add_argument('--log-dir', type=str, default='tb', help='path for logging directory') parser.add_argument('--data-dir', type=str, default='./VOCdevkit/VOC2007', help='Path to PASCAL data storage') args = parser.parse_args() util.set_random_seed(args.seed) sess = util.set_session() # train_images, train_labels, train_weights = util.load_pascal(args.data_dir, # class_names=CLASS_NAMES, # split='trainval') test_images, test_labels, test_weights = util.load_pascal(args.data_dir, class_names=CLASS_NAMES, split='test') random_ind = np.random.randint(test_images.shape[0], size=1000) test_images_sub = test_images[random_ind,:,:,:] test_labels_sub = test_labels[random_ind,:] test_weights_sub = test_weights[random_ind,:] model = SimpleCNN(num_classes=len(CLASS_NAMES)) test_dataset = tf.data.Dataset.from_tensor_slices((test_images_sub, test_labels_sub, test_weights_sub)) test_dataset = test_dataset.batch(args.batch_size) ## TODO write the training and testing code for multi-label classification global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay(args.lr, global_step, 5000, 0.5, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) model.build((args.batch_size,224,224,3)) ckpt_path = "./tb/2019-02-25_10-45-32/" status = checkpoint.restore(os.path.join(ckpt_path,"ckpt-60")) status.assert_consumed() total_fc7_out = [] for batch, (images, labels, weights) in enumerate(test_dataset): fc7_out = model.call_fc7(images) fc7_out = fc7_out.numpy() for i in range(fc7_out.shape[0]): total_fc7_out.append(fc7_out[i,:]) total_fc7_out = np.array(total_fc7_out) fc7_out_tsne = TSNE(n_components=2).fit_transform(total_fc7_out) print(fc7_out_tsne.shape) norm_labels = map_class(test_labels_sub) fig = plt.figure(figsize=(8, 8)) ax = plt.subplot(aspect='equal') draw = ax.scatter(fc7_out_tsne[:,0], fc7_out_tsne[:,1], c=norm_labels) fig.colorbar(draw, ax=ax) ax.axis('off') plt.show() plt.savefig("./hw1/figures/tsne.jpg")
def main(): #### options parser = argparse.ArgumentParser() parser.add_argument('-opt', type=str, help='Path to option YMAL file.') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args() opt = option.parse(args.opt, is_train=True) #### distributed training settings if args.launcher == 'none': # disabled distributed training opt['dist'] = False rank = -1 print('Disabled distributed training.') else: opt['dist'] = True init_dist() world_size = torch.distributed.get_world_size() rank = torch.distributed.get_rank() #### loading resume state if exists if opt['path'].get('resume_state', None): # distributed resuming: all load into default GPU device_id = torch.cuda.current_device() resume_state = torch.load(opt['path']['resume_state'], map_location=lambda storage, loc: storage.cuda(device_id)) option.check_resume(opt, resume_state['iter']) # check resume options else: resume_state = None #### mkdir and loggers if rank <= 0: # normal training (rank -1) OR distributed training (rank 0) if resume_state is None: print(opt['path']) util.mkdir_and_rename( opt['path']['experiments_root']) # rename experiment folder if exists util.mkdirs((path for key, path in opt['path'].items() if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key and path is not None)) # config loggers. Before it, the log will not work util.setup_logger('base', opt['path']['log'], 'train_' + opt['name'], level=logging.INFO, screen=True, tofile=True) util.setup_logger('val', opt['path']['log'], 'val_' + opt['name'], level=logging.INFO, screen=True, tofile=True) logger = logging.getLogger('base') logger.info(option.dict2str(opt)) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: version = float(torch.__version__[0:3]) if version >= 1.1: # PyTorch 1.1 from torch.utils.tensorboard import SummaryWriter else: logger.info( 'You are using PyTorch {}. Tensorboard will use [tensorboardX]'.format(version)) from tensorboardX import SummaryWriter trial = 0 while os.path.isdir('../Loggers/' + opt['name'] + '/' + str(trial)): trial += 1 tb_logger = SummaryWriter(log_dir='../Loggers/' + opt['name'] + '/' + str(trial)) else: util.setup_logger('base', opt['path']['log'], 'train', level=logging.INFO, screen=True) logger = logging.getLogger('base') # convert to NoneDict, which returns None for missing keys opt = option.dict_to_nonedict(opt) # -------------------------------------------- ADDED -------------------------------------------- l1_loss = torch.nn.L1Loss() mse_loss = torch.nn.MSELoss() calc_lpips = PerceptualLossLPIPS() if torch.cuda.is_available(): l1_loss = l1_loss.cuda() mse_loss = mse_loss.cuda() # ----------------------------------------------------------------------------------------------- #### random seed seed = opt['train']['manual_seed'] if seed is None: seed = random.randint(1, 10000) if rank <= 0: logger.info('Random seed: {}'.format(seed)) util.set_random_seed(seed) torch.backends.cudnn.benckmark = True # torch.backends.cudnn.deterministic = True #### create train and val dataloader dataset_ratio = 200 # enlarge the size of each epoch for phase, dataset_opt in opt['datasets'].items(): if phase == 'train': train_set = create_dataset(dataset_opt) train_size = int(math.ceil(len(train_set) / dataset_opt['batch_size'])) total_iters = int(opt['train']['niter']) total_epochs = int(math.ceil(total_iters / train_size)) if opt['dist']: train_sampler = DistIterSampler(train_set, world_size, rank, dataset_ratio) total_epochs = int(math.ceil(total_iters / (train_size * dataset_ratio))) else: train_sampler = None train_loader = create_dataloader(train_set, dataset_opt, opt, train_sampler) if rank <= 0: logger.info('Number of train images: {:,d}, iters: {:,d}'.format( len(train_set), train_size)) logger.info('Total epochs needed: {:d} for iters {:,d}'.format( total_epochs, total_iters)) elif phase == 'val': val_set = create_dataset(dataset_opt) val_loader = create_dataloader(val_set, dataset_opt, opt, None) if rank <= 0: logger.info('Number of val images in [{:s}]: {:d}'.format( dataset_opt['name'], len(val_set))) else: raise NotImplementedError('Phase [{:s}] is not recognized.'.format(phase)) assert train_loader is not None #### create model model = Model(opt) #### resume training if resume_state: logger.info('Resuming training from epoch: {}, iter: {}.'.format( resume_state['epoch'], resume_state['iter'])) start_epoch = resume_state['epoch'] current_step = resume_state['iter'] model.resume_training(resume_state) # handle optimizers and schedulers else: current_step = 0 start_epoch = 0 #### training logger.info('Start training from epoch: {:d}, iter: {:d}'.format(start_epoch, current_step)) for epoch in range(start_epoch, total_epochs + 1): if opt['dist']: train_sampler.set_epoch(epoch) train_bar = tqdm(train_loader, desc='[%d/%d]' % (epoch, total_epochs)) for bus, train_data in enumerate(train_bar): # validation if epoch % opt['train']['val_freq'] == 0 and bus == 0 and rank <= 0: avg_ssim = avg_psnr = avg_lpips = val_pix_err_f = val_pix_err_nf = val_mean_color_err = 0.0 print("into validation!") idx = 0 val_bar = tqdm(val_loader, desc='[%d/%d]' % (epoch, total_epochs)) for val_data in val_bar: idx += 1 img_name = os.path.splitext(os.path.basename(val_data['LQ_path'][0]))[0] img_dir = os.path.join(opt['path']['val_images'], img_name) util.mkdir(img_dir) model.feed_data(val_data) model.test() visuals = model.get_current_visuals() sr_img = util.tensor2img(visuals['SR']) # uint8 gt_img = util.tensor2img(visuals['GT']) # uint8 lq_img = util.tensor2img(visuals['LQ']) # uint8 #nr_img = util.tensor2img(visuals['NR']) # uint8 #nf_img = util.tensor2img(visuals['NF']) # uint8 #nh_img = util.tensor2img(visuals['NH']) # uint8 #print("Great! images got into here.") # Save SR images for reference save_sr_img_path = os.path.join(img_dir, '{:s}_{:d}_sr.png'.format(img_name, current_step)) save_nr_img_path = os.path.join(img_dir, '{:s}_{:d}_lq.png'.format(img_name, current_step)) #save_nf_img_path = os.path.join(img_dir, # 'bs_{:s}_{:d}_nr.png'.format(img_name, current_step)) #save_nh_img_path = os.path.join(img_dir, # 'bs_{:s}_{:d}_nh.png'.format(img_name, current_step)) util.save_img(sr_img, save_sr_img_path) util.save_img(lq_img, save_nr_img_path) #util.save_img(nf_img, save_nf_img_path) #util.save_img(nh_img, save_nh_img_path) #print("Saved") # calculate PSNR gt_img = gt_img / 255. sr_img = sr_img / 255. #nf_img = nf_img / 255. lq_img = lq_img / 255. #cropped_lq_img = lq_img[crop_size:-crop_size, crop_size:-crop_size, :] #cropped_nr_img = nr_img[crop_size:-crop_size, crop_size:-crop_size, :] avg_psnr += util.calculate_psnr(sr_img * 255, gt_img * 255) avg_ssim += util.calculate_ssim(sr_img * 255, gt_img * 255) avg_lpips += calc_lpips(visuals['SR'], visuals['GT']) #avg_psnr_n += util.calculate_psnr(cropped_lq_img * 255, cropped_nr_img * 255) # ----------------------------------------- ADDED ----------------------------------------- val_pix_err_nf += l1_loss(visuals['SR'], visuals['GT']) val_mean_color_err += mse_loss(visuals['SR'].mean(2).mean(1), visuals['GT'].mean(2).mean(1)) # ----------------------------------------------------------------------------------------- avg_psnr = avg_psnr / idx avg_ssim = avg_ssim / idx avg_lpips = avg_lpips / idx val_pix_err_f /= idx val_pix_err_nf /= idx val_mean_color_err /= idx # log logger.info('# Validation # PSNR: {:.4e},'.format(avg_psnr)) logger.info('# Validation # SSIM: {:.4e},'.format(avg_ssim)) logger.info('# Validation # LPIPS: {:.4e},'.format(avg_lpips)) logger_val = logging.getLogger('val') # validation logger logger_val.info('<epoch:{:3d}, iter:{:8,d}> psnr: {:.4e} ssim: {:.4e} lpips: {:.4e}'.format( epoch, current_step, avg_psnr, avg_ssim, avg_lpips)) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: tb_logger.add_scalar('val_psnr', avg_psnr, current_step) tb_logger.add_scalar('val_ssim', avg_ssim, current_step) tb_logger.add_scalar('val_lpips', avg_lpips, current_step) tb_logger.add_scalar('val_pix_err_nf', val_pix_err_nf, current_step) tb_logger.add_scalar('val_mean_color_err', val_mean_color_err, current_step) current_step += 1 if current_step > total_iters: break #### update learning rate model.update_learning_rate(current_step, warmup_iter=opt['train']['warmup_iter']) #### training model.feed_data(train_data) model.optimize_parameters(current_step) model.clear_data() #### tb_logger if current_step % opt['logger']['tb_freq'] == 0: logs = model.get_current_log() if opt['use_tb_logger'] and 'debug' not in opt['name']: for k, v in logs.items(): if rank <= 0: tb_logger.add_scalar(k, v, current_step) #### logger if epoch % opt['logger']['print_freq'] == 0 and epoch != 0 and bus == 0: logs = model.get_current_log() message = '<epoch:{:3d}, iter:{:8,d}, lr:{:.3e}> '.format( epoch, current_step, model.get_current_learning_rate()) for k, v in logs.items(): message += '{:s}: {:.4e} '.format(k, v) if rank <= 0: logger.info(message) #### save models and training states if epoch % opt['logger']['save_checkpoint_freq'] == 0 and epoch != 0 and bus == 0: if rank <= 0: logger.info('Saving models and training states.') model.save(current_step) model.save_training_state(epoch, current_step) if rank <= 0: logger.info('Saving the final model.') model.save('latest') logger.info('End of training.')
def main(): parser = argparse.ArgumentParser( description='TensorFlow Fashion MNIST Example') parser.add_argument('--batch-size', type=int, default=100, help='input batch size for training') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=5, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=100, help='how many batches to wait before' ' evaluate the model') parser.add_argument('--log-dir', type=str, default='tb', help='path for logging directory') parser.add_argument('--ckpt-dir', type=str, default='ckpt', help='path for saving model') args = parser.parse_args() start_time = time.time() util.set_random_seed(args.seed) sess = util.set_session() fashion_mnist = keras.datasets.fashion_mnist class_names = [ 'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot' ] (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() train_images, train_labels = preprocess_data(train_images, train_labels) test_images, test_labels = preprocess_data(test_images, test_labels) train_dataset = tf.data.Dataset.from_tensor_slices( (train_images, train_labels)) train_dataset = train_dataset.shuffle(10000).batch(args.batch_size) test_dataset = tf.data.Dataset.from_tensor_slices( (test_images, test_labels)) test_dataset = test_dataset.shuffle(10000).batch(args.batch_size) train_dataset_mix = tf.data.Dataset.from_tensor_slices( (train_images, train_labels)) train_dataset_mix = train_dataset.shuffle(10000).batch(args.batch_size) model = SimpleCNN(num_classes=len(class_names)) global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_log = {'iter': [], 'loss': [], 'accuracy': []} test_log = {'iter': [], 'loss': [], 'accuracy': []} for ep in range(args.epochs): epoch_loss_avg = tfe.metrics.Mean() epoch_accuracy = tfe.metrics.Accuracy() for batch, ((images, labels), (images_mix, labels_mix)) in enumerate( zip(train_dataset, train_dataset_mix)): lamb = np.random.beta(2., 2., args.batch_size) images = images * lamb[:, np.newaxis, np.newaxis, np.newaxis] + images_mix * ( 1 - lamb)[:, np.newaxis, np.newaxis, np.newaxis] labels = labels * lamb + labels_mix * (1. - lamb) loss_value, grads = util.cal_grad( model, loss_func=tf.losses.sparse_softmax_cross_entropy, inputs=images, targets=labels) optimizer.apply_gradients(zip(grads, model.trainable_variables), global_step) epoch_loss_avg(loss_value) epoch_accuracy( tf.argmax(model(images), axis=1, output_type=tf.int32), labels) if global_step.numpy() % args.log_interval == 0: print( 'Epoch: {0:d}/{1:d} Iteration:{2:d} Training Loss:{3:.4f} ' 'Training Accuracy:{4:.4f}'.format( ep, args.epochs, global_step.numpy(), epoch_loss_avg.result(), epoch_accuracy.result())) train_log['iter'].append(global_step.numpy()) train_log['loss'].append(epoch_loss_avg.result()) train_log['accuracy'].append(epoch_accuracy.result()) if global_step.numpy() % args.eval_interval == 0: test_loss, test_acc = test(model, test_dataset) test_log['iter'].append(global_step.numpy()) test_log['loss'].append(test_loss) test_log['accuracy'].append(test_acc) model.summary() end_time = time.time() print('Elapsed time: {0:.3f}s'.format(end_time - start_time)) predict(model, test_images[:5], class_names) fig = plt.figure() plt.plot(train_log['iter'], train_log['loss'], 'r', label='Training') plt.plot(test_log['iter'], test_log['loss'], 'b', label='Testing') plt.title('Loss') plt.legend() plt.savefig('dynamic_loss_10.png') fig = plt.figure() plt.plot(train_log['iter'], train_log['accuracy'], 'r', label='Training') plt.plot(test_log['iter'], test_log['accuracy'], 'b', label='Testing') plt.title('Accuracy') plt.legend() plt.savefig('dynamic_accuracy_10.png') plt.show()
def main(): parser = argparse.ArgumentParser(description='TensorFlow Pascal Example') parser.add_argument('--batch-size', type=int, default=20, help='input batch size for training') parser.add_argument('--epochs', type=int, default=5, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=20, help='how many batches to wait before' ' evaluate the model') parser.add_argument('--log-dir', type=str, default='tb', help='path for logging directory') parser.add_argument('--data-dir', type=str, default='./VOCdevkit/VOC2007', help='Path to PASCAL data storage') args = parser.parse_args() util.set_random_seed(args.seed) sess = util.set_session() img_save_interval = 200 train_images, train_labels, train_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='trainval') test_images, test_labels, test_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='test') ## TODO modify the following code to apply data augmentation here ori_h = train_images.shape[1] ori_w = train_images.shape[2] crop_h = 224 crop_w = 224 central_fraction = 0.7 train_dataset = tf.data.Dataset.from_tensor_slices( (train_images, train_labels, train_weights)) test_dataset = tf.data.Dataset.from_tensor_slices( (test_images, test_labels, test_weights)) train_dataset_aug_flip = train_dataset.map( lambda img, l, w: (tf.image.random_flip_left_right(img), l, w)) train_dataset_aug_crop = train_dataset_aug_flip.map( lambda img, l, w: (tf.random_crop(img, [crop_h, crop_w, 3]), l, w)) train_dataset.concatenate(train_dataset_aug_flip) test_dataset_aug = test_dataset.map( lambda img, l, w: (tf.image.central_crop(img, central_fraction), l, w)) test_dataset_aug = test_dataset_aug.map( lambda img, l, w: (tf.image.resize_images(img, (ori_h, ori_w)), l, w)) test_dataset.concatenate(test_dataset_aug) train_dataset = train_dataset.map(lambda img, l, w: (img_mean_substract(img), l, w)) test_dataset = test_dataset.map(lambda img, l, w: (img_mean_substract(img), l, w)) train_dataset = train_dataset.shuffle(10000).batch(args.batch_size) test_dataset = test_dataset.batch(args.batch_size) model = SimpleCNN(num_classes=len(CLASS_NAMES)) logdir = os.path.join(args.log_dir, datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) checkpoint_dir = os.path.join(logdir, "ckpt") if os.path.exists(logdir): shutil.rmtree(logdir) os.makedirs(logdir) writer = tf.contrib.summary.create_file_writer(logdir) writer.set_as_default() ## TODO write the training and testing code for multi-label classification global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay(args.lr, global_step, 5000, 0.5, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) train_log = {'iter': [], 'loss': [], 'accuracy': []} test_log = {'iter': [], 'loss': [], 'accuracy': []} for ep in range(args.epochs): epoch_loss_avg = tfe.metrics.Mean() for batch, (images, labels, weights) in enumerate(train_dataset): loss_value, grads = util.cal_grad( model, loss_func=tf.losses.sigmoid_cross_entropy, inputs=images, weights=weights, targets=labels) optimizer.apply_gradients(zip(grads, model.trainable_variables), global_step) epoch_loss_avg(loss_value) if global_step.numpy() % args.log_interval == 0: print( 'Epoch: {0:d}/{1:d} Iteration:{2:d} Training Loss:{3:.4f} ' .format(ep, args.epochs, global_step.numpy(), epoch_loss_avg.result())) train_log['iter'].append(global_step.numpy()) train_log['loss'].append(epoch_loss_avg.result()) # Tensorboard Visualization with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar('training_loss', epoch_loss_avg.result()) #tf.contrib.summary.scalar('learning_rate', learning_rate()) # for grad,var in zip(grads,model.trainable_variables): # tf.contrib.summary.histogram("gradients_{0}".format(var.name), grad) if global_step.numpy() % args.eval_interval == 0: with tf.contrib.summary.always_record_summaries(): test_AP, test_mAP = util.eval_dataset_map( model, test_dataset) tf.contrib.summary.scalar('test_map', test_mAP) #test_loss = test(test_dataset,model) #tf.contrib.summary.scalar('testing_loss', test_loss) # if global_step.numpy() % img_save_interval == 0: # with tf.contrib.summary.always_record_summaries(): # tf.contrib.summary.image('training_img', images) # Save checkpoints checkpoint.save(file_prefix=checkpoint_dir) AP, mAP = util.eval_dataset_map(model, test_dataset) # For visualization rand_AP = util.compute_ap(test_labels, np.random.random(test_labels.shape), test_weights, average=None) print('Random AP: {} mAP'.format(np.mean(rand_AP))) gt_AP = util.compute_ap(test_labels, test_labels, test_weights, average=None) print('GT AP: {} mAP'.format(np.mean(gt_AP))) print('Obtained {} mAP'.format(mAP)) print('Per class:') for cid, cname in enumerate(CLASS_NAMES): print('{}: {}'.format(cname, util.get_el(AP, cid)))
def main(): parser = argparse.ArgumentParser(description='TensorFlow Pascal Example') parser.add_argument('--batch-size', type=int, default=20, help='input batch size for training') parser.add_argument('--epochs', type=int, default=30, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=250, help='how many batches to wait before' ' evaluate the model') parser.add_argument('--log-dir', type=str, default='pascal_caffenet_tb', help='path for logging directory') parser.add_argument('--data-dir', type=str, default='./VOCdevkit/VOC2007', help='Path to PASCAL data storage') args = parser.parse_args() util.set_random_seed(args.seed) sess = util.set_session() splt = "trainval" trainval_npz = splt + '.npz' test_npz = 'test.npz' if (os.path.isfile(trainval_npz)): print("\nFound trainval npz file\n") with np.load(trainval_npz) as tr_npzfile: train_images = tr_npzfile['imgs'] train_labels = tr_npzfile['labels'] train_weights = tr_npzfile['weights'] else: train_images, train_labels, train_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split=splt) np.savez(trainval_npz, imgs=train_images, labels=train_labels, weights=train_weights) ##TEST## if (os.path.isfile(test_npz)): print("\nFound test npz file\n") # npzfile = np.load(test_npz) with np.load(test_npz) as test_npzfile: test_images = test_npzfile['imgs'] test_labels = test_npzfile['labels'] test_weights = test_npzfile['weights'] else: test_images, test_labels, test_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='test') np.savez(test_npz, imgs=test_images, labels=test_labels, weights=test_weights) ## TODO modify the following code to apply data augmentation here rgb_mean = np.array([123.68, 116.78, 103.94], dtype=np.float32) / 256.0 train_images = (train_images - rgb_mean).astype(np.float32) test_images = (test_images - rgb_mean).astype(np.float32) flip_fn = lambda img, lbl, wts: flip(img, lbl, wts) crop_fn = lambda img, lbl, wts: crop(img, lbl, wts) ccrop_fn = lambda img, lbl, wts: center_crop(img, lbl, wts) train_dataset = tf.data.Dataset.from_tensor_slices( (train_images, train_labels, train_weights)) flipped_train = train_dataset.map(flip_fn, num_parallel_calls=4) train_dataset = train_dataset.concatenate(flipped_train) train_dataset = train_dataset.map(crop_fn, num_parallel_calls=4) train_dataset = train_dataset.shuffle(10000).batch(args.batch_size) test_dataset = tf.data.Dataset.from_tensor_slices( (test_images, test_labels, test_weights)) test_dataset = test_dataset.map(ccrop_fn, num_parallel_calls=4) test_dataset = test_dataset.batch(args.batch_size) model = SimpleCNN(num_classes=len(CLASS_NAMES)) logdir = os.path.join(args.log_dir, datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) if os.path.exists(logdir): shutil.rmtree(logdir) os.makedirs(logdir) writer = tf.contrib.summary.create_file_writer(logdir) writer.set_as_default() tf.contrib.summary.initialize() global_step = tf.train.get_or_create_global_step() # optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) ##decay lr using callback learning_rate = tf.Variable(args.lr) decay_interval = 5000 # decay_op = tf.train.exponential_decay(args.lr,global_step,decay_interval,0.5) ##optimizer : sgd , momentum, 0.9 optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) train_log = {'iter': [], 'loss': []} test_log = {'iter': [], 'mAP': []} checkpoint_directory = "./03_pascal_caffenet/" if not os.path.exists(checkpoint_directory): os.makedirs(checkpoint_directory) checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) # pdb.set_trace() latest = tf.train.latest_checkpoint(checkpoint_directory) load_flag = 0 if (latest is not None): print("Loading checkpoint ", latest) status = checkpoint.restore( tf.train.latest_checkpoint(checkpoint_directory)) load_flag = 1 print("\nUsing eval interval: ", args.eval_interval) print("\nUsing batch size: ", args.batch_size) for ep in range(args.epochs): epoch_loss_avg = tfe.metrics.Mean() # for batch, (images, labels,weights) in enumerate(train_dataset): for (images, labels, weights) in tfe.Iterator(train_dataset): # pdb.set_trace() # loss_value, grads = util.cal_grad(model, # loss_func=tf.losses.sigmoid_cross_entropy, # inputs=images, # targets=labels, # weights=weights) with tf.GradientTape() as tape: logits = model(images, training=True) loss_value = tf.losses.sigmoid_cross_entropy( labels, logits, weights) grads = tape.gradient(loss_value, model.trainable_variables) # print("Loss and gradient calculation, done \n") # pdb.set_trace() optimizer.apply_gradients(zip(grads, model.trainable_variables), global_step) epoch_loss_avg(loss_value) if global_step.numpy() % args.log_interval == 0: # pdb.set_trace() print( 'Epoch: {0:d}/{1:d} Iteration:{2:d} Training Loss:{3:.4f} ' .format(ep, args.epochs, global_step.numpy(), epoch_loss_avg.result())) train_log['iter'].append(global_step.numpy()) train_log['loss'].append(epoch_loss_avg.result()) with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar('Training loss', loss_value) tf.contrib.summary.scalar('Learning rate', learning_rate) for i, variable in enumerate(model.trainable_variables): tf.contrib.summary.histogram("grad_" + variable.name, grads[i]) if global_step.numpy() % args.eval_interval == 0: print("\n **** Running Eval *****\n") test_AP, test_mAP = util.eval_dataset_map(model, test_dataset) print("Eval finsished with test mAP : ", test_mAP) test_log['iter'].append(global_step.numpy()) test_log['mAP'].append(test_mAP) with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar('Testing mAP', test_mAP) learning_rate.assign( tf.train.exponential_decay(args.lr, global_step, decay_interval, 0.5)()) print("Learning rate:", learning_rate) checkpoint.save(checkpoint_prefix) ## TODO write the training and testing code for multi-label classification AP, mAP = util.eval_dataset_map(model, test_dataset) rand_AP = util.compute_ap(test_labels, np.random.random(test_labels.shape), test_weights, average=None) print('Random AP: {} mAP'.format(np.mean(rand_AP))) gt_AP = util.compute_ap(test_labels, test_labels, test_weights, average=None) print('GT AP: {} mAP'.format(np.mean(gt_AP))) print('Obtained {} mAP'.format(mAP)) print('Per class:') for cid, cname in enumerate(CLASS_NAMES): print('{}: {}'.format(cname, util.get_el(AP, cid)))
def init(self, directory, device): set_random_seed() self.directory = Path(directory) self.loaded_data = LoadData( self.train_seeds_ratio, self.directory, self.nega_sample_num, name_channel=self.name_channel, attribute_channel=self.attribute_value_channel, digit_literal_channel=self.digit_attribute_channel or self.literal_attribute_channel, load_new_seed_split=self.load_new_seed_split, device=device) self.sr_ent_num = self.loaded_data.sr_ent_num self.tg_ent_num = self.loaded_data.tg_ent_num self.att_num = self.loaded_data.att_num # Init graph adjacent matrix print_time_info('Begin preprocessing adjacent matrix') self.channels = {} edges_sr = torch.tensor(self.loaded_data.triples_sr)[:, :2] edges_tg = torch.tensor(self.loaded_data.triples_tg)[:, :2] edges_sr = torch.unique(edges_sr, dim=0) edges_tg = torch.unique(edges_tg, dim=0) if self.name_channel: self.channels['name'] = { 'edges_sr': edges_sr, 'edges_tg': edges_tg, 'sr_ent_embed': self.loaded_data.sr_embed, 'tg_ent_embed': self.loaded_data.tg_embed, } if self.structure_channel: self.channels['structure'] = { 'edges_sr': edges_sr, 'edges_tg': edges_tg } if self.attribute_value_channel: self.channels['attribute'] = { 'edges_sr': edges_sr, 'edges_tg': edges_tg, 'att_num': self.loaded_data.att_num, 'attribute_triples_sr': self.loaded_data.attribute_triples_sr, 'attribute_triples_tg': self.loaded_data.attribute_triples_tg, 'value_embedding': self.loaded_data.value_embedding } if self.literal_attribute_channel: self.channels['attribute'] = { 'edges_sr': edges_sr, 'edges_tg': edges_tg, 'att_num': self.loaded_data.literal_att_num, 'attribute_triples_sr': self.loaded_data.literal_triples_sr, 'attribute_triples_tg': self.loaded_data.literal_triples_tg, 'value_embedding': self.loaded_data.literal_value_embedding } if self.digit_attribute_channel: self.channels['attribute'] = { 'edges_sr': edges_sr, 'edges_tg': edges_tg, 'att_num': self.loaded_data.digit_att_num, 'attribute_triples_sr': self.loaded_data.digital_triples_sr, 'attribute_triples_tg': self.loaded_data.digital_triples_tg, 'value_embedding': self.loaded_data.digit_value_embedding } print_time_info('Finished preprocesssing adjacent matrix')
def main(): logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) args = parse_args() if args.model_name == 'Transformer': args.segment_num = 1 if args.dataset == 'IWSLT16': hparams = {'enc_layers': 5, 'dec_layers': 5, 'hidden_size': 278, 'ffn_size': 507, 'head_num': 2, 'lr_schedule': 'linear', 'learning_rate': 3e-4, 'total_steps': 250000, 'max_token_num': 2048} elif args.dataset == 'WMT14': hparams = {'enc_layers': 6, 'dec_layers': 6, 'hidden_size': 512, 'ffn_size': 512, 'head_num': 8, 'lr_schedule': 'warmup', 'learning_rate': 1, 'total_steps': 200000, 'max_token_num': 60000} else: hparams = {'enc_layers': 6, 'dec_layers': 6, 'hidden_size': 512, 'ffn_size': 512, 'head_num': 8, 'lr_schedule': 'warmup', 'learning_rate': 1, 'total_steps': 200000, 'max_token_num': 30000} args.__dict__.update(hparams) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() use_gpu = n_gpu > 0 logger.info('n_gpu=%d' % n_gpu) if args.seed is None: seed = random.Random(None).randint(1, 100000) else: seed = args.seed set_random_seed(seed, use_gpu) logger.info("seed: {} ".format(seed)) save_path = args.save_path if not os.path.exists(save_path): os.makedirs(save_path) #----------------- Data Preparation ----------------# if args.share_vocab: vocab = load_vocab(args.vocab_path) src_vocab = vocab tgt_vocab = vocab vocab_size = len(vocab) src_emb_conf = { 'vocab_size': vocab_size, 'emb_size': args.hidden_size, 'padding_idx': vocab['<pad>'] } tgt_emb_conf = None logger.info("Load vocabulary from %s, vocabulary size: %d" % (args.vocab_path, vocab_size)) else: src_vocab = load_vocab(args.src_vocab_path) tgt_vocab = load_vocab(args.tgt_vocab_path) src_vocab_size, tgt_vocab_size = len(src_vocab), len(tgt_vocab) logger.info("Load src vocabulary from %s, vocabulary size: %d" % (args.src_vocab_path, src_vocab_size)) logger.info("Load tgt vocabulary from %s, vocabulary size: %d" % (args.tgt_vocab_path, tgt_vocab_size)) src_emb_conf = { 'vocab_size': src_vocab_size, 'emb_size': args.hidden_size, 'padding_idx': src_vocab['<pad>'] } tgt_emb_conf = { 'vocab_size': tgt_vocab_size, 'emb_size': args.hidden_size, 'padding_idx': tgt_vocab['<pad>'] } tgt_padding_idx = tgt_vocab['<pad>'] train_dataset = list(load_data(args.train_src_file, src_vocab, args.train_tgt_file, tgt_vocab)) logger.info("Load training set with %d samples" % len(train_dataset)) def split_str(string, sep=' '): return [substr for substr in string.split(sep) if substr] do_eval = False if args.valid_src_file is not None: do_eval = True valid_tgt_file_list = [args.valid_tgt_file] if args.valid_tgt_file_list: valid_tgt_file_list = eval(args.valid_tgt_file_list) valid_tgt_ref_list = [] for valid_tgt_file in valid_tgt_file_list: with open(valid_tgt_file, encoding='utf8') as f: valid_tgt_sent = f.readlines() valid_tgt_ref = [split_str(sent.strip().replace('@@ ', '')) for sent in valid_tgt_sent] valid_tgt_ref_list.append(valid_tgt_ref) valid_tgt_ref = list(zip(*valid_tgt_ref_list)) valid_tgt_ref = [list(ref) for ref in valid_tgt_ref] valid_dataset = list(load_data(args.valid_src_file, src_vocab)) logger.info("Load validation set with %d samples" % len(valid_dataset)) #--------------- Model Initialization ---------------# if args.model_name == 'Transformer': model = Transformer( enc_layers=args.enc_layers, dec_layers=args.dec_layers, hidden_size=args.hidden_size, head_num=args.head_num, ffn_size=args.ffn_size, src_emb_conf=src_emb_conf, tgt_emb_conf=tgt_emb_conf, dropout=args.dropout, use_label_smoothing=args.use_label_smoothing, smooth_rate=args.smooth_rate) elif args.model_name == 'RecoverSAT': model = RecoverSAT( enc_layers=args.enc_layers, dec_layers=args.dec_layers, hidden_size=args.hidden_size, head_num=args.head_num, ffn_size=args.ffn_size, src_emb_conf=src_emb_conf, tgt_emb_conf=tgt_emb_conf, eos_id=tgt_vocab['<eos>'], delete_id=tgt_vocab['<delete>'], segment_num=args.segment_num, dropout=args.dropout, use_label_smoothing=args.use_label_smoothing, smooth_rate=args.smooth_rate) else: assert False if args.init_encoder_path: init_model = torch.load(args.init_encoder_path) model.src_embedding.load_state_dict(init_model.src_embedding.state_dict()) model.tgt_embedding.load_state_dict(init_model.tgt_embedding.state_dict()) model.encoder.load_state_dict(init_model.encoder.state_dict()) logger.info("Load pretrained embedding and encoder parameters from %s" % args.init_encoder_path) #--------------- Training Preparation --------------# model.to(device) if n_gpu > 1: model = torch.nn.DataParallel(model) trainable_params = list(filter(lambda p: p.requires_grad, model.parameters())) optimizer = optim.Adam( params=trainable_params, lr=args.learning_rate, weight_decay=args.weight_decay, betas=(0.9, 0.98), eps=1e-9) total_steps = args.total_steps if args.lr_schedule == 'warmup': lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer=optimizer, lr_lambda=Transformer_LR_Schedule( model_size=args.hidden_size, warmup_steps=args.warmup_steps)) elif args.lr_schedule == 'linear': lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer=optimizer, lr_lambda=Linear_LR_Schedule( initial_lr=args.learning_rate, final_lr=1e-5, total_steps=total_steps)) else: assert False, "Unrecognized learning rate schedule: %s" % args.lr_schedule #-------------------- Training ---------------------# if args.model_name == 'Transformer': beam_size_list = [4] else: beam_size_list = [1] best_bleu, best_ckp = dict(), dict() for beam_size in beam_size_list: key = 'b%d' % beam_size best_bleu[key] = 0 best_ckp[key] = {'epoch': -1, 'batch': -1} if do_eval: eval_model = model.module if n_gpu > 1 else model assert args.grad_accumulate_steps >= 1 token_num_per_batch = args.max_token_num // args.grad_accumulate_steps real_loss = 0. global_step, mini_step = 0, 0 for epoch in range(args.epoch_num): for step, batch in enumerate(token_number_batcher(train_dataset, token_num_per_batch, parallel_data_len)): cur_rand_dividing_prob = args.rand_dividing_prob if args.anneal_rand_dividing_prob: cur_rand_dividing_prob = args.rand_dividing_prob * max(1. - global_step / total_steps, 0.) batch_tensor = convert_to_tensor(batch, src_vocab, tgt_vocab, seg_num=args.segment_num, \ rand_dividing_prob=cur_rand_dividing_prob, redundant_prob=args.redundant_prob, device=device, is_training=True) src_seq, src_lens, tgt_seq, label = batch_tensor[:4] if args.model_name == 'RecoverSAT': seg_id, tgt_pos, seg_lens = batch_tensor[-3:] if args.model_name == 'Transformer': loss = model(src_seq, tgt_seq, src_lens, label) token_num = torch.clamp(torch.sum(label != tgt_padding_idx).float(), min=1.) loss = loss.sum() / token_num elif args.model_name == 'RecoverSAT': loss = model(src_seq, tgt_seq, src_lens, label, seg_id=seg_id, tgt_pos=tgt_pos, seg_lens=seg_lens) token_num = torch.clamp(torch.sum(label != tgt_padding_idx).float(), min=1.) loss = loss.sum() / token_num loss = loss / args.grad_accumulate_steps loss.backward() real_loss = real_loss + loss.item() mini_step += 1 if mini_step % args.grad_accumulate_steps == 0: lr_scheduler.step() optimizer.step() optimizer.zero_grad() global_step += 1 if global_step % args.log_period == 0: log_info = "Epoch=%-3d batch=%-4d step=%-6d loss=%f" % (epoch, step + 1, global_step, real_loss) logger.info(log_info) if do_eval and global_step % args.save_period == 0: model.eval() for beam_size in beam_size_list: bleu = evaluation(eval_model, src_vocab, tgt_vocab, valid_dataset, valid_tgt_ref, \ beam_size=beam_size, args=args, device=device) log_info = "Evaluation: beam_size=%d Epoch=%-3d batch=%-4d step=%-6d bleu=%.2f" \ % (beam_size, epoch, step + 1, global_step, bleu) logger.info(log_info) key = 'b%d' % beam_size if bleu > best_bleu[key]: prev_model_file = os.path.join(save_path, 'b%d-epoch-%d-batch-%d.ckp' \ % (beam_size, best_ckp[key]['epoch'], best_ckp[key]['batch'])) if os.path.exists(prev_model_file): os.remove(prev_model_file) best_ckp[key]['epoch'] = epoch best_ckp[key]['batch'] = step + 1 best_bleu[key] = bleu model_file = os.path.join(save_path, 'b%d-epoch-%d-batch-%d.ckp' % (beam_size, epoch, step + 1)) model_to_save = model.module if hasattr(model, 'module') else model torch.save(model_to_save, model_file) log_info = "Evaluation: beam_size=%d BEST_BLEU:%.2f BEST_CKP:epoch-%d-batch-%d" \ % (beam_size, best_bleu[key], best_ckp[key]['epoch'], best_ckp[key]['batch']) logger.info(log_info) model.train() real_loss = 0 if global_step >= total_steps: break if do_eval: for beam_size in beam_size_list: key = 'b%d' % beam_size log_info = "Final: DEV beam_size=%d BEST_BLEU:%.2f BEST_CKP:epoch-%d-batch-%d" \ % (beam_size, best_bleu[key], best_ckp[key]['epoch'], best_ckp[key]['batch']) logger.info(log_info)
def main(): parser = argparse.ArgumentParser(description='TensorFlow Pascal Example') parser.add_argument('--batch-size', type=int, default=20, help='input batch size for training') parser.add_argument('--epochs', type=int, default=5, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=20, help='how many batches to wait before' ' evaluate the model') parser.add_argument('--log-dir', type=str, default='tb', help='path for logging directory') parser.add_argument('--data-dir', type=str, default='./VOCdevkit/VOC2007', help='Path to PASCAL data storage') args = parser.parse_args() util.set_random_seed(args.seed) sess = util.set_session() model = SimpleCNN(num_classes=len(CLASS_NAMES)) # logdir = os.path.join(args.log_dir, # datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) # if os.path.exists(logdir): # shutil.rmtree(logdir) # os.makedirs(logdir) ## TODO write the training and testing code for multi-label classification global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay(args.lr, global_step, 5000, 0.5, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) model.build((args.batch_size, 224, 224, 3)) ckpt_path = "./tb/2019-02-25_10-45-32/" for cp_ind in range(1, 61, 5): status = checkpoint.restore( os.path.join(ckpt_path, "ckpt-" + str(cp_ind))) weights = model.get_weights() status.assert_consumed() conv_weights = model.get_conv_weights() kernel_weights = conv_weights[0].numpy() # 11 11 3 96 visualize_idx = [0, 10, 20] for i in visualize_idx: kernel_weight = kernel_weights[:, :, :, i] norm = colors.Normalize(0, 1) norm_weight = kernel_normalize(kernel_weight) plt.imshow(norm_weight, cmap='gray') img_name = "./hw1/figures/ckpt-" + str(cp_ind) + "_conv1_f" + str( i) + ".jpg" # plt.show() plt.savefig(img_name)
def main(cfg: DictConfig) -> None: cwd = Path(hydra.utils.get_original_cwd()) myutil.print_config(cfg) # Setting history directory # All outputs will be written into (p / "history" / train_id). train_id = tid.generate_train_id(cfg) history_dir = cwd / "history" / train_id if not history_dir.exists(): history_dir.mkdir(parents=True, exist_ok=True) cfg_path = history_dir / "config.yaml" if cfg_path.exists(): existing_cfg = OmegaConf.load(str(history_dir / "config.yaml")) if not myutil.is_same_config(cfg, existing_cfg): raise ValueError("Train ID {} already exists, but config is different".format(train_id)) # Saving cfg OmegaConf.save(cfg, str(history_dir / "config.yaml")) # Setting seed if cfg.seed is not None: myutil.set_random_seed(cfg.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Training trainloaders, valloaders = get_data_loaders(cfg) # Confirming dataset dataiter = iter(trainloaders) inputs, labels = next(dataiter) hdrpy.io.write( history_dir / 'input_sample.jpg', inputs[0].clone().detach().numpy().transpose((1, 2, 0))) hdrpy.io.write( history_dir / 'label_sample.jpg', labels[0].clone().detach().numpy().transpose((1, 2, 0))) net = myutil.get_model(cfg) # Checking initial DNN # outputs = net(inputs.to(device)).to('cpu').clone().detach() # hdrpy.io.write( # history_dir / 'initial_output_sample.jpg', # outputs[0].clone().detach().numpy().transpose((1, 2, 0))) criterion = nn.L1Loss() optimizer = get_optimizer(net.parameters(), cfg) scheduler = get_scheduler(optimizer, cfg) extensions = [ModelSaver(directory=history_dir, name=lambda x: cfg.model.name+"_best.pth", trigger=MinValueTrigger(mode="validation", key="loss")), HistorySaver(directory=history_dir, name=lambda x: cfg.model.name+"_history.pth", trigger=IntervalTrigger(period=1))] trainer = RegressorTrainer(net, optimizer, criterion, trainloaders, scheduler=scheduler, extensions=extensions, init_epoch=0, device=device) trainer.train(cfg.epoch, valloaders) # Checking trained DNN # outputs = net(inputs.to(device)).to('cpu').clone().detach() # hdrpy.io.write( # history_dir / 'output_sample.jpg', # outputs[0].clone().detach().numpy().transpose((1, 2, 0))) # print(outputs[0]) save_model(net, str(history_dir / "{}.pth".format(cfg.model.name)))
def __init__(self, conf): self.conf = conf self.device = torch.device(f"cuda:{conf.gpu_id}") self.log = get_logger() torch.set_printoptions(precision=8) if conf.runid: conf.rundir = mkdir(conf.outdir / conf.runid) if not conf.rundir: conf.rundir = next_rundir(conf.outdir, log=self.log) self.rundir = conf.rundir dump_args(conf, conf.rundir / "conf.json") set_random_seed(conf.random_seed) if self.conf.use_bert: assert self.conf.lang in Bert.supported_langs, self.conf.lang self.bert = Bert(self.conf.bert_model_name, device=self.device) else: self.bert = None self.data = load_dataset(conf, conf.lang, bert=self.bert) _data = [self.data] for d in _data: self.log.info( f"{len(d.train_loader)} batches | bs {conf.batch_size}") self.model = self.get_model() self.optimizer = get_optim(conf, self.model) optimum = "min" if conf.lr_scheduler == "plateau": self.lr_scheduler = ReduceLROnPlateau(self.optimizer, factor=0.1, patience=2, mode=optimum, verbose=True) elif conf.lr_scheduler: raise ValueError("Unknown lr_scheduler: " + conf.lr_scheduler) self.losses = LossTrackers.from_names("loss", log=self.log) if (self.main_lang_data.tag == "ner" or self.conf.dataset.startswith("sr3de")): if self.data.is_multilingual: self.sentence_texts = { split_name: self.main_lang_data.token_texts(split_name) for split_name in ["dev", "test"] } self.conll_score = { lang: ConllScore(tag_enc=self.main_lang_data.tag_enc) for lang in self.data.dev } self.score = { lang: Score("f1", save_model=False, log=self.log, score_func=self.conll_score[lang], add_mode="append") for lang in self.data.dev } self.avg_score = Score("avg_f1", log=self.log, score_func="dummy", add_mode="append") else: self.sentence_texts = { split_name: self.main_lang_data.token_texts(split_name) [:conf.max_eval_inst] for split_name in ["dev", "test"] } self.conll_score = ConllScore( tag_enc=self.main_lang_data.tag_enc) self.score = Score("f1", log=self.log, score_func=self.conll_score, add_mode="append") else: if self.data.is_multilingual: self.score = { lang: Score("acc", log=self.log) for lang in self.data.dev } self.avg_score = Score("avg_acc", log=self.log, score_func="dummy", add_mode="append") else: self.score = Score("acc", log=self.log) if conf.early_stop > 0: score_optimum = ("max" if (self.conf.dataset.startswith("wikiannmulti") or self.data.is_multilingual) else self.score.optimum) self.early_stop = EarlyStopping( score_optimum, min_delta=conf.early_stop_min_delta, patience=conf.early_stop) else: self.early_stop = None self.epoch = 0
def main(): parser = argparse.ArgumentParser(description='TensorFlow Pascal Example') parser.add_argument('--batch-size', type=int, default=10, help='input batch size for training') parser.add_argument('--epochs', type=int, default=5, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=20, help='how many batches to wait before' ' evaluate the model') parser.add_argument('--log-dir', type=str, default='tb', help='path for logging directory') parser.add_argument('--data-dir', type=str, default='./VOCdevkit/VOC2007', help='Path to PASCAL data storage') args = parser.parse_args() util.set_random_seed(args.seed) sess = util.set_session() train_images, train_labels, train_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='trainval') test_images, test_labels, test_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='test') ## TODO modify the following code to apply data augmentation here train_dataset = tf.data.Dataset.from_tensor_slices( (train_images, train_labels, train_weights)) train_dataset = train_dataset.shuffle(10000).batch(args.batch_size) test_dataset = tf.data.Dataset.from_tensor_slices( (test_images, test_labels, test_weights)) test_dataset = test_dataset.batch(args.batch_size) model = SimpleCNN(num_classes=len(CLASS_NAMES)) logdir = os.path.join(args.log_dir, datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) if os.path.exists(logdir): shutil.rmtree(logdir) os.makedirs(logdir) writer = tf.contrib.summary.create_file_writer(logdir) writer.set_as_default() ## TODO write the training and testing code for multi-label classification AP, mAP = util.eval_dataset_map(model, test_dataset) rand_AP = util.compute_ap(test_labels, np.random.random(test_labels.shape), test_weights, average=None) print('Random AP: {} mAP'.format(np.mean(rand_AP))) gt_AP = util.compute_ap(test_labels, test_labels, test_weights, average=None) print('GT AP: {} mAP'.format(np.mean(gt_AP))) print('Obtained {} mAP'.format(mAP)) print('Per class:') for cid, cname in enumerate(CLASS_NAMES): print('{}: {}'.format(cname, util.get_el(AP, cid)))
import yaml import importlib import util if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-yfname', '-f', type=str, help='the main yaml file that sets the settings') parser.add_argument('--seed', '-s', type=int, default=10, help='the main yaml file that sets the settings') args = parser.parse_args() util.set_random_seed(args.seed) with open(args.yfname, 'r') as f: setting = yaml.load(f) agent_module = importlib.import_module(setting['agent_module_name']) agent_cls = getattr(agent_module, setting['agent_class_name']) agent = agent_cls(args.yfname) agent.main()
def main(): parser = argparse.ArgumentParser(description='TensorFlow Pascal Example') parser.add_argument('--batch-size', type=int, default=20, help='input batch size for training') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.0001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=60, help='how many batches to wait before' ' evaluate the model') parser.add_argument('--log-dir', type=str, default='tb/05', help='path for logging directory') parser.add_argument('--data-dir', type=str, default='./VOCdevkit/VOC2007', help='Path to PASCAL data storage') parser.add_argument('--checkpoint-dir', type=str, default='./checkpoints/06', help='Path to checkpoints storage') parser.add_argument( '--save-interval', type=int, default=2, help='How many batch to wait before storing checkpoints') parser.add_argument( '--pretrain-dir', type=str, default= './pre_trained_model/vgg16_weights_tf_dim_ordering_tf_kernels.h5', help='path the pretrained model') parser.add_argument('--scratch-dir', type=str, default='./checkpoints/04/ckpt.h5', help='path the scratched model') args = parser.parse_args() util.set_random_seed(args.seed) sess = util.set_session() model = SimpleCNN(pretrain_dir=args.pretrain_dir, scratch_dir=args.scratch_dir, num_classes=len(CLASS_NAMES)) train_images, train_labels, train_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='trainval') test_images, test_labels, test_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='test') # np.random.seed(1) # images_mix = train_images # np.random.shuffle(images_mix) # np.random.seed(1) # labels_mix = train_labels # np.random.shuffle(labels_mix) # np.random.seed(1) # weights_mix = train_weights # np.random.shuffle(weights_mix) # lamb = np.random.beta(2., 2.) # train_images=train_images * lamb + images_mix * (1-lamb) # train_labels=train_labels * lamb + labels_mix * (1-lamb) # train_weights=train_weights * lamb + weights_mix * (1-lamb) ## TODO modify the following code to apply data augmentation here print('start_loading!') train_dataset = tf.data.Dataset.from_tensor_slices( (train_images, train_labels, train_weights)) train_dataset = train_dataset.shuffle(10000).batch(args.batch_size) test_dataset = tf.data.Dataset.from_tensor_slices( (test_images, test_labels, test_weights)) test_dataset = test_dataset.batch(50) train_dataset_mix = tf.data.Dataset.from_tensor_slices( (train_images, train_labels, train_weights)) train_dataset_mix = train_dataset_mix.shuffle(10000).batch(args.batch_size) logdir = os.path.join(args.log_dir, datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) if os.path.exists(logdir): shutil.rmtree(logdir) os.makedirs(logdir) writer = tf.contrib.summary.create_file_writer(logdir) writer.set_as_default() ## TODO write the training and testing code for multi-label classification global_step = tf.train.get_or_create_global_step() learning_rate_decay = tf.train.exponential_decay(args.lr, global_step, 1000, 0.5) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate_decay, momentum=0.9) train_log = {'iter': [], 'loss': []} test_log = {'iter': [], 'loss': [], 'accuracy': []} print('start training!') for ep in range(args.epochs): epoch_loss_avg = tfe.metrics.Mean() # epoch_accuracy = tfe.metrics.Accuracy() for batch, ((images, labels, weights), (images_mix, labels_mix, weights_mix)) in enumerate( zip(train_dataset, train_dataset_mix)): # print(labels - labels_mix) labels = tf.cast(labels, tf.float32) labels_mix = tf.cast(labels_mix, tf.float32) weights = tf.cast(weights, tf.float32) weights_mix = tf.cast(weights_mix, tf.float32) lamb_size = images.shape[0] lamb = np.random.beta(0.2, 0.2, lamb_size) # print(lamb) images = images * lamb[:, np.newaxis, np.newaxis, np.newaxis] + images_mix * ( 1 - lamb)[:, np.newaxis, np.newaxis, np.newaxis] # print(images.shape) weights = weights * lamb[:, np.newaxis] + weights_mix * ( 1. - lamb)[:, np.newaxis] labels = labels * lamb[:, np.newaxis] + labels_mix * ( 1. - lamb)[:, np.newaxis] # print(labels * lamb[:, np.newaxis]) # print(labels.dtype) images, labels, weights = mean_normalization( images, labels, weights) images, labels, weights = randomly_crop(images, labels, weights) images, labels, weights = randomly_flip(images, labels, weights) # print(images[0]) # print(labels) # print(weights.shape) with tf.contrib.summary.record_summaries_every_n_global_steps(100): tf.contrib.summary.image("sample_image", images, max_images=3) loss_value, grads = util.cal_grad( model, loss_func=tf.losses.sigmoid_cross_entropy, inputs=images, targets=labels, weights=weights) optimizer.apply_gradients(zip(grads, model.trainable_variables), global_step) learning_rate_decay = tf.train.exponential_decay( args.lr, global_step, 1000, 0.5) with tf.contrib.summary.record_summaries_every_n_global_steps(1): tf.contrib.summary.scalar('learning_rate', learning_rate_decay()) with tf.contrib.summary.record_summaries_every_n_global_steps(10): for grad, var in zip(grads, model.trainable_variables): tf.contrib.summary.histogram( "{}/grad_histogram".format(var.name), grad) with tf.contrib.summary.record_summaries_every_n_global_steps(1): tf.contrib.summary.scalar('training_loss', loss_value) epoch_loss_avg(loss_value) if global_step.numpy() % args.log_interval == 0: print( 'Epoch: {0:d}/{1:d} Iteration:{2:d} Training Loss:{3:.4f}' .format(ep, args.epochs, global_step.numpy(), epoch_loss_avg.result())) train_log['iter'].append(global_step.numpy()) train_log['loss'].append(epoch_loss_avg.result()) # tf.contrib.summary.scalar('training_loss', epoch_loss_avg.result()) # train_log['accuracy'].append(epoch_accuracy.result()) if global_step.numpy() % args.eval_interval == 0: test_loss, test_acc = test(model, test_dataset) with tf.contrib.summary.record_summaries_every_n_global_steps( args.eval_interval): tf.contrib.summary.scalar('testing_acc', test_acc) test_log['iter'].append(global_step.numpy()) test_log['loss'].append(test_loss) test_log['accuracy'].append(test_acc) # tf.contrib.summary.scalar('testing_loss', test_loss) # tf.contrib.summary.scalar('testing_loss', test_acc) print( 'Epoch: {0:d}/{1:d} Iteration:{2:d} Testing Loss:{3:.4f} Testing Accuracy:{4:.4f}' .format(ep, args.epochs, global_step.numpy(), test_loss, test_acc)) # if global_step.numpy() % args.save_epoch == 0: # checkpoint = tfe.Checkpoint(optimizer=optimizer, # model=model, # optimizer_step=tf.train.get_or_create_global_step()) # checkpoint_prefix = os.path.join(args.checkpoint_dir, "ckpt") # checkpoint.save(file_prefix=checkpoint_prefix) AP, mAP = util.eval_dataset_map(model, test_dataset) rand_AP = util.compute_ap(test_labels, np.random.random(test_labels.shape), test_weights, average=None) # checkpoint = tfe.Checkpoint(optimizer=optimizer, # model=model, # optimizer_step=tf.train.get_or_create_global_step()) # checkpoint_prefix = os.path.join(args.checkpoint_dir, "ckpt") # checkpoint.save(file_prefix=checkpoint_prefix) checkpoint_prefix = os.path.join(args.checkpoint_dir, "ckpt.h5") model.save_weights(checkpoint_prefix) print('Random AP: {} mAP'.format(np.mean(rand_AP))) gt_AP = util.compute_ap(test_labels, test_labels, test_weights, average=None) print('GT AP: {} mAP'.format(np.mean(gt_AP))) print('Obtained {} mAP'.format(mAP)) print('Per class:') for cid, cname in enumerate(CLASS_NAMES): print('{}: {}'.format(cname, util.get_el(AP, cid))) writer.close()
def main(cfg: DictConfig) -> None: cwd = Path(hydra.utils.get_original_cwd()) myutil.print_config(cfg) # Setting history directory # All outputs will be written into (p / "history" / train_id). train_id = tid.generate_train_id(cfg) history_dir = cwd / "history" / train_id if not history_dir.exists(): history_dir.mkdir(parents=True, exist_ok=True) cfg_path = history_dir / "config.yaml" if cfg_path.exists(): existing_cfg = OmegaConf.load(str(history_dir / "config.yaml")) if not myutil.is_same_config(cfg, existing_cfg): raise ValueError( "Train ID {} already exists, but config is different".format( train_id)) # Saving cfg OmegaConf.save(cfg, str(history_dir / "config.yaml")) # Setting seed if cfg.seed is not None: myutil.set_random_seed(cfg.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Training trainloaders, valloaders, classes = get_data_loaders(cfg) net = myutil.get_model(classes, cfg) if device.type == "cuda": net = torch.nn.DataParallel(net) net = net.to(device) summary(net, input_size=(1, 32, 32)) criterion = nn.CrossEntropyLoss() optimizer = get_optimizer(net.parameters(), cfg) scheduler = get_scheduler(optimizer, cfg) extensions = [ ModelSaver(directory=history_dir, name=lambda x: cfg.model.name + "_best.pth", trigger=MaxValueTrigger(mode="validation", key="total acc")), HistorySaver(directory=history_dir, name=lambda x: cfg.model.name + "_history.pth", trigger=IntervalTrigger(period=1)) ] trainer = ClassifierTrainer(net, optimizer, criterion, trainloaders, scheduler=scheduler, extensions=extensions, init_epoch=0, device=device) trainer.train(cfg.epoch, valloaders, classes) save_model(net, str(history_dir / "{}.pth".format(cfg.model.name)))
summary_writer.add_image("Mask_GT", dataset.mask, channels=1) summary_writer.add_image("Mask_recon", reconstruct_mask, channels=1) summary_writer.add_image("UV", dataset.uv, channels=2) summary_op = tf.summary.merge(summary_writer.lists) return Model(train_op=train_op, summary_op=summary_op, loss=loss, vars=tf_vars, output=reconstruct_mask) if __name__ == '__main__': set_random_seed(args) # setting up logging logger = initial_logger(args) args.logger = logger args.ngf = 32 args.resnet_conv_count = 2 args.resnet_res_count = 9 args.resnet_padding = 'SYMMETRIC' # load data and preprocess dataset = data_mask.load_data(args) # build network
def main(): parser = argparse.ArgumentParser(description='TensorFlow Pascal Example') parser.add_argument('--batch-size', type=int, default=20, help='input batch size for training') parser.add_argument('--epochs', type=int, default=5, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=50, help='how many batches to wait before' ' evaluate the model') parser.add_argument('--log-dir', type=str, default='tb', help='path for logging directory') parser.add_argument('--data-dir', type=str, default='./data/VOCdevkit/VOC2007', help='Path to PASCAL data storage') args = parser.parse_args() util.set_random_seed(args.seed) sess = util.set_session() train_images, train_labels, train_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='trainval') test_images, test_labels, test_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='test') train_dataset = tf.data.Dataset.from_tensor_slices( (train_images, train_labels, train_weights)) train_dataset = train_dataset.map(augment_train_data) train_dataset = train_dataset.shuffle(10000).batch(args.batch_size) test_dataset = tf.data.Dataset.from_tensor_slices( (test_images, test_labels, test_weights)) test_dataset = test_dataset.map(center_crop_test_data) test_dataset = test_dataset.batch(args.batch_size) model = SimpleCNN(num_classes=len(CLASS_NAMES)) logdir = os.path.join(args.log_dir, datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) if os.path.exists(logdir): shutil.rmtree(logdir) os.makedirs(logdir) writer = tf.contrib.summary.create_file_writer(logdir) writer.set_as_default() tf.contrib.summary.initialize() global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_log = {'iter': [], 'loss': [], 'accuracy': []} test_log = {'iter': [], 'loss': [], 'accuracy': []} for ep in range(args.epochs): epoch_loss_avg = tfe.metrics.Mean() for batch, (images, labels, weights) in enumerate(train_dataset): loss_value, grads = util.cal_grad( model, loss_func=tf.losses.sigmoid_cross_entropy, inputs=images, targets=labels, weights=weights) optimizer.apply_gradients(zip(grads, model.trainable_variables), global_step) epoch_loss_avg(loss_value) with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar('Training Loss', loss_value) if global_step.numpy() % args.log_interval == 0: print( 'Epoch: {0:d}/{1:d} Iteration:{2:d} Training Loss:{3:.4f}' .format(ep, args.epochs, global_step.numpy(), epoch_loss_avg.result())) train_log['iter'].append(global_step.numpy()) train_log['loss'].append(epoch_loss_avg.result()) if global_step.numpy() % args.eval_interval == 0: test_AP, test_mAP = util.eval_dataset_map(model, test_dataset) print("mAP: ", test_mAP) with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar('Test mAP', test_mAP) model.summary() # fig = plt.figure() # plt.plot(train_log['iter'], train_log['loss'], 'r', label='Training') # plt.plot(test_log['iter'], test_log['loss'], 'b', label='Testing') # plt.title('Loss') # plt.legend() # fig = plt.figure() # plt.plot(train_log['iter'], train_log['accuracy'], 'r', label='Training') # plt.plot(test_log['iter'], test_log['accuracy'], 'b', label='Testing') # plt.title('Accuracy') # plt.legend() # plt.show() AP, mAP = util.eval_dataset_map(model, test_dataset) rand_AP = util.compute_ap(test_labels, np.random.random(test_labels.shape), test_weights, average=None) print('Random AP: {} mAP'.format(np.mean(rand_AP))) gt_AP = util.compute_ap(test_labels, test_labels, test_weights, average=None) print('GT AP: {} mAP'.format(np.mean(gt_AP))) print('Obtained {} mAP'.format(mAP)) print('Per class:') for cid, cname in enumerate(CLASS_NAMES): print('{}: {}'.format(cname, util.get_el(AP, cid)))
def main(): parser = argparse.ArgumentParser(description='VGG Fine Tune') parser.add_argument('--batch-size', type=int, default=20, help='input batch size for training') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.0001, help='learning rate') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--log-interval', type=int, default=60, help='how many batches to wait before' ' logging training status') parser.add_argument('--eval-interval', type=int, default=60, help='how many batches to wait before' ' evaluate the model') parser.add_argument('--log-dir', type=str, default='tb', help='path for logging directory') parser.add_argument('--data-dir', type=str, default='./data/VOCdevkit/VOC2007', help='Path to PASCAL data storage') args = parser.parse_args() util.set_random_seed(args.seed) sess = util.set_session() train_images, train_labels, train_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='trainval') test_images, test_labels, test_weights = util.load_pascal( args.data_dir, class_names=CLASS_NAMES, split='test') train_dataset = tf.data.Dataset.from_tensor_slices( (train_images, train_labels, train_weights)) train_dataset = train_dataset.map(augment_train_data) train_dataset = train_dataset.shuffle(10000).batch(args.batch_size) test_dataset = tf.data.Dataset.from_tensor_slices( (test_images, test_labels, test_weights)) test_dataset = test_dataset.map(center_crop_test_data) test_dataset = test_dataset.batch(args.batch_size) model = VGG(num_classes=len(CLASS_NAMES)) logdir = os.path.join(args.log_dir, datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) if os.path.exists(logdir): shutil.rmtree(logdir) os.makedirs(logdir) writer = tf.contrib.summary.create_file_writer(logdir) writer.set_as_default() tf.contrib.summary.initialize() global_step = tf.train.get_or_create_global_step() train_log = {'iter': [], 'loss': [], 'accuracy': []} test_log = {'iter': [], 'loss': [], 'accuracy': []} ckpt_dir = 'pascal_vgg_ft' ckpt_prefix = os.path.join(ckpt_dir, 'ckpt') if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) # Build model first to load weights input_shape = tf.TensorShape([None, 224, 224, 3]) model.build(input_shape) model.load_weights('vgg16_weights_tf_dim_ordering_tf_kernels.h5', by_name=True) # Print layer names in saved weights # f = h5py.File('vgg16_weights_tf_dim_ordering_tf_kernels.h5', 'r') # # Get the data # for i in list(f.keys()): # print(i) decayed_lr = tf.train.exponential_decay(args.lr, global_step, 1000, 0.5, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate=decayed_lr(), momentum=0.9) root = tf.train.Checkpoint(optimizer=optimizer, model=model) for ep in range(args.epochs): epoch_loss_avg = tfe.metrics.Mean() for batch, (images, labels, weights) in enumerate(train_dataset): loss_value, grads = util.cal_grad( model, loss_func=tf.losses.sigmoid_cross_entropy, inputs=images, targets=labels, weights=weights) grads_and_vars = zip(grads, model.trainable_variables) optimizer.apply_gradients(grads_and_vars, global_step) epoch_loss_avg(loss_value) if global_step.numpy() % args.log_interval == 0: print( 'Epoch: {0:d}/{1:d} Iteration:{2:d} Training Loss:{3:.4f}' .format(ep, args.epochs, global_step.numpy(), epoch_loss_avg.result())) train_log['iter'].append(global_step.numpy()) train_log['loss'].append(epoch_loss_avg.result()) with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar('Training Loss', loss_value) tf.contrib.summary.image('RGB', images) tf.contrib.summary.scalar('LR', decayed_lr()) for i, variable in enumerate(model.trainable_variables): tf.contrib.summary.histogram("grad_" + variable.name, grads[i]) if global_step.numpy() % args.eval_interval == 0: test_AP, test_mAP = util.eval_dataset_map(model, test_dataset) test_loss = test(model, test_dataset) print("mAP: ", test_mAP) print("Test Loss: ", test_loss) # print("Loss: %.4f, Acc: %.4f, mAP: %.4f", test_lotest_mAP) with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar('Test mAP', test_mAP) tf.contrib.summary.scalar('Test Loss', test_loss) if ep % 2 == 0: root.save(ckpt_prefix) root.save(ckpt_prefix) model.summary() AP, mAP = util.eval_dataset_map(model, test_dataset) rand_AP = util.compute_ap(test_labels, np.random.random(test_labels.shape), test_weights, average=None) print('Random AP: {} mAP'.format(np.mean(rand_AP))) gt_AP = util.compute_ap(test_labels, test_labels, test_weights, average=None) print('GT AP: {} mAP'.format(np.mean(gt_AP))) print('Obtained {} mAP'.format(mAP)) print('Per class:') for cid, cname in enumerate(CLASS_NAMES): print('{}: {}'.format(cname, util.get_el(AP, cid)))
def ensemble_sims_with_svm(train_sims, valid_sims, test_sims, device, avg=False): set_random_seed() def sim_standardization2(sim): mean = np.mean(sim) std = np.std(sim) sim = (sim - mean) / std return sim, mean, std def sim_standardization3(sim, mean, std): return (sim - mean) / std train_sims2 = [] mean_list = [] std_list = [] for sim in train_sims: sim, mean, std = sim_standardization2(sim) train_sims2.append(sim) mean_list.append(mean) std_list.append(std) train_sims = train_sims2 valid_sims = [ sim_standardization3(sim, mean_list[i], std_list[i]) for i, sim in enumerate(valid_sims) ] test_sims = [ sim_standardization3(sim, mean_list[i], std_list[i]) for i, sim in enumerate(test_sims) ] if avg: get_hits(sum(test_sims), device=device) return train_data, train_label = generate_data(train_sims, ratio=len(test_sims) * 4) test_data, test_label = generate_data(test_sims, ratio=1) def ensemble_sims_with_weight(test_sims, weight): ## test performance test_size = test_sims[0].shape[0] test_sims = [sim.reshape(test_size, test_size, 1) for sim in test_sims] test_sims = np.concatenate(test_sims, axis=-1) test_sims = np.dot(test_sims, weight) test_sims = np.squeeze(test_sims, axis=-1) return -test_sims def performance_svc(train_data, train_label, test_sims, C): clf = SVC(kernel='linear', C=C, gamma='auto') clf.fit(train_data, train_label) prediction = clf.predict(test_data) print_time_info('Classification accuracy: %f.' % (np.sum(prediction == test_label) / len(test_label))) weight = clf.coef_.reshape(-1, 1) # shape = [sim_num, 1] test_sims = ensemble_sims_with_weight(test_sims, weight) top_lr, top_rl, mr_lr, mr_rl, mrr_lr, mrr_rl = get_hits( test_sims, print_info=False, device=device) top1 = (top_lr[0] + top_rl[0]) / 2 return top1, weight C_range = [1e-6, 1e-5] #1e-4, 1e-3, 1e-2, 1e-1]# 1, 10, 100, 1000] best_top1 = 0 best_C = 0 best_weight = None for C in C_range: top1, weight = performance_svc(train_data, train_label, valid_sims, C) if top1 > best_top1: best_top1 = top1 best_C = C best_weight = weight test_sims = ensemble_sims_with_weight(test_sims, best_weight) print('Best C=%f.' % best_C) print('Weight', best_weight.reshape(-1)) get_hits(test_sims, device=device)