def train(self): train_dataset = Dataset(self.data_train, self.batch_size, self.vocab_size, self.pad_idx) valid_dataset = Dataset(self.data_valid, self.batch_size, self.vocab_size, self.pad_idx) train_generator = BatchGenerator(train_dataset) valid_generator = BatchGenerator(valid_dataset) if self.cfg.early_stopping: valid_metric_history = [] dropout_prob = self.cfg.d_dropout_prob pbar = tqdm(total=self.cfg.max_step) step = self.sess.run(self.global_step) if step > 1: pbar.update(step) while not self.sv.should_stop(): pbar.update(1) feed = train_generator.get_d_batch() ops = [self.train_op, self.global_step] result = self.D.run(self.sess, ops, feed, dropout_prob) step = result[1] # update learning rate (optional) if step % self.cfg.lr_update_step == self.cfg.lr_update_step - 1: self.sess.run(self.d_lr_update) if not step % self.cfg.log_step == 0: continue ops = [self.D.loss, self.D.accuracy] train_loss, train_acc = self.D.run(self.sess, ops, feed, dropout_prob) print_msg = '[{}/{}] D_loss: {:.6f} D_accuracy: {:.6f}'.format( step, self.cfg.max_step, train_loss, train_acc) if self.cfg.validation: feed = valid_generator.get_d_batch() ops = [self.D.loss, self.D.accuracy] valid_loss, valid_acc = self.D.run(self.sess, ops, feed, 1) add_msg = \ " | [Valid] D_loss: {:.6f} D_accuracy: {:.6f} ".format( valid_loss, valid_acc) print_msg = print_msg + add_msg if self.cfg.early_stopping and step >= 100: if self.cfg.early_stopping_metric == 'loss': valid_metric_history.append(valid_loss) elif self.cfg.early_stopping_metric == 'accuracy': valid_metric_history.append(valid_acc) if step >= 1000: add_msg = self.validation_monitor(valid_metric_history) print_msg = print_msg + add_msg print(print_msg)
def evaluate(model, config, checkpoint_manager, checkpoint, ckpt_path, model_name_or_path, tokenizer_class, tokenizer_cache_dir): if ckpt_path == None: ckpt_path = checkpoint_manager.latest_checkpoint tf.get_logger().info("Evaluating model %s", ckpt_path) checkpoint.restore(ckpt_path) validation_dataset = Dataset(config.get("validation_file_path", None), os.path.join(config.get("model_dir"), "data"), config.get("seq_size"), config.get("max_sents"), config.get("do_shuffle"), config.get("do_skip_empty"), procedure="dev", model_name_or_path=model_name_or_path, tokenizer_class=tokenizer_class, tokenizer_cache_dir=tokenizer_cache_dir) iterator = iter( validation_dataset.create_one_epoch(do_shuffle=False, mode="p")) @tf.function def encode_next(): src, tgt = next(iterator) padding_mask = build_mask(src["input_ids"], src["lengths"]) src_sentence_embedding = model.encode(src, padding_mask) padding_mask = build_mask(tgt["input_ids"], tgt["lengths"]) tgt_sentence_embedding = model.encode(tgt, padding_mask) return src_sentence_embedding, tgt_sentence_embedding # Iterates on the dataset. src_sentence_embedding_list = [] tgt_sentence_embedding_list = [] while True: try: src_sentence_embedding_, tgt_sentence_embedding_ = encode_next() src_sentence_embedding_list.append(src_sentence_embedding_.numpy()) tgt_sentence_embedding_list.append(tgt_sentence_embedding_.numpy()) except tf.errors.OutOfRangeError: break src_sentences = np.concatenate(src_sentence_embedding_list, axis=0) tgt_sentences = np.concatenate(tgt_sentence_embedding_list, axis=0) print("src_sentences", src_sentences.shape) print("tgt_sentences", tgt_sentences.shape) d = src_sentences.shape[-1] index = faiss.IndexFlatIP(d) # build the index print("faiss state: ", index.is_trained) index.add(src_sentences) # add vectors to the index print("number of sentences: %d" % index.ntotal) k = 1 D, I = index.search(tgt_sentences, k) # tgt -> src search print(sklearn.metrics.accuracy_score(np.arange(index.ntotal), I))
def load_image_features(args): # Image preprocessing, normalization for the pretrained b7 transform = transforms.Compose([ transforms.Resize(args.crop_size), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # train split fts_file = os.path.join(args.save, 'b7_v2.{}.{}.th'.format(args.data_set, 'train')) if os.path.isfile(fts_file): print('[INFO] loading image features: {}'.format(fts_file)) fts = torch.load(fts_file, map_location='cpu') else: print('[INFO] computing image features: {}'.format(fts_file)) data_loader = Dataset(args.image_folder.format(args.data_set), args.data_split_file.format( args.data_set, 'train'), transform, num_workers=args.num_workers) attr_file = args.attribute_file.format(args.data_set) fts = utils.extract_features(data_loader, attr_file, args.attr2idx_file, device, args.image_model) torch.save(fts, fts_file) # dev split fts_file_dev = os.path.join(args.save, 'b7_v2.{}.{}.th'.format(args.data_set, 'val')) if os.path.isfile(fts_file_dev): print('[INFO] loading image features: {}'.format(fts_file_dev)) fts_dev = torch.load(fts_file_dev, map_location='cpu') else: print('[INFO] computing image features: {}'.format(fts_file_dev)) data_loader_dev = Dataset(args.image_folder.format(args.data_set), args.data_split_file.format( args.data_set, 'val'), transform, num_workers=args.num_workers) attr_file_dev = args.attribute_file.format(args.data_set) fts_dev = utils.extract_features(data_loader_dev, attr_file_dev, args.attr2idx_file, device, args.image_model) torch.save(fts_dev, fts_file_dev) return fts, fts_dev
def train(self): from data_loader import Dataset from data_loader import BatchGenerator train_dataset = Dataset(self.data_train, self.cfg.batch_size, self.vocab_size, self.pad_idx) train_generator = BatchGenerator(train_dataset) self.label_real = train_generator.get_binary_label_batch(True) self.label_fake = train_generator.get_binary_label_batch(False) dropout_prob = self.cfg.d_dropout_prob pbar = tqdm(total=self.cfg.max_step) step = self.sess.run(self.global_step) # z_test = np.random.uniform(-1, 1, # [self.cfg.batch_size, self.cfg.z_dim]) if step > 1: pbar.update(step) while not self.sv.should_stop(): pbar.update(1) que_real, ans_real = train_generator.get_gan_data_batch() # G train z = np.random.uniform(-1, 1, [self.cfg.batch_size, self.cfg.z_dim]) feed = [que_real, ans_real, z, 1] ops = [self.global_step, self.g_loss, self.g_train_op] step, g_loss, _ = self.run_gan(self.sess, ops, feed) # D train if step % self.cfg.g_per_d_train == 0: z = np.random.uniform(-1, 1, [self.cfg.batch_size, self.cfg.z_dim]) feed = [que_real, ans_real, z, dropout_prob] ops = [self.d_loss, self.summary_op, self.d_train_op] d_loss, summary, _ = self.run_gan(self.sess, ops, feed) # summary & print message if step % (self.cfg.g_per_d_train * 10) == 0: print_msg = "[{}/{}] G_loss: {:.6f} D_loss: {:.6f} ".\ format(step, self.cfg.max_step, g_loss, d_loss) print(print_msg) self.writer.add_summary(summary, step) # print generated samples feed = [que_real, ans_real, z, 1] # ans_real.fill(40607) # feed = [que_real[:self.cfg.num_samples], # ans_real[:self.cfg.num_samples], # z[:self.cfg.num_samples], # 1] if self.cfg.dataset == 'nugu': self._print_nugu_samples(feed) elif self.cfg.dataset == 'simque': self._print_simque_samples(feed) else: raise Exception('Unsupported dataset:', self.cfg.dataset)
def main(params): """ Starting point of the application """ backends = [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir is not None: os.makedirs(params.log_dir, exist_ok=True) logfile = os.path.join(params.log_dir, "log.json") backends.append(JSONStreamBackend(Verbosity.VERBOSE, logfile)) logger = Logger(backends) # Optimization flags os.environ['CUDA_CACHE_DISABLE'] = '0' os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private' os.environ['TF_SYNC_ON_FINISH'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '2' hvd.init() #init horovod #set gpu configurations if params.use_xla: tf.config.optimizer.set_jit(True) gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) if gpus: tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU') if params.use_amp: tf.keras.mixed_precision.experimental.set_policy('mixed_float16') else: os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '0' # get dataset from tf.data api dataset = Dataset(batch_size=params.batch_size, gpu_id=hvd.rank(), num_gpus=hvd.size()) # Build the model input_shape = (1024, 2048, 3) model = custom_unet( input_shape, num_classes=8, use_batch_norm=False, upsample_mode='decov', # 'deconv' or 'simple' use_dropout_on_upsampling=True, dropout=0.3, dropout_change_per_layer=0.0, filters=7, num_layers=4, output_activation='softmax') # do not use model compile as we are using gradientTape #start training train(params, model, dataset, logger)
def get_dataloaders(args): if args.dataset == 'miniImageNet': from data_loader import MiniImageNet as Dataset elif args.dataset == 'CUB': from data_loader import CUB as Dataset elif args.dataset == 'tieredImageNet': from data_loader import TieredImageNet as Dataset train_set = Dataset(root=args.data_root, dataset=args.dataset, mode='train',cnn = args.cnn) train_sampler = CategoriesSampler(train_set.label, args.iters_per_epoch, args.way, args.shot + args.query) train_loader = DataLoader(dataset=train_set, batch_sampler=train_sampler, num_workers=8, pin_memory=True) val_set = Dataset(root=args.data_root, dataset=args.dataset, mode='val',cnn = args.cnn) val_sampler = CategoriesSampler(val_set.label, args.val_episodes, args.val_way, args.val_shot + args.val_query) val_loader = DataLoader(dataset=val_set, batch_sampler=val_sampler, num_workers=8, pin_memory=True) return train_loader, val_loader
def test(CV, val_csv_file): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = select_model(device) model.load_state_dict(torch.load(args.load_model)) model.to(device) model.eval() val_transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.ToTensor() ]) testset = Dataset(csv_file=val_csv_file, root_dir=img_path, transform=val_transform) test_loader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch_size, shuffle=False, num_workers=args.num_workers) print('***CV_{}***'.format(CV)) for batch_idx, data in enumerate(test_loader): print('image count: {}'.format((batch_idx + 1) * args.test_batch_size), end='\r') image = data['image'].to(device) labels = data['annotations'].to(device).float() with torch.no_grad(): outputs = model(image) outputs = outputs.view(-1, 5, 1) predicted_mean, predicted_std = 0.0, 0.0 target_mean, target_std = 0.0, 0.0 for i in range(5): predicted_mean += i * outputs[:, i].cpu() target_mean += i * labels[:, i].cpu() for i in range(5): predicted_std += outputs[:, i].cpu() * (i - predicted_mean)**2 target_std += labels[:, i].cpu() * (i - target_mean)**2 if batch_idx == 0: predicted = predicted_mean target = target_mean else: predicted = torch.cat((predicted, predicted_mean), 0) target = torch.cat((target, target_mean), 0) if args.plot: output_score = predicted_mean.numpy().flatten().tolist()[0] target_score = target_mean.numpy().flatten().tolist()[0] output_score_std = predicted_std.numpy().flatten().tolist()[0] target_score_std = target_std.numpy().flatten().tolist()[0] img = data['image'].cpu().squeeze(0).permute(1, 2, 0).numpy() print('beauty score: {:.2f}%{:.2f}({:.2f}%{:.2f})'.format( output_score, output_score_std, target_score, target_score_std)) plt.imshow(img) plt.show() print('EMD LOSS: {:.4f}'.format(emd_loss(target, predicted))) print('PC: {:.4f}'.format(pearsonr_loss(target, predicted))) print('MAE LOSS: {:.4f}'.format(MAE_loss(target, predicted))) print('RMSE LOSS: {:.4f}'.format(RMSE_loss(target, predicted))) print('\n')
def main(): #+++++++++++++++++ 1) load and prepare the data file_path = r"C:\Users\jojo\Documents\Uni\Second Semester\Machine Learning\Project\ml_project1_data.xlsx" ds = Dataset(file_path) #+++++++++++++++++ 2) split into train and unseen seed = 0 DF_train, DF_unseen = train_test_split(ds.rm_df.copy(), test_size=0.2, stratify=ds.rm_df["Response"], random_state=seed) #+++++++++++++++++ 3) preprocess, based on train pr = Processor(DF_train, DF_unseen) #+++++++++++++++++ 4) feature engineering fe = FeatureEngineer(pr.training, pr.unseen) # apply Box-Cox transformations # num_features = fe.training._get_numeric_data().drop(['Response'], axis=1).columns # fe.box_cox_transformations(num_features, target="Response") # rank input features according to Chi-Squared # continuous_flist = fe.box_cox_features # categorical_flist = ['DT_MS_Divorced', 'DT_MS_Widow', 'DT_E_Master', 'DT_R_5', 'DT_R_6', "Gender"] # fe.rank_features_chi_square(continuous_flist, categorical_flist) # print("Ranked input features:\n", fe._rank) # get top n features # criteria, n_top = "chisq", 9 # DF_train_top, DF_unseen_top = fe.get_top(criteria="chisq", n_top=n_top) #+++++++++++++++++ 5) modelling # mlp_param_grid = {'mlpc__hidden_layer_sizes': [(3), (6), (3, 3), (5, 5)], # 'mlpc__learning_rate_init': [0.001, 0.01]} # # mlp_gscv = grid_search_MLP(DF_train_top, mlp_param_grid, seed) # print("Best parameter set: ", mlp_gscv.best_params_) # pd.DataFrame.from_dict(mlp_gscv.cv_results_).to_excel("D:\\PipeLines\\project_directory\\data\\mlp_gscv.xlsx") #+++++++++++++++++ 6) retraining & assessment of generalization ability # auprc = assess_generalization_auprc(mlp_gscv.best_estimator_, DF_unseen_top) # print("AUPRC: {:.2f}".format(auprc)) #+++++++++++++++++ X, y split y_test = fe.unseen['Response'] y_train = fe.training['Response'] X_test = fe.unseen.loc[:, fe.unseen.columns != 'Response'] X_train = fe.training.loc[:, fe.training.columns != 'Response']
def __init__(self, config): self.config = config self.z_num = config['z_num'] self.hidden_num = config['conv_hidden_num'] self.height = 256 self.width = 256 self.channels = 3 self.repeat_num = int(np.log2(self.height)) - 2 self.noise_dim = 0 self.model_dir = config['results_dir'] self.num_epochs = config['num_epochs'] self.batch_size = config['batch_size'] self.logger = Logger(config['log_dir']) self.pretrained_path= config['pretrained_path'] self.log_every = config['log_every'] self.save_every = config['save_every'] self.print_every = config['print_every'] self.is_wgan = config['is_wgan'] if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.data_loader_params = {'batch_size': self.batch_size, 'shuffle': True, 'num_workers': 6} if config['use_cuda']: self.device = torch.device('cuda') else: self.device = torch.device('cpu') if config['train']: self.dataset = Dataset(**get_split('train')) self.generator = data.DataLoader(self.dataset, **self.data_loader_params) self.n_samples = get_split('train')['total_data'] else: self.dataset = Dataset(**get_split('test')) self.generator = data.DataLoader(self.dataset, **self.data_loader_params) self.n_samples = get_split('train')['total_data']
'req_id': req_id, 'num_tags': len(tags) }] print(raw_data) data_cleaner = Data_cleaner(config, 'predict') data_cleaner.raw_data = copy.deepcopy(raw_data) data_cleaner.num_article = len(data_cleaner.raw_data) data_cleaner.tokenize() data_manager = Data_manager(config, data_cleaner.raw_data) tokens_tensor, segments_tensors, label = data_manager.get_fitting_features_labels( ) dataset = Dataset(config, tokens_tensor, segments_tensors, label) generator = data.DataLoader(dataset, batch_size=config['val_batch_size'], num_workers=multiprocessing.cpu_count(), pin_memory=True) # BERT init bert_model = BertModel.from_pretrained(config['bert_model']) BERT_LAYERS = config['BERT_LAYERS'] BERT_INTER_LAYER = config['BERT_INTER_LAYER'] # check devices if torch.cuda.is_available(): device = torch.device('cuda') # move model to GPU model.cuda()
def train(CV, train_csv_file, val_csv_file, file_name): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = select_model(device) loss_fn = nn.MultiLabelSoftMarginLoss() metrics = torch.nn.Sigmoid() train_transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(10, resample=PIL.Image.BILINEAR), transforms.ToTensor() ]) val_transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.ToTensor() ]) os.makedirs(os.path.join(ckpt_path, file_name), exist_ok=True) save_path = os.path.join(ckpt_path, file_name) logger = Logger(save_path) trainset = Dataset(csv_file=train_csv_file, root_dir=img_path, transform=train_transform) valset = Dataset(csv_file=val_csv_file, root_dir=img_path, transform=val_transform) train_loader = torch.utils.data.DataLoader( trainset, batch_size=args.train_batch_size, shuffle=True, num_workers=args.num_workers) val_loader = torch.utils.data.DataLoader(valset, batch_size=args.val_batch_size, shuffle=False, num_workers=args.num_workers) optimizer = optim.Adam([{ 'params': model.features.parameters(), 'lr': args.CONV_LR }, { 'params': model.classifier.parameters(), 'lr': args.DENSE_LR }]) # send hyperparams info = ({ 'train_batch_size': args.train_batch_size, 'val_batch_size': args.val_batch_size, 'conv_base_lr': args.CONV_LR, 'dense_lr': args.DENSE_LR, }) for tag, value in info.items(): logger.scalar_summary(tag, value, 0) param_num = 0 for param in model.parameters(): param_num += int(np.prod(param.shape)) print('Trainable params: %.2f million' % (param_num / 1e6)) #loss_fn = softCrossEntropy() best_val_loss = float('inf') train_losses, val_losses = [], [] train_pc, val_pc = [], [] train_mae, val_mae = [], [] train_rmse, val_rmse = [], [] for epoch in range(0, args.EPOCHS): start = time.time() for batch_idx, data in enumerate(train_loader): images = data['image'].to(device) labels = data['annotations'].to(device).float() model.train() outputs = model(images) outputs = outputs.view(-1, 5, 1) optimizer.zero_grad() outprobs = metrics(outputs) predicted_mean, target_mean = 0.0, 0.0 for i in range(5): predicted_mean += i * outprobs[:, i].cpu() target_mean += i * labels[:, i].cpu() if batch_idx == 0: predicted = predicted_mean target = target_mean predicted_prob = outputs.cpu() target_prob = labels.cpu() else: predicted = torch.cat((predicted, predicted_mean), 0) target = torch.cat((target, target_mean), 0) predicted_prob = torch.cat((predicted_prob, outputs.cpu()), 0) target_prob = torch.cat((target_prob, labels.cpu()), 0) PC = pearsonr_loss(target_mean, predicted_mean) MAE = MAE_loss(target_mean, predicted_mean) RMSE = RMSE_loss(target_mean, predicted_mean) loss = loss_fn(outputs, labels) loss.backward() optimizer.step() if batch_idx > 0: print( '\rCV{} Epoch: {}/{} | CE: {:.4f} | PC: {:.4f} | MAE: {:.4f} | RMSE: {:.4f} | [{}/{} ({:.0f}%)] | Time: {} ' .format( CV, epoch + 1, args.EPOCHS, loss, PC, MAE, RMSE, batch_idx * args.train_batch_size, len(train_loader.dataset), 100. * batch_idx * args.train_batch_size / len(train_loader.dataset), timeSince( start, batch_idx * args.train_batch_size / len(train_loader.dataset))), end='') train_losses.append(loss_fn(predicted_prob, target_prob)) train_pc.append(pearsonr_loss(target, predicted)) train_mae.append(MAE_loss(target, predicted)) train_rmse.append(RMSE_loss(target, predicted)) # do validation after each epoch for batch_idx, data in enumerate(val_loader): images = data['image'].to(device) labels = data['annotations'].to(device).float() with torch.no_grad(): model.eval() outputs = model(images) outputs = outputs.view(-1, 5, 1) outprobs = metrics(outputs) predicted_mean, target_mean = 0.0, 0.0 for i in range(5): predicted_mean += i * outprobs[:, i].cpu() target_mean += i * labels[:, i].cpu() if batch_idx == 0: predicted = predicted_mean target = target_mean predicted_prob = outputs.cpu() target_prob = labels.cpu() else: predicted = torch.cat((predicted, predicted_mean), 0) target = torch.cat((target, target_mean), 0) predicted_prob = torch.cat((predicted_prob, outputs.cpu()), 0) target_prob = torch.cat((target_prob, labels.cpu()), 0) val_losses.append(loss_fn(predicted_prob, target_prob)) val_pc.append(pearsonr_loss(target, predicted)) val_mae.append(MAE_loss(target, predicted)) val_rmse.append(RMSE_loss(target, predicted)) info = { 'conv_base_lr': args.CONV_LR, 'dense_lr': args.DENSE_LR, 'train CE loss': train_losses[-1], 'train mae loss': train_mae[-1], 'train rmse loss': train_rmse[-1], 'train pc': train_pc[-1], 'val CE loss': val_losses[-1], 'val mae loss': val_mae[-1], 'val rmse loss': val_rmse[-1], 'val pc': val_pc[-1] } for tag, value in info.items(): logger.scalar_summary(tag, value, epoch + 1) print( '\ntrain CE %.4f | train PC %.4f | train MAE %.4f | train RMSE: %.4f' % (train_losses[-1], train_pc[-1], train_mae[-1], train_rmse[-1])) print( 'valid CE %.4f | valid PC %.4f | valid MAE %.4f | valid RMSE: %.4f' % (val_losses[-1], val_pc[-1], val_mae[-1], val_rmse[-1])) # Use early stopping to monitor training if val_losses[-1] < best_val_loss: best_val_loss = val_losses[-1] # save model weights if val loss decreases torch.save(model.state_dict(), os.path.join(save_path, 'BCE-%f.pkl' % (best_val_loss))) print('Save Improved Model(BCE_loss = %.6f)...' % (best_val_loss)) # reset stop_count if args.save_fig and (epoch + 1) % 100 == 0: epochs = range(1, epoch + 2) plt.plot(epochs, train_losses, 'b-', label='train CE') plt.plot(epochs, val_losses, 'g-', label='val CE') plt.plot(epochs, train_pc, 'r-', label='train pc') plt.plot(epochs, val_pc, 'y', label='val pc') plt.title('CE loss') plt.legend() plt.savefig(save_path + 'loss.png')
def __init__(self, debug, f): self.log = sys.stdout if debug else open(f, "a+") def __call__(self, sth): print(str(sth), file=self.log, flush=True) log = os.path.join(args.outf, "log_%s_%s.txt" % (args.log_mark, args.mode)) logprinter = LogPrinter(args.debug, log) logprinter(args._get_kwargs()) # Set up data contentPath = args.UHD_contentPath if args.UHD else args.contentPath stylePath = args.UHD_stylePath if args.UHD else args.stylePath dataset = Dataset(contentPath, stylePath, args.texturePath, args.content_size, args.style_size, args.picked_content_mark, args.picked_style_mark, args.synthesis, args.debug) loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=1, shuffle=False) # Set up model and transform wct = WCT(args).cuda() @torch.no_grad() def styleTransfer(encoder, decoder, contentImg, styleImg, csF): sF = encoder(styleImg) torch.cuda.empty_cache() # empty cache to save memory cF = encoder(contentImg) torch.cuda.empty_cache()
loss = capsule_net.loss(data, output, target, reconstructions) test_loss += loss.data[0] correct += sum( np.argmax(masked.data.cpu().numpy(), 1) == np.argmax( target.data.cpu().numpy(), 1)) tqdm.write("Epoch: [{}/{}], test accuracy: {:.6f}, loss: {:.6f}".format( epoch, N_EPOCHS, correct / len(test_loader.dataset), test_loss / len(test_loader))) if __name__ == '__main__': torch.manual_seed(1) dataset = 'cifar10' # dataset = 'mnist' config = Config(dataset) mnist = Dataset(dataset, BATCH_SIZE) capsule_net = CapsNet(config) capsule_net = torch.nn.DataParallel(capsule_net) if USE_CUDA: capsule_net = capsule_net.cuda() capsule_net = capsule_net.module optimizer = torch.optim.Adam(capsule_net.parameters()) for e in range(1, N_EPOCHS + 1): train(capsule_net, optimizer, mnist.train_loader, e) test(capsule_net, mnist.test_loader, e)
def main(): cfg, args = _parse_args() torch.manual_seed(args.seed) output_base = cfg.OUTPUT_DIR if len(cfg.OUTPUT_DIR) > 0 else './output' exp_name = '-'.join([ datetime.now().strftime("%Y%m%d-%H%M%S"), cfg.MODEL.ARCHITECTURE, str(cfg.INPUT.IMG_SIZE) ]) output_dir = get_outdir(output_base, exp_name) with open(os.path.join(output_dir, 'config.yaml'), 'w', encoding='utf-8') as file_writer: # cfg.dump(stream=file_writer, default_flow_style=False, indent=2, allow_unicode=True) file_writer.write(pyaml.dump(cfg)) logger = setup_logger(file_name=os.path.join(output_dir, 'train.log'), control_log=False, log_level='INFO') # create model model = create_model(cfg.MODEL.ARCHITECTURE, num_classes=cfg.MODEL.NUM_CLASSES, pretrained=True, in_chans=cfg.INPUT.IN_CHANNELS, drop_rate=cfg.MODEL.DROP_RATE, drop_connect_rate=cfg.MODEL.DROP_CONNECT, global_pool=cfg.MODEL.GLOBAL_POOL) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu gpu_list = list(map(int, args.gpu.split(','))) device = 'cuda' if len(gpu_list) == 1: model.cuda() torch.backends.cudnn.benchmark = True elif len(gpu_list) > 1: model = nn.DataParallel(model, device_ids=gpu_list) model = convert_model(model).cuda() torch.backends.cudnn.benchmark = True else: device = 'cpu' logger.info('device: {}, gpu_list: {}'.format(device, gpu_list)) optimizer = create_optimizer(cfg, model) # optionally initialize from a checkpoint if args.initial_checkpoint and os.path.isfile(args.initial_checkpoint): load_checkpoint(model, args.initial_checkpoint) # optionally resume from a checkpoint resume_state = None resume_epoch = None if args.resume and os.path.isfile(args.resume): resume_state, resume_epoch = resume_checkpoint(model, args.resume) if resume_state and not args.no_resume_opt: if 'optimizer' in resume_state: optimizer.load_state_dict(resume_state['optimizer']) logger.info('Restoring optimizer state from [{}]'.format( args.resume)) start_epoch = 0 if args.start_epoch is not None: start_epoch = args.start_epoch elif resume_epoch is not None: start_epoch = resume_epoch model_ema = None if cfg.SOLVER.EMA: # Important to create EMA model after cuda() model_ema = ModelEma(model, decay=cfg.SOLVER.EMA_DECAY, device=device, resume=args.resume) lr_scheduler, num_epochs = create_scheduler(cfg, optimizer) if lr_scheduler is not None and start_epoch > 0: lr_scheduler.step(start_epoch) # summary print('=' * 60) print(cfg) print('=' * 60) print(model) print('=' * 60) summary(model, (3, cfg.INPUT.IMG_SIZE, cfg.INPUT.IMG_SIZE)) # dataset dataset_train = Dataset(cfg.DATASETS.TRAIN) dataset_valid = Dataset(cfg.DATASETS.TEST) train_loader = create_loader(dataset_train, cfg, is_training=True) valid_loader = create_loader(dataset_valid, cfg, is_training=False) # loss function if cfg.SOLVER.LABEL_SMOOTHING > 0: train_loss_fn = LabelSmoothingCrossEntropy( smoothing=cfg.SOLVER.LABEL_SMOOTHING).to(device) validate_loss_fn = nn.CrossEntropyLoss().to(device) else: train_loss_fn = nn.CrossEntropyLoss().to(device) validate_loss_fn = train_loss_fn eval_metric = cfg.SOLVER.EVAL_METRIC best_metric = None best_epoch = None saver = CheckpointSaver( checkpoint_dir=output_dir, recovery_dir=output_dir, decreasing=True if eval_metric == 'loss' else False) try: for epoch in range(start_epoch, num_epochs): train_metrics = train_epoch(epoch, model, train_loader, optimizer, train_loss_fn, cfg, logger, lr_scheduler=lr_scheduler, saver=saver, device=device, model_ema=model_ema) eval_metrics = validate(epoch, model, valid_loader, validate_loss_fn, cfg, logger) if model_ema is not None: ema_eval_metrics = validate(epoch, model_ema.ema, valid_loader, validate_loss_fn, cfg, logger) eval_metrics = ema_eval_metrics if lr_scheduler is not None: # step LR for next epoch lr_scheduler.step(epoch + 1, eval_metrics[eval_metric]) update_summary(epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), write_header=best_metric is None) if saver is not None: # save proper checkpoint with eval metric save_metric = eval_metrics[eval_metric] best_metric, best_epoch = saver.save_checkpoint( model, optimizer, cfg, epoch=epoch, model_ema=model_ema, metric=save_metric) except KeyboardInterrupt: pass if best_metric is not None: logger.info('*** Best metric: {0} (epoch {1})'.format( best_metric, best_epoch))
def align(lang, checkpoint_path, dataset_path, config, config_class, model_class, tokenizer_class, output="output"): ##### print("encoding %s in lang %d using ckpt %s" % (dataset_path, lang, checkpoint_path)) ##### model_name_or_path = config.get("model_name_or_path", "xlm-mlm-enfr-1024") config_cache_dir = config.get("pretrained_config_cache_dir") model_cache_dir = config.get("pretrained_model_cache_dir") tokenizer_cache_dir = config.get("pretrained_tokenizer_cache_dir") model_name_or_path_ = config.get("model_name_or_path_", "xlm-mlm-enfr-1024") ##### dataset = Dataset(dataset_path, config.get("training_data_save_path"), config.get("seq_size"), config.get("max_sents"), config.get("do_shuffle"), config.get("do_skip_empty"), procedure="align", model_name_or_path=model_name_or_path, tokenizer_class=tokenizer_class, tokenizer_cache_dir=tokenizer_cache_dir) pretrained_config = config_class.from_pretrained( model_name_or_path, cache_dir=config_cache_dir if config_cache_dir else None) model = model_class.from_pretrained( model_name_or_path_, config=pretrained_config, cache_dir=model_cache_dir if model_cache_dir else None) checkpoint = tf.train.Checkpoint(model=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, config["model_dir"], max_to_keep=5) if checkpoint_manager.latest_checkpoint is not None: if checkpoint_path == None: checkpoint_path = checkpoint_manager.latest_checkpoint tf.get_logger().info("Restoring parameters from %s", checkpoint_path) checkpoint.restore(checkpoint_path) iterator = iter(dataset.create_one_epoch(mode="p", lang=lang)) @tf.function def encode_next(): src, tgt = next(iterator) src_padding_mask = build_mask(src["input_ids"], src["lengths"]) tgt_padding_mask = build_mask(tgt["input_ids"], tgt["lengths"]) sign = -1.0 align, _, _, _, _ = model((src, tgt), sign_src=sign, sign_tgt=sign, src_padding_mask=src_padding_mask, tgt_padding_mask=tgt_padding_mask, training=False) tf.print(align, summarize=1000) return align import matplotlib.pyplot as plt import seaborn as sns align_ = None while True: try: align = encode_next() align_ = tf.squeeze(align).numpy() except tf.errors.OutOfRangeError: break fig, ax = plt.subplots(figsize=(6, 6)) ax = sns.heatmap(align_, linewidths=.5, ax=ax, cbar=False) fig.savefig('heatmap_align.pgf') return True
def train(model_name="LSTM", params=None, embedding="Random"): # Parameters to tune print(params) batch_size = params["batch_size"] num_epochs = params["num_epochs"] oversample = params["oversample"] soft_labels = params["soft_labels"] if model_name == "LSTM": learning_rate = params["learning_rate"] hidden_dim = params["hidden_dim"] num_layers = params["num_layers"] dropout = params["dropout"] combine = embedding == "Both" embedding_dim = 300 if combine: embedding = "Random" if model_name == "Bert": learning_rate = params["learning_rate"] num_warmup_steps = params["num_warmup_steps"] num_total_steps = params["num_total_steps"] embedding = "None" # Constants test_percentage = 0.1 val_percentage = 0.2 # Load data torch.manual_seed(42) dataset = Dataset("../data/cleaned_tweets_orig.csv", use_embedding=embedding, embedd_dim=embedding_dim, for_bert=(model_name == "Bert"), combine=combine) train_data, val_test_data = split_dataset(dataset, test_percentage + val_percentage) val_data, test_data = split_dataset( val_test_data, test_percentage / (test_percentage + val_percentage)) train_loader, val_loader, weights = load_data(oversample, train_data, val_data, batch_size) # Define model if model_name == "CNN": vocab_size = len(dataset.vocab) model = CNN(vocab_size, embedding_dim=embedding_dim, combine=params["combine"], n_filters=params["filters"]) elif model_name == "LSTM": vocab_size = len(dataset.vocab) model = LSTM(vocab_size, embedding_dim, batch_size=batch_size, hidden_dim=hidden_dim, lstm_num_layers=num_layers, combine=combine, dropout=dropout) elif model_name == "Bert": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=3) train_loader, val_loader, weights = load_data(oversample, train_data, val_data, batch_size, collate_fn=bert_collate) if not model_name == "Bert": model.embedding.weight.data.copy_(dataset.vocab.vectors) if combine: model.embedding_glove.weight.data.copy_(dataset.glove.vectors) # cuda device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # optimiser scheduler = None optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"]) if model_name == "Bert": optimizer = AdamW(model.parameters(), lr=learning_rate, correct_bias=False) # Linear scheduler for adaptive lr scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # weighted cross entropy loss, by class counts of other classess weights = torch.tensor([0.9414, 0.2242, 0.8344], device=device) if soft_labels: criterion = weighted_soft_cross_entropy else: criterion = nn.CrossEntropyLoss(weight=weights) eval_criterion = nn.CrossEntropyLoss(weight=weights) for epoch in range(num_epochs): # train epoch_loss, epoch_acc = train_epoch(model, train_loader, optimizer, criterion, device, scheduler=scheduler, weights=weights) # realtime feel print(f'Epoch: {epoch+1}') print( f'\tTrain Loss: {epoch_loss:.5f} | Train Acc: {epoch_acc*100:.2f}%' ) # Compute F1 score on validation set - this is what we optimise during tuning loss, acc, predictions, ground_truth = evaluate_epoch(model, val_loader, eval_criterion, device, is_final=True) val_f1 = f1_score(y_true=ground_truth, y_pred=predictions, average="macro") print("Done") return val_f1
def resume_train_gan(meta_file, checkpoint_path, train_data_dir, val_data_dir, output_dir, num_epochs, batch_size=16, eval_val=True, save_eval_img=True, num_eval_img=100, device='/gpu:0', img_dim=256): # Set up output directories val_dir = output_dir + 'val_results/' val_img_dir = val_dir + 'imgs/' train_dir = output_dir + 'train_results/' trained_sess_dir = output_dir + 'trained_sess/' if not os.path.exists(val_dir): os.makedirs(val_dir) if not os.path.exists(val_img_dir): os.makedirs(val_img_dir) if not os.path.exists(train_dir): os.makedirs(train_dir) if not os.path.exists(trained_sess_dir): os.makedirs(trained_sess_dir) # Get the trained model configuration configs = checkpoint_path.split('/')[-1] pre_epoch = int(configs.split('_')[-1]) params_str = configs.split('_')[:-2] params_str = '_'.join(params_str) # Output file paths train_log_file = train_dir + 'train_log_{}.txt'.format(params_str) train_img_file = train_dir + 'train_gen_examples_epoch_' val_log_file = val_dir + 'val_log_{}.txt'.format(params_str) val_csv_file = val_dir + 'val_metrics_{}'.format(params_str) # Initialize the log files start_msg = local_clock( ) + ' Resumed training model with {} and {} epochs\n'.format( params_str, pre_epoch) print(start_msg) with open(train_log_file, 'w') as handle: handle.write(start_msg) handle.write('device={}\n'.format(device)) with open(val_log_file, 'w') as handle: handle.write(start_msg) handle.write('device={}\n'.format(device)) # Get the data set train_gray_dir = train_data_dir + 'gray/' train_color_dir = train_data_dir + 'color/' val_gray_dir = val_data_dir + 'gray/' val_color_dir = val_data_dir + 'color/' train_data = Dataset(train_gray_dir, train_color_dir, batch_size, img_dim, shuffle=True) train_example_data = Dataset(train_gray_dir, train_color_dir, batch_size, img_dim, shuffle=False) val_data = Dataset(val_gray_dir, val_color_dir, batch_size, img_dim, shuffle=False) # Restore the trained session and evaluate on the evlation dataset with tf.Session() as sess: new_saver = tf.train.import_meta_graph(meta_file) new_saver.restore(sess, checkpoint_path) # Restore the variables is_training = tf.get_collection('is_training')[0] gray_img = tf.get_collection('gray_img')[0] color_img = tf.get_collection('color_img')[0] G_sample = tf.get_collection('G_sample')[0] D_loss = tf.get_collection('D_loss')[0] G_loss = tf.get_collection('G_loss')[0] img_loss = tf.get_collection('img_loss')[0] mse = tf.get_collection('mse')[0] D_train_op = tf.get_collection('D_train_op')[0] G_train_op = tf.get_collection('G_train_op')[0] for epoch in range(pre_epoch + 1, pre_epoch + 1 + num_epochs): print(local_clock() + ' Started epoch %d' % (epoch)) for t, (gray_img_np, color_img_np) in enumerate(train_data): gray_processed_np = preprocess(gray_img_np) color_processed_np = preprocess(color_img_np) feed_dict = { gray_img: gray_processed_np, color_img: color_processed_np, is_training: True } _, D_loss_np = sess.run([D_train_op, D_loss], feed_dict=feed_dict) _, G_loss_np, img_loss_np = sess.run( [G_train_op, G_loss, img_loss], feed_dict=feed_dict) mse_np = sess.run(mse, feed_dict=feed_dict) # Save the results to the train log file epoch_train_time = local_clock() + '\n' epoch_train_msg = 'Epoch %d D loss: %0.4f G loss: %0.4f img loss: %0.4f MSE: %0.4f' % ( epoch, D_loss_np, G_loss_np, img_loss_np, mse_np) print(local_clock() + ' ' + epoch_train_msg) epoch_train_msg += '\n' with open(train_log_file, 'a') as handle: handle.write('\n') handle.write(epoch_train_time) handle.write(epoch_train_msg) # Save examples of generated images for j, (gray_example_np, color_example_np) in enumerate(train_example_data): gray_example_processed_np = preprocess(gray_example_np) color_example_processed_np = preprocess(color_example_np) break # only load the first batch as examples example_feed_dict = { gray_img: gray_example_processed_np, color_img: color_example_processed_np, is_training: False } gen_example_np = sess.run(G_sample, feed_dict=example_feed_dict) gen_example_np = postprocess(gen_example_np) show_images(gen_example_np, post_process=False, save=True, filepath=train_img_file + str(epoch) + '.png') # If true, evaluate on the validation data set if eval_val: val_log_note = 'Epoch ' + str(epoch) epoch_val_img_dir = val_img_dir + 'epoch' + str(epoch) + '/' if not os.path.exists(epoch_val_img_dir): os.makedirs(epoch_val_img_dir) epoch_val_csv = val_csv_file + '_epoch' + str(epoch) + '.csv' evaluate_model(sess=sess, graph_gray=gray_img, graph_color=color_img, graph_training=is_training, graph_D_loss=D_loss, graph_G_loss=G_loss, graph_img_loss=img_loss, graph_G_sample=G_sample, dataset=val_data, log_filename=val_log_file, log_note=val_log_note, csv_filename=epoch_val_csv, output_imgs=save_eval_img, img_dir=epoch_val_img_dir, num_eval_img=num_eval_img) # Save the session when the epoch is done saver = tf.train.Saver() sess_name = params_str + '_epoch_' + str(epoch) sess_file = trained_sess_dir + sess_name saver.save(sess, sess_file) print(local_clock() + ' Finished epoch %d' % (epoch)) print('') return
def evaluate_trained_gan(meta_file, checkpoint_path, eval_data_dir, output_dir, num_eval_img=100, batch_size=16, img_dim=256): # Set up output directories eval_dir = output_dir + 'eval_results/' eval_img_dir = eval_dir + 'imgs/' if not os.path.exists(eval_dir): os.makedirs(eval_dir) if not os.path.exists(eval_img_dir): os.makedirs(eval_img_dir) # Output file paths eval_log_file = eval_dir + 'eval_log.txt' eval_csv_file = eval_dir + 'eval_metrics.csv' # Initialize the log file start_msg = local_clock() + ' Started evaluating model.' with open(eval_log_file, 'w') as handle: handle.write(start_msg) handle.write('meta file: ' + meta_file + '\n') handle.write('checkpoint path: ' + checkpoint_path + '\n') handle.write('eval data directory: ' + eval_data_dir + '\n') # Get the data set eval_gray_dir = eval_data_dir + 'gray/' eval_color_dir = eval_data_dir + 'color/' eval_data = Dataset(eval_gray_dir, eval_color_dir, batch_size, img_dim, shuffle=False) # Restore the trained session and evaluate on the evlation dataset with tf.Session() as sess: new_saver = tf.train.import_meta_graph(meta_file) new_saver.restore(sess, checkpoint_path) # Restore the variables is_training = tf.get_collection('is_training')[0] gray_img = tf.get_collection('gray_img')[0] color_img = tf.get_collection('color_img')[0] G_sample = tf.get_collection('G_sample')[0] D_loss = tf.get_collection('D_loss')[0] G_loss = tf.get_collection('G_loss')[0] img_loss = tf.get_collection('img_loss')[0] mse = tf.get_collection('mse')[0] D_train_op = tf.get_collection('D_train_op')[0] G_train_op = tf.get_collection('G_train_op')[0] evaluate_model(sess=sess, graph_gray=gray_img, graph_color=color_img, graph_training=is_training, graph_D_loss=D_loss, graph_G_loss=G_loss, graph_img_loss=img_loss, graph_G_sample=G_sample, dataset=eval_data, log_filename=eval_log_file, log_note='Finished evaluating.', csv_filename=eval_csv_file, output_imgs=True, img_dir=eval_img_dir, num_eval_img=num_eval_img) return
def train_gan(train_data_dir, val_data_dir, output_dir, D_lr, G_lr, beta1, reg, num_epochs, loss='l2', batch_size=16, eval_val=True, save_eval_img=True, num_eval_img=100, device='/gpu:0', img_dim=256): # Set up the image loss function if loss == 'l2': loss_method = l2_loss elif loss == 'l1': loss_method = l1_loss # Set up output directories val_dir = output_dir + 'val_results/' val_img_dir = val_dir + 'imgs/' train_dir = output_dir + 'train_results/' trained_sess_dir = output_dir + 'trained_sess/' if not os.path.exists(val_dir): os.makedirs(val_dir) if not os.path.exists(val_img_dir): os.makedirs(val_img_dir) if not os.path.exists(train_dir): os.makedirs(train_dir) if not os.path.exists(trained_sess_dir): os.makedirs(trained_sess_dir) # Output file paths train_log_file = train_dir + 'train_log_Dlr={}_Glr={}_beta1={}_reg={}_loss={}.txt'.format( D_lr, G_lr, beta1, reg, loss) train_img_file = train_dir + 'train_gen_examples_epoch_' val_log_file = val_dir + 'val_log_Dlr={}_Glr={}_beta1={}_reg={}_loss={}.txt'.format( D_lr, G_lr, beta1, reg, loss) val_csv_file = val_dir + 'val_metrics_Dlr={}_Glr={}_beta1={}_reg={}_loss={}'.format( D_lr, G_lr, beta1, reg, loss) # Initialize the log files start_msg = local_clock( ) + ' Started training model with D_lr={}, G_lr={}, beta1={}, reg={}\n'.format( D_lr, G_lr, beta1, reg) print(start_msg) with open(train_log_file, 'w') as handle: handle.write(start_msg) handle.write('device={}\n'.format(device)) with open(val_log_file, 'w') as handle: handle.write(start_msg) handle.write('device={}\n'.format(device)) # Get the data set train_gray_dir = train_data_dir + 'gray/' train_color_dir = train_data_dir + 'color/' val_gray_dir = val_data_dir + 'gray/' val_color_dir = val_data_dir + 'color/' train_data = Dataset(train_gray_dir, train_color_dir, batch_size, img_dim, shuffle=True) train_example_data = Dataset(train_gray_dir, train_color_dir, batch_size, img_dim, shuffle=False) val_data = Dataset(val_gray_dir, val_color_dir, batch_size, img_dim, shuffle=False) # Construct computational graph tf.reset_default_graph() # reset the graph with tf.device(device): is_training = tf.placeholder(tf.bool, name='is_training') gray_img = tf.placeholder(tf.float32, [None, img_dim, img_dim, 1]) color_img = tf.placeholder(tf.float32, [None, img_dim, img_dim, 4]) pair_real = tf.concat([gray_img, color_img], axis=3) G_sample = generator(gray_img, is_training) pair_fake = tf.concat([gray_img, G_sample], axis=3) with tf.variable_scope('') as scope: logits_real = discriminator(pair_real, is_training) scope.reuse_variables() logits_fake = discriminator(pair_fake, is_training) # Get the list of trainable variables for the discriminator and generator D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') # Get solvers D_solver, G_solver = get_solvers(D_lr=D_lr, G_lr=G_lr, beta1=beta1) # Compute the losses D_loss, G_loss = gan_loss(logits_real, logits_fake) img_loss = loss_method(G_sample, color_img, reg=reg) # Calculate the MSE between generated images and original color images mse = calculate_mse(G_sample, color_img) # Set up the training operations D_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, 'discriminator') with tf.control_dependencies(D_update_ops): D_train_op = D_solver.minimize(D_loss, var_list=D_vars) G_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, 'generator') with tf.control_dependencies(G_update_ops): G_train_op = G_solver.minimize(G_loss + img_loss, var_list=G_vars) # Remember the nodes we want to run in the future tf.add_to_collection('is_training', is_training) tf.add_to_collection('gray_img', gray_img) tf.add_to_collection('color_img', color_img) tf.add_to_collection('G_sample', G_sample) tf.add_to_collection('D_loss', D_loss) tf.add_to_collection('G_loss', G_loss) tf.add_to_collection('img_loss', img_loss) tf.add_to_collection('mse', mse) tf.add_to_collection('D_train_op', D_train_op) tf.add_to_collection('G_train_op', G_train_op) # Training loop with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(num_epochs): print(local_clock() + ' Started epoch %d' % (epoch)) for t, (gray_img_np, color_img_np) in enumerate(train_data): gray_processed_np = preprocess(gray_img_np) color_processed_np = preprocess(color_img_np) feed_dict = { gray_img: gray_processed_np, color_img: color_processed_np, is_training: True } _, D_loss_np = sess.run([D_train_op, D_loss], feed_dict=feed_dict) _, G_loss_np, img_loss_np = sess.run( [G_train_op, G_loss, img_loss], feed_dict=feed_dict) mse_np = sess.run(mse, feed_dict=feed_dict) # Save the results to the train log file epoch_train_time = local_clock() + '\n' epoch_train_msg = 'Epoch %d D loss: %0.4f G loss: %0.4f img loss: %0.4f MSE: %0.4f' % ( epoch, D_loss_np, G_loss_np, img_loss_np, mse_np) print(local_clock() + ' ' + epoch_train_msg) epoch_train_msg += '\n' with open(train_log_file, 'a') as handle: handle.write('\n') handle.write(epoch_train_time) handle.write(epoch_train_msg) # Save examples of generated images for j, (gray_example_np, color_example_np) in enumerate(train_example_data): gray_example_processed_np = preprocess(gray_example_np) color_example_processed_np = preprocess(color_example_np) break # only load the first batch as examples example_feed_dict = { gray_img: gray_example_processed_np, color_img: color_example_processed_np, is_training: False } gen_example_np = sess.run(G_sample, feed_dict=example_feed_dict) gen_example_np = postprocess(gen_example_np) show_images(gen_example_np, post_process=False, save=True, filepath=train_img_file + str(epoch) + '.png') # If true, evaluate on the validation data set if eval_val: val_log_note = 'Epoch ' + str(epoch) epoch_val_img_dir = val_img_dir + 'epoch' + str(epoch) + '/' if not os.path.exists(epoch_val_img_dir): os.makedirs(epoch_val_img_dir) epoch_val_csv = val_csv_file + '_epoch' + str(epoch) + '.csv' evaluate_model(sess=sess, graph_gray=gray_img, graph_color=color_img, graph_training=is_training, graph_D_loss=D_loss, graph_G_loss=G_loss, graph_img_loss=img_loss, graph_G_sample=G_sample, dataset=val_data, log_filename=val_log_file, log_note=val_log_note, csv_filename=epoch_val_csv, output_imgs=save_eval_img, img_dir=epoch_val_img_dir, num_eval_img=num_eval_img) # Save the session when the epoch is done saver = tf.train.Saver() sess_name = 'Dlr={}_Glr={}_beta1={}_reg={}_loss={}_epoch_{}'.format( D_lr, G_lr, beta1, reg, loss, epoch) sess_file = trained_sess_dir + sess_name saver.save(sess, sess_file) print(local_clock() + ' Finished epoch %d' % (epoch)) print('') return
prediction_labels = np.argmax(preds, axis=1) test_labels = np.argmax(dataset.test_labels, axis=1) test_features = dataset.test_features csv_writer.append_to_file( ['#', 'Paveikslėlis', 'Nuspėta klasė', 'Tikroji klasė']) for index in range(30): csv_writer.append_to_file([ index + 1, '', LABELS[prediction_labels[index]], LABELS[test_labels[index]] ]) image_saver.plt.imshow(test_features[index]) image_saver.save_image(index) if __name__ == '__main__': dataset = Dataset(data_folder='./data') dataset.load_data(data_parts=[0.7, 0.2, 0.1]) print(dataset.get_data_summary()) l_rate, momentum, n_epoch, batch_size, verbose, optimizer, loss_func = load_P1_options( ) model = Model(l_rate=l_rate, momentum=momentum, optimizer=optimizer, loss=loss_func) # train_scenario() load_from_file_scenario() loss, accuracy, predictions = model.evaluate(
# Add all arguments to parser args = parser.parse_args() ############## # Cuda Flags # ############## if args.cuda: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: device = torch.device("cpu") ############################### # Creating the dataset object # ############################### # Create training data object trainset = Dataset(phase='train', max_input_length=10, auto_encoder=args.auto_encoder) # Extract the languages' attributes input_lang, output_lang = trainset.langs() # The trainloader for parallel processing trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=False, drop_last=True) # iterate through training dataiter = iter(trainloader) # Create testing data object testset = Dataset(phase='test', max_input_length=10, auto_encoder=args.auto_encoder) testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=True, num_workers=1, pin_memory=False, drop_last=True)
def train(network, dataset_path, real_path, mesh_path, mesh_info, object_id, epochs, batch_size=1, sample_interval=50): dalo = Dataset('train', dataset_path, object_id, real_path, mesh_path, mesh_info, batch_size) optimizer = Adam(lr=1e-5, clipnorm=0.001) network.compile(loss='mse', optimizer=optimizer) multiproc = Pool(1) for epoch in range(epochs): for batch_i in range(dalo.n_batches): start_t = time.time() batch = dalo.__get_batch__(batch_i) img_list = dalo.__img_list__() ann_list = copy.deepcopy(dalo.__anno_list__()) ia = copy.deepcopy(dalo.__augmenter__()) intri = copy.deepcopy(dalo.__get_intrinsics__()) diameter = copy.deepcopy(dalo.__model_diameter__()) img_res = copy.deepcopy(dalo.__image_shape__()) parallel_loaded = multiproc.map(partial(load_data_sample, img_list=img_list, anno_list=ann_list, augmenter=ia, intrinsics=intri, img_res=img_res, model_dia=diameter), batch) imgs_obsv = [] imgs_rend = [] targets = [] ren_Rot = [] ren_Tra = [] bboxes = [] # temp for separate rendering for sample in parallel_loaded: imgs_obsv.append(sample[0]) #imgs_rend.append(sample[1]) targets.append(sample[1]) ren_Rot.append(sample[2]) ren_Tra.append(sample[3]) bboxes.append(sample[4]) # looping over render #for idx, pose in enumerate(extrinsics): # imgs_rend.append(render_crop(obsv_pose=pose, bbox=bboxes[idx], renderer=bop_render, intrinsics=intri, obj_id=object_id,, img_res=img_res)) # multiproc render and cropping #triple_list = [] #for idx, rot in enumerate(ren_Rot): # triple_list.append([rot, ren_Tra[idx], bboxes[idx]]) #parallel_rendered = multiproc.map(partial(render_crop, renderer=bop_render, intrinsics=intri, obj_id=object_id, img_res=img_res), triple_list) ''' # multiproc only rendering double_list = [] for idx, rot in enumerate(ren_Rot): double_list.append([rot, ren_Tra[idx]]) light_pose = [np.random.rand() * 2000.0 - 1000.0, np.random.rand() * 2000.0 - 1000.0, 0.0] # light_color = [np.random.rand() * 0.1 + 0.9, np.random.rand() * 0.1 + 0.9, np.random.rand() * 0.1 + 0.9] light_color = [1.0, 1.0, 1.0] light_ambient_weight = np.random.rand() light_diffuse_weight = 0.75 + np.random.rand() * 0.25 light_spec_weight = 0.25 + np.random.rand() * 0.25 light_spec_shine = np.random.rand() * 3.0 # time negligible bop_render.set_light(light_pose, light_color, light_ambient_weight, light_diffuse_weight, light_spec_weight, light_spec_shine) # render + get < 23 ms i5-6600k #bop_renderer.render_object(obj_id, R_list, t_list, intri[0], intri[1], intri[2], intri[3]) parallel_rendered = multiproc.map(partial(render_top_level, ren=bop_render, intrinsics=intri, obj_id=object_id), double_list) ''' quat_list = [] img_sizes = [] for idx, rot in enumerate(ren_Rot): quat_list.append([rot, ren_Tra[idx], intri, int(object_id)]) img_sizes.append(img_res) print('start rendering') full_renderings = multiproc.map(render_top_level, quat_list) print('rendering done') for img in full_renderings: print(img.shape) parallel_cropping = multiproc.map(partial(crop_rendering, bbox=bboxes, img_res=img_res), full_renderings) imgs_obsv = np.array(imgs_obsv, dtype=np.float32) imgs_rend = np.array(parallel_cropping, dtype=np.float32) targets = np.array(targets, dtype=np.float32) imgs_obsv = imgs_obsv / 127.5 - 1. imgs_rend = imgs_rend / 127.5 - 1. print('T data preparation: ', time.time()-start_t) network.fit(x=[imgs_obsv, imgs_rend], y=targets, batch_size=batch_size, verbose=1, steps_per_epoch=1, # steps_per_epoch=data_generator.__len__(), epochs=1) #elapsed_time = datetime.datetime.now() - start_time # Plot the progress #print("Epoch %d/%d Iteration: %d/%d Loss: %f || pose: %f da: %f" % (epoch, epochs, # batch_i, data_loader.n_batches, # g_loss[0] + g_loss[1], # g_loss[0], g_loss[1])) #print("Epoch %d/%d Iteration: %d/%d Loss: %f" % (epoch, epochs, batch_i, data_loader.n_batches, g_loss)) snapshot_path = './models' try: os.makedirs(snapshot_path) except OSError: if not os.path.isdir(snapshot_path): raise network.save(snapshot_path, 'linemod_{oi}_{{epoch:02d}}.h5'.format(oi=object_id)) print("Training finished!")
def main(): torch.manual_seed(42) # Random #params = {'batch_size': 32, 'dropout': 0, 'hidden_dim': 128, 'learning_rate': 0.01, 'num_epochs': 5, 'num_layers': 2, 'oversample': False, 'soft_labels': False} # Glove params = { 'batch_size': 32, 'dropout': 0, 'hidden_dim': 128, 'learning_rate': 0.001, 'num_epochs': 5, 'num_layers': 2, 'oversample': False, 'soft_labels': False } # Random #params = {'batch_size': 32, 'dropout': 0, 'hidden_dim': 256, 'learning_rate': 0.0001, 'num_epochs': 5, 'num_layers': 3, 'oversample': False, 'soft_labels': False} #some params experiment_number = 1 test_percentage = 0.1 val_percentage = 0.2 batch_size = params["batch_size"] num_epochs = 5 #params["num_epochs"] dropout = params["dropout"] embedding_dim = 300 model_name = "CNN" #'Bert' #"CNN" #"LSTM" unsupervised = True embedding = "Glove" #"Random" ##"Glove" # "Both" # soft_labels = False combine = embedding == "Both" # LSTM parameters if model_name == "LSTM": hidden_dim = params["hidden_dim"] num_layers = params["num_layers"] # Bert parameter num_warmup_steps = 100 num_total_steps = 1000 if model_name == "Bert": embedding = "None" if embedding == "Both": combine = True embedding = "Random" else: combine = False learning_rate = params["learning_rate"] #5e-5, 3e-5, 2e-5 oversample_bool = False weighted_loss = True # load data dataset = Dataset("../data/cleaned_tweets_orig.csv", use_embedding=embedding, embedd_dim=embedding_dim, combine=combine, for_bert=(model_name == "Bert")) #dataset.oversample() train_data, val_test_data = split_dataset(dataset, test_percentage + val_percentage) val_data, test_data = split_dataset( val_test_data, test_percentage / (test_percentage + val_percentage)) # print(len(train_data)) #save_data(train_data, 'train') #save_data(test_data, 'test') #define loaders if oversample_bool: weights, targets = get_loss_weights(train_data, return_targets=True) class_sample_count = [ 1024 / 20, 13426, 2898 / 2 ] # dataset has 10 class-1 samples, 1 class-2 samples, etc. oversample_weights = 1 / torch.Tensor(class_sample_count) oversample_weights = oversample_weights[targets] # oversample_weights = torch.tensor([0.9414, 0.2242, 0.8344]) #torch.ones((3))- sampler = torch.utils.data.sampler.WeightedRandomSampler( oversample_weights, len(oversample_weights)) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, collate_fn=my_collate, sampler=sampler) else: train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, collate_fn=my_collate) val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, collate_fn=my_collate) #define model if model_name == "CNN": vocab_size = len(dataset.vocab) model = CNN(vocab_size, embedding_dim, combine=combine) elif model_name == "LSTM": vocab_size = len(dataset.vocab) model = LSTM(vocab_size, embedding_dim, batch_size=batch_size, hidden_dim=hidden_dim, lstm_num_layers=num_layers, combine=combine, dropout=dropout) elif model_name == "Bert": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=3) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, collate_fn=bert_collate) val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, collate_fn=bert_collate) #device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #LOSS : weighted cross entropy loss, by class counts of other classess if weighted_loss: weights = torch.tensor([0.9414, 0.2242, 0.8344], device=device) else: weights = torch.ones(3, device=device) #weights = torch.tensor([1.0, 1.0, 1.0], device = device) #get_loss_weights(train_data).to(device) # not to run again criterion = nn.CrossEntropyLoss(weight=weights) if soft_labels: criterion = weighted_soft_cross_entropy #latent model if unsupervised: vocab_size = len(dataset.vocab) criterion = nn.CrossEntropyLoss(weight=weights, reduction='none') model = Rationalisation_model(vocab_size, embedding_dim=embedding_dim, model=model_name, batch_size=batch_size, combine=combine, criterion=criterion) if not model_name == "Bert": model.embedding.weight.data.copy_(dataset.vocab.vectors) if combine: model.embedding_glove.weight.data.copy_(dataset.glove.vectors) #model to device model.to(device) #optimiser optimizer = optim.Adam(model.parameters(), lr=learning_rate) if model_name == "Bert": optimizer = AdamW(model.parameters(), lr=learning_rate, correct_bias=False) # Linear scheduler for adaptive lr scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) else: scheduler = None plot_log = defaultdict(list) for epoch in range(num_epochs): #train and validate epoch_loss, epoch_acc = train_epoch(model, train_loader, optimizer, criterion, device, soft_labels=soft_labels, weights=weights, scheduler=scheduler, unsupervised=unsupervised) val_loss, val_acc = evaluate_epoch(model, val_loader, criterion, device, soft_labels=soft_labels, weights=weights, unsupervised=unsupervised) #save for plotting for name, point in zip( ["train_loss", "train_accuracy", "val_loss", "val_accuracy"], [epoch_loss, epoch_acc, val_loss, val_acc]): plot_log[f'{name}'] = point #realtime feel print(f'Epoch: {epoch+1}') print( f'\tTrain Loss: {epoch_loss:.5f} | Train Acc: {epoch_acc*100:.2f}%' ) print(f'\t Val. Loss: {val_loss:.5f} | Val. Acc: {val_acc*100:.2f}%') sample_sentences_and_z(model, train_loader, device, dataset.vocab) #save plot results_directory = f'plots/{experiment_number}' os.makedirs(results_directory, exist_ok=True) for name, data in plot_log.items(): save_plot(data, name, results_directory) #save model torch.save(model, os.path.join(results_directory, 'model_cnn.pth')) #confusion matrix and all that fun loss, acc, predictions, ground_truth = evaluate_epoch( model, val_loader, criterion, device, is_final=True, soft_labels=soft_labels, weights=weights, unsupervised=unsupervised) conf_matrix = confusion_matrix(ground_truth, predictions) class_report = classification_report(ground_truth, predictions) print('\nFinal Loss and Accuracy\n----------------\n') print(f'\t Val. Loss: {loss:.5f} | Val. Acc: {acc*100:.2f}%') print('\nCONFUSION MATRIX\n----------------\n') print(conf_matrix) print('\nCLASSSIFICATION REPORT\n----------------------\n') print(class_report) plot_confusion_matrix(ground_truth, predictions, classes=["Hate speech", "Offensive", "Neither"], normalize=False, title='Confusion matrix') plt.show()
def evaluate_with_beam_search(args): transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) dataset = Dataset({ 'data_dir': args['data_dir'], 'exp_dir': args['exp_dir'], 'raw_data_dir': args['raw_data_dir'], 'transform': transform, 'mode': 'test' }) args['vocab_size'] = len(dataset.vocab) encoder = EncoderCNN(args).eval() decoder = DecoderRNN(args).eval() encoder = encoder.to(device) decoder = decoder.to(device) encoder.load_state_dict( torch.load(os.path.join(args['model_path'], 'encoder.pt'))) decoder.load_state_dict( torch.load(os.path.join(args['model_path'], 'decoder.pt'))) generated_captions = [] image_ids = [] target_captions = [] for idx in range(len(dataset.ids)): image_id, image, captions = dataset.get_test_item(idx) image = image.to(device) print(idx) features = encoder(image) generated_sents = decoder.decode_with_beam_search(features) # print(generated_sents) sents = [] for sent_id in generated_sents: words = [] for word_id in sent_id[0]: if dataset.vocab.idx2word[word_id] == '<start>': continue elif dataset.vocab.idx2word[word_id] != '<end>': words.append(dataset.vocab.idx2word[word_id]) else: break sents.append((' '.join(words), sent_id[1] / len(sent_id[0]))) sents = sorted(sents, key=lambda x: x[1], reverse=True) generated_captions.append(sents) image_ids.append(image_id) target_captions.append(captions) image_captions = [{ 'image_id': image_ids[idx], 'caption': generated_captions[idx][0][0] } for idx in range(len(image_ids))] captions_path = os.path.join(args['exp_dir'], args['model_dir'], args['caption_fils']) image_caption_path = os.path.join(args['exp_dir'], args['model_dir'], args['evaluation_file']) with open(captions_path, 'w') as f: for idx in range(len(generated_captions)): f.write('*' * 50 + '\n') f.write('-' * 20 + 'generated_captions' + '-' * 20 + '\n') for sent in generated_captions[idx]: f.write(sent[0] + '\n') f.write('-' * 20 + 'target_captions' + '-' * 20 + '\n') for words in target_captions[idx]: f.write(' '.join(words) + '\n') f.write('*' * 50 + '\n') f.write('\n') with open(image_caption_path, 'w') as f: json.dump(image_captions, f)
def train(strategy, optimizer, learning_rate, config, config_class, model_class, tokenizer_class, on_top=False): ##### model_name_or_path = config.get("model_name_or_path", "xlm-mlm-enfr-1024") config_cache_dir = config.get("pretrained_config_cache_dir") model_cache_dir = config.get("pretrained_model_cache_dir") tokenizer_cache_dir = config.get("pretrained_tokenizer_cache_dir") model_name_or_path_ = config.get("model_name_or_path_", "xlm-mlm-enfr-1024") ##### train_dataset = Dataset(config.get("filepath", None), config.get("training_data_save_path"), config.get("seq_size"), config.get("max_sents"), config.get("do_shuffle"), config.get("do_skip_empty"), model_name_or_path=model_name_or_path, tokenizer_class=tokenizer_class, tokenizer_cache_dir=tokenizer_cache_dir) pretrained_config = config_class.from_pretrained( model_name_or_path, cache_dir=config_cache_dir if config_cache_dir else None) with strategy.scope(): model = model_class.from_pretrained( model_name_or_path_, config=pretrained_config, cache_dir=model_cache_dir if model_cache_dir else None) checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) checkpoint_manager = tf.train.CheckpointManager(checkpoint, config["model_dir"], max_to_keep=5) if checkpoint_manager.latest_checkpoint is not None: tf.get_logger().info("Restoring parameters from %s", checkpoint_manager.latest_checkpoint) checkpoint_path = checkpoint_manager.latest_checkpoint checkpoint.restore(checkpoint_path) ##### ##### Training functions with strategy.scope(): gradient_accumulator = GradientAccumulator() def _accumulate_gradients(src, tgt, sign): src_padding_mask = build_mask(src["input_ids"], src["lengths"]) tgt_padding_mask = build_mask(tgt["input_ids"], tgt["lengths"]) align, aggregation_src, aggregation_tgt, loss, similarity_loss = model( (src, tgt), sign_src=sign, sign_tgt=sign, src_padding_mask=src_padding_mask, tgt_padding_mask=tgt_padding_mask, training=True) #tf.print("aggregation_src", aggregation_src, "aggregation_tgt", aggregation_tgt, "sign", sign, summarize=1000) loss = loss + similarity_loss * 0.1 if on_top: variables = [ var for var in model.trainable_variables if "bidirectional" in var.name ] else: variables = model.trainable_variables print("var numb: ", len(variables)) for var in variables: print(var.name) print(var) gradients = optimizer.get_gradients(loss, variables) #gradients = [(tf.clip_by_norm(grad, 0.1)) for grad in gradients] gradient_accumulator(gradients) num_examples = tf.shape(src["input_ids"])[0] return loss, num_examples def _apply_gradients(): #variables = model.trainable_variables if on_top: variables = [ var for var in model.trainable_variables if "bidirectional" in var.name ] else: variables = model.trainable_variables grads_and_vars = [] for gradient, variable in zip(gradient_accumulator.gradients, variables): scaled_gradient = gradient / 2.0 grads_and_vars.append((scaled_gradient, variable)) optimizer.apply_gradients(grads_and_vars) gradient_accumulator.reset() u_epoch_dataset = train_dataset.create_one_epoch(mode="u") p_epoch_dataset = train_dataset.create_one_epoch(mode="p") @function_on_next(u_epoch_dataset) def _u_train_forward(next_fn): with strategy.scope(): per_replica_source, per_replica_target = next_fn() per_replica_loss, per_replica_num_examples = strategy.experimental_run_v2( _accumulate_gradients, args=(per_replica_source, per_replica_target, 1.0)) loss = strategy.reduce(tf.distribute.ReduceOp.MEAN, per_replica_loss, None) num_examples = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_num_examples, None) return loss, num_examples @function_on_next(p_epoch_dataset) def _p_train_forward(next_fn): with strategy.scope(): per_replica_source, per_replica_target = next_fn() per_replica_loss, per_replica_num_examples = strategy.experimental_run_v2( _accumulate_gradients, args=(per_replica_source, per_replica_target, -1.0)) loss = strategy.reduce(tf.distribute.ReduceOp.MEAN, per_replica_loss, None) num_examples = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_num_examples, None) return loss, num_examples @tf.function def _step(): with strategy.scope(): strategy.experimental_run_v2(_apply_gradients) #### Training _summary_writer = tf.summary.create_file_writer(config["model_dir"]) report_every = config.get("report_every", 100) save_every = config.get("save_every", 1000) eval_every = config.get("eval_every", 1000) train_steps = config.get("train_steps", 100000) u_training_flow = iter(_u_train_forward()) p_training_flow = iter(_p_train_forward()) p_losses = [] u_losses = [] _number_examples = [] import time start = time.time() with _summary_writer.as_default(): while True: try: u_loss, u_examples_num = next(u_training_flow) p_loss, p_examples_num = next(p_training_flow) _step() p_losses.append(p_loss) u_losses.append(u_loss) _number_examples.extend([u_examples_num, p_examples_num]) step = optimizer.iterations.numpy() if step % report_every == 0: elapsed = time.time() - start tf.get_logger().info( "Step = %d ; Learning rate = %f ; u_loss = %f; p_loss = %f, number_examples = %d, after %f seconds", step, learning_rate(step), np.mean(u_losses), np.mean(p_losses), np.sum(_number_examples), elapsed) start = time.time() u_losses = [] p_losses = [] _number_examples = [] if step % save_every == 0: tf.get_logger().info("Saving checkpoint for step %d", step) checkpoint_manager.save(checkpoint_number=step) if step % eval_every == 0: ckpt_path = None evaluate(model, config, checkpoint_manager, checkpoint, ckpt_path, model_name_or_path, tokenizer_class, tokenizer_cache_dir) tf.summary.flush() if step > train_steps: break except StopIteration: #tf.errors.OutOfRangeError: print("next epoch") u_epoch_dataset = train_dataset.create_one_epoch(mode="u") p_epoch_dataset = train_dataset.create_one_epoch(mode="p") @function_on_next(u_epoch_dataset) def _u_train_forward(next_fn): with strategy.scope(): per_replica_source, per_replica_target = next_fn() per_replica_loss, per_replica_num_examples = strategy.experimental_run_v2( _accumulate_gradients, args=(per_replica_source, per_replica_target, 1.0)) loss = strategy.reduce(tf.distribute.ReduceOp.MEAN, per_replica_loss, None) num_examples = strategy.reduce( tf.distribute.ReduceOp.SUM, per_replica_num_examples, None) return loss, num_examples @function_on_next(p_epoch_dataset) def _p_train_forward(next_fn): with strategy.scope(): per_replica_source, per_replica_target = next_fn() per_replica_loss, per_replica_num_examples = strategy.experimental_run_v2( _accumulate_gradients, args=(per_replica_source, per_replica_target, -1.0)) loss = strategy.reduce(tf.distribute.ReduceOp.MEAN, per_replica_loss, None) num_examples = strategy.reduce( tf.distribute.ReduceOp.SUM, per_replica_num_examples, None) return loss, num_examples u_training_flow = iter(_u_train_forward()) p_training_flow = iter(_p_train_forward())
# set up log dirs TimeID, ExpID, rec_img_path, weights_path, log = set_up_dir(args.project_name, args.resume, args.debug) logprint = LogPrint(log, ExpID, args.screen) args.ExpID = ExpID args.CodeID = get_CodeID() loghub = LogHub() # Set up model, data, optimizer if args.mode == "wct_se": args.BE = "trained_models/original_wct_models/vgg_normalised_conv%d_1.t7" % args.stage args.BD = "trained_models/our_BD/%dBD_E30S0.pth" % args.stage if args.pretrained_init: args.SE = "trained_models/small16x_ae_base/e%d_base.pth" % args.stage net = TrainSE_With_WCTDecoder(args).cuda() dataset = Dataset(args.content_train, args.shorter_side) train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=True) elif args.mode == "wct_sd": args.BE = "trained_models/original_wct_models/vgg_normalised_conv%d_1.t7" % args.stage if args.pretrained_init: args.SD = "trained_models/small16x_ae_base/d%d_base.pth" % args.stage net = TrainSD_With_WCTSE(args).cuda() SE_path = check_path(args.SE) net.SE.load_state_dict(torch.load(SE_path)["model"]) dataset = Dataset(args.content_train, args.shorter_side) train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=True) optimizer = torch.optim.Adam(net.parameters(), lr=args.lr) # Train
def encode(lang, checkpoint_path, dataset_path, config, config_class, model_class, tokenizer_class, output="output"): ##### print("encoding %s in lang %d using ckpt %s" % (dataset_path, lang, checkpoint_path)) ##### model_name_or_path = config.get("model_name_or_path", "xlm-mlm-enfr-1024") config_cache_dir = config.get("pretrained_config_cache_dir") model_cache_dir = config.get("pretrained_model_cache_dir") tokenizer_cache_dir = config.get("pretrained_tokenizer_cache_dir") model_name_or_path_ = config.get("model_name_or_path_", "xlm-mlm-enfr-1024") ##### dataset = Dataset(dataset_path, config.get("training_data_save_path"), config.get("seq_size"), config.get("max_sents"), config.get("do_shuffle"), config.get("do_skip_empty"), procedure="encode", model_name_or_path=model_name_or_path, tokenizer_class=tokenizer_class, tokenizer_cache_dir=tokenizer_cache_dir) pretrained_config = config_class.from_pretrained( model_name_or_path, cache_dir=config_cache_dir if config_cache_dir else None) model = model_class.from_pretrained( model_name_or_path_, config=pretrained_config, cache_dir=model_cache_dir if model_cache_dir else None) checkpoint = tf.train.Checkpoint(model=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, config["model_dir"], max_to_keep=5) if checkpoint_manager.latest_checkpoint is not None: if checkpoint_path == None: checkpoint_path = checkpoint_manager.latest_checkpoint tf.get_logger().info("Restoring parameters from %s", checkpoint_path) checkpoint.restore(checkpoint_path) iterator = iter(dataset.create_one_epoch(mode="e", lang=lang)) @tf.function def encode_next(): src = next(iterator) padding_mask = build_mask(src["input_ids"], src["lengths"]) src_sentence_embedding = model.encode(src, padding_mask) return src_sentence_embedding src_sentence_embedding_list = [] maxcount = 1000000 count = 0 index = 0 while True: try: src_sentence_embedding_ = encode_next() src_sentence_embedding__ = src_sentence_embedding_.numpy() src_sentence_embedding_list.append(src_sentence_embedding__) count += src_sentence_embedding__.shape[0] print(count) if count > maxcount: src_sentences = np.concatenate(src_sentence_embedding_list, axis=0) np.savez(output + str(index), sentence_embeddings=src_sentences) count = 0 src_sentence_embedding_list = [] index += 1 except tf.errors.OutOfRangeError: break if len(src_sentence_embedding_list) > 0: src_sentences = np.concatenate(src_sentence_embedding_list, axis=0) np.savez(output + str(index), sentence_embeddings=src_sentences) return True
import sys import os from unet import UNet from discriminator import Discriminator from data_loader import Dataset from predictor import Predictor yml_path = sys.argv[1] with open(yml_path) as f: config = yaml.load(f) if config['use_gpu']: torch.set_default_tensor_type('torch.cuda.FloatTensor') discriminator = Discriminator(**(config['discriminator_params'])) unet = UNet(**(config['unet_params'])) dl = Dataset(**(config['dataset_params'])) \ .flow_from_directory(**(config['test_dataloader_params'])) unet_path = os.path.join(config['fit_params']['logdir'], 'unet_%d.pth' % config['test_epoch']) unet.load_state_dict(torch.load(unet_path)) discriminator_path = os.path.join( config['fit_params']['logdir'], 'discriminator_%d.pth' % config['test_epoch']) discriminator.load_state_dict(torch.load(discriminator_path)) p = Predictor(unet, discriminator) p(dl, os.path.join(config['fit_params']['logdir'], 'predicted'))
device = torch.device(device_type) import psutil n_cpu = psutil.cpu_count() n_cpu_to_use = n_cpu // 4 torch.set_num_threads(n_cpu_to_use) os.environ['MKL_NUM_THREADS'] = str(n_cpu_to_use) os.environ['KMP_AFFINITY'] = 'compact' if args.mode == 'test': verbose = True else: verbose = False if verbose: print("%s dataset running on %s mode with %s device" % (args.dataset.upper(), args.mode.upper(), device_type.upper())) dset = Dataset(args.dataset, args.data_dir, args.mode) x_s_train = FN(dset.x_s_train).to(device) y_s_train = FN(dset.y_s_train).to(device) y_s_train_ix = FN(index_labels(dset.y_s_train, dset.s_class)).to(device) x_s_test = FN(dset.x_s_test).to(device) y_s_test = FN(dset.y_s_test).to(device) x_u_test = FN(dset.x_u_test).to(device) y_u_test = FN(dset.y_u_test).to(device) y_u_test_ix = FN(index_labels(dset.y_u_test, dset.u_class)).to(device) attr = FN(dset.attr).to(device) s_attr = FN(dset.s_attr).to(device) u_attr = FN(dset.u_attr).to(device)
def evaluate(args): transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) dataset = Dataset({ 'data_dir': args['data_dir'], 'exp_dir': args['exp_dir'], 'raw_data_dir': args['raw_data_dir'], 'transform': transform, 'mode': 'test' }) args['vocab_size'] = len(dataset.vocab) encoder = EncoderCNN(args).eval() decoder = DecoderRNN(args).eval() encoder = encoder.to(device) decoder = decoder.to(device) encoder.load_state_dict( torch.load(os.path.join(args['model_path'], 'encoder.pt'))) decoder.load_state_dict( torch.load(os.path.join(args['model_path'], 'decoder.pt'))) generated_captions = [] image_ids = [] target_captions = [] for idx in range(len(dataset.ids)): image_id, image, captions = dataset.get_test_item(idx) image = image.to(device) print(idx) features = encoder(image) word_ids = decoder.sample(features) word_ids = word_ids[0].cpu().tolist() words = [] for word_id in word_ids: if dataset.vocab.idx2word[word_id] == '<start>': continue if dataset.vocab.idx2word[word_id] != '<end>': words.append(dataset.vocab.idx2word[word_id]) else: break image_ids.append(image_id) generated_captions.append(words) target_captions.append(captions) print(words) image_captions = [{ 'image_id': image_ids[idx], 'caption': ' '.join(generated_captions[idx]) } for idx in range(len(image_ids))] captions_path = os.path.join(args['exp_dir'], args['caption_file']) image_caption_path = os.path.join(args['exp_dir'], args['evaluation_file']) with open(captions_path, 'w') as f: for idx in range(len(generated_captions)): f.write('*' * 50 + '\n') f.write('-' * 20 + 'generated_captions' + '-' * 20 + '\n') f.write(' '.join(generated_captions[idx]) + '\n') f.write('-' * 20 + 'target_captions' + '-' * 20 + '\n') for words in target_captions[idx]: f.write(' '.join(words) + '\n') f.write('*' * 50 + '\n') f.write('\n') with open(bleu_score_path, 'w') as f: f.write('BLEU_score: {}'.format(str(BLEU_score))) with open(image_caption_path, 'w') as f: json.dump(image_captions, f)