def run(dataset='mnist', batch_size=64, n_features=200, n_layers=6, n_bins=4, optimizer='adam', learnrate=1e-4, dropout=0.9, exp_name='pixelCNN', exp_dir='~/experiments/conditional-pixelcnn/', cuda=True, resume=False): exp_name += '_%s_%ifeat_%ilayers_%ibins'%( dataset, n_features, n_layers, n_bins) exp_dir = os.path.join(os.path.expanduser(exp_dir), exp_name) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) # Data loaders train_loader, val_loader, onehot_fcn, n_classes = data.loader(dataset, batch_size) if not resume: # Store experiment params in params.json params = {'batch_size':batch_size, 'n_features':n_features, 'n_layers':n_layers, 'n_bins':n_bins, 'optimizer': optimizer, 'learnrate':learnrate, 'dropout':dropout, 'cuda':cuda} with open(os.path.join(exp_dir,'params.json'),'w') as f: json.dump(params,f) # Model net = model.PixelCNN(1, n_classes, n_features, n_layers, n_bins, dropout) else: # if resuming, need to have params, stats and checkpoint files if not (os.path.isfile(os.path.join(exp_dir,'params.json')) and os.path.isfile(os.path.join(exp_dir,'stats.json')) and os.path.isfile(os.path.join(exp_dir,'last_checkpoint'))): raise Exception('Missing param, stats or checkpoint file on resume') net = torch.load(os.path.join(exp_dir, 'last_checkpoint')) # Define loss fcn, incl. label formatting from input def input2label(x): return torch.squeeze(torch.round((n_bins-1)*x).type(torch.LongTensor),1) loss_fcn = torch.nn.NLLLoss2d() # Train train.fit(train_loader, val_loader, net, exp_dir, input2label, loss_fcn, onehot_fcn, n_classes, optimizer, learnrate=learnrate, cuda=cuda, resume=resume) # Generate some between-class examples generate_between_classes(net, [28, 28], [1, 7], os.path.join(exp_dir,'1-7.jpeg'), n_classes, cuda) generate_between_classes(net, [28, 28], [3, 8], os.path.join(exp_dir,'3-8.jpeg'), n_classes, cuda) generate_between_classes(net, [28, 28], [4, 9], os.path.join(exp_dir,'4-9.jpeg'), n_classes, cuda) generate_between_classes(net, [28, 28], [5, 6], os.path.join(exp_dir,'5-6.jpeg'), n_classes, cuda)
def model_pipeline(train_dataset, test_dataset, batch_size, num_epochs, optimizer, weights=None): train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4) net = model.MaskDetector(train_dataset.df) if weights is not None: net.load_state_dict( torch.load(weights, map_location=lambda storage, loc: storage)) print( summary(net, torch.zeros((1, 3, 100, 100)), show_input=False, show_hierarchical=True)) model_net = train.fit(net, train_loader, test_loader, num_epochs, optimizer, plot=True, save=True, checkpoint=True) net.visualize_conv2d_features('convLayer1', 'convLayer1') return model_net
def run_experiments(finetune, kernel_sizes, filters, lr, pooling, weight_decay, other_params): global embeddings_matrix, training_set, validation_set other_params['commit_hash'] = commit_hash (vocab_size, dimensions) = embeddings_matrix.shape net = models.Net(dimensions=dimensions, finetune=finetune, vocab_size=vocab_size, kernel_sizes=kernel_sizes, filters=filters, dropout_rate=0.5, pooling=pooling, lr=lr, weight_decay=weight_decay, embeddings_matrix=embeddings_matrix) hyperparams = util.fill_dict(net.hyperparameters, other_params) logger.info('experiment with hyperparameters: {}'.format( json.dumps(hyperparams, sort_keys=True, indent=None))) with get_archiver(datadir='data/models', suffix="_" + commit_hash[:6]) as a1, get_archiver( datadir='data/results', suffix="_" + commit_hash[:6]) as a: save_model(hyperparams, net, a.getFilePath) early_stopping = train.EarlyStopping(c.monitor, c.patience, c.monitor_objective) model_checkpoint = train.ModelCheckpoint(a1.getFilePath('checkpoint')) csv_logger = train.CSVLogger(a.getFilePath('logger.csv')) adam_config = train.AdamConfig( lr=net.hyperparameters['lr'], beta_1=net.hyperparameters['beta_1'], beta_2=net.hyperparameters['beta_2'], epsilon=net.hyperparameters['epsilon'], weight_decay=net.hyperparameters['weight_decay']) history = train.fit( net, training_set, validation_set, batch_size=c.batch_size, epochs=c.epochs, validation_split=0.2, callbacks=[early_stopping, model_checkpoint, csv_logger], optimizer=adam_config) save_history(history, a.getDirPath()) return
def cross_validation_loop(X, Y, leave_out=5): assert 1 <= leave_out <= 5 and type(leave_out) == int n_features = X.shape[1] n_targets = Y.shape[1] assert len(X) == len(Y) averaged_scores = { 'MSE': np.zeros((n_targets, )), 'feature_grad': np.zeros((n_targets, n_features)), 'feature_grad_abs': np.zeros((n_targets, n_features)) } cross_validation_times = len(X) // leave_out for i in range(cross_validation_times): print("Leave %d out [%d/%d]" % (leave_out, i + 1, cross_validation_times)) test_indices = [m for m in range(i * leave_out, (i + 1) * leave_out)] train_indices = [i for i in range(len(X)) if i not in test_indices] X_train = X[train_indices, :] Y_train = Y[train_indices, :] X_test = X[test_indices, :] Y_test = Y[test_indices, :] net = model.FCModel(n_in_features=X.shape[1], n_out_features=Y.shape[1]) device = 'cuda:0' if torch.cuda.is_available() else "cpu" net = net.to(device) params = { 'n_epoch': 200, 'lr': 1e-2, 'beta': 0, 'batch_size': 8, } train.fit(net, X_train, Y_train, params, verbose=False) scores = train.score(net, X_test, Y_test) for k, v in scores.items(): averaged_scores[k] += scores[k] / cross_validation_times return averaged_scores
def permutation_loop(X, Y, permutation_times=10): n_features = X.shape[1] n_targets = Y.shape[1] assert len(X) == len(Y) permutation_scores = {'performance_gain': np.zeros((n_targets, ))} for i in range(permutation_times): print("Permutation [%d/%d]" % (i + 1, permutation_times)) X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split( X, Y, test_size=0.25) net = model.FCModel(n_in_features=X.shape[1], n_out_features=Y.shape[1]) device = 'cuda:0' if torch.cuda.is_available() else "cpu" net = net.to(device) params = { 'n_epoch': 200, 'lr': 1e-2, 'beta': 0, 'batch_size': 8, } train.fit(net, X_train, Y_train, params, verbose=False) scores_on_original = train.score(net, X_test, Y_test) perm_indices = np.arange(len(Y)) np.random.shuffle(perm_indices) Y_shuffled = Y[perm_indices].copy() X_train, X_test, Y_shuffled_train, Y_shuffled_test = sklearn.model_selection.train_test_split( X, Y_shuffled, test_size=0.25) train.fit(net, X_train, Y_shuffled_train, params, verbose=False) scores_on_shuffled = train.score(net, X_test, Y_shuffled_test) permutation_scores["performance_gain"] += ( scores_on_shuffled["MSE"] - scores_on_original["MSE"]) * (1 / permutation_times) return permutation_scores
def start(): # produce_data() model = Bert_CRF() print('create_iter') train_iter, num_train_steps = create_batch_iter("train") eval_iter = create_batch_iter("valid") print('create_iter finished') epoch_size = num_train_steps * args.train_batch_size * args.gradient_accumulation_steps / args.num_train_epochs pbar = ProgressBar(epoch_size=epoch_size, batch_size=args.train_batch_size) # for name, param in model.named_parameters(): # if param.requires_grad: # print(name) print('fit') fit(model=model, training_iter=train_iter, eval_iter=eval_iter, num_epoch=args.num_train_epochs, pbar=pbar, num_train_steps=num_train_steps, verbose=1)
percentages = [(1.0 / 7), (2.0 / 7), (3.0 / 7), (4.0 / 7), (5.0 / 7), (6.0 / 7), 1] train_losses = [] test_losses = [] train_accs = [] test_accs = [] metrics = [] for p in percentages: tidx = train_idx[np.random.permutation(int(p * train_idx.shape[0]))] best = fit(args.model_type, dataset, tidx, val_idx, device, save, args, stopping=8) print(best) metrics.append(best) print(metrics) # n = len(train_losses) # xs = np.arange(n) # # plot losses # fig, ax = plt.subplots() # ax.plot(xs, train_losses, '--', linewidth=2, label='train') # ax.plot(xs, test_losses, '-', linewidth=2, label='validation') # ax.set_xlabel("Epoch") # ax.set_ylabel("Training Loss")
# Test Dataset & Loader validset = Dataset(config.validdata_dir) validloader = create_loader(dataset=validset, input_size=(3, 224, 224), batch_size=config.batch_size, interpolation="bicubic", mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), num_workers=2, crop_pct=1.0) #validloader = torch.utils.data.DataLoader(validset, batch_size=32, shuffle=False, num_workers=2) print("Loaded %d Train Images, %d Validation images" % (len(trainset), len(validset))) # # Train Dataset & Loader # trainset = Dataset(traindata_dir, transform = transform) # trainloader = torch.utils.data.DataLoader(trainset, batch_size=config.batch_size, shuffle=True, num_workers=2, drop_last= True) # # Test Dataset & Loader # validset = Dataset(validdata_dir, transform = transform) # validloader = torch.utils.data.DataLoader(validset, batch_size=config.batch_size, shuffle=False, num_workers=2) # Tensorboard train_writer = SummaryWriter('./checkpoint/logs/') # Train, Validate print("Start Training") #fit(config.save_dir, train_writer, trainloader, validloader, model, model2, loss_fn, optimizer, optimizer_smoothing, scheduler, n_epochs, cuda, log_interval) fit(config.save_dir, train_writer, trainloader, validloader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, log_interval)
def create_run_ensemble(model_state_list, n_layers, grad_clip_value=5, seed=0, num_epochs=20, learning_rate=0.001, init_channels=get('init_channels'), batch_size=get('batch_size'), genotype_class='PCDARTS'): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled=True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) logging.info("config = %s", config) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) genotype = eval("genotypes.%s" % genotype_class) dataset = dict() dims = [] for i, model_state in enumerate(model_state_list): model = Network(init_channels, train_dataset.n_classes, n_layers, genotype) model.load_state_dict(torch.load(model_state)) model.cuda() for p in model.parameters(): p.requires_grad = False trn_labels = [] trn_features = [] if i == 0: for d,la in train_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) trn_labels.extend(la) trn_features.extend(o.cpu().data) test_labels = [] test_features = [] for d,la in test_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) test_labels.extend(la) test_features.extend(o.cpu().data) dataset['trn_labels'] = trn_labels dataset['test_labels'] = test_labels else: for d,la in train_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) trn_features.extend(o.cpu().data) test_labels = [] test_features = [] for d,la in test_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) test_features.extend(o.cpu().data) dataset['trn_features'].extend(trn_features) dims.extend(dataset['trn_features'][i][0].size(0)) dataset['test_features'].extend(test_features) trn_feat_dset = FeaturesDataset(dataset['trn_features'][0],dataset['trn_features'][1],dataset['trn_features'][2],dataset['trn_labels']) test_feat_dset = FeaturesDataset(dataset['test_features'][0],dataset['test_features'][1],dataset['test_features'][2],dataset['test_labels']) trn_feat_loader = DataLoader(trn_feat_dset,batch_size=64,shuffle=True) test_feat_loader = DataLoader(val_feat_dset,batch_size=64) model = EnsembleModel(dims, out_size=train_dataset.n_classes) criterion = torch.nn.optim.CrossEntropyLoss criterion = criterion.cuda() optimizer = torch.nn.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) for epoch in range(num_epochs): epoch_loss, epoch_accuracy = fit(epoch,model,trn_feat_loader,critierion, training=True) val_epoch_loss , val_epoch_accuracy = fit(epoch,model, test_feat_loader, criterion, training=False) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if not os.path.exists(save_model_str): os.mkdir(save_model_str) torch.save(model.state_dict(), os.path.join(save_model_str, time.ctime()))
def grid_search_motifs(min_motifs, max_motifs, device=torch.device('cpu'), num_epochs=30): total_motifs = (max_motifs - min_motifs + 1) * 5 Seq_train, Seq_test, fam_train, fam_test, class_weights, family_set, family_counts = load_data( ) parent_directory = 'Grid-Search' if not os.path.exists(parent_directory): os.mkdir(parent_directory) for min_len in range(min_motifs, max_motifs + 2): for max_len in range(min_len + 1, max_motifs + 2): motif_lengths = [*range(min_len, max_len)] motifs_per_length, remainder = divmod(total_motifs, len(motif_lengths)) num_motifs_of_length = [ motifs_per_length + (0 if i < (len(motif_lengths) - remainder) else 1) for i, _ in enumerate(motif_lengths) ] print("Training for motif lengths: {} - {}".format( min_len, max_len - 1)) net = ProtClassifier(len(alphabet), num_motifs_of_length, motif_lengths, len(family_set)) results = fit(net, (Seq_train, fam_train), (Seq_test, fam_test), num_epochs=num_epochs, class_weights=class_weights, device=device, parent_directory=parent_directory) loss_history, accuracy_history, precision_history, \ recall_history, f1_score_history, roc_auc_history, \ mcc_history = results save_file_name = '{}/{}-{}'.format(parent_directory, min_len, max_len - 1) if not os.path.exists(save_file_name): os.mkdir(save_file_name) save_file_name = save_file_name + '/Metrics' with open(save_file_name + '.txt', 'w') as f: f.write("Loss\n") f.write(str(loss_history.tolist())) f.write("\nAccuracy\n") f.write(str(accuracy_history.tolist())) f.write("\nPrecision\n") f.write(str(precision_history.tolist())) f.write("\nRecall\n") f.write(str(recall_history.tolist())) f.write("\nF1-Score\n") f.write(str(f1_score_history.tolist())) f.write("\nROC-AUC\n") f.write(str(roc_auc_history.tolist())) f.write("\nMCC\n") f.write(str(mcc_history.tolist())) np.save( save_file_name, np.stack((loss_history, accuracy_history, precision_history, recall_history, f1_score_history, roc_auc_history, mcc_history)))
def main(): device = "cuda" if torch.cuda.is_available() else "cpu" parser = argparse.ArgumentParser() parser.add_argument("--K", type=int, default=5, help="Number of topics") parser.add_argument( "--model", choices=["slda", "pfslda"], default="pfslda", help="Specify which model to train", ) parser.add_argument("--p", type=float, default=0.15, help="Value for the switch prior for pf-sLDA") parser.add_argument("--alpha", type=bool, default=True, help="Specify if alpha is fixed") parser.add_argument( "--path", type=str, default=None, help="Path to saved model to load before training", ) parser.add_argument("--lr", type=float, default=0.025, help="Initial learning rate") parser.add_argument("--lambd", type=float, default=0, help="Supervised task regularizer weight") parser.add_argument("--num_epochs", type=int, default=500, help="Number of epochs to train") parser.add_argument( "--check", type=int, default=10, help="Number of epochs per stats check (print/save)", ) parser.add_argument( "--batch_size", type=int, default=100, ) parser.add_argument( "--y_thresh", type=float, default=None, help="Threshold for yscore (RMSE or AUC) to save model.", ) parser.add_argument( "--c_thresh", type=float, default=None, help="Threshold for topic coherence to save model.", ) args = parser.parse_args() # make sure args valid if args.K < 1: raise ValueError("Invalid number of topics.") p = args.p if p > 1 or p < 0: raise ValueError("Invalid switch prior p.") p = torch.tensor(p).to(device) p = torch.log(p / (1 - p)) # load dataset and specify target type d = load_Pang_Lee() W = d["W"] W_val = d["W_val"] y = d["y"] y_val = d["y_val"] W_test = d["W_test"] y_test = d["y_test"] vocab = d["vocab"] version = "real" V = W.shape[1] M = W.shape[0] M_val = W_val.shape[0] # instantiate model if args.model == "slda": model = sLDA(args.K, V, M, M_val, args.alpha, device) elif args.model == "pfslda": model = pfsLDA(args.K, V, M, M_val, p, args.alpha, device) model.to(device) # load saved model if path specified if args.path: state_dict = torch.load(args["path"], map_location=device) model.load_state_dict(state_dict) kwargs = { "W": W, "y": y, "lr": args.lr, "lambd": args.lambd, "num_epochs": args.num_epochs, "check": args.check, "batch_size": args.batch_size, "version": version, "W_val": W_val, "y_val": y_val, "device": device, "y_thresh": args.y_thresh, "c_thresh": args.c_thresh, } fit(model, **kwargs) print_topics(model, 10, vocab)
import os import json import torch import wandb from train import fit, init_experiment if __name__ == "__main__": os.environ['PYTHONWARNINGS'] = 'ignore:semaphore_tracker:UserWarning' config = json.load(open("config/local_config.json", "r")) init_experiment(config) print("------------------") print(config) print("------------------") config["device"] = torch.device(config["device"] if torch.cuda.is_available() else 'cpu') if config["wandb_enable"]: wandb.init(project=config["wandb_project"], entity=config["wandb_entity"], name=config["name_run"]) fit(config)
#real_image = tf.divide(real_image,255.0) return input_image, real_image #train_dataset = tf.data.Dataset.list_files(PATH+'train/*/*.jpg') train_dataset = tf.data.Dataset.list_files( settings.config['paths']['train_dataset']) train_dataset = train_dataset.shuffle(BUFFER_SIZE) train_dataset = train_dataset.map( load, num_parallel_calls=tf.data.experimental.AUTOTUNE) train_dataset = train_dataset.batch( settings.config.getint('training', 'batch_size')) test_dataset = tf.data.Dataset.list_files( settings.config['paths']['test_dataset']) #test_dataset = test_dataset.shuffle(BUFFER_SIZE) test_dataset = test_dataset.map( load, num_parallel_calls=tf.data.experimental.AUTOTUNE) test_dataset = test_dataset.batch( settings.config.getint('training', 'batch_size')) logdir = os.path.join(settings.config.get('paths', 'tb_logs'), settings.config.get('paths', 'log_tag')) writer = tf.summary.create_file_writer(logdir) writer.set_as_default() pr = cProfile.Profile() train.fit(train_dataset, test_dataset, settings.config.getint('training', 'epochs'))
def run(batch_size=128, n_features=64, n_layers=6, n_scales=1, n_bins=16, exp_name='pixelCNN', exp_dir='/home/jason/experiments/pytorch_pixelcnn/', optimizer='adam', learnrate=1e-4, dropout=0.5, cuda=True, resume=False): exp_name += '_%ifeat_%iscales_%ilayers_%ibins' % (n_features, n_scales, n_layers, n_bins) exp_dir = os.path.join(exp_dir, exp_name) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) if not resume: # Store experiment params in params.json params = { 'batch_size': batch_size, 'n_features': n_features, 'n_layers': n_layers, 'n_scales': n_scales, 'n_bins': n_bins, 'optimizer': optimizer, 'learnrate': learnrate, 'dropout': dropout, 'cuda': cuda } with open(os.path.join(exp_dir, 'params.json'), 'w') as f: json.dump(params, f) # Model net = model.PixelCNN(1, n_features, n_layers, n_scales, n_bins, dropout) else: # if resuming, need to have params, stats and checkpoint files if not (os.path.isfile(os.path.join(exp_dir, 'params.json')) and os.path.isfile(os.path.join(exp_dir, 'stats.json')) and os.path.isfile(os.path.join(exp_dir, 'last_checkpoint'))): raise Exception( 'Missing param, stats or checkpoint file on resume') net = torch.load(os.path.join(exp_dir, 'last_checkpoint')) # Data loaders train_loader, val_loader = data.mnist(batch_size) # Up-weight 1s (~8x rarer) to balance loss, interpolate intermediate values weight = torch.from_numpy(np.linspace(1, 8, n_bins, dtype='float32')) if cuda: weight = weight.cuda() # Define loss fcn, incl. label formatting from input def input2label(x): return torch.squeeze( torch.round((n_bins - 1) * x).type(torch.LongTensor), 1) loss_fcn = torch.nn.NLLLoss2d(torch.autograd.Variable(weight)) # Train train.fit(train_loader, val_loader, net, exp_dir, input2label, loss_fcn, optimizer, learnrate=learnrate, cuda=cuda, resume=resume)
dataset = pd.read_csv( "/home/singh/PycharmProjects/MachineLearning_from_Scratch/Datasets/linear_regression" + "/one_var/train.csv") print("Shape of dataset: " + str(dataset.shape)) data_x = dataset['x'][:50] data_y = dataset['y'][:50] data_x = data_x.to_numpy() data_y = data_y.to_numpy() data_x = data_x.reshape(len(data_x), 1) data_y = data_y.reshape(len(data_y), 1) train.fit() # hypothesis function of the form y = c1 + c2*x def h(c1, c2, xh): size = xh.shape[0] a1 = np.ones((size, 1), dtype=np.float) a1 = np.concatenate((a1, xh), axis=1) a2 = np.array(([c1], [c2])) yh = a1.dot(a2) return yh def plot_all(para1, para2, x_data, y_data): y_pred = h(para1, para2, x_data)
def run(dataset='mnist', n_samples=50000, n_bins=4, n_features=200, batch_size=64, n_layers=6, loss='standard', optimizer='adam', learnrate=1e-4, dropout=0.9, max_epochs=35, cuda=True, resume=False, exp_dir='out', note=''): # Data if dataset == 'mnistog': train_data, val_data, onehot_fcn, n_classes = data.get_loaders( 'mnist', batch_size) if dataset == 'mnist': train_data, val_data, onehot_fcn, n_classes = data.get_sorted_data( 'mnist', batch_size) if not resume: # Make dir exp_name = datetime.datetime.now().strftime("%m_%d_%y-%H_%M_%S") exp_name += '_{}_{}samples_{}_{}'.format(dataset, n_samples, loss, note) print("Out directory: " + exp_name) exp_dir = os.path.join(os.path.expanduser(exp_dir), exp_name) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) # Store experiment params in params.json params = { 'data': dataset, 'n_samples': n_samples, 'loss': loss, 'batch_size': batch_size, 'n_features': n_features, 'n_layers': n_layers, 'n_bins': n_bins, 'optimizer': optimizer, 'learnrate': learnrate, 'dropout': dropout, 'cuda': cuda, 'note': note } print("Params: " + str(params.items())) with open(os.path.join(exp_dir, 'params.json'), 'w') as f: json.dump(params, f) net = model.PixelCNN(1, n_classes, n_features, n_layers, n_bins, dropout) else: # if resuming, need to have params, stats and checkpoint files if not (os.path.isfile(os.path.join(exp_dir, 'params.json')) and os.path.isfile(os.path.join(exp_dir, 'stats.json')) and os.path.isfile(os.path.join(exp_dir, 'last_checkpoint'))): raise Exception( 'Missing param, stats or checkpoint file on resume') net = torch.load(os.path.join(exp_dir, 'last_checkpoint')) # Define loss fcn, incl. label formatting from input def input2label(x): return torch.squeeze( torch.round((n_bins - 1) * x).type(torch.LongTensor), 1) loss_fcns = { 'official': losses.official_loss_function, 'standard': losses.standard_loss_function, 'sum': losses.sum_loss_function, 'min': losses.min_loss_function, 'debug': torch.nn.NLLLoss() } loss_fcn = loss_fcns[loss] # Train train.fit(train_data, val_data, n_samples, net, exp_dir, input2label, loss_fcn, onehot_fcn, n_classes, optimizer, learnrate=learnrate, cuda=cuda, max_epochs=max_epochs, resume=resume)
def run( pixelcnn_ckpt, vgg_ckpt=None, adversarial_range=0.2, train_dataset='mnist', test_dataset='emnist', img_size=28, vgg_params={ 'batch_size': 16, 'base_f': 16, 'n_layers': 9, 'dropout': 0.8, 'optimizer': 'adam', 'learnrate': 1e-4 }, exp_name='domain-prior', exp_dir='~/experiments/domain-prior/', cuda=True, resume=False): # Set up experiment directory exp_name += '_%s-to-%s_vgg%i-%i_adv%.2f' % ( train_dataset, test_dataset, vgg_params['n_layers'], vgg_params['base_f'], adversarial_range) exp_dir = os.path.join(os.path.expanduser(exp_dir), exp_name) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) # Train a VGG classifier if not already done if vgg_ckpt is None: train_loader, val_loader, n_classes = data.loader( train_dataset, vgg_params['batch_size']) if not resume: with open(os.path.join(exp_dir, 'vgg_params.json'), 'w') as f: json.dump(vgg_params, f) vgg = model.VGG(img_size, 1, vgg_params['base_f'], vgg_params['n_layers'], n_classes, vgg_params['dropout']) else: vgg = torch.load(os.path.join(exp_dir, 'best_checkpoint')) train.fit(train_loader, val_loader, vgg, exp_dir, torch.nn.CrossEntropyLoss(), vgg_params['optimizer'], vgg_params['learnrate'], cuda, resume=resume) else: vgg = torch.load(vgg_ckpt) pixelcnn = torch.load(pixelcnn_ckpt) pixelcnn_params = os.path.join(os.path.dirname(pixelcnn_ckpt), 'params.json') with open(pixelcnn_params, 'r') as f: pixelcnn_params = json.load(f) n_bins = pixelcnn_params['n_bins'] if cuda: vgg = vgg.cuda() pixelcnn = pixelcnn.cuda() # Run the datasets through the networks and calculate 3 pixelcnn losses: # 1. Average: mean across the image # 2. High-pass filtered: weight by difference to upper- and left- neighbors # 3. Saliency: weight by pixel saliency (vgg backprop-to-input) _, loader, _ = data.loader(train_dataset, 1) print('Calculating losses for ' + train_dataset) dom_avg, dom_hp, dom_sw, dom_sal, dom_var = calc_losses( vgg, pixelcnn, loader, n_bins, cuda) print('Calculating losses for adversarial images') adv_avg, adv_hp, adv_sw, adv_sal, adv_var = adversarial( vgg, pixelcnn, loader, n_bins, adversarial_range, cuda) _, loader, _ = data.loader(test_dataset, 1) print('Calculating losses for ' + test_dataset) ext_avg, ext_hp, ext_sw, ext_sal, ext_var = calc_losses( vgg, pixelcnn, loader, n_bins, cuda) # Loss histograms n_bins = 100 all_losses = np.concatenate((dom_avg, adv_avg, ext_avg, dom_hp, adv_hp, ext_hp, dom_sw, adv_sw, ext_sw)) edges = np.linspace(0, np.percentile(all_losses, 95), n_bins + 1) # average loss vis.histogram(dom_avg, edges, train_dataset + ' average loss', exp_dir) vis.histogram(adv_avg, edges, 'adversarial average loss', exp_dir) vis.histogram(ext_avg, edges, test_dataset + ' average loss', exp_dir) # high-pass weighted loss vis.histogram(dom_hp, edges, train_dataset + ' highpass loss', exp_dir) vis.histogram(adv_hp, edges, 'adversarial highpass loss', exp_dir) vis.histogram(ext_hp, edges, test_dataset + ' highpass loss', exp_dir) # saliency weighted loss vis.histogram(dom_sw, edges, train_dataset + ' saliency loss', exp_dir) vis.histogram(adv_sw, edges, 'adversarial saliency loss', exp_dir) vis.histogram(ext_sw, edges, test_dataset + ' saliency loss', exp_dir) # loss variances loss_variances = np.concatenate((dom_var, adv_var, ext_var)) edges = np.linspace(0, np.percentile(loss_variances, 95), n_bins + 1) vis.histogram(dom_var, edges, train_dataset + ' loss variance', exp_dir) vis.histogram(adv_var, edges, 'adversarial loss variance', exp_dir) vis.histogram(ext_var, edges, test_dataset + ' loss variance', exp_dir) # Calculate epistemic uncertainties for each dataset for each model _, loader, _ = data.loader(train_dataset, 1) dom_class_epi = epistemic(vgg, loader, cuda) adv_class_epi = epistemic_adversarial(vgg, adversarial_range, loader, cuda) _, loader, _ = data.loader(test_dataset, 1) ext_class_epi = epistemic(vgg, loader, cuda) # Classifier uncertainty histograms n_bins = 100 all_class_epi = dom_class_epi + adv_class_epi + ext_class_epi edges = np.linspace(0, np.percentile(all_class_epi, 95), n_bins + 1) vis.histogram(dom_class_epi, edges, train_dataset + ' classifier uncertainty', exp_dir) vis.histogram(adv_class_epi, edges, 'adversarial classifier uncertainty', exp_dir) vis.histogram(ext_class_epi, edges, test_dataset + ' classifier uncertainty', exp_dir) # ROC curves vis.roc(dom_avg, ext_avg, 'out-of-domain: average loss', exp_dir) vis.roc(dom_hp, ext_hp, 'out-of-domain: high-pass filtered loss', exp_dir) vis.roc(dom_sw, ext_sw, 'out-of-domain: saliency-weighted loss', exp_dir) vis.roc(dom_class_epi, ext_class_epi, 'out-of-domain: epistemic uncertainty', exp_dir) vis.roc(dom_avg, adv_avg, 'adversarial: average loss', exp_dir) vis.roc(dom_hp, adv_hp, 'adversarial: high-pass filtered loss', exp_dir) vis.roc(dom_sw, adv_sw, 'adversarial: saliency-weighted loss', exp_dir) vis.roc(dom_class_epi, adv_class_epi, 'adversarial: epistemic uncertainty', exp_dir)
experiment.set_name(args.namestr) args.experiment = experiment # Because we all like reproducibility (...and also know where we keep our towels) # ------------------------------------------------------------------------------ np.random.seed(42) torch.manual_seed(42) torch.cuda.manual_seed_all(42) # Obtain and train our model here: # ------------------------------------------------------------------------------ model, optim = get_model() if use_cuda: model.cuda() training_loader, validation_loader = _dataloader(args) # load trained model if necessary if args.load_dir is not None: model, optim, start_epoch = load_session(model, optim, args) else: start_epoch = 0 fit(model, training_loader, validation_loader, optim, start_epoch, args) args.experiment.end() # ------------------------------------------------------------------------------ # So Long, and Thanks for All the Fish! >< ((('> >< ((('> >< ((('> # ------------------------------------------------------------------------------
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument( "--data_dir", default=None, type=str, required=True, help= "The input data dir. Should contain the .tsv files (or other data files) for the task." ) parser.add_argument( "--bert_model", default=None, type=str, required=True, help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " "bert-base-multilingual-cased, bert-base-chinese.") parser.add_argument("--task_name", default=None, type=str, required=True, help="The name of the task to train.") parser.add_argument( "--output_dir", default=None, type=str, required=True, help= "The output directory where the model predictions and checkpoints will be written." ) # Other parameters parser.add_argument( "--cache_dir", default="", type=str, help= "Where do you want to store the pre-trained models downloaded from s3") parser.add_argument( "--max_seq_length", default=128, type=int, help= "The maximum total input sequence length after WordPiece tokenization. \n" "Sequences longer than this will be truncated, and sequences shorter \n" "than this will be padded.") parser.add_argument("--do_train", action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", action='store_true', help="Whether to run eval or not.") parser.add_argument("--eval_on", default="dev", help="Whether to run eval on the dev set or test set.") parser.add_argument( "--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") parser.add_argument("--train_batch_size", default=32, type=int, help="Total batch size for training.") parser.add_argument("--eval_batch_size", default=8, type=int, help="Total batch size for eval.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument("--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory") parser.add_argument( "--warmup_proportion", default=0.1, type=float, help= "Proportion of training to perform linear learning rate warmup for. " "E.g., 0.1 = 10%% of training.") parser.add_argument("--weight_decay", default=0.01, type=float, help="Weight deay if we apply some.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument( '--gradient_accumulation_steps', type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument( '--fp16', action='store_true', help="Whether to use 16-bit float precision instead of 32-bit") parser.add_argument( '--fp16_opt_level', type=str, default='O1', help= "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html") parser.add_argument( '--loss_scale', type=float, default=0, help= "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" "0 (default value): dynamic loss scaling.\n" "Positive power of 2: static loss scaling value.\n") parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.") parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.") args = parser.parse_args() # ------------------Parameter Valid Check------------------------------------------- if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) if not args.do_train and not args.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if os.path.exists(args.output_dir) and os.listdir( args.output_dir ) and args.do_train and not args.overwrite_output_dir: raise ValueError( "Output directory ({}) not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) processors = {"CoNLL2003_NER": CoNLL2003NerProcessor} # ------------------Prepare Data------------------------------------------- task_name = args.task_name if task_name not in processors: raise ValueError("Task not found: %s" % task_name) data_processor = processors[task_name]() label_list = data_processor.get_labels() num_labels = len(label_list) tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps train_iter = None num_train_optimization_steps = 0 if args.do_train: train_examples = data_processor.get_train_examples(args.data_dir) num_train_optimization_steps = int( len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs if args.local_rank != -1: num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size( ) train_features = convert_examples_to_features(train_examples, label_list, args.max_seq_length, tokenizer) train_iter = prepare_data_loader(train_features, args, 'train') eval_examples = data_processor.get_dev_examples(args.data_dir) eval_features = convert_examples_to_features(eval_examples, label_list, args.max_seq_length, tokenizer) eval_iter = prepare_data_loader(eval_features, args, 'eval') # Prepare model if args.do_train: config = BertConfig.from_pretrained(args.bert_model, num_labels=num_labels, finetuning_task=args.task_name, output_hidden_states=True) model = BertNer.from_pretrained(args.bert_model, config=config) fit(model, train_iter, eval_iter, num_train_optimization_steps, label_list, args) else: # Load a trained model and vocabulary that you have fine-tuned model = BertNer.from_pretrained(args.output_dir) fit(model, train_iter, eval_iter, num_train_optimization_steps, label_list, args)
# In[6]: # for name,param in model.named_parameters(): # param.requires_grad = True # opt = optim.Adam(model.parameters()) # Train the model. We automatically save the model with the lowest val_loss. If you want to continue the training and keep the loss history, just pass it as an additional argument as shown below. # In[7]: #!export CUDA_LAUNCH_BLOCKING = 1; # In[8]: model, val_hist = fit(10, model, custom_loss, opt, train_dl, valid_dl) # In[9]: # model, val_hist = fit(1, model, custom_loss, opt, train_dl, valid_dl, val_hist=val_hist) # In[10]: val_hist # In[11]: plt.plot(val_hist) # #### evalute the model
# For songs sampling "TEMPERATURE": 1, "TAKE_MAX_PROBABLE": False, "LIMIT_LEN": 300 } print(config) # model = VanillaRNN(config["VOCAB_SIZE"], config["HIDDEN"], config["VOCAB_SIZE"]).to(get_device()) model = LSTMSimple(config["VOCAB_SIZE"], config["HIDDEN"], config["VOCAB_SIZE"]).to(get_device()) criterion = CrossEntropyLoss() # Fit Model fit(model, train_encoded, val_encoded, config) # Report NLL for validation and test nll_val = negative_log_likelihood(model, val_encoded, criterion, config) nll_test = negative_log_likelihood(model, test_encoded, criterion, config) print("NLL Validation: {}".format(nll_val)) print("NLL Test: {}".format(nll_test)) # Save error plot to file save_loss_graph(model) # Save model to file print("Saving model...") now = datetime.now().strftime('%Y-%m-%d-%H-%M') torch.save(model.state_dict(), "model" + now + ".pth") print("Saved!")
device=device, order=args.order, ), ModelCheckpoint(filepath=PATH + f'/models/maml/{param_str}.pth', monitor=f'val_{args.n}-shot_{args.k}-way_acc'), ReduceLROnPlateau(patience=10, factor=0.5, monitor=f'val_loss'), CSVLogger(PATH + f'/logs/maml/{param_str}.csv'), ] fit( meta_model, meta_optimiser, loss_fn, epochs=args.epochs, dataloader=background_taskloader, prepare_batch=prepare_meta_batch(args.n, args.k, args.q, args.meta_batch_size), callbacks=callbacks, metrics=['categorical_accuracy'], fit_function=meta_gradient_step, fit_function_kwargs={ 'n_shot': args.n, 'k_way': args.k, 'q_queries': args.q, 'train': True, 'order': args.order, 'device': device, 'inner_train_steps': args.inner_train_steps, 'inner_lr': args.inner_lr }, )
def train_model(exp_name, train_tfrecord, val_tfrecord, dictionary_file, n_hidden, learn_rate, batch_size, decouple_split=200, patience=10, max_epochs=200, sample_length=16, resume=False): """ Train a GRU on some text data :param exp_name: experiment name (saved to ~/experiments/story-gen/exp_name) :param train_tfrecord: path to tfrecord of training set :param val_tfrecord: path to tfrecord of validation set :param dictionary_file: path to dictionary json file :param n_hidden: number of hidden units in GRU :param learn_rate: learning rate :param batch_size: batch size :param decouple_split: subsequence length between decoupled neural interface or None to not use decoupled neural intefaces :param patience: early stopping limit :param max_epochs: maximum number of epochs to run :param sample_length: length of sample to generate after each epoch :param resume: resume from previous run :return: """ exp_dir = os.path.join(os.path.expanduser('~/experiments/story-gen/'), exp_name) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) with open(dictionary_file,'r') as f: reverse_dict = json.load(f) # word -> int reverse_dict = {v+1:k for k,v in reverse_dict.items()} # int -> word # note: sequences are padded with zero, add to dict_size (for embedding) reverse_dict[0] = '_END_' # this should be removed from sampled output dict_size = max(reverse_dict.keys())+1 if not resume: max_sequence = 20000 if decouple_split is not None else 100 pipeline = Vector_Pipeline(train_tfrecord, val_tfrecord, batch_size, max_sequence=max_sequence) init_train, init_val = pipeline.init_train, pipeline.init_val model_input = tf.placeholder_with_default(pipeline.output[:,:-1], [None, None], 'input') # Embedding embedding = orthogonal([dict_size, n_hidden], 'embedding') embedded_input = tf.nn.embedding_lookup(embedding, model_input) int_label = pipeline.output[:,1:] # Decoupled neural interface (optional) decoupled = decouple_split is not None if decoupled: # Split subsequences, reshape to [slow_time, batch, fast_time, feat] seq_len = tf.shape(embedded_input)[1] # pad so sequence length is divisible by subsequence length pad_len = decouple_split-tf.mod(seq_len,tf.constant(decouple_split)) embedded_input = tf.pad(embedded_input, [[0,0], [0,pad_len], [0,0]], mode='CONSTANT', constant_values=0) int_label = tf.pad(int_label, [[0,0], [0,pad_len]]) # batch x features x time dni_input = tf.transpose(embedded_input, [0,2,1]) # batch x features x slow_time x fast_time dni_input = tf.reshape( dni_input, [-1, n_hidden, (seq_len+pad_len)//decouple_split, decouple_split]) # fast_time x features x batch x slow_time dni_input = tf.transpose(dni_input, [3,1,0,2]) # fast_time x features x (batch x slow_time) dni_input = tf.reshape(dni_input, [decouple_split, n_hidden, -1]) # (batch x slow_time) x fast_time x features dni_input = tf.transpose(dni_input, [2,0,1]) # (batch x slow_time) x (fast_time x features) dni_input = tf.reshape(dni_input, [tf.shape(dni_input)[0],-1]) # Decoupled neural interface: simplify to single dense layer dni = Dense(dni_input, n_hidden, tf.nn.relu, name='dni', init='uniform', n_in=n_hidden*decouple_split) # Reshape DNI out & embedded_input to new_batch x fast_time for GRU gru_hidden = tf.reshape(dni.output, [-1, n_hidden]) embedded_input = tf.reshape(embedded_input, [-1, decouple_split, n_hidden]) int_label = tf.reshape(int_label, [-1, decouple_split]) else: gru_hidden = None # model part2: GRU # transpose: tf.scan needs time x batch x features embedded_input = tf.transpose(embedded_input, [1,0,2]) training_toggle = tf.placeholder(tf.int32, name='training_toggle') gru = GRU(embedded_input, n_hidden, training_toggle, h0=gru_hidden, name='gru') gru_h0 = gru.h0 gru_output = gru.output # model part3: dropout and dense layer dropout_rate = tf.placeholder(tf.float32, name='dropout_rate') dropped = tf.nn.dropout(gru_output, 1-dropout_rate) dense = Dense(dropped, dict_size) model_output = tf.identity(dense.output, 'output') # cross-entropy loss # note: sequences padded with -1, mask these entries mask = tf.not_equal(int_label, -1) # swap -1's to avoid error in loss fcn, even though we're ignoring these int_label = tf.where(mask, int_label, tf.zeros_like(int_label)) # mean over entries with mask==1 mask = tf.cast(mask, dtype=tf.float32) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=int_label, logits=model_output) loss = tf.reduce_sum(mask*loss)/tf.reduce_sum(mask) if decoupled: # decoupled neural interface loss dni_label = tf.stop_gradient(gru.output) dni_loss = tf.reduce_mean(tf.square(dni_label-dni.output), name='dni_loss') else: dni_loss = tf.constant(0., dtype=tf.float32) train_step = tf.train.AdamOptimizer(learn_rate).minimize( loss+dni_loss,name='train_step') else: (model_input, training_toggle, dropout_rate, train_step, init_train, init_val, loss, dni_loss, gru_output, gru_h0, model_output ) = reload_graph(exp_dir) n_examples = tf.shape(model_input)[0] sampled_out = tf.multinomial(model_output[0,:1,:],num_samples=1) def epoch_callback(sess): # TODO: not sure how to initialize this since it's usually from the DNI h0 = np.random.rand(1, n_hidden) sampled_text = [np.random.randint(0,dict_size,size=(1,1))] for i in range(sample_length+1): out,h0 = sess.run([sampled_out, gru_output], feed_dict={gru_h0:h0, model_input:sampled_text[i], dropout_rate:0, training_toggle:0}) h0 = h0[0] sampled_text.append(out) sampled_text = sampled_text[1:] # temp bugfix: screwed up the reverse dictionary, missing keys if any([int(o) not in reverse_dict.keys() for o in sampled_text]): sampled_text = [ o if int(o) in reverse_dict.keys() else int(np.random.choice(list(reverse_dict.keys()))) for o in sampled_text] print(' '.join([reverse_dict[int(o)] for o in sampled_text])) print('') fit(training_toggle, dropout_rate, train_step, init_train, init_val, loss, dni_loss, n_examples, patience, max_epochs, exp_dir, epoch_callback, resume)
train_dataloader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True, num_workers=12, pin_memory=True, drop_last=True) val_dataloader = DataLoader(val_dataset, batch_size=args['batch_size'], num_workers=12, pin_memory=True) V = len(dataset.vocab.keys()) P = len(dataset.pos_set.keys()) model, criterion, optimizer = prepare_model( V, P, args['embed'], args['hidden'], args['layers'], args['nhead'], dropout=args['dropout'], smoothing=args['label_smoothing'], lr=args['lr'], device=device ) best_loss, best_jacc = fit(model, train_dataloader, val_dataloader, criterion, optimizer, device, args['epoch'], model_prefix + '_' + str(i)) fold_stats.append([best_loss, best_jacc]) print('Fold {} - Best Loss: {}, Best Jacc: {}'.format(i, best_loss, best_jacc)) fold_stats = np.array(fold_stats) mean = np.mean(fold_stats, axis=0) std = np.std(fold_stats, axis=0) print(mean) print(std)
def test_build(self): fit("fake") assert True
def fit(self, input_path, model): data = train.prepare(input_path) self.func = train.fit(data, model) return self.func