def main(): '''main''' engine.train( model_annotator, 'annotation', steps=10000000, no_healthy=False, model_dir='summary/summary_annotator' ) return
def run(df): y = df["target"].values X = df.drop(["target", "kfold"], axis=1).values train_dataset = dataset.TweetDataset( tweets=X, targets=y ) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=2 ) device = "cuda" if torch.cuda.is_available() else "cpu" print("Using {} device".format(device)) model = neural_net.NeuralNetwork().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) loss_fn = torch.nn.BCELoss() print("Training Model...") for epoch in range(config.EPOCHS): print(f"Epoch {epoch+1}\n--------------------") engine.train( train_dataloader, model, optimizer, loss_fn, device ) torch.save(model.state_dict(), f"{config.MODEL_PATH}/{config.MODEL_NAME}.pth")
def run(df, fold): train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold == fold].reset_index(drop=True) tokenizer = tf.keras.preprocessing.text.Tokenizer() tokenizer.fit_on_texts(df.review.values.tolist()) xtrain = tokenizer.texts_to_sequences(train_df.review.values) xtest = tokenizer.texts_to_sequences(valid_df.review.values) xtrain = tf.keras.preprocessing.sequence.pad_sequences( xtrain, maxlen=config.MAX_LEN) xtest = tf.keras.preprocessing.sequence.pad_sequences( xtest, maxlen=config.MAX_LEN) train_dataset = dataset.IMDBDataset(reviews=xtrain, targets=train_df.sentiment.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=2) valid_dataset = dataset.IMDBDataset(reviews=xtest, targets=valid_df.sentiment.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=2) print('Loading Embeddings') embedding_dict = load_vectors('./crawl-300d-2M.vec') print('Embeddings Loaded') embedding_matrix = create_embedding_matrix(tokenizer.word_index, embedding_dict) device = torch.device('cuda') model = lstm.LSTM(embedding_matrix) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.1) print('Training model') best_accuracy = 0 early_stopping_counter = 0 for epoch in range(config.EPOCHS): engine.train(train_data_loader, model, optimizer, device) outputs, targets = engine.evaluate(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print('Fold: ', fold, ' EPOCH: ', epoch, ' Accuracy Score: ', accuracy) if accuracy > best_accuracy: best_accuracy = accuracy else: early_stopping_counter += 1
def run(args) : # pick out the action. if args['action'] == 'train': engine.train(args) else: engine.test(args)
def main(config): prepare_dirs_loggers(config, os.path.basename(__file__)) corpus_client = corpora.CMVCorpus(config) # corpus_client = corpora.CourtCorpus(config) conv_corpus = corpus_client.get_corpus() # train_conv, test_conv, valid_conv, vocab_size = conv_corpus['train'],\ # conv_corpus['test'],\ # conv_corpus['valid'],\ # conv_corpus['vocab_size'] train_conv, test_conv, vocab_size = conv_corpus['train'], \ conv_corpus['test'], \ conv_corpus['vocab_size'] # create data loader that feed the deep models train_feed = data_loaders.CMVDataLoader("train", train_conv, vocab_size, config) test_feed = data_loaders.CMVDataLoader("test", test_conv, vocab_size, config) # valid_feed = data_loaders.CMVDataLoader("Valid", valid_conv, vocab_size, config) model = dtdmn.DTDMN(corpus_client, config) if config.forward_only: test_file = os.path.join(config.log_dir, config.load_sess, "{}-test-{}.txt".format(get_time(), config.gen_type)) dump_file_train = os.path.join(config.session_dir, "{}-train.pkl".format(get_time())) dump_file_test = os.path.join(config.session_dir, "{}-test.pkl".format(get_time())) dump_file_valid = os.path.join(config.session_dir, "{}-valid.pkl".format(get_time())) model_file = os.path.join(config.log_dir, config.load_sess, "model") else: test_file = os.path.join(config.session_dir, "{}-test-{}.txt".format(get_time(), config.gen_type)) dump_file_train = os.path.join(config.session_dir, "{}-train.pkl".format(get_time())) dump_file_test = os.path.join(config.session_dir, "{}-test.pkl".format(get_time())) dump_file_valid = os.path.join(config.session_dir, "{}-valid.pkl".format(get_time())) model_file = os.path.join(config.session_dir, "model") if config.use_gpu: model.cuda() if config.forward_only is False: try: engine.train(model, train_feed, test_feed, config) except KeyboardInterrupt: print("Training stopped by keyboard.") # config.batch_size = 10 model.load_state_dict(torch.load(model_file)) engine.inference(model, test_feed, config, num_batch=None) if config.output_vis: with open(dump_file_train, "wb") as gen_f: gen_utils.generate_with_act(model, train_feed, config, num_batch=None, dest_f=gen_f) with open(dump_file_test, "wb") as gen_f: gen_utils.generate_with_act(model, test_feed, config, num_batch=None, dest_f=gen_f)
def run(args): import engine # pick out the action. if args['action'] == 'train': engine.train(args) elif args['action'] == 'continue': engine.cont(args) else: engine.test(args)
def main(): '''main''' engine.train( model_classifier_cheat, 'both', steps=10000000, no_healthy=True, model_dir='summary/summary_classifier_cheat_bn_warp', ) return
def run(): train_file = config.TRAINING_FILE train_batch = config.TRAIN_BATCH_SIZE vaild_batch = config.VALID_BATCH_SIZE model_path = config.BERT_PATH max_length = config.MAX_LEN dfs = pd.read_csv(train_file, sep="\t", names=['idx', 'sent1', 'sent2', 'label']) dfs['label'] = pd.to_numeric(dfs["label"], downcast='float') df_train, df_valid = model_selection.train_test_split( dfs, test_size=0.1, random_state=42, stratify=dfs.label.values, ) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) dataset_reader = dataset.Dataset() train_dataset = dataset_reader.read(df_train, return_pt=True) valid_sentence1, valid_sentence2, valid_labels = dataset_reader.read( df_valid) train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=train_batch) # evaluator = evaluation.EmbeddingSimilarityEvaluator(valid_sentence1, valid_sentence2, valid_labels) evaluator = evaluation.BinaryClassificationEvaluator( valid_sentence1, valid_sentence2, valid_labels, batch_size=vaild_batch, show_progress_bar=False) word_embedding_model = models.Transformer(model_path, max_seq_length=max_length) pooling_model = models.Pooling( word_embedding_model.get_word_embedding_dimension()) dense_model = models.Dense( in_features=pooling_model.get_sentence_embedding_dimension(), out_features=max_length, activation_function=nn.Tanh()) model = SentenceTransformer( modules=[word_embedding_model, pooling_model, dense_model]) train_loss = losses.CosineSimilarityLoss(model) engine.train(train_dataloader, model, train_loss, evaluator)
def main(config): log_setp(logger, config) dataloader = DataLoader(config) word_map = dataloader.word_map print('number of tokens:', len(word_map)) print( f'create model: input size {config.input_size}, hidden size {config.hidden_size}, layer number {config.layer_num}' ) model = LM(len(word_map), config.input_size, config.hidden_size, config.layer_num) print(f'start training of {config.iter_num} iterations') train(model, dataloader, config)
def run(): # Load Dataset train_dataset = dataset.autoencoderDataset("bibtex_train.embeddings") test_dataset = dataset.autoencoderDataset("bibtex_test.embeddings") # Dataloaders train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = 1, shuffle=True, num_workers = 4) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size = 1, num_workers = 1) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") autoencoder_model = model.autoEncoder(100) # 100 is the input dimension autoencoder_model.to(device) # Creating the optimizer optimizer = torch.optim.Adam(autoencoder_model.parameters(), lr=1e-3) for epoch in range(0,10): training_loss = engine.train(train_dataloader, autoencoder_model, optimizer, device) print("Epoch: {} Loss: {}".format(epoch+1, training_loss)) # Model evaluation engine.eval(test_dataloader, autoencoder_model, device) # Generating the embeddings now train_set_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = 1, shuffle = False) test_set_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle = False) embed_list = engine.generate_low_dimensional_embeddings(train_set_dataloader, autoencoder_model, device) embed_list.extend(engine.generate_low_dimensional_embeddings(test_set_dataloader, autoencoder_model, device)) with open("bibtex_low_dimension.embeddings", mode='w+') as file: for each_elem in embed_list: line_to_write = " ".join(str(v) for v in each_elem[0])+'\n' file.write(line_to_write)
def main(config): prepare_dirs_loggers(config, os.path.basename(__file__)) corpus_client = corpora.TwitterCorpus(config) conv_corpus = corpus_client.get_corpus_bow() train_conv, valid_conv, test_conv, vocab_size = conv_corpus['train'],\ conv_corpus['valid'],\ conv_corpus['test'],\ conv_corpus['vocab_size'] # create data loader that feed the deep models train_feed = data_loaders.TCDataLoader("Train", train_conv, vocab_size, config) valid_feed = data_loaders.TCDataLoader("Valid", valid_conv, vocab_size, config) test_feed = data_loaders.TCDataLoader("Test", test_conv, vocab_size, config) # for generation conv_corpus_seq = corpus_client.get_corpus_seq() train_conv_seq, valid_conv_seq, test_conv_seq = conv_corpus_seq[ 'train'], conv_corpus_seq['valid'], conv_corpus_seq['test'] model = conv_models.TDM(corpus_client, config) if config.use_gpu: model.cuda() engine.train(model, train_feed, valid_feed, test_feed, config) # config.batch_size = 10 train_feed_output = data_loaders.TCDataLoader("Train_Output", train_conv, vocab_size, config) test_feed_output = data_loaders.TCDataLoader("Test_Output", test_conv, vocab_size, config) valid_feed_output = data_loaders.TCDataLoader("Valid_Output", valid_conv, vocab_size, config) if config.output_vis: with open(os.path.join(config.session_dir, "gen_samples.txt"), "w") as gen_f: gen_utils.generate(model, valid_feed_output, valid_conv_seq, config, num_batch=2, dest_f=gen_f)
def main(): args = get_args_parser() if args.subcommand is None: print("ERROR: specify either train or eval") sys.exit(1) if args.cuda and not torch.cuda.is_available(): print("ERROR: cuda is not available, try running on CPU") sys.exit(1) if args.subcommand == "train": print('Starting train...') utils.check_paths(args) train(args) else: print('Starting stylization...') utils.check_paths(args, train=False) stylize(args)
def run(): df = pd.read_csv(config.TRAIN_FILE).fillna("none") df.sentiment = df.sentiment.apply(lambda x: 1 if x == "positive" else 0) train_df, test_df = train_test_split(df, test_size=0.99, stratify=df.sentiment.values, random_state=42) train_df = train_df.reset_index(drop=True) test_df = test_df.reset_index(drop=True) train_dataset = dataset.BertDataset(reviews=train_df.review.values, targets=train_df.sentiment.values) test_dataset = dataset.BertDataset(reviews=test_df.review.values, targets=test_df.sentiment.values) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE #num_workers=1 ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.TEST_BATCH_SIZE) device = torch.device("cpu") model = BertBaseUncase() model.to(device) num_training_steps = int( len(train_df) // config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(model.parameters(), lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train(train_loader, model, optimizer, device, scheduler) torch.save(model.state_dict(), config.MODEL_PATH) """
def run_training(): image_files = glob.glob(os.path.join(config.DATA_DIR, '*.png')) targets_orig = [x.split('/')[-1][:-4] for x in image_files] targets = [[c for c in x] for x in targets_orig] targets_flat = [c for clist in targets for c in clist] label_enc = preprocessing.LabelEncoder() label_enc.fit(targets_flat) targets_enc = [label_enc.transform(x) for x in targets] targets_enc = np.array(targets_enc) + 1 train_imgs, test_imgs, train_targets, test_targets, train_orig_targets, test_orig_targets = model_selection.train_test_split( image_files, targets_enc, targets_orig, test_size=0.1, random_state=42) train_dataset = dataset.Classification(image_paths=train_imgs, targets=train_targets, resize=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True) test_dataset = dataset.Classification(image_paths=test_imgs, targets=test_targets, resize=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH)) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config.BATCH_SIZE, shuffle=False) model = CaptchaModel(num_chars=len(label_enc.classes_)) model = model.to(config.DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.8, patience=5, verbose=True) for epoch in range(config.EPOCHS): train_loss = engine.train(model, train_loader, optimizer) val_preds, valid_loss = engine.eval(model, test_loader) print( f"Epoch: {epoch}: Train loss: {train_loss}, Valid loss: {valid_loss}" ) valid_cap_preds = [] for vp in val_preds: current_preds = decode_predictions(vp, label_enc) valid_cap_preds.extend(current_preds) print(list(zip(test_orig_targets, valid_cap_preds))[6:11])
def main(): parser = argparse.ArgumentParser( description="Deep Neural Networks for 3D Anaglyph Image Generation") parser.add_argument( "--config-file", default="", metavar="file", help="path to config file", type=str, ) parser.add_argument( "--mode", default="test", metavar="mode", help="'train' or 'test'", type=str, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # build the config cfg = get_cfg_defaults() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() dataset = build_dataset(cfg) model, optimizer = build_model(cfg) train(cfg, optimizer, dataset)
# torch dataloader creates batches of data # from classification dataset utils train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=16, shuffle=True, num_workers=4 ) # same for validation dataset # fetch the ClassificationDataset class valid_dataset = dataset.ClassicationDataset( image_paths=valid_images, targets=valid_targets, resize=(227, 227), augmentations=aug ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=16, shuffle=True, num_workers=4 ) # adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=5e-4) # train and print auc score for all epochs for epoch in range(epochs): engine.train(train_loader, model, optimizer, device=device) predictions, valid_targets = engine.evaluate( valid_loader, model, device=device ) roc_auc = metrics.roc_auc_score(valid_targets, predictions) print(f"Epcoh={epoch}, Valid ROC AUC={roc_auc}")
net_shadow, net_target = models.SimpleCNN(n_classes), models.SimpleCNN( n_classes) opt_shadow = torch.optim.SGD(net_shadow.parameters(), lr=lr, momentum=momentum) opt_target = torch.optim.SGD(net_target.parameters(), lr=lr, momentum=momentum) net_target = nn.DataParallel(net_target).cuda() net_shadow = nn.DataParallel(net_shadow).cuda() loss = nn.CrossEntropyLoss().cuda() ''' best_model = train(net_target, D_t_loader, test_loader, opt_target, loss, n_epochs) torch.save({'state_dict':best_model.state_dict()}, os.path.join(save_path, "SimpleCNN_{}_target.tar".format(model_v))) best_model = train(net_shadow, D_s_t_loader, test_loader, opt_shadow, loss, n_epochs) torch.save({'state_dict':best_model.state_dict()}, os.path.join(save_path, "SimpleCNN_{}_shadow.tar".format(model_v))) ''' for i in range(n_epochs): target_acc = train(net_target, D_t_loader, test_loader, opt_target, loss, i) #shadow_acc = train(net_shadow, D_s_t_loader, test_loader, opt_shadow, loss, i, beta=beta) train_ece, test_ece = eval_ece(net_target, D_t_loader), eval_ece( net_target, test_loader) print("Train ECE:{:.2f}\tTest ECE:{:.2f}".format(train_ece, test_ece)) ''' if n_epochs - i <= 170: print("Target Acc:{:.2f}".format(target_acc)) attack_acc = work(net_target, net_shadow) '''
optimizer = optim.SGD(params_cnn_fnn, lr=args.lr, momentum=0.6, weight_decay=0.0005) lr_scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-3, max_lr=6e-3) # lr_scheduler = CosineAnnealingLR(optimizer, 50, eta_min=1e-3, last_epoch=-1) model_save_path = args.save_final_model_path print("Start training") start_time = time.time() for epoch in range(args.epochs): print("Executing Epoch: " + str(epoch)) engine.train(model_cnn, model_fnn, optimizer, train_loader, device, epoch, save=model_save_path) lr_scheduler.step() if epoch % 10 == 0: print("Testing Epoch: " + str(epoch)) engine.test(model_cnn, model_fnn, test_loader, device, epoch) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
def main(): args = get_args() log.info(f'Parsed arguments: \n{pformat(args.__dict__)}') assert args.cond_type.lower() in ['none', 'platanios', 'oestling'] device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') log.info('Using device {}.'.format(device)) use_apex = False if torch.cuda.is_available() and args.fp16: log.info('Loading Nvidia Apex and using AMP') from apex import amp, optimizers use_apex = True else: log.info('Using FP32') amp = None log.info(f'Using time stamp {timestamp} to save models and logs.') if not args.no_seed: log.info(f'Setting random seed to {args.seed} for reproducibility.') torch.manual_seed(args.seed) random.seed(args.seed) data = Corpus(args.datadir) data_splits = [ { 'split': 'train', 'languages': args.dev_langs + args.target_langs, 'invert_include': True, }, { 'split': 'valid', 'languages': args.dev_langs, }, { 'split': 'test', 'languages': args.target_langs, }, ] if args.refine: data_splits.append({ 'split': 'train_100', 'languages': args.target_langs, 'ignore_missing': True, }) data_splits = data.make_datasets(data_splits, force_rebuild=args.rebuild) train_set, val_set, test_set = data_splits['train'], data_splits[ 'valid'], data_splits['test'] dictionary = data_splits['dictionary'] train_language_distr = get_sampling_probabilities(train_set, 1.0) train_set = Dataset(train_set, batchsize=args.batchsize, bptt=args.bptt, reset_on_iter=True, language_probabilities=train_language_distr) val_set = Dataset(val_set, make_config=True, batchsize=args.valid_batchsize, bptt=args.bptt, eval=True) test_set = Dataset(test_set, make_config=True, batchsize=args.test_batchsize, bptt=args.bptt, eval=True) train_loader = DataLoader(train_set, num_workers=args.workers) val_loader = DataLoader(val_set, num_workers=args.workers) test_loader = DataLoader(test_set, num_workers=args.workers) if args.refine: refine_set = dict() for lang, lang_d in data_splits['train_100'].items(): refine_set[lang] = Dataset({lang: lang_d}, batchsize=args.valid_batchsize, bptt=args.bptt, make_config=True) n_token = len(dictionary.idx2tkn) # Load and preprocess matrix of typological features # TODO: implement this, the OEST # prior_matrix = load_prior(args.prior, corpus.dictionary.lang2idx) # n_components = min(50, *prior_matrix.shape) # pca = PCA(n_components=n_components, whiten=True) # prior_matrix = pca.fit_transform(prior_matrix) prior = None model = RNN(args.cond_type, prior, n_token, n_input=args.emsize, n_hidden=args.nhidden, n_layers=args.nlayers, dropout=args.dropouto, dropoute=args.dropoute, dropouth=args.dropouth, dropouti=args.dropouti, wdrop=args.wdrop, wdrop_layers=[0, 1, 2], tie_weights=True).to(device) if args.opt_level != 'O2': loss_function = SplitCrossEntropyLoss(args.emsize, splits=[]).to(device) else: loss_function = CrossEntropyLoss().to( device) # Should be ok to use with a vocabulary of this small size if use_apex: optimizer = optimizers.FusedAdam(model.parameters(), lr=args.lr, weight_decay=args.wdecay) else: params = list(filter(lambda p: p.requires_grad, model.parameters())) + list( loss_function.parameters()) optimizer = Adam(params, lr=args.lr, weight_decay=args.wdecay) if use_apex: model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level) parameters = { 'model': model, 'optimizer': optimizer, 'loss_function': loss_function, 'use_apex': use_apex, 'amp': amp if use_apex else None, 'clip': args.clip, 'alpha': args.alpha, 'beta': args.beta, 'bptt': args.bptt, 'device': device, 'prior': args.prior, } # Add backward hook for gradient clipping if args.clip: if use_apex: for p in amp.master_params(optimizer): p.register_hook( lambda grad: torch.clamp(grad, -args.clip, args.clip)) else: for p in model.parameters(): p.register_hook( lambda grad: torch.clamp(grad, -args.clip, args.clip)) if args.prior == 'vi': prior = VIPrior(model, device=device) parameters['prior'] = prior def sample_weights(module: torch.nn.Module, input: torch.Tensor): prior.sample_weights(module) sample_weights_hook = model.register_forward_pre_hook(sample_weights) # Load model checkpoint if available start_epoch = 1 if args.resume: if args.checkpoint is None: log.error( 'No checkpoint passed. Specify it using the --checkpoint flag') checkpoint = None else: log.info('Loading the checkpoint at {}'.format(args.checkpoint)) checkpoint = load_model(args.checkpoint, **parameters) start_epoch = checkpoint['epoch'] if args.wdrop: for rnn in model.rnns: if isinstance(rnn, WeightDrop): rnn.dropout = args.wdrop elif rnn.zoneout > 0: rnn.zoneout = args.wdrop saved_models = list() result_str = '| Language {} | test loss {:5.2f} | test ppl {:8.2f} | test bpc {:8.3f}' def test(): log.info('=' * 89) log.info('Running test set (zero-shot results)...') test_loss, avg_loss = evaluate(test_loader, **parameters) log.info('Test set finished | test loss {} | test bpc {}'.format( test_loss, test_loss / math.log(2))) for lang, avg_l_loss in avg_loss.items(): langstr = dictionary.idx2lang[lang] log.info( result_str.format(langstr, avg_l_loss, math.exp(avg_l_loss), avg_l_loss / math.log(2))) log.info('=' * 89) if args.train: f = 1. stored_loss = 1e32 epochs_no_improve = 0 val_losses = list() # calculate specific language lr data_spec_count = sum([len(ds) for l, ds in train_set.data.items()]) data_spec_avg = data_spec_count / len(train_set.data.items()) data_spec_lrweights = dict([(l, data_spec_avg / len(ds)) for l, ds in train_set.data.items()]) # estimate total number of steps total_steps = sum( [len(ds) // args.bptt for l, ds in train_set.data.items()]) * args.no_epochs steps = 0 try: pbar = tqdm.trange(start_epoch, args.no_epochs + 1, position=1, dynamic_ncols=True) for epoch in pbar: steps = train(train_loader, lr_weights=data_spec_lrweights, **parameters, total_steps=total_steps, steps=steps, scaling=args.scaling, n_samples=args.n_samples, tb_writer=tb_writer) val_loss, _ = evaluate(val_loader, **parameters) pbar.set_description('Epoch {} | Val loss {}'.format( epoch, val_loss)) # Save model if args.prior == 'vi': sample_weights_hook.remove() filename = path.join( args.checkpoint_dir, '{}_epoch{}{}_{}.pth'.format( timestamp, epoch, '_with_apex' if use_apex else '', args.prior)) torch.save(make_checkpoint(epoch + 1, **parameters), filename) saved_models.append(filename) if args.prior == 'vi': sample_weights_hook = model.register_forward_pre_hook( sample_weights) # Early stopping if val_loss < stored_loss: epochs_no_improve = 0 stored_loss = val_loss else: epochs_no_improve += 1 if epochs_no_improve == args.patience: log.info('Early stopping at epoch {}'.format(epoch)) break val_losses.append(val_loss) # Reduce lr every 1/3 total epochs if epoch - 1 > f / 3 * args.no_epochs: log.info('Epoch {}/{}. Dividing LR by 10'.format( epoch, args.no_epochs)) for g in optimizer.param_groups: g['lr'] = g['lr'] / 10 f += 1. test() except KeyboardInterrupt: log.info('Registered KeyboardInterrupt. Stopping training.') log.info('Saving last model to disk') if args.prior == 'vi': sample_weights_hook.remove() torch.save( make_checkpoint(epoch, **parameters), path.join( args.checkpoint_dir, '{}_epoch{}{}_{}.pth'.format( timestamp, epoch, '_with_apex' if use_apex else '', args.prior))) return elif args.test: test() # Only test on existing languages if there are no held out languages if not args.target_langs: exit(0) importance = 1e-5 # If use UNIV, calculate informed prior, else use boring prior if args.prior == 'laplace': if not isinstance( prior, LaplacePrior): # only calculate matrix if it is not supplied. log.info('Creating laplace approximation dataset') laplace_set = Dataset(data_splits['train'], batchsize=args.batchsize, bptt=100, reset_on_iter=True) laplace_loader = DataLoader(laplace_set, num_workers=args.workers) log.info('Creating Laplacian prior') prior = LaplacePrior(model, loss_function, laplace_loader, use_apex=use_apex, amp=amp, device=device) parameters['prior'] = prior torch.save( make_checkpoint('fisher_matrix', **parameters), path.join( args.checkpoint_dir, '{}_fishers_matrix{}_{}.pth'.format( timestamp, '_with_apex' if use_apex else '', args.prior))) importance = 1e5 elif args.prior == 'ninf': log.info('Creating non-informative Gaussian prior') parameters['prior'] = GaussianPrior() elif args.prior == 'vi': importance = 1e-5 elif args.prior == 'hmc': raise NotImplementedError else: raise ValueError( f'Passed prior {args.prior} is not an implemented inference technique.' ) best_model = saved_models[-1] if not len( saved_models) == 0 else args.checkpoint # Remove sampling hook from model if args.prior == 'vi': sample_weights_hook.remove() # Refine on 100 samples on each target if args.refine: # reset learning rate optimizer.param_groups[0]['lr'] = args.lr loss = 0 results = dict() # Create individual tests sets test_sets = dict() for lang, lang_d in data_splits['test'].items(): test_sets[lang] = DataLoader(Dataset({lang: lang_d}, make_config=True, batchsize=args.test_batchsize, bptt=args.bptt, eval=True), num_workers=args.workers) for lang, lang_data in tqdm.tqdm(refine_set.items()): final_loss = False refine_dataloader = DataLoader(lang_data, num_workers=args.workers) load_model(best_model, **parameters) log.info(f'Refining for language {dictionary.idx2lang[lang]}') for epoch in range(1, args.refine_epochs + 1): refine(refine_dataloader, **parameters, importance=importance) if epoch % 5 == 0: final_loss = True loss, avg_loss = evaluate(test_sets[lang], model, loss_function, only_l=lang, report_all=True, device=device) for lang, avg_l_loss in avg_loss.items(): langstr = dictionary.idx2lang[lang] log.debug( result_str.format(langstr, avg_l_loss, math.exp(avg_l_loss), avg_l_loss / math.log(2))) if not final_loss: loss, avg_loss = evaluate(test_sets[lang], model, loss_function, only_l=lang, report_all=True, device=device) for lang, avg_l_loss in avg_loss.items(): langstr = dictionary.idx2lang[lang] log.info( result_str.format(langstr, avg_l_loss, math.exp(avg_l_loss), avg_l_loss / math.log(2))) results[lang] = avg_l_loss log.info('=' * 89) log.info('FINAL FEW SHOT RESULTS: ') log.info('=' * 89) for lang, avg_l_loss in results.items(): langstr = dictionary.idx2lang[lang] log.info( result_str.format(langstr, avg_l_loss, math.exp(avg_l_loss), avg_l_loss / math.log(2))) log.info('=' * 89)
def run(df, fold): """ Run training and validation for a given fold :param df: dataframe with kold column :param fold: current fold, int """ # training dataframe train_df = df[df.kfold != fold].reset_index(drop=True) # validation dataframe valid_df = df[df.kfold == fold].reset_index(drop=True) print("Fitting tokenizer") # tokenize tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='!~\t\n', ) tokenizer.fit_on_texts(df.question.values.tolist()) # convert training data to sequence xtrain = tokenizer.texts_to_sequences(train_df.question.values) # convert validation data to sequence xtest = tokenizer.texts_to_sequences(valid_df.question.values) # zero pad the trainign sequence, padding on left side xtrain = tf.keras.preprocessing.sequence.pad_sequences( xtrain, maxlen=config.MAX_LEN) # zero pad validation sequence xtest = tf.keras.preprocessing.sequence.pad_sequences( xtest, maxlen=config.MAX_LEN) # initialize dataset class for training train_dataset = dataset.QUORADataset(question=xtrain, OpenStatus=train_df.OpenStatus.values) # create torch DataLoader train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=2) #initialize dataset class for validation valid_dataset = dataset.QUORADataset(question=xtest, OpenStatus=valid_df.OpenStatus.values) # create torch DataLoader valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) print("loading embeddings") embedding_dict = load_vectors("../input/embeddings/crawl-300d-2M.vec") embedding_matrix = create_embedding_matrix(tokenizer.word_index, embedding_dict) # create torch device device = torch.device("cuda") # get LSTM model model = lstm.LSTM(embedding_matrix) # send model to device model.to(device) #initialize adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=5e-3) #optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) print("Training model") # best accuracy to 0 best_accuracy = 0 # early stopping counter early_stopping_counter = 0 # train and validate for all epoch for epoch in range(config.EPOCHS): # train one epoch engine.train(train_data_loader, model, optimizer, device) # validate outputs, targets = engine.evaluate(valid_data_loader, model, device) print(outputs[:10]) # threshold #outputs1=outputs outputs1 = outputs outputs = np.array(outputs) >= 0.5 #print(outputs1[:10]) # calculate accuracy accuracy = metrics.accuracy_score(targets, outputs) conf_m = confusion_matrix(targets, outputs) print(conf_m) roc_score = roc_auc_score(targets, outputs1) print('ROC AUC score\n', roc_score) print(f"Fold:{fold}, Epoch:{epoch}, Accuracy_score ={accuracy}") #print("conf_m\n",conf_m) print("---") # early stopping if accuracy > best_accuracy: best_accuracy = accuracy else: early_stopping_counter += 1 if early_stopping_counter > 4: break
def main(): image_paths = glob.glob(os.path.join(config.PATH, "*.png")) image_paths = [path.replace("\\", "/") for path in image_paths] targets = [path.split("/")[-1][:-4] for path in image_paths] targets_listed = [[char for char in target] for target in targets] targets_flattened = [char for target in targets_listed for char in target] label_enc = preprocessing.LabelEncoder() label_enc.fit(targets_flattened) targets_encoded = np.array( [label_enc.transform(target) for target in targets_listed]) targets_encoded += 1 # to keep the "0" class for UNK chars ( train_imgs, valid_imgs, train_enc_targets, valid_enc_targets, _, valid_targets, ) = model_selection.train_test_split(image_paths, targets_encoded, targets, test_size=0.1, random_state=42) train_loader = make_loader( mode="train", image_paths=train_imgs, targets=train_enc_targets, size=(config.HEIGHT, config.WIDTH), resize=True, ) valid_loader = make_loader( mode="valid", image_paths=valid_imgs, targets=valid_enc_targets, size=(config.HEIGHT, config.WIDTH), resize=True, ) model = OCRModel(num_classes=len(label_enc.classes_), dropout=config.DROPOUT).to(config.DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=config.LR) lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="min", patience=config.PATIENCE, factor=config.FACTOR, verbose=True, ) if config.MODE == "train": best_loss = float("inf") for epoch in range(config.EPOCHS): model.train() _ = engine.train(model, train_loader, optimizer) model.eval() with torch.no_grad(): valid_preds, valid_loss = engine.eval(model, valid_loader) captcha_preds = [] for preds in valid_preds: preds_ = decode_predictions(preds, label_enc) captcha_preds.extend(preds_) print(f"Epoch: {epoch}") pprint(list(zip(valid_targets, captcha_preds))[:10]) lr_scheduler.step(valid_loss.avg) if valid_loss.avg < best_loss: best_loss = valid_loss.avg torch.save(model.state_dict(), "model.pt") else: model.load_state_dict( torch.load("./models/model.pt", map_location=config.DEVICE)) model.eval() with torch.no_grad(): valid_preds, valid_loss = engine.eval(model, valid_loader) captcha_preds = [] for preds in valid_preds: preds_ = decode_predictions(preds, label_enc) captcha_preds.extend(preds_) pprint(list(zip(valid_targets, captcha_preds))[:10]) return valid_loader, captcha_preds, valid_targets
vil_swith_mode = cfg.MODEL.VIT.MSVIT.VIL_MODE_SWITCH * cfg.OPTIM.EPOCHS if cfg.MODEL.VIT.MSVIT.MODE > 0 and epoch >= vil_swith_mode: # only reset random sample mode to full mode if hasattr(net, 'module'): net.module.reset_vil_mode(mode=0) else: net.reset_vil_mode(mode=0) # train for one epoch with torch.autograd.set_detect_anomaly(cfg.SOLVER.DETECT_ANOMALY): train(trainloader, net, criterion, optimizer, scheduler, epoch, cfg, train_meters, global_step=global_step, device=device, mixup_fn=mixup_fn, scaler=scaler) # evaluate on validation set global_step = (epoch + 1) * len(trainloader) accs = [] for task_name, testloader, test_meter in zip(task_names, testloaders, test_meters): logging.info("Evaluating dataset: {}".format(task_name)) acc = validate(testloader, net, criterion_eval,
def main(): import cifar.network BATCH_SIZE = 200 full_datastream = cifar.data.load_datastream(BATCH_SIZE) small_datastream = cifar.data.load_datastream(BATCH_SIZE, training_set_size=4000, validation_set_size=1000) input_var = T.tensor4('inputs') targets_var = T.ivector('targets') print('Building the network...') network, layers = cifar.network.build_cnn_network2(input_var, batch_size=BATCH_SIZE) print('Network:') for l in layers: print ' ', l['type'], '\n ', l['args'] print('Learning schedule:') schedule = [ {'num_epochs': 10, 'hyperparams': { 'learning_rate': 0.01, 'momentum': 0.95, 'weight_decay': 0.0001 }}, {'num_epochs': 30, 'hyperparams': { 'learning_rate': 0.005, 'momentum': 0.9, 'weight_decay': 0.0001 }}, {'num_epochs': 100, 'hyperparams': { 'learning_rate': 0.001, 'momentum': 0.9, 'weight_decay': 0.0001 }} ] for s in schedule: print ' ', s print('Starting training...') for s in schedule: _, test_acc = \ engine.train( input_var=input_var, targets_var=targets_var, data=datastream, network=network, verbose=2, patience=10, **s) print('Training finished') t = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d__%H-%M-%S') save_results(layers, params=lasagne.layers.get_all_param_values(network), training_schedule=schedule, filename='saved_nets/{:.2f}accuracy_{}.params'.format(test_acc, t))
[p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] # Optimizer and Scheduler optimizer = torch.optim.AdamW(optimizer_parameters, lr=3e-5) num_training_steps = int( len(train_dataset) / config.TRAIN_BATCH_SIZE * config.EPOCHS) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_training_steps) # Training loop best_micro = 0 for epoch in range(config.EPOCHS): engine.train(epoch, model, training_loader, device, optimizer, scheduler) outputs, targets = engine.validation(epoch, model, testing_loader, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) f1_score_micro = metrics.f1_score(targets, outputs, average='micro') f1_score_macro = metrics.f1_score(targets, outputs, average='macro') print(f"Accuracy Score = {accuracy}") print(f"F1 Score (Micro) = {f1_score_micro}") print(f"F1 Score (Macro) = {f1_score_macro}") if f1_score_micro > best_micro: torch.save(model.state_dict(), config.MODEL_PATH) best_micro = f1_score_micro
def run(df, fold): # select features features = [ f for f in df.columns if f not in ("quality", "kfold", "wclass", "wclass_num") ] # Normalize inputs scaler = preprocessing.StandardScaler() # get training data using folds train_df = df[df.kfold != fold].reset_index(drop=True) xtrain = scaler.fit_transform(train_df[features]) ytrain = (train_df["wclass_num"] - train_df["wclass_num"].min()).astype('category') # get validation data using folds valid_df = df[df.kfold == fold].reset_index(drop=True) # transform the valid data using fits in train xvalid = scaler.transform(valid_df[features]) yvalid = (valid_df["wclass_num"] - valid_df["wclass_num"].min()).astype('category') #print("classes", df.wclass.value_counts()) # initialize dataset class for training train_dataset = dataset.WineDataset(features=xtrain, target=ytrain) # Make troch dataloader for training # torch dataloader loads the data using dataset class in batches # specifed by bath_size train_data_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, shuffle=True) # initialize dataset class for vailidation valid_dataset = dataset.WineDataset(features=xvalid, target=yvalid) # Make troch dataloader for training # torch dataloader loads the data using dataset class in batches # specifed by bath_size valid_data_loader = torch.utils.data.DataLoader( dataset=valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=0) # create toech device device = torch.device("cpu") # get the model model = network.Network() # initialize the optimizer learning_rate = 0.005 optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=10e-3) for epoch in range(config.EPOCHS): total_correct = 0 # train one epoch loss, train_correct = engine.train(train_data_loader, model, optimizer, device) # calculate total correct on validation data total_correct = engine.evaluate(valid_data_loader, model, device) if epoch > (config.EPOCHS - 5): print("epoch", epoch) print( "loss: %.2f" % loss, "valid_correct:", total_correct, "valid_correct_pcntg: %.4f" % (total_correct / xvalid.shape[0]), "train_correct_pcntg: %.4f" % (train_correct / xtrain.shape[0]), "train_correct", train_correct) # save the trained model torch.save(model.state_dict(), config.MODEL_PATH) print("Model dumped") # Save the data column from training model_columns = list(train_df.columns) joblib.dump(model_columns, config.MODEL_COLUMN) print("model columns dumped") # Save the scaler from training joblib.dump(scaler, config.MODEL_TRANSFORMS) print("scaler dumped")
def run(): trainDataset, testDataset, labelGenerator = utils.loadDataset() # Making DataLoaders trainDataLoader = torch.utils.data.DataLoader( trainDataset, batch_size=config.TRAIN_BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True) testDataLoader = torch.utils.data.DataLoader( testDataset, batch_size=config.TEST_BATCH_SIZE, num_workers=1) totalNOsOfLabels = len(labelGenerator.classes_) device = torch.device(config.DEVICE) # Defining Model print("Making model:- ", config.modelName) citeModel = None if config.modelName == "BertBase": citemodel = model.BERTBaseUncased(numOfLabels=totalNOsOfLabels, dropout=config.DROPOUT) elif config.modelName == "SciBert": citemodel = model.SciBertUncased(numOfLabels=totalNOsOfLabels, dropout=config.DROPOUT) citemodel.to(device) param_optimizer = list(citemodel.named_parameters()) ''' There is generally no need to apply L2 penalty (i.e. weight decay) to biases and LayerNorm.weight. Hence, we have following line. Update: There is need to apply L2 to LayerNorm.weight as per Google TF implementation so reverting it ;) ''' no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] # Removed "LayerNorm.bias", optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.01, # changed this from 0.001 to 0.1 }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, } ] num_train_steps = int(len(trainDataLoader) * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=config.LEARNING_RATE) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=num_train_steps * config.WARMUP_PROPORTION, num_training_steps=num_train_steps) if config.dotrain: print('In Training') for epoch in range(config.EPOCHS): trainingLoss = engine.train(trainDataLoader, citemodel, optimizer, device, scheduler) print("Epoch: ", epoch, " Loss: ", trainingLoss, '\n') # Saving the model os.makedirs(os.path.dirname(config.MODEL_SAVED.format( config.modelName)), exist_ok=True) torch.save(citemodel.state_dict(), config.MODEL_SAVED.format(config.modelName)) print('Model is saved at: ', config.MODEL_SAVED.format(config.modelName)) ''' Evaluating the model ''' print("Loading the model") #citemodel = model.BERTBaseUncased(*args, **kwargs) citemodel.load_state_dict( torch.load(config.MODEL_SAVED.format(config.modelName))) outputs, targets = engine.eval(testDataLoader, citemodel, device) # Saving the results with corresponding targets os.makedirs(os.path.dirname( config.PREDICTIONS_PATH.format(config.modelName)), exist_ok=True) with open(config.PREDICTIONS_PATH.format(config.modelName), 'wb') as f: pickle.dump(outputs, f) # First saved the predicted outputs pickle.dump(targets, f) # Then saved the corresponding targets print('Starting Evaluation...') utils.metric(outputs, targets)
criterion = nn.BCELoss(reduction='sum') # initialize the transform transform = transform() # prepare the training and validation data loaders train_data, valid_data = prepare_dataset(root_path='../input/catsNdogs/') trainset = LFWDataset(train_data, transform=transform) trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True) validset = LFWDataset(valid_data, transform=transform) validloader = DataLoader(validset, batch_size=batch_size) train_loss = [] valid_loss = [] for epoch in range(epochs): print(f"Epoch {epoch+1} of {epochs}") train_epoch_loss = train(model, trainloader, trainset, device, optimizer, criterion) valid_epoch_loss, recon_images = validate(model, validloader, validset, device, criterion) train_loss.append(train_epoch_loss) valid_loss.append(valid_epoch_loss) # save the reconstructed images from the validation loop save_reconstructed_images(recon_images, epoch + 1) # convert the reconstructed images to PyTorch image grid format image_grid = make_grid(recon_images.detach().cpu()) grid_images.append(image_grid) print(f"Train Loss: {train_epoch_loss:.4f}") print(f"Val Loss: {valid_epoch_loss:.4f}") # save the reconstructions as a .gif file image_to_vid(grid_images) # save the loss plots to disk
def run_cv(df, fold): train_df = df[df["kfold"] != fold].reset_index(drop=True) valid_df = df[df["kfold"] == fold].reset_index(drop=True) #y_train = pd.get_dummies(train_df["target"], dtype="int64").values y_train = train_df["target"].values X_train = train_df.drop(["target", "kfold"], axis=1).values #y_valid = pd.get_dummies(valid_df["target"], dtype="int64").values y_valid = valid_df["target"].values X_valid = valid_df.drop(["target", "kfold"], axis=1).values train_dataset = dataset.TweetDataset( tweets=X_train, targets=y_train ) valid_dataset = dataset.TweetDataset( tweets=X_valid, targets=y_valid ) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=2 ) valid_dataloader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=1 ) device = "cuda" if torch.cuda.is_available() else "cpu" print("Using {} device".format(device)) model = neural_net.NeuralNetwork().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) loss_fn = torch.nn.BCELoss() print("Training Model...") #early_stopping_counter = 0 for epoch in range(config.EPOCHS): print(f"Epoch {epoch+1}\n--------------------") engine.train( train_dataloader, model, optimizer, loss_fn, device ) outputs, targets = engine.evaluate( valid_dataloader, model, loss_fn, device ) outputs = np.array(outputs).reshape(-1,) outputs = list(map(lambda pred: 1 if pred>0.5 else 0, outputs)) valid_score = metrics.f1_score(targets, outputs) print(f" F1 Score: {valid_score}\n")
net.to(DEVICE) else: # model model_name = 'seg-net' net = model_factory(model_name) net.to(DEVICE) optimizer = torch.optim.Adam(net.parameters(), lr=1e-3) criterion = nn.CrossEntropyLoss() optimizer.zero_grad() train( train_loader=train_loader, val_loader=val_loader, net=net, epochs=EPOCHS, criterion=criterion, optimizer=optimizer, device=DEVICE, batch_size=BATCH_SIZE ) if TEST: net = restore_net(PATH_TO_TEST_MODEL) net.eval() net.to(DEVICE) # after the training run function for train/val/test loader loader = test_loader ecgs, y_true, y_pred = test( net=net,
def run(df, fold): """ Run training and validation for a given fold and dataset :param df: pandas dataframe with kfold column :param fold: current fold, int """ # fetch training dataframe train_df = df[df.kfold != fold].reset_index(drop=True) # fetch validation dataframe valid_df = df[df.kfold == fold].reset_index(drop=True) print("Fitting tokenizer") # we use tf.keras for tokenization # you can use your own tokenizer and then you can # get rid of tensorflow tokenizer = tf.keras.preprocessing.text.Tokenizer() tokenizer.fit_on_texts(df.review.values.tolist()) # convert training data to sequences # for example : "bad movie" gets converted to # [24, 27] where 24 is the index for bad and 27 is the # index for movie xtrain = tokenizer.texts_to_sequences(train_df.review.values) xtest = tokenizer.texts_to_sequences(valid_df.review.values) # zero pad the training/validation sequences given the maximum length # this padding is done on left hand side # if sequence is > MAX_LEN, it is truncated on left hand side too xtrain = tf.keras.preprocessing.sequence.pad_sequences(xtrain, maxlen=config.MAX_LEN) xtest = tf.keras.preprocessing.sequence.pad_sequences(xtest, maxlen=config.MAX_LEN) # initialize dataset class for training train_dataset = dataset.IMDBDataset(reviews=xtrain, targets=train_df.sentiment.values) # create torch dataloader for training # torch dataloader loads the data using dataset # class in batches specified by batch size train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=2) # initialize dataset class for validation valid_dataset = dataset.IMDBDataset(reviews=xtest, targets=valid_df.sentiment.values) # create torch dataloader for validation valid_data_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) print("Loading embeddings") # load embeddings as shown previously embedding_dict = load_vectors("../input/crawl-300d-2M.vec") embedding_matrix = create_embedding_matrix(tokenizer.word_index, embedding_dict) # create torch device, since we use gpu, we are using cuda device = torch.device("cuda") # fetch our LSTM model model = lstm.LSTM(embedding_matrix) # send model to device model.to(device) # initialize Adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) print("Training Model") # set best accuracy to zero best_accuracy = 0 # set early stopping counter to zero early_stopping_counter = 0 # train and validate for all epochs for epoch in range(config.EPOCHS): # train one epoch engine.train(train_data_loader, model, optimizer, device) # validate outputs, targets = engine.evaluate(valid_data_loader, model, device) # use threshold of 0.5 # please note we are using linear layer and no sigmoid # you should do this 0.5 threshold after sigmoid outputs = np.array(outputs) >= 0.5 # calculate accuracy accuracy = metrics.accuracy_score(targets, outputs) print(f"FOLD:{fold}, Epoch: {epoch}, Accuracy Score = {accuracy}") # simple early stopping if accuracy > best_accuracy: best_accuracy = accuracy else: early_stopping_counter += 1 if early_stopping_counter > 2: break
EPOCHS = 10 BATCH_SIZE = 32 LR = 1e-3 MAX_LOSS = 9999 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) train_set, test_set = dataset.create_dataset(transform) train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False) model = model.Net().to(device) optimizer = optim.Adam(model.parameters(), lr=LR) scheduler = StepLR(optimizer, step_size=1) for epoch in range(1, EPOCHS + 1): train_loss = engine.train(model, device, train_loader, optimizer, epoch) test_loss = engine.test(model, device, test_loader) scheduler.step() if test_loss < MAX_LOSS: torch.save(model.state_dict(), "mnist_cnn.pt")