def main(): """Main training program.""" print('Generate Samples') # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. _ = Timers() # Arguments. args = get_args() # Pytorch distributed. initialize_distributed(args) # Random seeds for reproducability. set_random_seed(args.seed) # get the tokenizer tokenizer = prepare_tokenizer(args) # Model, optimizer, and learning rate. model = setup_model(args) # setting default batch size to 1 args.batch_size = 1 # generate samples generate_samples(model, tokenizer, args)
def main(): # Disable CuDNN. torch.backends.cudnn.enabled = False # Arguments. args = get_args() # Pytorch distributed. initialize_distributed(args) # get the tokenizer tokenizer = prepare_tokenizer(args) tokenizer.save_pretrained(args.export_huggingface) # Model _ = setup_model(args)
import sys import os import os.path as osp import pickle from PIL import Image from dataset.bdd_dataset import BDD_Train_DataSet, BDD_Valid_DataSet, BDD_Test_DataSet import matplotlib.pyplot as plt import random import timeit from src.train import TrainNetwork from src.test import TestNetwork from models.enet_model import ENetModel from src.arguments import get_args from src.utils import enet_weighing args = get_args() #Select the GPU to run the network on os.environ["CUDA_VISIBLE_DEVICES"]=args.gpu_select def train_main(): h, w = map(int, args.input_size.split(',')) input_size = (h, w) crop_size = (int(h),int(w)) # Create the directory to store the checkpoints and verbose text files directory = 'saved_models/' + args.run_name + '/' if not os.path.exists(directory): os.makedirs(directory)
def main(): """Main training program.""" # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. timers = Timers() # Arguments. args = get_args() # if args.load_huggingface: # args.make_vocab_size_divisible_by = 1 # Pytorch distributed. initialize_distributed(args) if torch.distributed.get_rank() == 0: print('Pretrain GPT3 model') print_args(args) # Random seeds for reproducability. set_random_seed(args.seed) # Data stuff. train_data, val_data, test_data, args.vocab_size, args.eod_token, tokenizer = get_train_val_test_data(args) # Model, optimizer, and learning rate. model, optimizer, lr_scheduler = setup_model_and_optimizer(args) # Resume data loader if necessary. if args.resume_dataloader: if train_data is not None: train_data.batch_sampler.start_iter = args.iteration % len(train_data) print_rank_0(f"Resume train set from iteration {train_data.batch_sampler.start_iter}") if val_data is not None: start_iter_val = (args.train_iters // args.save_interval) * args.eval_interval val_data.batch_sampler.start_iter = start_iter_val % len(val_data) if train_data is not None: train_data_iterator = iter(train_data) else: train_data_iterator = None iteration = 0 if args.train_iters > 0: if args.do_train: iteration, skipped = train(model, optimizer, lr_scheduler, train_data_iterator, val_data, timers, args, tokenizer) if args.do_valid: prefix = 'the end of training for val data' # val_loss, val_ppl _ = evaluate_and_print_results(prefix, iter(val_data) if val_data else None, model, args, timers, False) if args.save and iteration != 0: save_checkpoint(iteration, model, optimizer, lr_scheduler, args, deepspeed=DEEPSPEED_WRAP and args.deepspeed) if args.do_test: # Run on test data. prefix = 'the end of training for test data' evaluate_and_print_results(prefix, iter(test_data) if test_data else None, model, args, timers, True)