def test_net(args): print("Init...") _, _, val_loader, _ = lib.build_dataloader(args) model = lib.build_model(args) load_model(model, args) args.cuda = not args.no_cuda and torch.cuda.is_available() if args.cuda: model.cuda() if args.label_smoothing: criterion = cross_entropy_with_label_smoothing else: criterion = nn.CrossEntropyLoss() print("Start testing...") val(model, val_loader, criterion, args.test_model, args)
def train_net(args): print("Init...") log_writer = tensorboardX.SummaryWriter(args.log_dir) # train_loader, _, val_loader, _ = lib.build_dataloader(args) model = lib.build_model(args) print(model.forward(torch.ones(1, 3, 224, 224)).shape) # print('Parameters:', sum([np.prod(p.size()) for p in model.parameters()])) model = torch.nn.DataParallel(model) optimizer = lib.build_optimizer(args, model) epoch = 0 if args.resume: epoch = resume_model(model, optimizer, args) args.cuda = not args.no_cuda and torch.cuda.is_available() cudnn.benchmark = True if args.label_smoothing: criterion = cross_entropy_with_label_smoothing else: # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() if args.cuda: model.cuda() print("Start training...") while epoch < args.epochs: train(model, train_loader, optimizer, criterion, epoch, log_writer, args) if (epoch + 1) % args.test_epochs == 0: val(model, val_loader, criterion, epoch, args, log_writer) if (epoch + 1) % args.save_epochs == 0: save_model(model, optimizer, epoch, args) epoch += 1 save_model(model, optimizer, epoch - 1, args)
import tensorflow as tf from lib import Data_Set, build_model, generate_text ModelCheckpoint = tf.keras.callbacks.ModelCheckpoint EPOCHS = 0 data_set = Data_Set(file_path='./texts/websters.txt', verbose=True) file_name = "./models/websters_model.hdf5" model = build_model(data_set, file_name=file_name, verbose=True) checkpoint = ModelCheckpoint(file_name, monitor='loss', verbose=1, save_best_only=True, mode='min') def predict(): print(generate_text(build_model, data_set, start_string=u'CRUSTECEAN\n')) callbacks = [checkpoint] history = model.fit(data_set.training_data, epochs=EPOCHS, callbacks=callbacks) temperature = 0.8 num_generate = 500 generated_text = ' '
def train_net(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) verbose = args.multiprocessing_distributed and args.rank % ngpus_per_node == 0 if verbose and not args.test: log_writer = tensorboardX.SummaryWriter(args.log_dir) else: log_writer = None model = lib.build_model(args) print('Parameters:', sum([np.prod(p.size()) for p in model.parameters()])) optimizer = lib.build_optimizer(args, model) if args.distributed: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.val_batch_size = int(args.val_batch_size / ngpus_per_node) args.num_workers = int( (args.num_workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) epoch = 0 if args.resume: epoch = resume_model(model, optimizer, args) if args.label_smoothing: criterion = cross_entropy_with_label_smoothing else: criterion = nn.CrossEntropyLoss() cudnn.benchmark = True train_loader, train_sampler, val_loader, val_sampler = lib.build_dataloader( args, args.distributed) if args.test: load_model(model, args) val(model, val_loader, val_sampler, criterion, epoch, verbose=verbose, args=args) return if verbose: print("Start training...") while epoch < args.epochs: train(model, train_sampler, train_loader, optimizer, criterion, epoch, log_writer, args, verbose=verbose) if (epoch + 1) % args.save_epochs == 0: dist_save_model(model, optimizer, epoch, ngpus_per_node, args) if (epoch + 1) % args.test_epochs == 0: val(model, val_loader, val_sampler, criterion, epoch, args, log_writer, verbose=verbose) epoch += 1 dist_save_model(model, optimizer, epoch - 1, ngpus_per_node, args)