def dataset_iterator(args): if args.dataset == 'mnist': train_gen, dev_gen, test_gen = mnist.load(args.batch_size, args.batch_size) if args.dataset == 'cifar10': data_dir = '../../../images/cifar-10-batches-py/' train_gen, dev_gen = cifar10.load(args.batch_size, data_dir) test_gen = None return (train_gen, dev_gen, test_gen)
def dataset_iterator(args): transform = get_transform(args) if args.dataset == 'mnist': train_gen, dev_gen, test_gen = mnist.load(args.batch_size, args.batch_size) if args.dataset == 'cifar10': data_dir = '/data0/images/cifar-10-batches-py/' train_gen, dev_gen = cifar10.load(args.batch_size, data_dir) test_gen = None if args.dataset == 'celeba': data_dir = '/data0/images/celeba' data = datasets.ImageFolder(data_dir, transform=transform) data_loader = torch.utils.data.DataLoader(data, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=4) return data_loader #train_gen = celeba.load(args.batch_size, data_dir+'/train/') #dev_gen = celeba.load(args.batch_size, data_dir+'/test/') #test_gen = None return (train_gen, dev_gen, test_gen)
def run(global_rank, world_size, local_rank, max_epoch, batch_size, model, data, sgd, graph, verbosity, dist_option='fp32', spars=None): dev = device.create_cuda_gpu_on(local_rank) dev.SetRandSeed(0) np.random.seed(0) if data == 'cifar10': from data import cifar10 train_x, train_y, val_x, val_y = cifar10.load() elif data == 'cifar100': from data import cifar100 train_x, train_y, val_x, val_y = cifar100.load() elif data == 'mnist': from data import mnist train_x, train_y, val_x, val_y = mnist.load() num_channels = train_x.shape[1] image_size = train_x.shape[2] data_size = np.prod(train_x.shape[1:train_x.ndim]).item() num_classes = (np.max(train_y) + 1).item() #print(num_classes) if model == 'resnet': from model import resnet model = resnet.resnet50(num_channels=num_channels, num_classes=num_classes) elif model == 'xceptionnet': from model import xceptionnet model = xceptionnet.create_model(num_channels=num_channels, num_classes=num_classes) elif model == 'cnn': from model import cnn model = cnn.create_model(num_channels=num_channels, num_classes=num_classes) elif model == 'alexnet': from model import alexnet model = alexnet.create_model(num_channels=num_channels, num_classes=num_classes) elif model == 'mlp': import os, sys, inspect current = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) parent = os.path.dirname(current) sys.path.insert(0, parent) from mlp import module model = module.create_model(data_size=data_size, num_classes=num_classes) # For distributed training, sequential gives better performance if hasattr(sgd, "communicator"): DIST = True sequential = True else: DIST = False sequential = False if DIST: train_x, train_y, val_x, val_y = partition(global_rank, world_size, train_x, train_y, val_x, val_y) ''' # check dataset shape correctness if global_rank == 0: print("Check the shape of dataset:") print(train_x.shape) print(train_y.shape) ''' if model.dimension == 4: tx = tensor.Tensor( (batch_size, num_channels, model.input_size, model.input_size), dev, tensor.float32) elif model.dimension == 2: tx = tensor.Tensor((batch_size, data_size), dev, tensor.float32) np.reshape(train_x, (train_x.shape[0], -1)) np.reshape(val_x, (val_x.shape[0], -1)) ty = tensor.Tensor((batch_size, ), dev, tensor.int32) num_train_batch = train_x.shape[0] // batch_size num_val_batch = val_x.shape[0] // batch_size idx = np.arange(train_x.shape[0], dtype=np.int32) # attached model to graph model.set_optimizer(sgd) model.compile([tx], is_train=True, use_graph=graph, sequential=sequential) dev.SetVerbosity(verbosity) # Training and Evaluation Loop for epoch in range(max_epoch): start_time = time.time() np.random.shuffle(idx) if global_rank == 0: print('Starting Epoch %d:' % (epoch)) # Training Phase train_correct = np.zeros(shape=[1], dtype=np.float32) test_correct = np.zeros(shape=[1], dtype=np.float32) train_loss = np.zeros(shape=[1], dtype=np.float32) model.train() for b in range(num_train_batch): # Generate the patch data in this iteration x = train_x[idx[b * batch_size:(b + 1) * batch_size]] if model.dimension == 4: x = augmentation(x, batch_size) if (image_size != model.input_size): x = resize_dataset(x, model.input_size) y = train_y[idx[b * batch_size:(b + 1) * batch_size]] # Copy the patch data into input tensors tx.copy_from_numpy(x) ty.copy_from_numpy(y) # Train the model out, loss = model(tx, ty, dist_option, spars) train_correct += accuracy(tensor.to_numpy(out), y) train_loss += tensor.to_numpy(loss)[0] if DIST: # Reduce the Evaluation Accuracy and Loss from Multiple Devices reducer = tensor.Tensor((1, ), dev, tensor.float32) train_correct = reduce_variable(train_correct, sgd, reducer) train_loss = reduce_variable(train_loss, sgd, reducer) if global_rank == 0: print('Training loss = %f, training accuracy = %f' % (train_loss, train_correct / (num_train_batch * batch_size * world_size)), flush=True) # Evaluation Phase model.eval() for b in range(num_val_batch): x = val_x[b * batch_size:(b + 1) * batch_size] if model.dimension == 4: if (image_size != model.input_size): x = resize_dataset(x, model.input_size) y = val_y[b * batch_size:(b + 1) * batch_size] tx.copy_from_numpy(x) ty.copy_from_numpy(y) out_test = model(tx) test_correct += accuracy(tensor.to_numpy(out_test), y) if DIST: # Reduce the Evaulation Accuracy from Multiple Devices test_correct = reduce_variable(test_correct, sgd, reducer) # Output the Evaluation Accuracy if global_rank == 0: print('Evaluation accuracy = %f, Elapsed Time = %fs' % (test_correct / (num_val_batch * batch_size * world_size), time.time() - start_time), flush=True) dev.PrintTimeProfiling()
def dataset_iterator(args): data_dir = './images/cifar-10-batches-py' train_gen, dev_gen = cifar10.load(args.batch_size, data_dir) test_gen = None return (train_gen, dev_gen, test_gen)