def run(global_rank, world_size, local_rank, max_epoch, batch_size, model, data, sgd, graph, verbosity, dist_option='fp32', spars=None): dev = device.create_cuda_gpu_on(local_rank) dev.SetRandSeed(0) np.random.seed(0) if data == 'cifar10': from data import cifar10 train_x, train_y, val_x, val_y = cifar10.load() elif data == 'cifar100': from data import cifar100 train_x, train_y, val_x, val_y = cifar100.load() elif data == 'mnist': from data import mnist train_x, train_y, val_x, val_y = mnist.load() num_channels = train_x.shape[1] image_size = train_x.shape[2] data_size = np.prod(train_x.shape[1:train_x.ndim]).item() num_classes = (np.max(train_y) + 1).item() #print(num_classes) if model == 'resnet': from model import resnet model = resnet.resnet50(num_channels=num_channels, num_classes=num_classes) elif model == 'xceptionnet': from model import xceptionnet model = xceptionnet.create_model(num_channels=num_channels, num_classes=num_classes) elif model == 'cnn': from model import cnn model = cnn.create_model(num_channels=num_channels, num_classes=num_classes) elif model == 'alexnet': from model import alexnet model = alexnet.create_model(num_channels=num_channels, num_classes=num_classes) elif model == 'mlp': import os, sys, inspect current = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) parent = os.path.dirname(current) sys.path.insert(0, parent) from mlp import module model = module.create_model(data_size=data_size, num_classes=num_classes) # For distributed training, sequential gives better performance if hasattr(sgd, "communicator"): DIST = True sequential = True else: DIST = False sequential = False if DIST: train_x, train_y, val_x, val_y = partition(global_rank, world_size, train_x, train_y, val_x, val_y) ''' # check dataset shape correctness if global_rank == 0: print("Check the shape of dataset:") print(train_x.shape) print(train_y.shape) ''' if model.dimension == 4: tx = tensor.Tensor( (batch_size, num_channels, model.input_size, model.input_size), dev, tensor.float32) elif model.dimension == 2: tx = tensor.Tensor((batch_size, data_size), dev, tensor.float32) np.reshape(train_x, (train_x.shape[0], -1)) np.reshape(val_x, (val_x.shape[0], -1)) ty = tensor.Tensor((batch_size, ), dev, tensor.int32) num_train_batch = train_x.shape[0] // batch_size num_val_batch = val_x.shape[0] // batch_size idx = np.arange(train_x.shape[0], dtype=np.int32) # attached model to graph model.set_optimizer(sgd) model.compile([tx], is_train=True, use_graph=graph, sequential=sequential) dev.SetVerbosity(verbosity) # Training and Evaluation Loop for epoch in range(max_epoch): start_time = time.time() np.random.shuffle(idx) if global_rank == 0: print('Starting Epoch %d:' % (epoch)) # Training Phase train_correct = np.zeros(shape=[1], dtype=np.float32) test_correct = np.zeros(shape=[1], dtype=np.float32) train_loss = np.zeros(shape=[1], dtype=np.float32) model.train() for b in range(num_train_batch): # Generate the patch data in this iteration x = train_x[idx[b * batch_size:(b + 1) * batch_size]] if model.dimension == 4: x = augmentation(x, batch_size) if (image_size != model.input_size): x = resize_dataset(x, model.input_size) y = train_y[idx[b * batch_size:(b + 1) * batch_size]] # Copy the patch data into input tensors tx.copy_from_numpy(x) ty.copy_from_numpy(y) # Train the model out, loss = model(tx, ty, dist_option, spars) train_correct += accuracy(tensor.to_numpy(out), y) train_loss += tensor.to_numpy(loss)[0] if DIST: # Reduce the Evaluation Accuracy and Loss from Multiple Devices reducer = tensor.Tensor((1, ), dev, tensor.float32) train_correct = reduce_variable(train_correct, sgd, reducer) train_loss = reduce_variable(train_loss, sgd, reducer) if global_rank == 0: print('Training loss = %f, training accuracy = %f' % (train_loss, train_correct / (num_train_batch * batch_size * world_size)), flush=True) # Evaluation Phase model.eval() for b in range(num_val_batch): x = val_x[b * batch_size:(b + 1) * batch_size] if model.dimension == 4: if (image_size != model.input_size): x = resize_dataset(x, model.input_size) y = val_y[b * batch_size:(b + 1) * batch_size] tx.copy_from_numpy(x) ty.copy_from_numpy(y) out_test = model(tx) test_correct += accuracy(tensor.to_numpy(out_test), y) if DIST: # Reduce the Evaulation Accuracy from Multiple Devices test_correct = reduce_variable(test_correct, sgd, reducer) # Output the Evaluation Accuracy if global_rank == 0: print('Evaluation accuracy = %f, Elapsed Time = %fs' % (test_correct / (num_val_batch * batch_size * world_size), time.time() - start_time), flush=True) dev.PrintTimeProfiling()
from model.cnn import save_model from keras.utils import plot_model from contextlib import redirect_stdout #from keras.models import model_from_json TEXT_FILE = '..\\model\\model.txt' IMAGE_FILE = '..\\model\\model.png' def to_text(model): with open(TEXT_FILE, 'w') as my_file: with redirect_stdout(my_file): model.summary() print("Exported to {0} successfully".format(TEXT_FILE)) def to_image(model): plot_model(model, to_file=IMAGE_FILE, show_layer_names=True, show_shapes=True) print("Exported to {0} successfully".format(IMAGE_FILE)) cnn_model = create_model() cnn_model = init_model(cnn_model) to_text(cnn_model) to_image(cnn_model) save_model(cnn_model)