def Data_Processing(batch_size): '''In this Gan tutorial, we don't need the label data.''' (train_lbl_one_hot, train_lbl, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz') (test_lbl_one_hot, test_lbl, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz') '''data loading referenced by Data Loading API ''' train_iter = mx.io.NDArrayIter(data={'data': to2d(train_img)}, batch_size=batch_size, shuffle=True) # training data return train_iter, len(train_img)
def NeuralNet(epoch, batch_size, save_period): time_step = 28 hidden_unit_number1 = 100 hidden_unit_number2 = 100 fc_number = 100 class_number = 10 use_cudnn = True ''' load_data 1. SoftmaxOutput must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data or train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data 2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data ''' (train_lbl_one_hot, train_lbl, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz') (test_lbl_one_hot, test_lbl, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz') '''data loading referenced by Data Loading API ''' train_iter = mx.io.NDArrayIter(data={'data': train_img}, label={'label': train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data': test_img}, label={'label': test_lbl_one_hot}) #test data ####################################################-Network-################################################################ data = mx.sym.Variable('data') label = mx.sym.Variable('label') data = mx.sym.transpose(data, axes=(1, 0, 2)) # (time,batch,column) '''1. RNN cell declaration''' ''' Fusing RNN layers across time step into one kernel. Improves speed but is less flexible. Currently only supported if using cuDNN on GPU. ''' if use_cudnn: #faster!!! rnn1 = mx.rnn.FusedRNNCell(num_hidden=hidden_unit_number1, mode="rnn_tanh", prefix="rnn1_", get_next_state=True) rnn2 = mx.rnn.FusedRNNCell(num_hidden=hidden_unit_number2, mode="rnn_tanh", prefix="rnn2_", get_next_state=True) else: rnn1 = mx.rnn.RNNCell(num_hidden=hidden_unit_number1, activation='tanh', prefix='rnn1_') rnn2 = mx.rnn.RNNCell(num_hidden=hidden_unit_number2, activation='tanh', prefix='rnn2_') '''2. Unroll the RNN CELL on a time axis.''' ''' unroll's return parameter outputs : list of Symbol output symbols. states : Symbol or nested list of Symbol has the same structure as begin_state() ''' #if you see the unroll function layer1, state1 = rnn1.unroll(length=time_step, inputs=data, merge_outputs=True, layout='TNC') layer1 = mx.sym.Dropout(layer1, p=0.3) layer2, state2 = rnn2.unroll(length=time_step, inputs=layer1, merge_outputs=True, layout="TNC") rnn_output = mx.sym.Reshape(state2[-1], shape=(-1, hidden_unit_number1)) '''FullyConnected Layer''' affine1 = mx.sym.FullyConnected(data=rnn_output, num_hidden=fc_number, name='affine1') act1 = mx.sym.Activation(data=affine1, act_type='sigmoid', name='sigmoid1') affine2 = mx.sym.FullyConnected(data=act1, num_hidden=class_number, name='affine2') output = mx.sym.SoftmaxOutput(data=affine2, label=label, name='softmax') # We visualize the network structure with output size (the batch_size is ignored.) shape = {"data": (time_step, batch_size, 28)} mx.viz.plot_network( symbol=output, shape=shape) #The diagram can be found on the Jupiter notebook. print output.list_arguments() # training mod mod = mx.module.Module(symbol=output, data_names=['data'], label_names=['label'], context=mx.gpu(0)) # test mod test = mx.module.Module(symbol=output, data_names=['data'], label_names=['label'], context=mx.gpu(0)) # Network information print print mod.data_names print mod.label_names print train_iter.provide_data print train_iter.provide_label '''if the below code already is declared by mod.fit function, thus we don't have to write it. but, when you load the saved weights, you must write the below code.''' mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) # weights save model_name = 'weights/Neural_Net' checkpoint = mx.callback.do_checkpoint(model_name, period=save_period) #weights load # When you want to load the saved weights, uncomment the code below. symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100) #the below code needs mod.bind, but If arg_params and aux_params is set in mod.fit, you do not need the code below, nor do you need mod.bind. mod.set_params(arg_params, aux_params) mod.fit( train_iter, initializer=mx.initializer.Xavier(rnd_type='gaussian', factor_type="avg", magnitude=1), optimizer='adam', optimizer_params={'learning_rate': 0.001}, eval_metric=mx.metric.MSE(), # Once the loaded parameters are declared here,You do not need to declare mod.set_params,mod.bind num_epoch=epoch, arg_params=None, aux_params=None, epoch_end_callback=checkpoint) # Network information print print mod.data_shapes print mod.label_shapes print mod.output_shapes print mod.get_params() print mod.get_outputs() print "training_data : {}".format(mod.score(train_iter, ['mse', 'acc'])) print "Optimization complete." #################################TEST#################################### '''load method1 - load the saved parameter''' #symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100) '''load method2 - load the training mod.get_params() directly''' #arg_params, aux_params = mod.get_params() '''load method3 - using the shared_module''' """ Parameters shared_module : Module Default is `None`. This is used in bucketing. When not `None`, the shared module essentially corresponds to a different bucket -- a module with different symbol but with the same sets of parameters (e.g. unrolled RNNs with different lengths). """ test.bind(data_shapes=test_iter.provide_data, label_shapes=test_iter.provide_label, shared_module=mod, for_training=False) '''Annotate only when running test data. and Uncomment only if it is 'load method1' or 'load method2' ''' #test.set_params(arg_params, aux_params) #batch by batch accuracy #To use the code below, Test / batchsize must be an integer. '''for preds, i_batch, eval_batch in mod.iter_predict(test_iter): pred_label = preds[0].asnumpy().argmax(axis=1) label = eval_batch.label[0].asnumpy().argmax(axis=1) print('batch %d, accuracy %f' % (i_batch, float(sum(pred_label == label)) / len(label))) ''' '''test''' result = test.predict(test_iter).asnumpy().argmax(axis=1) print 'Final accuracy : {}%'.format( float(sum(test_lbl == result)) / len(result) * 100.0)
def NeuralNet(epoch, batch_size, save_period): '''load_data 1. SoftmaxOutput must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data 2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data ''' (train_lbl_one_hot, train_lbl, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz') (test_lbl_one_hot, test_lbl, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz') '''data loading referenced by Data Loading API ''' train_iter = mx.io.NDArrayIter(data={'data': to4d(train_img)}, label={'label': train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data': to4d(test_img)}, label={'label': test_lbl_one_hot}) #test data '''neural network''' data = mx.sym.Variable('data') label = mx.sym.Variable('label') # first convolution layer conv1 = mx.sym.Convolution(data=data, kernel=(5, 5), num_filter=30) conv1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, use_global_stats=True) relu1 = mx.sym.Activation( data=conv1, name='relu_c1', act_type="relu") # -> size : (batch_size,30,24,24) pool1 = mx.sym.Pooling(data=relu1, pool_type="max", kernel=(2, 2), stride=(2, 2)) # -> size : (batch_size,30,12,12) # second convolution layer conv2 = mx.sym.Convolution(data=pool1, kernel=(5, 5), num_filter=60) conv2 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, use_global_stats=True) relu2 = mx.sym.Activation(data=conv2, name='relu_c2', act_type="relu") # -> size : (batch_size,60,8,8) pool2 = mx.sym.Pooling(data=relu2, pool_type="max", kernel=(2, 2), stride=(2, 2)) # -> size : (batch_size,60,4,4) #flatten the data flatten = mx.sym.Flatten(data=pool2) # first fullyconnected layer affine1 = mx.sym.FullyConnected(data=flatten, name='fc1', num_hidden=50) affine1 = mx.sym.BatchNorm(data=affine1, fix_gamma=False, use_global_stats=True) hidden1 = mx.sym.Activation(data=affine1, name='relu_f1', act_type="relu") # two fullyconnected layer affine2 = mx.sym.FullyConnected(data=hidden1, name='fc2', num_hidden=50) affine2 = mx.sym.BatchNorm(data=affine2, fix_gamma=False, use_global_stats=True) hidden2 = mx.sym.Activation(data=affine2, name='relu_f2', act_type="relu") output_affine = mx.sym.FullyConnected(data=hidden2, name='fc3', num_hidden=10) output = mx.sym.SoftmaxOutput(data=output_affine, label=label) # We visualize the network structure with output size (the batch_size is ignored.) shape = {"data": (batch_size, 1, 28, 28)} mx.viz.plot_network( symbol=output, shape=shape) #The diagram can be found on the Jupiter notebook. print output.list_arguments() # Fisrt optimization method # weights save model_name = 'weights/Neural_Net' checkpoint = mx.callback.do_checkpoint(model_name, period=save_period) # training mod mod = mx.mod.Module(symbol=output, data_names=['data'], label_names=['label'], context=mx.gpu(0)) # test mod test = mx.mod.Module(symbol=output, data_names=['data'], label_names=['label'], context=mx.gpu(0)) # Network information print print mod.data_names print mod.label_names print train_iter.provide_data print train_iter.provide_label '''if the below code already is declared by mod.fit function, thus we don't have to write it. but, when you load the saved weights, you must write the below code.''' mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) #weights load # When you want to load the saved weights, uncomment the code below. symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100) #the below code needs mod.bind, but If arg_params and aux_params is set in mod.fit, you do not need the code below, nor do you need mod.bind. mod.set_params(arg_params, aux_params) mod.fit( train_iter, initializer=mx.initializer.Xavier(rnd_type='gaussian', factor_type="avg", magnitude=1), optimizer='adam', optimizer_params={'learning_rate': 0.001}, eval_metric=mx.metric.MSE(), # Once the loaded parameters are declared here,You do not need to declare mod.set_params,mod.bind num_epoch=epoch, arg_params=None, aux_params=None, epoch_end_callback=checkpoint) # Network information print print mod.data_shapes print mod.label_shapes print mod.output_shapes print mod.get_params() print mod.get_outputs() print "training_data : {}".format(mod.score(train_iter, ['mse', 'acc'])) print "Optimization complete." #################################TEST#################################### '''load method1 - load the saved parameter''' #symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100) '''load method2 - load the training mod.get_params() directly''' #arg_params, aux_params = mod.get_params() '''load method3 - using the shared_module''' """ Parameters shared_module : Module Default is `None`. This is used in bucketing. When not `None`, the shared module essentially corresponds to a different bucket -- a module with different symbol but with the same sets of parameters (e.g. unrolled RNNs with different lengths). """ test.bind(data_shapes=test_iter.provide_data, label_shapes=test_iter.provide_label, shared_module=mod, for_training=False) '''Annotate only when running test data. and Uncomment only if it is 'load method1' or 'load method2' ''' #test.set_params(arg_params, aux_params) #batch by batch accuracy #To use the code below, Test / batchsize must be an integer. '''for preds, i_batch, eval_batch in mod.iter_predict(test_iter): pred_label = preds[0].asnumpy().argmax(axis=1) label = eval_batch.label[0].asnumpy().argmax(axis=1) print('batch %d, accuracy %f' % (i_batch, float(sum(pred_label == label)) / len(label))) ''' '''test''' result = test.predict(test_iter).asnumpy().argmax(axis=1) print 'Final accuracy : {}%'.format( float(sum(test_lbl == result)) / len(result) * 100.0) '''
def CapsNet(reconstruction, epoch, batch_size, save_period, load_period, ctx=mx.gpu(0), graphviz=False): (train_lbl_one_hot, train_lbl, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz') (test_lbl_one_hot, test_lbl, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz') ''' In the paper,'Training is performed on 28? 28 MNIST images have been shifted by up to 2 pixels in each direction with zero padding', But In this implementation, the original data is not transformed as above. ''' '''data loading referenced by Data Loading API ''' train_iter = mx.io.NDArrayIter( data={'data': to4d(train_img)}, label={'label': train_lbl}, batch_size=batch_size, shuffle=True, last_batch_handle='roll_over') #training data test_iter = mx.io.NDArrayIter(data={'data': to4d(test_img)}, label={'label': test_lbl}, batch_size=batch_size, shuffle=False, last_batch_handle='roll_over') #test data ''' reconstruction=true output_list[0] -> total_loss=margin_loss+reconstruction_loss output_list[1] -> capsule_output output_list[2] -> reconstruction_output reconstruction=False output_list[0] -> margin_loss output_list[1] -> capsule_output ''' output_list = capsule(reconstruction=reconstruction, routing_iteration=1, batch_size=batch_size) # (1) Get the name of the 'argument' arg_names = output_list[0].list_arguments() #caustion!!! in hear, need label's shape arg_shapes, output_shapes, aux_shapes = output_list[0].infer_shape( data=(batch_size, 1, 28, 28), label=(batch_size, )) # (2) Make space for 'argument' - mutable type - If it is declared as below, it is kept in memory. arg_dict = dict( zip(arg_names, [ mx.nd.random.normal(loc=0, scale=0.01, shape=shape, ctx=ctx) for shape in arg_shapes ])) grad_dict = dict( zip(arg_names[1:-1], [mx.nd.zeros(shape, ctx=ctx) for shape in arg_shapes[1:-1]])) #Exclude input output aux_args = [mx.nd.zeros(shape=shape, ctx=ctx) for shape in aux_shapes] if epoch == 0 and graphviz == True: if reconstruction: total_loss = mx.viz.plot_network(symbol=output_list[0], shape={ "data": (batch_size, 1, 28, 28), "label": (batch_size, ) }) total_loss.view("total_loss") else: margin_loss = mx.viz.plot_network(symbol=output_list[0], shape={ "data": (batch_size, 1, 28, 28), "label": (batch_size, ) }) margin_loss.view("margin_loss") if reconstruction: #reconstruction=True if os.path.exists( "weights/MNIST_Reconstruction_weights-{}.param".format( load_period)): print("MNIST_Reconstruction_weights-{}.param exists".format( load_period)) pretrained = mx.nd.load( "weights/MNIST_Reconstruction_weights-{}.param".format( load_period)) for name in arg_names: if name == "data" or name == "label": continue else: arg_dict[name] = pretrained[name] else: print("weight initialization") else: #reconstruction=False if os.path.exists( "weights/MNIST_weights-{}.param".format(load_period)): print("MNIST_weights-{}.param exists".format(load_period)) pretrained = mx.nd.load( "weights/MNIST_weights-{}.param".format(load_period)) for name in arg_names: if name == "data" or name == "label": continue else: arg_dict[name] = pretrained[name] else: print("weight initialization") network = output_list[0].bind(ctx=ctx, args=arg_dict, args_grad=grad_dict, grad_req='write', aux_states=aux_args) if reconstruction: capsule_output = output_list[1].bind(ctx=ctx, args=arg_dict, args_grad=grad_dict, grad_req='null', aux_states=aux_args, shared_exec=network) reconstruction_output = output_list[2].bind(ctx=ctx, args=arg_dict, args_grad=grad_dict, grad_req='null', aux_states=aux_args, shared_exec=network) else: capsule_output = output_list[1].bind(ctx=ctx, args=arg_dict, args_grad=grad_dict, grad_req='null', aux_states=aux_args, shared_exec=network) #optimizer state = [] optimizer = mx.optimizer.Adam(learning_rate=0.001) for shape in arg_shapes[1:-1]: state.append( optimizer.create_state(0, mx.nd.zeros(shape=shape, ctx=ctx))) if not os.path.exists("weights"): os.makedirs("weights") # learning for i in tqdm(range(1, epoch + 1, 1)): ''' In the paper,'including the exponentially decaying learning rate', But In this implementation, Multiply the learning_rate by 0.99 for every 10 steps. ''' if i % 10 == 0: optimizer.set_learning_rate(0.001 * pow(0.99, i)) train_iter.reset() for batch in train_iter: ''' <very important> # mean of [:] : This sets the contents of the array instead of setting the array to a new value not overwriting the variable. # For more information, see reference ''' arg_dict["data"][:] = batch.data[0] arg_dict["label"][:] = batch.label[0] out = network.forward() network.backward(out) for j, name in enumerate(arg_names[1:-1]): optimizer.update(0, arg_dict[name], grad_dict[name], state[j]) if reconstruction: print("epoch : {}, last total loss : {}".format( i, mx.nd.mean(network.outputs[0]).asscalar())) if i % save_period == 0: mx.nd.save( "weights/MNIST_Reconstruction_weights-{}.param".format(i), arg_dict) else: print("epoch : {}, last margin loss : {}".format( i, mx.nd.mean(network.outputs[0]).asscalar())) if i % save_period == 0: mx.nd.save("weights/MNIST_weights-{}.param".format(i), arg_dict) test_accuracy = evaluate_accuracy(test_iter, capsule_output) print("Test_acc : {0:0.3f}%".format(test_accuracy * 100)) print("#Optimization complete\n") test_accuracy = evaluate_accuracy(test_iter, capsule_output) print("Test_acc : {0:0.3f}%".format(test_accuracy * 100)) if reconstruction: generate_image(test_iter, reconstruction_output)
def NeuralNet(epoch,batch_size,save_period,load_weights): ''' load_data 1. SoftmaxOutput must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data 2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data ''' (train_lbl_one_hot, train_lbl, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz','train-images-idx3-ubyte.gz') (test_lbl_one_hot, test_lbl, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz') '''data loading referenced by Data Loading API ''' train_iter = mx.io.NDArrayIter(data={'data' : to2d(train_img)},label={'one_hot_label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to2d(test_img)}, label={'one_hot_label' : test_lbl_one_hot}) #test data '''neural network''' data = mx.sym.Variable('data') label = mx.sym.Variable('one_hot_label') # first_hidden_layer affine1 = mx.sym.FullyConnected(data=data,name='fc1',num_hidden=50) hidden1 = mx.sym.Activation(data=affine1, name='sigmoid1', act_type="sigmoid") # two_hidden_layer affine2 = mx.sym.FullyConnected(data=hidden1, name='fc2', num_hidden=50) hidden2 = mx.sym.Activation(data=affine2, name='sigmoid2', act_type="sigmoid") # output_layer output_affine = mx.sym.FullyConnected(data=hidden2, name='fc3', num_hidden=10) ''' Apply a custom operator implemented in a frontend language Custom operator should override required methods like forward and backward. The custom operator must be registered before it can be used. please check tutorial here. parameters : data ( NDArray[] - Input data for the custom operator) op_type(string) - Name of the custom operator . this is the name that is passed to mx.operator.register the operator out ( NDArray , optional ) The output NDArray to hold the result Returns : out - The output of this function. Return : NDarray or list of NDArrays ''' #custom layer -> I just recommend using makeloss. #You can not write label data if you write it like api in mxnet page. #I do not know why, but it seems to be written as follows. output = mx.sym.Custom(data= output_affine , label = label , grad_scale = 1 , name="SoftmaxOutput", op_type = 'SoftmaxOutput') # # We visualize the network structure with output size (the batch_size is ignored.) shape = {"data": (batch_size,784)} graph=mx.viz.plot_network(symbol=output,shape=shape)#The diagram can be found on the Jupiter notebook. if epoch==1: graph.view() print(output.list_arguments()) print(output.list_outputs()) # training mod mod = mx.mod.Module(symbol=output, data_names=['data'], label_names=['one_hot_label'], context=mx.gpu(0)) mod.bind(data_shapes=train_iter.provide_data,label_shapes=train_iter.provide_label) #load the saved mod data weghts_path="weights/mod-{}.params".format(load_weights) if os.path.exists(weghts_path) : print("Load weights") mod.load_params(weghts_path) else : mod.init_params(initializer=mx.initializer.Xavier(rnd_type='uniform', factor_type='avg', magnitude=1)) mod.init_optimizer(optimizer='adam',optimizer_params={'learning_rate': 0.001}) # test mod test = mx.mod.Module(symbol=output, data_names=['data'], label_names=['one_hot_label'], context=mx.gpu(0)) '''load method1 - using the shared_module''' """ Parameters shared_module : Module Default is `None`. This is used in bucketing. When not `None`, the shared module essentially corresponds to a different bucket -- a module with different symbol but with the same sets of parameters (e.g. unrolled RNNs with different lengths). """ test.bind(data_shapes=test_iter.provide_data, label_shapes=test_iter.provide_label,shared_module=mod,for_training=False) # Network information print print(mod.data_names) print(mod.label_names) print(train_iter.provide_data) print(train_iter.provide_label) '''############Although not required, the following code should be declared.#################''' '''make evaluation method 1 - Using existing ones. metrics = { 'acc': Accuracy, 'accuracy': Accuracy, 'ce': CrossEntropy, 'f1': F1, 'mae': MAE, 'mse': MSE, 'rmse': RMSE, 'top_k_accuracy': TopKAccuracy }''' metric = mx.metric.create(['acc','mse']) '''make evaluation method 2 - Making new things.''' ''' Custom evaluation metric that takes a NDArray function. Parameters: •feval (callable(label, pred)) – Customized evaluation function. •name (str, optional) – The name of the metric. •allow_extra_outputs (bool) – If true, the prediction outputs can have extra outputs. This is useful in RNN, where the states are also produced in outputs for forwarding. ''' def zero(label, pred): return 0 null = mx.metric.CustomMetric(zero) for epoch in range(1,epoch+1,1): print("epoch : {}".format(epoch)) train_iter.reset() #total_batch_number = np.ceil(len(train_img) / (batch_size * 1.0)) #temp=0 for batch in train_iter: mod.forward(batch, is_train=True) mod.backward() mod.update() #cost #temp+=(mod.get_outputs()[0].asnumpy()-batch.label[0].asnumpy()) #cost = (0.5*np.square(temp)/(total_batch_number*1.0)).mean() result = test.predict(test_iter).asnumpy().argmax(axis=1) print("training_data : {}".format(mod.score(train_iter, ['mse', 'acc']))) print('accuracy during learning. : {}%'.format(float(sum(test_lbl == result)) / len(result) * 100.0)) #print "cost value : {}".format(cost) if not os.path.exists("weights"): os.makedirs("weights") #Save the data if epoch%save_period==0: print('Saving weights') mod.save_params("weights/mod-{}.params" .format(epoch)) # Network information print print(mod.data_shapes) print(mod.label_shapes) print(mod.output_shapes) print(mod.get_params()) print(mod.get_outputs()) print("Optimization complete.") #################################TEST#################################### '''load method2 - load the training mod.get_params() directly''' #arg_params, aux_params = mod.get_params() '''Annotate only when running test data. and Uncomment only if it is 'load method2' ''' #test.set_params(arg_params, aux_params) #batch by batch accuracy #To use the code below, Test / batchsize must be an integer. '''for preds, i_batch, eval_batch in mod.iter_predict(test_iter): pred_label = preds[0].asnumpy().argmax(axis=1) label = eval_batch.label[0].asnumpy().argmax(axis=1) print('batch %d, accuracy %f' % (i_batch, float(sum(pred_label == label)) / len(label))) ''' '''test''' result = test.predict(test_iter).asnumpy().argmax(axis=1) print('Final accuracy : {}%' .format(float(sum(test_lbl == result)) / len(result)*100.0))
def NeuralNet(epoch, batch_size, save_period, tensorboard): '''load_data 1. SoftmaxOutput must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data 2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data ''' (train_lbl_one_hot, train_lbl, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz') (test_lbl_one_hot, test_lbl, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz') '''data loading referenced by Data Loading API ''' train_iter = mx.io.NDArrayIter(data={'data': to4d(train_img)}, label={'label': train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data': to4d(test_img)}, label={'label': test_lbl_one_hot}) #test data '''neural network''' data = mx.sym.Variable('data') label = mx.sym.Variable('label') # first convolution layer conv1 = mx.sym.Convolution(data=data, kernel=(5, 5), num_filter=30) conv1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, use_global_stats=True) relu1 = mx.sym.Activation( data=conv1, name='relu_c1', act_type="relu") # -> size : (batch_size,30,24,24) pool1 = mx.sym.Pooling(data=relu1, pool_type="max", kernel=(2, 2), stride=(2, 2)) # -> size : (batch_size,30,12,12) # second convolution layer conv2 = mx.sym.Convolution(data=pool1, kernel=(5, 5), num_filter=60) conv2 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, use_global_stats=True) relu2 = mx.sym.Activation(data=conv2, name='relu_c2', act_type="relu") # -> size : (batch_size,60,8,8) pool2 = mx.sym.Pooling(data=relu2, pool_type="max", kernel=(2, 2), stride=(2, 2)) # -> size : (batch_size,60,4,4) #flatten the data flatten = mx.sym.Flatten(data=pool2) # first fullyconnected layer affine1 = mx.sym.FullyConnected(data=flatten, name='fc1', num_hidden=100) affine1 = mx.sym.BatchNorm(data=affine1, fix_gamma=False, use_global_stats=True) hidden1 = mx.sym.Activation(data=affine1, name='relu_f1', act_type="relu") # two fullyconnected layer affine2 = mx.sym.FullyConnected(data=hidden1, name='fc2', num_hidden=100) affine2 = mx.sym.BatchNorm(data=affine2, fix_gamma=False, use_global_stats=True) hidden2 = mx.sym.Activation(data=affine2, name='relu_f2', act_type="relu") output_affine = mx.sym.FullyConnected(data=hidden2, name='fc3', num_hidden=10) output = mx.sym.SoftmaxOutput(data=output_affine, label=label) # We visualize the network structure with output size (the batch_size is ignored.) shape = {"data": (batch_size, 1, 28, 28)} mx.viz.plot_network( symbol=output, shape=shape) #The diagram can be found on the Jupiter notebook. print output.list_arguments() # training mod mod = mx.mod.Module(symbol=output, data_names=['data'], label_names=['label'], context=mx.gpu(0)) mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) #load the saved mod data mod.load_params("weights/mod-100.params") mod.init_params(initializer=mx.initializer.Xavier( rnd_type='gaussian', factor_type='avg', magnitude=1)) mod.init_optimizer(optimizer='adam', optimizer_params={'learning_rate': 0.001}) # test mod test = mx.mod.Module(symbol=output, data_names=['data'], label_names=['label'], context=mx.gpu(0)) '''load method1 - using the shared_module''' """ Parameters shared_module : Module Default is `None`. This is used in bucketing. When not `None`, the shared module essentially corresponds to a different bucket -- a module with different symbol but with the same sets of parameters (e.g. unrolled RNNs with different lengths). """ test.bind(data_shapes=test_iter.provide_data, label_shapes=test_iter.provide_label, shared_module=mod, for_training=False) # Network information print print mod.data_names print mod.label_names print train_iter.provide_data print train_iter.provide_label '''############Although not required, the following code should be declared.#################''' '''make evaluation method 1 - Using existing ones. metrics = { 'acc': Accuracy, 'accuracy': Accuracy, 'ce': CrossEntropy, 'f1': F1, 'mae': MAE, 'mse': MSE, 'rmse': RMSE, 'top_k_accuracy': TopKAccuracy }''' metric = mx.metric.create(['acc', 'mse']) '''make evaluation method 2 - Making new things.''' ''' Custom evaluation metric that takes a NDArray function. Parameters: •feval (callable(label, pred)) – Customized evaluation function. •name (str, optional) – The name of the metric. •allow_extra_outputs (bool) – If true, the prediction outputs can have extra outputs. This is useful in RNN, where the states are also produced in outputs for forwarding. ''' def zero(label, pred): return 0 null = mx.metric.CustomMetric(zero) for epoch in xrange(1, epoch + 1, 1): print "epoch : {}".format(epoch) train_iter.reset() total_batch_number = np.ceil(len(train_img) / (batch_size * 1.0)) temp = 0 for batch in train_iter: mod.forward(batch, is_train=True) mod.backward() mod.update() '''tensorboard part''' temp += (mod.get_outputs()[0].asnumpy() - batch.label[0].asnumpy()) cost = (0.5 * np.square(temp) / (total_batch_number * 1.0)).mean() print "MSE_cost value : {}".format(cost) result = test.predict(test_iter).asnumpy().argmax(axis=1) print "training_data : {}".format(mod.score(train_iter, ['mse', 'acc'])) print 'accuracy during learning. : {}%'.format( float(sum(test_lbl == result)) / len(result) * 100.0) ''' class SummaryWriter(object): """Writes `Summary` directly to event files. The `SummaryWriter` class provides a high-level api to create an event file in a given directory and add summaries and events to it. The class updates the file contents asynchronously. This allows a training program to call methods to add data to the file directly from the training loop, without slowing down training. """ def __init__(self, log_dir): self.file_writer = FileWriter(logdir=log_dir) def add_scalar(self, name, scalar_value, global_step=None): self.file_writer.add_summary(scalar(name, scalar_value), global_step) def add_histogram(self, name, values): self.file_writer.add_summary(histogram(name, values)) def add_image(self, tag, img_tensor): self.file_writer.add_summary(image(tag, img_tensor)) def close(self): self.file_writer.flush() self.file_writer.close() def __del__(self): if self.file_writer is not None: self.file_writer.close() ''' '''tensorboard_part''' if (epoch % tensorboard) == 0: arg_params, aux_params = mod.get_params() #write scalar values summary_writer.add_scalar(name="MSE_cost", scalar_value=cost, global_step=epoch) for arg_key, arg_value, aux_key, aux_value in zip( arg_params.keys(), arg_params.values(), aux_params.keys(), aux_params.values()): #write matrix values summary_writer.add_histogram( name=arg_key, values=arg_value.asnumpy().ravel()) #summary_writer.add_histogram(name=aux_key, values=aux_value.asnumpy().ravel()) '''or''' #summary_writer.add_histogram(name=arg_key, values=arg_value.asnumpy().flatten()) #summary_writer.add_histogram(name=aux_key, values=aux_value.asnumpy().flatten()) #Save the data if (epoch % save_period) == 0: print('Saving weights') mod.save_params("weights/mod-{}.params".format(epoch)) '''tensorboard_part''' summary_writer.close() # Network information print print mod.data_shapes print mod.label_shapes print mod.output_shapes print mod.get_params() print mod.get_outputs() print "Optimization complete." #################################TEST#################################### '''load method2 - load the training mod.get_params() directly''' #arg_params, aux_params = mod.get_params() '''Annotate only when running test data. and Uncomment only if it is 'load method2' ''' #test.set_params(arg_params, aux_params) #batch by batch accuracy #To use the code below, Test / batchsize must be an integer. '''for preds, i_batch, eval_batch in mod.iter_predict(test_iter): pred_label = preds[0].asnumpy().argmax(axis=1) label = eval_batch.label[0].asnumpy().argmax(axis=1) print('batch %d, accuracy %f' % (i_batch, float(sum(pred_label == label)) / len(label))) ''' '''test''' result = test.predict(test_iter).asnumpy().argmax(axis=1) print 'Final accuracy : {}%'.format( float(sum(test_lbl == result)) / len(result) * 100.0)
def CapsNet(reconstruction, epoch, batch_size, save_period, load_period, ctx=mx.gpu(0), graphviz=False): (train_lbl_one_hot, train_lbl, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz') (test_lbl_one_hot, test_lbl, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz') ''' In the paper,'Training is performed on 28? 28 MNIST images have been shifted by up to 2 pixels in each direction with zero padding', But In this implementation, the original data is not transformed as above. ''' '''data loading referenced by Data Loading API ''' train_iter = mx.io.NDArrayIter( data={'data': to4d(train_img)}, label={'label': train_lbl}, batch_size=batch_size, shuffle=True, last_batch_handle='roll_over') #training data test_iter = mx.io.NDArrayIter(data={'data': to4d(test_img)}, label={'label': test_lbl}, batch_size=batch_size, shuffle=False, last_batch_handle='roll_over') #test data ''' reconstruction=true output_list[0] -> total_loss=margin_loss+reconstruction_loss output_list[1] -> capsule_output output_list[2] -> reconstruction_output reconstruction=False output_list[0] -> margin_loss output_list[1] -> capsule_output ''' output_list = capsule(reconstruction=reconstruction, routing_iteration=1, batch_size=batch_size) # training mod network = mx.mod.Module(symbol=output_list[0], data_names=['data'], label_names=['label'], context=ctx) network.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label, for_training=True) if epoch == 0 and graphviz == True: if reconstruction: total_loss = mx.viz.plot_network(symbol=output_list[0], shape={ "data": (batch_size, 1, 28, 28), "label": (batch_size, ) }) total_loss.view("total_loss") else: margin_loss = mx.viz.plot_network(symbol=output_list[0], shape={ "data": (batch_size, 1, 28, 28), "label": (batch_size, ) }) margin_loss.view("margin_loss") if reconstruction: #reconstruction==True if os.path.exists( "weights/MNIST_Reconstruction_weights-{}.param".format( load_period)): print("MNIST_Reconstruction_weights-{}.param exists".format( load_period)) network.load_params( "weights/MNIST_Reconstruction_weights-{}.param".format( load_period)) else: print("weight initialization") network.init_params(initializer=mx.initializer.Normal(sigma=0.1)) else: #reconstruction=False if os.path.exists( "weights/MNIST_weights-{}.param".format(load_period)): print("MNIST_weights-{}.param exists".format(load_period)) network.load_params( "weights/MNIST_weights-{}.param".format(load_period)) else: print("weight initialization") network.init_params(initializer=mx.initializer.Normal(sigma=0.1)) if reconstruction: capsule_output = mx.mod.Module(symbol=output_list[1], data_names=['data'], label_names=None, context=ctx) reconstruction_output = mx.mod.Module(symbol=output_list[2], data_names=['data'], label_names=['label'], context=ctx) capsule_output.bind(data_shapes=test_iter.provide_data, label_shapes=None, for_training=False, shared_module=network, grad_req='null') reconstruction_output.bind(data_shapes=test_iter.provide_data, label_shapes=test_iter.provide_label, for_training=False, shared_module=network, grad_req='null') else: capsule_output = mx.mod.Module(symbol=output_list[1], data_names=['data'], label_names=None, context=ctx) capsule_output.bind(data_shapes=test_iter.provide_data, label_shapes=None, for_training=False, shared_module=network, grad_req='null') lr_sch = mx.lr_scheduler.FactorScheduler(step=5000, factor=0.99) network.init_optimizer(optimizer='adam', optimizer_params={ 'learning_rate': 0.001, 'lr_scheduler': lr_sch }) if not os.path.exists("weights"): os.makedirs("weights") # learning for i in tqdm(range(1, epoch + 1, 1)): train_iter.reset() for batch in train_iter: network.forward(batch) out_grads = network.get_outputs() network.backward(out_grads=out_grads) network.update() if reconstruction: print("epoch : {}, last total loss : {}".format( i, mx.nd.mean(network.get_outputs()[0]).asscalar())) if i % save_period == 0: print('Saving weights') network.save_params( "weights/MNIST_Reconstruction_weights-{}.param".format(i)) else: print("epoch : {}, last margin loss : {}".format( i, mx.nd.mean(network.get_outputs()[0]).asscalar())) if i % save_period == 0: print('Saving weights') network.save_params("weights/MNIST_weights-{}.param".format(i)) test_accuracy = evaluate_accuracy(test_iter, capsule_output) print("Test_acc : {0:0.3f}%".format(test_accuracy * 100)) print("Optimization complete\n") test_accuracy = evaluate_accuracy(test_iter, capsule_output) print("Test_acc : {0:0.3f}%".format(test_accuracy * 100)) if reconstruction: generate_image(test_iter, reconstruction_output)
def NeuralNet(epoch, batch_size, save_period): ''' load_data 1. SoftmaxOutput must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data 2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data ''' '''In this Autoencoder tutorial, we don't need the label data.''' (_, _, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz') (_, _, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz') '''data loading referenced by Data Loading API ''' train_iter = mx.io.NDArrayIter(data={'input': to2d(train_img)}, label={'input_': to2d(train_img)}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'input': to2d(test_img)}, label={'input_': to2d(test_img)}) #test data '''Autoencoder network <structure> input - encode - middle - decode -> output ''' input = mx.sym.Variable('input') output = mx.sym.Variable('input_') # encode affine1 = mx.sym.FullyConnected(data=input, name='encode', num_hidden=100) encode1 = mx.sym.Activation(data=affine1, name='sigmoid1', act_type="sigmoid") # middle affine2 = mx.sym.FullyConnected(data=encode1, name='middle', num_hidden=50) middle = mx.sym.Activation(data=affine2, name='sigmoid2', act_type="sigmoid") # decode affine3 = mx.sym.FullyConnected(data=middle, name='decode', num_hidden=100) decode1 = mx.sym.Activation(data=affine3, name='sigmoid1', act_type="sigmoid") # output result = mx.sym.FullyConnected(data=decode1, name='result', num_hidden=784) #LogisticRegressionOutput contains a sigmoid function internally. and It should be executed with xxxx_lbl_one_hot data. result = mx.sym.LogisticRegressionOutput(data=result, label=output) # We visualize the network structure with output size (the batch_size is ignored.) shape = {"input": (batch_size, 784)} mx.viz.plot_network( symbol=result, shape=shape) #The diagram can be found on the Jupiter notebook. print result.list_arguments() # Fisrt optimization method # weights save model_name = 'weights/Autoencoder' checkpoint = mx.callback.do_checkpoint(model_name, period=save_period) #training mod mod = mx.mod.Module(symbol=result, data_names=['input'], label_names=['input_'], context=mx.gpu(0)) #test mod test = mx.mod.Module(symbol=result, data_names=['input'], label_names=['input_'], context=mx.gpu(0)) # Network information print print mod.data_names print mod.label_names print train_iter.provide_data print train_iter.provide_label '''if the below code already is declared by mod.fit function, thus we don't have to write it. but, when you load the saved weights, you must write the below code.''' mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) #weights load # When you want to load the saved weights, uncomment the code below. symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100) #the below code needs mod.bind, but If arg_params and aux_params is set in mod.fit, you do not need the code below, nor do you need mod.bind. mod.set_params(arg_params, aux_params) '''if you want to modify the learning process, go into the mod.fit function()''' mod.fit( train_iter, initializer=mx.initializer.Xavier(rnd_type='gaussian', factor_type="avg", magnitude=1), optimizer='adam', #optimizer optimizer_params={'learning_rate': 0.001}, #learning rate eval_metric=mx.metric.MSE(), # Once the loaded parameters are declared here,You do not need to declare mod.set_params,mod.bind arg_params=None, aux_params=None, num_epoch=epoch, epoch_end_callback=checkpoint) # Network information print print mod.data_shapes print mod.label_shapes print mod.output_shapes print mod.get_params() print mod.get_outputs() print "training_data : {}".format(mod.score(train_iter, ['mse'])) print "Optimization complete." #################################TEST#################################### '''load method1 - load the saved parameter''' #symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 100) '''load method2 - load the training mod.get_params() directly''' #arg_params, aux_params = mod.get_params() '''load method3 - using the shared_module''' """ Parameters shared_module : Module Default is `None`. This is used in bucketing. When not `None`, the shared module essentially corresponds to a different bucket -- a module with different symbol but with the same sets of parameters (e.g. unrolled RNNs with different lengths). """ test.bind(data_shapes=test_iter.provide_data, label_shapes=test_iter.provide_label, shared_module=mod, for_training=False) '''Annotate only when running test data. and Uncomment only if it is 'load method1' or 'load method2' ''' #test.set_params(arg_params, aux_params) '''test''' column_size = 10 row_size = 10 # column_size x row_size <= 10000 result = test.predict(test_iter, num_batch=column_size * row_size).asnumpy() '''range adjustment 0 ~ 1 -> 0 ~ 255 ''' result = result * 255.0 '''generator image visualization''' fig_g, ax_g = plt.subplots(row_size, column_size, figsize=(column_size, row_size)) fig_g.suptitle('generator') for j in xrange(row_size): for i in xrange(column_size): ax_g[j][i].set_axis_off() ax_g[j][i].imshow(np.reshape(result[i + j * column_size], (28, 28)), cmap='gray') fig_g.savefig("generator.png") '''real image visualization''' fig_r, ax_r = plt.subplots(row_size, column_size, figsize=(column_size, row_size)) fig_r.suptitle('real') for j in xrange(row_size): for i in xrange(column_size): ax_r[j][i].set_axis_off() ax_r[j][i].imshow(test_img[i + j * column_size], cmap='gray') fig_r.savefig("real.png") plt.show()
def NeuralNet(epoch, batch_size, save_period): ''' load_data 1. SoftmaxOutput must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data 2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data ''' '''In this Autoencoder tutorial, we don't need the label data.''' (_, _, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz') (_, _, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz') '''data loading referenced by Data Loading API ''' train_iter = mx.io.NDArrayIter(data={'input': to2d(train_img)}, label={'input_': to2d(train_img)}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'input': to2d(test_img)}, label={'input_': to2d(test_img)}) #test data '''Autoencoder network <structure> input - encode - middle - decode -> output ''' input = mx.sym.Variable('input') output = mx.sym.Variable('input_') # encode affine1 = mx.sym.FullyConnected(data=input, name='encode', num_hidden=100) encode1 = mx.sym.Activation(data=affine1, name='sigmoid1', act_type="sigmoid") # middle affine2 = mx.sym.FullyConnected(data=encode1, name='middle', num_hidden=50) middle = mx.sym.Activation(data=affine2, name='sigmoid2', act_type="sigmoid") # decode affine3 = mx.sym.FullyConnected(data=middle, name='decode', num_hidden=100) decode1 = mx.sym.Activation(data=affine3, name='sigmoid1', act_type="sigmoid") # output result = mx.sym.FullyConnected(data=decode1, name='result', num_hidden=784) #LogisticRegressionOutput contains a sigmoid function internally. and It should be executed with xxxx_lbl_one_hot data. result = mx.sym.LogisticRegressionOutput(data=result, label=output) # We visualize the network structure with output size (the batch_size is ignored.) shape = {"input": (batch_size, 784)} mx.viz.plot_network( symbol=result, shape=shape) #The diagram can be found on the Jupiter notebook. print result.list_arguments() #training mod mod = mx.mod.Module(symbol=result, data_names=['input'], label_names=['input_'], context=mx.gpu(0)) mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) #load the saved mod data mod.load_params("weights/mod-100.params") mod.init_params(initializer=mx.initializer.Xavier( rnd_type='uniform', factor_type='avg', magnitude=3)) mod.init_optimizer(optimizer='adam', optimizer_params={'learning_rate': 0.01}) #test mod test = mx.mod.Module(symbol=result, data_names=['input'], label_names=['input_'], context=mx.gpu(0)) '''load method2 - using the shared_module''' """ Parameters shared_module : Module Default is `None`. This is used in bucketing. When not `None`, the shared module essentially corresponds to a different bucket -- a module with different symbol but with the same sets of parameters (e.g. unrolled RNNs with different lengths). """ test.bind(data_shapes=test_iter.provide_data, label_shapes=test_iter.provide_label, shared_module=mod, for_training=False) # Network information print print mod.data_names print mod.label_names print train_iter.provide_data print train_iter.provide_label '''############Although not required, the following code should be declared.#################''' '''make evaluation method 1 - Using existing ones. metrics = { 'acc': Accuracy, 'accuracy': Accuracy, 'ce': CrossEntropy, 'f1': F1, 'mae': MAE, 'mse': MSE, 'rmse': RMSE, 'top_k_accuracy': TopKAccuracy }''' metric = mx.metric.create(['acc', 'mse']) '''make evaluation method 2 - Making new things.''' ''' Custom evaluation metric that takes a NDArray function. Parameters: •feval (callable(label, pred)) – Customized evaluation function. •name (str, optional) – The name of the metric. •allow_extra_outputs (bool) – If true, the prediction outputs can have extra outputs. This is useful in RNN, where the states are also produced in outputs for forwarding. ''' def zero(label, pred): return 0 null = mx.metric.CustomMetric(zero) for epoch in xrange(1, epoch + 1, 1): print "epoch : {}".format(epoch) train_iter.reset() #total_batch_number = np.ceil(len(train_img) / (batch_size * 1.0)) #temp=0 for batch in train_iter: mod.forward(batch, is_train=True) mod.backward() mod.update() #cost #temp+=(mod.get_outputs()[0].asnumpy()-batch.data[0].asnumpy()) print "training_data : {}".format(mod.score(train_iter, ['mse'])) #cost = (0.5*np.square(temp)/(total_batch_number*1.0)).mean() #print "cost value : {}".format(cost) #Save the data if epoch % save_period == 0: print('Saving weights') mod.save_params("weights/mod-{}.params".format(epoch)) # Network information print print mod.data_shapes print mod.label_shapes print mod.output_shapes print mod.get_params() print mod.get_outputs() print "Optimization complete." #################################TEST#################################### '''load method2 - load the training mod.get_params() directly''' #arg_params, aux_params = mod.get_params() '''Annotate only when running test data. and Uncomment only if it is 'load method2' ''' #test.set_params(arg_params, aux_params) '''test''' column_size = 10 row_size = 10 # column_size x row_size <= 10000 result = test.predict(test_iter, num_batch=column_size * row_size).asnumpy() '''range adjustment 0 ~ 1 -> 0 ~ 255 ''' result = result * 255.0 '''generator image visualization''' fig_g, ax_g = plt.subplots(row_size, column_size, figsize=(column_size, row_size)) fig_g.suptitle('generator') for j in xrange(row_size): for i in xrange(column_size): ax_g[j][i].set_axis_off() ax_g[j][i].imshow(np.reshape(result[i + j * column_size], (28, 28)), cmap='gray') fig_g.savefig("generator.png") '''real image visualization''' fig_r, ax_r = plt.subplots(row_size, column_size, figsize=(column_size, row_size)) fig_r.suptitle('real') for j in xrange(row_size): for i in xrange(column_size): ax_r[j][i].set_axis_off() ax_r[j][i].imshow(test_img[i + j * column_size], cmap='gray') fig_r.savefig("real.png") plt.show()
def NeuralNet(epoch,batch_size,save_period): time_step=28 hidden_unit_number1 = 100 hidden_unit_number2 = 100 fc_number=100 class_number=10 use_cudnn = True ''' load_data 1. SoftmaxOutput must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl}, batch_size=batch_size) #test data or train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data 2. LogisticRegressionOutput , LinearRegressionOutput , MakeLoss and so on.. must be train_iter = mx.io.NDArrayIter(data={'data' : to4d(train_img)},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : to4d(test_img)}, label={'label' : test_lbl_one_hot}, batch_size=batch_size) #test data ''' (train_lbl_one_hot, train_lbl, train_img) = dd.read_data_from_file('train-labels-idx1-ubyte.gz','train-images-idx3-ubyte.gz') (test_lbl_one_hot, test_lbl, test_img) = dd.read_data_from_file('t10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz') '''data loading referenced by Data Loading API ''' train_iter = mx.io.NDArrayIter(data={'data' : train_img},label={'label' : train_lbl_one_hot}, batch_size=batch_size, shuffle=True) #training data test_iter = mx.io.NDArrayIter(data={'data' : test_img}, label={'label' : test_lbl_one_hot}) #test data ####################################################-Network-################################################################ data = mx.sym.Variable('data') label = mx.sym.Variable('label') data = mx.sym.transpose(data, axes=(1, 0, 2)) # (time,batch,column) '''1. RNN cell declaration''' ''' Fusing RNN layers across time step into one kernel. Improves speed but is less flexible. Currently only supported if using cuDNN on GPU. ''' if use_cudnn:#faster lstm1 = mx.rnn.FusedRNNCell(num_hidden=hidden_unit_number1, mode="lstm", prefix="lstm1_",get_next_state=True) lstm2 = mx.rnn.FusedRNNCell(num_hidden=hidden_unit_number2, mode="lstm", prefix="lstm2_",get_next_state=True) else: lstm1 = mx.rnn.LSTMCell(num_hidden=hidden_unit_number1, prefix="lstm1_") lstm2 = mx.rnn.LSTMCell(num_hidden=hidden_unit_number2, prefix="lstm2_") '''2. Unroll the RNN CELL on a time axis.''' ''' unroll's return parameter outputs : list of Symbol output symbols. states : Symbol or nested list of Symbol has the same structure as begin_state() ''' #if you see the unroll function layer1, state1= lstm1.unroll(length=time_step, inputs=data, merge_outputs=True, layout='TNC') layer1 = mx.sym.Dropout(layer1, p=0.3) layer2, state2 = lstm2.unroll(length=time_step, inputs=layer1, merge_outputs=True,layout="TNC") rnn_output= mx.sym.Reshape(state2[-1], shape=(-1,hidden_unit_number1)) '''FullyConnected Layer''' affine1 = mx.sym.FullyConnected(data=rnn_output, num_hidden=fc_number, name='affine1') act1 = mx.sym.Activation(data=affine1, act_type='sigmoid', name='sigmoid1') affine2 = mx.sym.FullyConnected(data=act1, num_hidden=class_number, name = 'affine2') output = mx.sym.SoftmaxOutput(data=affine2, label=label, name='softmax') # We visualize the network structure with output size (the batch_size is ignored.) shape = {"data": (time_step,batch_size,28)} mx.viz.plot_network(symbol=output,shape=shape)#The diagram can be found on the Jupiter notebook. print output.list_arguments() # training mod mod = mx.module.Module(symbol = output , data_names=['data'], label_names=['label'], context=mx.gpu(0)) mod.bind(data_shapes=train_iter.provide_data,label_shapes=train_iter.provide_label) #load the saved mod data mod.load_params("weights/Neural_Net-100.params") mod.init_params(initializer=mx.initializer.Xavier(rnd_type='gaussian', factor_type='avg', magnitude=1)) mod.init_optimizer(optimizer='adam',optimizer_params={'learning_rate': 0.001}) # test mod test = mx.mod.Module(symbol=output, data_names=['data'], label_names=['label'], context=mx.gpu(0)) '''load method1 - using the shared_module''' """ Parameters shared_module : Module Default is `None`. This is used in bucketing. When not `None`, the shared module essentially corresponds to a different bucket -- a module with different symbol but with the same sets of parameters (e.g. unrolled RNNs with different lengths). """ test.bind(data_shapes=test_iter.provide_data, label_shapes=test_iter.provide_label,shared_module=mod,for_training=False) # Network information print print mod.data_names print mod.label_names print train_iter.provide_data print train_iter.provide_label '''############Although not required, the following code should be declared.#################''' '''make evaluation method 1 - Using existing ones. metrics = { 'acc': Accuracy, 'accuracy': Accuracy, 'ce': CrossEntropy, 'f1': F1, 'mae': MAE, 'mse': MSE, 'rmse': RMSE, 'top_k_accuracy': TopKAccuracy }''' metric = mx.metric.create(['acc','mse']) '''make evaluation method 2 - Making new things.''' ''' Custom evaluation metric that takes a NDArray function. Parameters: •feval (callable(label, pred)) – Customized evaluation function. •name (str, optional) – The name of the metric. •allow_extra_outputs (bool) – If true, the prediction outputs can have extra outputs. This is useful in RNN, where the states are also produced in outputs for forwarding. ''' def zero(label, pred): return 0 null = mx.metric.CustomMetric(zero) for epoch in xrange(1,epoch+1,1): print "epoch : {}".format(epoch) train_iter.reset() #total_batch_number = np.ceil(len(train_img) / (batch_size * 1.0)) #temp=0 for batch in train_iter: mod.forward(batch, is_train=True) mod.backward() mod.update() #cost #temp+=(mod.get_outputs()[0].asnumpy()-batch.label[0].asnumpy()) #cost = (0.5*np.square(temp)/(total_batch_number*1.0)).mean() result = test.predict(test_iter).asnumpy().argmax(axis=1) print "training_data : {}".format(mod.score(train_iter, ['mse', 'acc'])) print 'accuracy during learning. : {}%'.format(float(sum(test_lbl == result)) / len(result) * 100.0) #print "cost value : {}".format(cost) #Save the data if epoch%save_period==0: print('Saving weights') mod.save_params("weights/Neural_Net" .format(epoch)) # Network information print print mod.data_shapes print mod.label_shapes print mod.output_shapes print mod.get_params() print mod.get_outputs() print "Optimization complete." #################################TEST#################################### '''load method2 - load the training mod.get_params() directly''' #arg_params, aux_params = mod.get_params() '''Annotate only when running test data. and Uncomment only if it is 'load method2' ''' #test.set_params(arg_params, aux_params) #batch by batch accuracy #To use the code below, Test / batchsize must be an integer. '''for preds, i_batch, eval_batch in mod.iter_predict(test_iter): pred_label = preds[0].asnumpy().argmax(axis=1) label = eval_batch.label[0].asnumpy().argmax(axis=1) print('batch %d, accuracy %f' % (i_batch, float(sum(pred_label == label)) / len(label))) ''' '''test''' result = test.predict(test_iter).asnumpy().argmax(axis=1) print 'Final accuracy : {}%' .format(float(sum(test_lbl == result)) / len(result)*100.0)