def test_word_embedding_analogy_evaluation_models(analogy_function): dataset = nlp.data.GoogleAnalogyTestSet() dataset = [d for i, d in enumerate(dataset) if i < 10] embedding = nlp.embedding.create('fasttext', source='wiki.simple', embedding_root='tests/data/embedding') counter = nlp.data.utils.Counter(embedding.idx_to_token) vocab = nlp.vocab.Vocab(counter) vocab.set_embedding(embedding) dataset_coded = [[vocab[d[0]], vocab[d[1]], vocab[d[2]], vocab[d[3]]] for d in dataset] dataset_coded_nd = nd.array(dataset_coded) for k in [1, 3]: for exclude_question_words in [True, False]: evaluator = nlp.embedding.evaluation.WordEmbeddingAnalogy( idx_to_vec=vocab.embedding.idx_to_vec, analogy_function=analogy_function, k=k, exclude_question_words=exclude_question_words) evaluator.initialize() words1 = dataset_coded_nd[:, 0] words2 = dataset_coded_nd[:, 1] words3 = dataset_coded_nd[:, 2] pred_idxs = evaluator(words1, words2, words3) # If we don't exclude inputs most predictions should be wrong words4 = dataset_coded_nd[:, 3] accuracy = nd.mean(pred_idxs[:, 0] == nd.array(words4)) accuracy = accuracy.asscalar() if not exclude_question_words: assert accuracy <= 0.1 # Instead the model would predict W3 most of the time accuracy_w3 = nd.mean(pred_idxs[:, 0] == nd.array(words3)) assert accuracy_w3.asscalar() >= 0.89 else: # The wiki.simple vectors don't perform too good assert accuracy >= 0.29 # Assert output shape assert pred_idxs.shape[1] == k
def backward(self, out_grads=None): #print('in backward') assert self.binded and self.params_initialized #tmp_ctx = self._ctx_cpu tmp_ctx = self._ctx_single_gpu fc7_outs = [] ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context))) #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu()) arcface_module_outputs = [] for i, _module in enumerate(self._arcface_modules): #_fc7 = _module.get_outputs(merge_multi_context=True)[0] out = _module.get_outputs(merge_multi_context=True) #print(out[0].shape) #print(out[1].shape) arcface_module_outputs.append(out) _fc7 = out[0] fc7_outs.append(_fc7) _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx) ctx_fc7_max[:,i] = _fc7_max local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1)) nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max) global_fc7_max = local_fc7_max #local_fc7_sum = None local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1)) local_fc7_sum[:,:] = 0.0 for i, _module in enumerate(self._arcface_modules): _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max) fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max) fc7_outs[i] = nd.exp(fc7_outs[i]) _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx) local_fc7_sum += _sum global_fc7_sum = local_fc7_sum if self._iter%self._verbose==0: #_ctx = self._context[-1] _ctx = self._ctx_cpu _probs = [] for i, _module in enumerate(self._arcface_modules): _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i]) _probs.append(_prob) fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context))) nd.concat(*_probs, dim=1, out=fc7_prob) fc7_pred = nd.argmax(fc7_prob, axis=1) local_label = self.global_label - self._local_class_start #local_label = self.get_ndarray2(_ctx, 'test_label', local_label) _pred = nd.equal(fc7_pred, local_label) print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0]) #local_fc1_grad = [] #fc1_grad_ctx = self._ctx_cpu fc1_grad_ctx = self._ctx_single_gpu local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size)) local_fc1_grad[:,:] = 0.0 total_eloss = [] celoss_verbose = 1000 if self._iter%celoss_verbose==0: fc7_celoss = self.get_ndarray(tmp_ctx, 'test_fc7_celoss', (self._batch_size,)) fc7_celoss[:] = 0.0 for i, _module in enumerate(self._arcface_modules): _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum) fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum) a = i*self._ctx_num_classes b = (i+1)*self._ctx_num_classes _label = self.global_label - self._ctx_class_start[i] _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label) onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes)) nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label) #print(fc7_outs[i].shape, onehot_label.shape) if self._iter%celoss_verbose==0: _ce_loss = fc7_outs[i] * onehot_label _ce_loss = nd.sum(_ce_loss, axis=1) fc7_celoss += _ce_loss.as_in_context(tmp_ctx) fc7_outs[i] -= onehot_label out = arcface_module_outputs[i] out_grads = [fc7_outs[i]] for j in range(1, len(out)): eloss = out[j] #print('eloss%d:'%j, eloss.shape) #print(out_grads[0].shape) #egrad_shape = (out_grads[0].shape[0], eloss.shape[0]) egrad_shape = eloss.shape egrad = self.get_ndarray(fc7_outs[i].context, 'egrad%d'%j, egrad_shape) #egrad[:][:] = 1.0/egrad_shape[0] egrad[:][:] = 1.0 out_grads.append(egrad) if self._iter%self._verbose==0: total_eloss.append(np.mean(eloss.asnumpy())) _module.backward(out_grads = out_grads) #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu()) ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0]) local_fc1_grad += ctx_fc1_grad if self._iter%self._verbose==0 and len(total_eloss)>0: print('{eloss}', self._iter, np.mean(total_eloss)) #if self._iter%self._verbose==0: if self._iter%celoss_verbose==0: ce_loss = nd.log(fc7_celoss) * -1.0 ce_loss = nd.mean(ce_loss) print('CELOSS,%d,%f'% (self._iter, ce_loss.asnumpy())) global_fc1_grad = local_fc1_grad self._curr_module.backward(out_grads = [global_fc1_grad])
def LSTM(epoch=100, batch_size=100, save_period=100, load_period=100, learning_rate=0.1, ctx=mx.gpu(0)): train_data, test_data = FashionMNIST(batch_size) #network parameter time_step = 28 num_inputs = 28 num_hidden = 200 num_outputs = 10 path = "weights/FashionMNIST_LSTMweights-{}".format(load_period) if os.path.exists(path): print("loading weights") [ wxhf, wxhi, wxho, wxhg, whhf, whhi, whho, whhg, bhf, bhi, bho, bhg, why, by ] = nd.load(path) # weights load wxhf = wxhf.as_in_context(ctx) wxhi = wxhi.as_in_context(ctx) wxho = wxho.as_in_context(ctx) wxhg = wxhg.as_in_context(ctx) whhf = whhf.as_in_context(ctx) whhi = whhi.as_in_context(ctx) whho = whho.as_in_context(ctx) whhg = whhg.as_in_context(ctx) bhf = bhf.as_in_context(ctx) bhi = bhi.as_in_context(ctx) bho = bho.as_in_context(ctx) bhg = bhg.as_in_context(ctx) why = why.as_in_context(ctx) by = by.as_in_context(ctx) params = [ wxhf, wxhi, wxho, wxhg, whhf, whhi, whho, whhg, bhf, bhi, bho, bhg, why, by ] else: print("initializing weights") with ctx: wxhf = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_inputs)) wxhi = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_inputs)) wxho = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_inputs)) wxhg = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_inputs)) whhf = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_hidden)) whhi = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_hidden)) whho = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_hidden)) whhg = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_hidden)) bhf = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, )) bhi = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, )) bho = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, )) bhg = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, )) why = nd.random.normal(loc=0, scale=0.1, shape=(num_outputs, num_hidden)) by = nd.random.normal(loc=0, scale=0.1, shape=(num_outputs, )) params = [ wxhf, wxhi, wxho, wxhg, whhf, whhi, whho, whhg, bhf, bhi, bho, bhg, why, by ] # attach gradient!!! for param in params: param.attach_grad() #Fully Neural Network with 1 Hidden layer def LSTM_Cell(input, h_state, c_state): for x in input: f_t = nd.Activation(nd.FullyConnected( data=x, weight=wxhf, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whhf, no_bias=True, num_hidden=num_hidden) + bhf, act_type="sigmoid") i_t = nd.Activation(nd.FullyConnected( data=x, weight=wxhi, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whhi, no_bias=True, num_hidden=num_hidden) + bhi, act_type="sigmoid") o_t = nd.Activation(nd.FullyConnected( data=x, weight=wxho, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whho, no_bias=True, num_hidden=num_hidden) + bho, act_type="sigmoid") g_t = nd.Activation(nd.FullyConnected( data=x, weight=wxhg, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whhg, no_bias=True, num_hidden=num_hidden) + bhg, act_type="tanh") c_state = nd.multiply(f_t, c_state) + nd.multiply(i_t, g_t) h_state = nd.multiply(o_t, nd.tanh(c_state)) output = nd.FullyConnected(data=h_state, weight=why, bias=by, num_hidden=num_outputs) output = nd.softmax(data=output) return output, h_state, c_state def cross_entropy(output, label): return -nd.sum(label * nd.log(output), axis=0, exclude=True) #Adam optimizer state = [] optimizer = mx.optimizer.Adam(rescale_grad=1, learning_rate=learning_rate) for param in params: state.append(optimizer.create_state(0, param)) for i in tqdm(range(1, epoch + 1, 1)): for data, label in train_data: h_state = nd.zeros(shape=(data.shape[0], num_hidden), ctx=ctx) c_state = nd.zeros(shape=(data.shape[0], num_hidden), ctx=ctx) data = data.as_in_context(ctx) data = data.reshape(shape=(-1, time_step, num_inputs)) data = nd.transpose(data=data, axes=(1, 0, 2)) label = label.as_in_context(ctx) label = nd.one_hot(label, num_outputs) with autograd.record(): outputs, h_state, c_state = LSTM_Cell(data, h_state, c_state) loss = cross_entropy(outputs, label) # (batch_size,) loss.backward() cost = nd.mean(loss).asscalar() for j, param in enumerate(params): optimizer.update(0, param, param.grad, state[j]) test_accuracy = evaluate_accuracy(test_data, time_step, num_inputs, num_hidden, LSTM_Cell, ctx) print(" epoch : {} , last batch cost : {}".format(i, cost)) print("Test_acc : {0:0.3f}%".format(test_accuracy * 100)) #weight_save if i % save_period == 0: if not os.path.exists("weights"): os.makedirs("weights") print("saving weights") nd.save("weights/FashionMNIST_LSTMweights-{}".format(i), params) test_accuracy = evaluate_accuracy(test_data, time_step, num_inputs, num_hidden, LSTM_Cell, ctx) print("Test_acc : {0:0.3f}%".format(test_accuracy * 100)) return "optimization completed"
def CNN_Autoencoder(epoch = 100 , batch_size=128, save_period=10 , load_period=100 ,optimizer="sgd",learning_rate= 0.01 , dataset = "MNIST", ctx=mx.gpu(0)): #data selection if dataset =="MNIST": train_data , test_data = MNIST(batch_size) path = "weights/MNIST-{}.params".format(load_period) elif dataset == "FashionMNIST": train_data, test_data = FashionMNIST(batch_size) path = "weights/FashionMNIST-{}.params".format(load_period) else: return "The dataset does not exist." '''Follow these steps: •Define network •Initialize parameters •Loop over inputs •Forward input through network to get output •Compute loss with output and label •Backprop gradient •Update parameters with gradient descent. ''' '''Brief description of deconvolution. I was embarrassed when I first heard about deconvolution, but it was just the opposite of convolution. The formula is as follows. The convolution formula is output_size = ([input_size+2*pad-kernel_size]/stride) + 1 The Deconvolution formula is output_size = stride(input_size-1)+kernel-2*pad ''' ''' imperative vs symbolic One main reason that the network is faster after hybridizing is because we don’t need to repeatedly invoke the Python forward function, while keeping all computations within the highly efficient C++ backend engine. But the potential drawback is the loss of flexibility to write the forward function. In other ways, inserting print for debugging or control logic such as if and for into the forward function is not possible now. ''' #convolution autoencoder #net = gluon.nn.Sequential() # stacks 'Block's sequentially net = gluon.nn.HybridSequential() #using symbolic for faster learning with net.name_scope(): # FashionMNIST or MNIST : result = ( batch size , 60 , 26 , 26) net.add(gluon.nn.Conv2D(channels=60 , kernel_size=(3,3) , strides=(1,1) , activation='relu' , use_bias=True)) # FashionMNIST or MNIST : result = ( batch size , 30 , 24 , 24) net.add(gluon.nn.Conv2D(channels=30 , kernel_size=(3,3) , strides=(1,1) , activation='relu' , use_bias=True)) # FashionMNIST or MNIST : result = ( batch size , 15 , 22 , 22) net.add(gluon.nn.Conv2D(channels=15 , kernel_size=(3,3) , strides=(1,1) , activation='relu' , use_bias=True)) # FashionMNIST : result = ( batch size , 10 , 20 , 20) net.add(gluon.nn.Conv2D(channels=10 , kernel_size=(3,3) , strides=(1,1) , activation='relu' , use_bias=True)) # FashionMNIST or MNIST : result = ( batch size , 15 , 22 , 22) net.add(gluon.nn.Conv2DTranspose(channels=15 , kernel_size=(3,3) , strides=(1, 1) , activation='relu' , use_bias=True)) # FashionMNIST or MNIST : result = ( batch size , 30 , 24 , 24) net.add(gluon.nn.Conv2DTranspose(channels=30 , kernel_size=(3,3) , strides=(1, 1) , activation='relu' , use_bias=True)) # FashionMNIST or MNIST : result = ( batch size , 60 , 26 , 26) net.add(gluon.nn.Conv2DTranspose(channels=60 , kernel_size=(3,3) , strides=(1, 1) , activation='relu' , use_bias=True)) # FashionMNIST or MNIST : result = ( batch size , 1 , 28 , 28) net.add(gluon.nn.Conv2DTranspose(channels=1 , kernel_size=(3,3) , strides=(1, 1) , activation='sigmoid' , use_bias=True)) net.hybridize() # using symbolic for faster learning #weights initialization if os.path.exists(path): print("loading weights") net.load_params(filename=path , ctx=ctx) # weights load else: print("initializing weights") net.collect_params().initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization #net.initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization #optimizer trainer = gluon.Trainer(net.collect_params() , optimizer, {"learning_rate" : learning_rate}) #learning for i in tqdm(range(1,epoch+1,1)): for data , label in train_data: data = data.as_in_context(ctx) data_ = data with autograd.record(train_mode=True): output=net(data) #loss definition loss=gluon.loss.L2Loss()(output,data_) cost=nd.mean(loss).asscalar() loss.backward() trainer.step(batch_size,ignore_stale_grad=True) print(" epoch : {} , last batch cost : {}".format(i,cost)) #weight_save if i % save_period==0: if not os.path.exists("weights"): os.makedirs("weights") print("saving weights") if dataset=="FashionMNIST": net.save_params("weights/FashionMNIST-{}.params".format(i)) elif dataset=="MNIST": net.save_params("weights/MNIST-{}.params".format(i)) #show image generate_image(test_data , net , ctx ,dataset) return "optimization completed"
def accuracy(output, label): return nd.mean(output.argmax(axis = 1) == label).asscalar()
def train(): """training""" image_pool = ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) # define a summary writer that logs data and flushes to the file every 5 seconds sw = SummaryWriter(logdir='%s' % dir_out_sw, flush_secs=5, verbose=False) global_step = 0 for epoch in range(epochs): if epoch == 0: netG.hybridize() netD.hybridize() # sw.add_graph(netG) # sw.add_graph(netD) tic = time.time() btic = time.time() train_data.reset() val_data.reset() iter = 0 for local_step, batch in enumerate(train_data): ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### tmp = mx.nd.concat(batch.data[0], batch.data[1], batch.data[2], dim=1) tmp = augmenter(tmp, patch_size=128, offset=offset, aug_type=1, aug_methods=aug_methods, random_crop=False) real_in = tmp[:, :1].as_in_context(ctx) real_out = tmp[:, 1:2].as_in_context(ctx) m = tmp[:, 2:3].as_in_context(ctx) # mask fake_out = netG(real_in) * m # loss weight based on mask, applied on L1 loss if no_loss_weights: loss_weight = m else: loss_weight = m.asnumpy() loss_weight[loss_weight == 0] = .1 loss_weight = mx.nd.array(loss_weight, ctx=m.context) fake_concat = image_pool.query(nd.concat(real_in, fake_out, dim=1)) with autograd.record(): # Train with fake image # Use image pooling to utilize history images output = netD(fake_concat) fake_label = nd.zeros(output.shape, ctx=ctx) errD_fake = GAN_loss(output, fake_label) metric.update([ fake_label, ], [ output, ]) # Train with real image real_concat = nd.concat(real_in, real_out, dim=1) output = netD(real_concat) real_label = nd.ones(output.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD = (errD_real + errD_fake) * 0.5 errD.backward() metric.update([ real_label, ], [ output, ]) trainerD.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): fake_out = netG(real_in) fake_concat = nd.concat(real_in, fake_out, dim=1) output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) errG = GAN_loss(output, real_label) + loss_2nd( real_out, fake_out, loss_weight) * lambda1 errG.backward() trainerG.step(batch.data[0].shape[0]) sw.add_scalar(tag='loss', value=('d_loss', errD.mean().asscalar()), global_step=global_step) sw.add_scalar(tag='loss', value=('g_loss', errG.mean().asscalar()), global_step=global_step) global_step += 1 if epoch + local_step == 0: sw.add_graph((netG)) img_in_list, img_out_list, m_val = val_data.next().data m_val = m_val.as_in_context(ctx) sw.add_image('first_minibatch_train_real', norm3(real_out)) sw.add_image('first_minibatch_val_real', norm3(img_out_list.as_in_context(ctx))) netG.export('%snetG' % dir_out_checkpoints) if local_step == 0: # Log the first batch of images of each epoch (training) sw.add_image('first_minibatch_train_fake', norm3(fake_out * m) * m, epoch) sw.add_image( 'first_minibatch_val_fake', norm3(netG(img_in_list.as_in_context(ctx)) * m_val) * m_val, epoch) # norm3(netG(img_in_list.as_in_context(ctx)) * m_val.as_in_context(ctx)), epoch) if (iter + 1) % 10 == 0: name, acc = metric.get() logging.info('speed: {} samples/s'.format( batch_size / (time.time() - btic))) logging.info( 'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc, iter, epoch)) iter += 1 btic = time.time() sw.add_scalar(tag='binary_training_acc', value=('acc', acc), global_step=epoch) name, acc = metric.get() metric.reset() fake_val = netG(val_data.data[0][1].as_in_context(ctx)) loss_val = loss_2nd(val_data.data[1][1].as_in_context(ctx), fake_val, val_data.data[2][1].as_in_context(ctx)) * lambda1 sw.add_scalar(tag='loss_val', value=('g_loss', loss_val.mean().asscalar()), global_step=epoch) if (epoch % check_point_interval == 0) | (epoch == epochs - 1): netD.save_params('%snetD-%04d' % (dir_out_checkpoints, epoch)) netG.save_params('%snetG-%04d' % (dir_out_checkpoints, epoch)) logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) sw.export_scalars('scalar_dict.json') sw.close()
def myTrain(net, batch_size, train_data, valid_data, epoches, lr, wd, ctx, lr_period, lr_decay, verbose=False): trainer = mx.gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': 0.9, 'wd': wd }) prev_time = datetime.datetime.now() train_loss_record = [] valid_loss_record = [] # epoches recycle record loss train_acc_record = [] valid_acc_record = [] focalloss = netlib.FocalLoss() for e in range(epoches): train_loss = 0.0 train_acc = 0.0 # if e > 99 and e < 251 and e % 10 == 0: # trainer.set_learning_rate(trainer.learning_rate * lr_decay) # decrease lr # if e == 60 or e == 120 or e == 160: # trainer.set_learning_rate(trainer.learning_rate * lr_decay) # decrease lr if e > 150 and e % 20 == 0: trainer.set_learning_rate(trainer.learning_rate * lr_decay) # decrease # print('train len:',len(train_data)) for data, label in train_data: label = label.reshape( shape=(label.shape[0], )) # be careful:it turns to vector label = label.astype('float32').as_in_context(ctx) with autograd.record(): output = net(data.as_in_context(ctx)) loss = softmax_cross_entrory(output, label) # # loss = focalloss(output, label)# focal loss loss.backward() trainer.step(batch_size) train_loss += nd.mean(loss).asscalar() train_acc += utils1.accuracy(output, label) train_loss_record.append(train_loss / len(train_data)) train_acc_record.append(train_acc / len(train_data)) cur_time = datetime.datetime.now() h, remainder = divmod((cur_time - prev_time).seconds, 3600) m, s = divmod(remainder, 60) time_str = 'Time %02d:%02d:%02d' % (h, m, s) if valid_data is not None: valid_acc = evaluate_accuracy(valid_data, net, ctx) valid_acc_record.append(valid_acc) if verbose: ###valid data loss valid_loss = 0 for data, valid_label in valid_data: valid_label = valid_label.reshape( shape=(valid_label.shape[0], )) # be careful:it turns to vector valid_label = valid_label.astype('float32').as_in_context( ctx) # with autograd.predict_mode(): out = net(data.as_in_context(ctx)) loss = softmax_cross_entrory(out, valid_label) # loss = focalloss(out, valid_label) # focal loss valid_loss += nd.mean(loss).asscalar() # valid_loss = nd.mean(loss).asscalar( # only used valid loss of every batch(vaild_data) valid_loss_record.append( valid_loss / len(valid_data)) # record every batch loss of valid data epoch_str = ( "Epoch %d. Train Loss: %f,Valid Loss: %f, Train acc %f, Valid acc %f, " % (e, train_loss / len(train_data), valid_loss / len(valid_data), train_acc / len(train_data), valid_acc)) else: epoch_str = ( "Epoch %d. Train Loss: %f, Train acc %f, Valid acc %f, " % (e, train_loss / len(train_data), train_acc / len(train_data), valid_acc)) else: epoch_str = ( "Epoch %d. Loss: %f, Train acc %f, " % (e, train_loss / len(train_data), train_acc / len(train_data))) prev_time = cur_time print(epoch_str + 'lr=' + str(trainer.learning_rate) + ',' + time_str) # plot loss and acc fig, (fig1, fig2) = plt.subplots(1, 2) if verbose: fig1.plot(train_loss_record, 'b') fig1.legend(['train']) fig2.plot(train_acc_record, 'b') fig2.legend(['train_acc']) if valid_data is not None: fig1.plot(valid_loss_record, 'r') fig1.legend(['train', 'test']) fig2.plot(valid_acc_record, 'r') fig2.legend(['train_acc', 'valid_acc']) else: fig1.plot(train_loss_record, 'b') fig1.legend(['train']) fig2.plot(train_acc_record, 'b') fig2.plot(valid_acc_record, 'r') fig2.legend(['train_acc', 'valid_acc']) fig.show() fig.savefig('./CIFAR10_result.png')
############################################# ### 训练 ####################### learning_rate = .1#学习率 epochs = 7##训练迭代 次数 for epoch in range(epochs): train_loss = 0.# 损失 train_acc = 0. #准确度 for data, label in train_data:#训练数据集 样本和标签 with autograd.record():#自动微分 output = net(data) #网络输出 loss = cross_entropy(output, label)##损失 loss.backward()#向后传播 # 将梯度做平均,这样学习率会对batch size不那么敏感 SGD(params, learning_rate/batch_size) train_loss += nd.mean(loss).asscalar()#损失 train_acc += accuracy(output, label) #准确度 test_acc = evaluate_accuracy(test_data, net)#验证数据集的准确度 print("训练次数 %d. 损失Loss: %f, 训练准确度Train acc %f, 测试准确度Test acc %f" % ( epoch, train_loss/len(train_data), train_acc/len(train_data), test_acc)) ## 查看训练结果 data, label = mnist_test[0:10]#测试数据集前10个数据 show_images(data)#图片实例 print('true labels')#真实标签 print(get_text_labels(label)) predicted_labels = net(data).argmax(axis=1)#将预测概率最高的那个类作为预测的类
def CNN(epoch = 100 , batch_size=10, save_period=10 , load_period=100 , weight_decay=0.001 ,learning_rate= 0.1 , dataset = "MNIST", ctx=mx.cpu(0)): #data selection if dataset =="MNIST": train_data , test_data = MNIST(batch_size) elif dataset == "CIFAR10": train_data, test_data = CIFAR10(batch_size) elif dataset == "FashionMNIST": train_data, test_data = FashionMNIST(batch_size) else: return "The dataset does not exist." # data structure if dataset == "MNIST" or dataset =="FashionMNIST": color = 1 elif dataset == "CIFAR10": color = 3 num_outputs = 10 if dataset == "MNIST": path = "weights/MNIST_weights-{}".format(load_period) elif dataset == "FashionMNIST": path = "weights/FashionMNIST_weights-{}".format(load_period) elif dataset == "CIFAR10": path = "weights/CIFAR10_weights-{}".format(load_period) if os.path.exists(path): print("loading weights") [W1, B1, W2, B2, W3, B3, W4, B4, W5, B5] = nd.load(path) # weights load W1=W1.as_in_context(ctx) B1=B1.as_in_context(ctx) W2=W2.as_in_context(ctx) B2=B2.as_in_context(ctx) W3=W3.as_in_context(ctx) B3=B3.as_in_context(ctx) W4=W4.as_in_context(ctx) B4=B4.as_in_context(ctx) W5=W5.as_in_context(ctx) B5=B5.as_in_context(ctx) params = [W1 , B1 , W2 , B2 , W3 , B3 , W4 , B4 , W5 , B5] else: print("initializing weights") with ctx: W1 = nd.random.normal(loc=0 , scale=0.1 , shape=(60,color,3,3)) B1 = nd.random.normal(loc=0 , scale=0.1 , shape=60) W2 = nd.random.normal(loc=0 , scale=0.1 , shape=(30,60,6,6)) B2 = nd.random.normal(loc=0 , scale=0.1 , shape=30) if dataset == "CIFAR10": reshape=750 elif dataset == "MNIST" or dataset == "FashionMNIST": reshape=480 W3 = nd.random.normal(loc=0 , scale=0.1 , shape=(120, reshape)) B3 = nd.random.normal(loc=0 , scale=0.1 , shape=120) W4 = nd.random.normal(loc=0 , scale=0.1 , shape=(64, 120)) B4 = nd.random.normal(loc=0 , scale=0.1 , shape=64) W5 = nd.random.normal(loc=0 , scale=0.1 , shape=(num_outputs , 64)) B5 = nd.random.normal(loc=0 , scale=0.1 , shape=num_outputs) params = [W1 , B1 , W2 , B2 , W3 , B3 , W4 , B4, W5 , B5] # attach gradient!!! for i, param in enumerate(params): param.attach_grad() # network - similar to lenet5 '''Convolution parameter data: (batch_size, channel, height, width) weight: (num_filter, channel, kernel[0], kernel[1]) bias: (num_filter,) out: (batch_size, num_filter, out_height, out_width). ''' def network(X,drop_rate=0.0): # formula : output_size=((input−weights+2*Padding)/Stride)+1 #data size # MNIST,FashionMNIST = (batch size , 1 , 28 , 28) # CIFAR = (batch size , 3 , 32 , 32) C_H1=nd.Activation(data= nd.Convolution(data=X , weight = W1 , bias = B1 , kernel=(3,3) , stride=(1,1) , num_filter=60) , act_type="relu") # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30) P_H1=nd.Pooling(data = C_H1 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15) C_H2=nd.Activation(data= nd.Convolution(data=P_H1 , weight = W2 , bias = B2 , kernel=(6,6) , stride=(1,1) , num_filter=30), act_type="relu") # MNIST : result = ( batch size , 30 , 8 , 8), CIFAR10 : result = ( batch size , 30 , 10 , 10) P_H2=nd.Pooling(data = C_H2 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5) P_H2 = nd.flatten(data=P_H2) '''FullyConnected parameter • data: (batch_size, input_dim) • weight: (num_hidden, input_dim) • bias: (num_hidden,) • out: (batch_size, num_hidden) ''' F_H1 =nd.Activation(nd.FullyConnected(data=P_H2 , weight=W3 , bias=B3 , num_hidden=120),act_type="sigmoid") F_H1 =nd.Dropout(data=F_H1, p=drop_rate) F_H2 =nd.Activation(nd.FullyConnected(data=F_H1 , weight=W4 , bias=B4 , num_hidden=64),act_type="sigmoid") F_H2 =nd.Dropout(data=F_H2, p=drop_rate) softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10)) return softmax_Y def cross_entropy(output, label): return - nd.sum(label * nd.log(output), axis=1) #Adam optimizer state=[] optimizer=mx.optimizer.Adam(rescale_grad=1,learning_rate=learning_rate) for i,param in enumerate(params): state.append(optimizer.create_state(0,param)) def SGD(params, lr , wd , bs): for param in params: param -= ((lr * param.grad)/bs+wd*param) for i in tqdm(range(1,epoch+1,1)): for data,label in train_data: data = data.as_in_context(ctx) label = label.as_in_context(ctx) label = nd.one_hot(label , num_outputs) with autograd.record(): output = network(data,drop_rate=0.2) #loss definition loss = cross_entropy(output,label) # (batch_size,) cost = nd.mean(loss).asscalar() loss.backward() for j,param in enumerate(params): optimizer.update(0,param,param.grad,state[j]) #SGD(params, learning_rate , weight_decay , batch_size) print(" epoch : {} , last batch cost : {}".format(i,cost)) #weight_save if i % save_period==0: if not os.path.exists("weights"): os.makedirs("weights") print("saving weights") if dataset=="MNIST": nd.save("weights/MNIST_weights-{}".format(i),params) elif dataset=="CIFAR10": nd.save("weights/CIFAR10_weights-{}".format(i),params) elif dataset=="FashionMNIST": nd.save("weights/FashionMNIST_weights-{}".format(i),params) test_accuracy = evaluate_accuracy(test_data , network , ctx) print("Test_acc : {}".format(test_accuracy)) return "optimization completed"
def forward(self, x, *args): return (x - x.mean()) / nd.sqrt(nd.mean(nd.power((x - x.mean()), 2)))
def train(args): frames = args.frames caption_length = args.caption_length glove_file = args.glove_file #CPU_COUNT = multiprocessing.cpu_count() if args.cuda: ctx = mx.gpu() else: ctx = mx.cpu() if args.load_pretrain: pretrain_model = vision.vgg16_bn(pretrained=True,ctx=ctx) transform = utils.Compose([utils.ToTensor(ctx), utils.normalize(ctx), utils.extractFeature(ctx,pretrain_model) ]) else: pretrain_model = None transform = utils.Compose([utils.ToTensor(ctx), utils.normalize(ctx), ]) target_transform = utils.targetCompose([utils.WordToTensor(ctx)]) train_dataset = videoFolder(args.train_folder,args.train_dict, frames, glove_file, caption_length, ctx, transform=transform, target_transform=target_transform) test_dataset = videoFolder(args.test_folder,args.test_dict, frames, glove_file, caption_length, ctx, transform=transform, target_transform=target_transform) train_loader = gluon.data.DataLoader(train_dataset,batch_size=args.batch_size, last_batch='discard',shuffle=True) test_loader = gluon.data.DataLoader(test_dataset,batch_size=args.batch_size, last_batch='discard',shuffle=False) #loss = L2Loss_cos() loss = L2Loss_2() net = lstm_net(frames,caption_length,ctx,pretrained=args.load_pretrain) #net = resnet18_v2(caption_length=caption_length,ctx=ctx) net.collect_params().initialize(init=mx.initializer.MSRAPrelu(), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': args.lr}) smoothing_constant = 0.01 for e in range(args.epochs): epoch_loss = 0 for batch_id, (x,_) in enumerate(train_loader): with autograd.record(): pred = net(x) batch_loss = loss(pred,_) trainer.step(x.shape[0],ignore_stale_grad=True) batch_loss.backward() mx.nd.waitall() batch_loss = F.mean(batch_loss).asscalar() if batch_id % 100 == 0: print("Train Batch:{}, batch_loss:{}".format(batch_id+1, batch_loss)) epoch_loss = (batch_loss if ((batch_id == 0) and (e == 0)) else (1 - smoothing_constant)*epoch_loss + smoothing_constant*batch_loss) epoch_loss_1 = 0 for batch_id, (x,_) in enumerate(test_loader): with autograd.predict_mode(): predict = net(x) batch_loss_1 = loss(pred,_) batch_loss_1 = F.mean(batch_loss_1).asscalar() if batch_id % 100 == 0: print("Test Batch:{}, batch_loss:{}".format(batch_id+1, batch_loss_1)) epoch_loss_1 = (batch_loss_1 if ((batch_id == 0) and (e == 0)) else (1 - smoothing_constant)*epoch_loss_1 + smoothing_constant*batch_loss_1) print("Epoch {}, train_loss:{}, test_loss:{}".format(e+1, epoch_loss, epoch_loss_1)) if args.save_model == True: file_name = "./saved_model/" + "lstm_pretrain.params" net.save_parameters(file_name)
d_optimizer.step(num_img) # ===============train generator # compute loss of fake_img with g.autograd.record(): fake_img = ge(z) output = d(fake_img) g_loss = bce(output, real_label) # bp and optimize g_loss.backward() g_optimizer.step(num_img) if (i + 1) % 100 == 0: print('Epoch [{}/{}], d_loss: {:.6f}, g_loss: {:.6f} ' 'D real: {:.6f}, D fake: {:.6f}'.format( epoch, num_epoch, nd.mean(d_loss).asscalar(), nd.mean(g_loss).asscalar(), nd.mean(real_scores).asscalar(), nd.mean(fake_scores).asscalar())) if epoch == 0: real_images = to_img(torch.FloatTensor(real_img.asnumpy())) save_image(real_images, './img/real_images.png') fake_images = to_img(torch.FloatTensor(fake_img.asnumpy())) save_image(fake_images, './img/fake_images-{}.png'.format(epoch + 1)) d.save_params('./dis.params') ge.save_params('./gen.params')
def accurancy(output, label): return nd.mean(output.argmax(axis=1) == label).asscalar() # output: batch_size * nums_classes
def feature_scaling(x: NDArray, mean: float, std: float): shifted = x - nd.mean(x) deviation = nd.sqrt(nd.mean(sqr(shifted))) # deviation = nd.mean(self.sqr(shifted)) print(deviation) return mean + std * shifted / deviation
l2_loss = gluon.loss.L2Loss() trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.005, 'wd': weight_decay }) # 4.train epoches = 10 train_loss_record = [] test_loss_record = [] for e in range(epoches): train_loss = 0 test_loss = 0 _dataIter = 0 for data, label in dataIter(x_train, y_train, batch_size): _dataIter += 1 with autograd.record(): out = net(data) loss = l2_loss(out, label) loss.backward() trainer.step(batch_size) train_loss += nd.mean(loss).asscalar() train_loss_record.append(train_loss / _dataIter) test_loss = nd.mean(l2_loss(net(x_test), y_test)).asscalar() test_loss_record.append(test_loss) print('epoches: %d, train_loss: %f, test_loss: %f' % (e, train_loss / _dataIter, test_loss)) plt.plot(train_loss_record, 'b') plt.plot(test_loss_record, 'r') plt.legend(['train', 'test']) plt.show()
def train(): image_pool = ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) for epoch in range(epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) fake_out = netG(real_in) fake_concat = image_pool.query(nd.concat(real_in, fake_out, dim=1)) with autograd.record(): # Train with fake image # Use image pooling to utilize history images output = netD(fake_concat) fake_label = nd.zeros(output.shape, ctx=ctx) errD_fake = GAN_loss(output, fake_label) metric.update([ fake_label, ], [ output, ]) # Train with real image real_concat = nd.concat(real_in, real_out, dim=1) output = netD(real_concat) real_label = nd.ones(output.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD = (errD_real + errD_fake) * 0.5 errD.backward() metric.update([ real_label, ], [ output, ]) trainerD.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): fake_out = netG(real_in) fake_concat = nd.concat(real_in, fake_out, dim=1) output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) errG = GAN_loss( output, real_label) + L1_loss(real_out, fake_out) * lambda1 errG.backward() trainerG.step(batch.data[0].shape[0]) # Print log infomation every ten batches if iter % 10 == 0: name, acc = metric.get() logging.info('speed: {} samples/s'.format( batch_size / (time.time() - btic))) logging.info( 'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc, iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() metric.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) # Visualize one generated image for each epoch fake_img = fake_out[0] visualize(fake_img)
def accuracy(output, labels): return nd.mean(nd.argmax(output, axis=1) == labels).asscalar()
def train(channel_input_dirs, hyperparameters, hosts, **kwargs): # retrieve the hyperparameters we set in notebook (with some defaults) batch_size = hyperparameters.get('batch_size', 128) epochs = hyperparameters.get('epochs', 100) learning_rate = hyperparameters.get('learning_rate', 0.1) beta1 = hyperparameters.get('beta1', 0.9) beta2 = hyperparameters.get('beta2', 0.99) num_gpus = hyperparameters.get('num_gpus', 0) burn_in = hyperparameters.get('burn_in', 5) # set logging logging.getLogger().setLevel(logging.DEBUG) if len(hosts) == 1: kvstore = 'device' if num_gpus > 0 else 'local' else: kvstore = 'dist_device_sync' if num_gpus > 0 else 'dist_sync' ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] print (ctx) f_path = channel_input_dirs['training'] train_X, train_Y, validation_X, validation_Y = get_data(f_path) print ('loaded data') train_iter = mx.io.NDArrayIter(data = train_X, label=train_Y, batch_size=batch_size, shuffle=True) validation_iter = mx.io.NDArrayIter(data = validation_X, label=validation_Y, batch_size=batch_size, shuffle=False) data_shape = (batch_size,) + train_X.shape[1:] label_shape = (batch_size,) + train_Y.shape[1:] print ('created iters') sym = build_unet() net = mx.mod.Module(sym, context=ctx, data_names=('data',), label_names=('label',)) net.bind(data_shapes=[['data', data_shape]], label_shapes=[['label', label_shape]]) net.init_params(mx.initializer.Xavier(magnitude=6)) net.init_optimizer(optimizer = 'adam', optimizer_params=( ('learning_rate', learning_rate), ('beta1', beta1), ('beta2', beta2) )) print ('start training') smoothing_constant = .01 curr_losses = [] moving_losses = [] i = 0 best_val_loss = np.inf for e in range(epochs): while True: try: batch = next(train_iter) except StopIteration: train_iter.reset() break net.forward_backward(batch) loss = net.get_outputs()[0] net.update() curr_loss = F.mean(loss).asscalar() curr_losses.append(curr_loss) moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss) moving_losses.append(moving_loss) i += 1 val_losses = [] for batch in validation_iter: net.forward(batch) loss = net.get_outputs()[0] val_losses.append(F.mean(loss).asscalar()) validation_iter.reset() # early stopping val_loss = np.mean(val_losses) if e > burn_in and val_loss < best_val_loss: best_val_loss = val_loss net.save_checkpoint('best_net', 0) print("Best model at Epoch %i" %(e+1)) print("Epoch %i: Moving Training Loss %0.5f, Validation Loss %0.5f" % (e+1, moving_loss, val_loss)) inference_sym = build_unet(inference=True) net = mx.mod.Module(inference_sym, context=ctx, data_names=('data',)) net.bind(data_shapes=[['data', data_shape]]) net.load_params('best_net-0000.params') return net
def main(opt): ctx = mx.gpu() if opt.use_gpu else mx.cpu() testclasspaths = [] testclasslabels = [] if opt.istest: filename = '_testlist.txt' else: filename = '_validationlist.txt' with open(opt.dataset + "_" + opt.expname + filename, 'r') as f: for line in f: testclasspaths.append(line.split(' ')[0]) if int(line.split(' ')[1]) == -1: testclasslabels.append(0) else: testclasslabels.append(1) test_data = load_image.load_test_images(testclasspaths, testclasslabels, opt.batch_size, opt.img_wd, opt.img_ht, ctx, opt.noisevar) netEn, netDe, netD, netD2 = set_network(opt.depth, ctx, opt.ngf) netEn.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) + '_En.params', ctx=ctx) netDe.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) + '_De.params', ctx=ctx) netD.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) + '_D.params', ctx=ctx) netD2.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) + '_D2.params', ctx=ctx) lbllist = [] scorelist1 = [] scorelist2 = [] scorelist3 = [] scorelist4 = [] test_data.reset() count = 0 for batch in (test_data): count += 1 real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) lbls = batch.label[0].as_in_context(ctx) out = netDe(netEn(real_out)) output4 = nd.mean((netD2(out)), (1, 3, 2)).asnumpy() out = netDe(netEn(real_in)) #real_concat = nd.concat(out, out, dim=1) output = netD2(out) #Denoised image output3 = nd.mean(out - real_out, (1, 3, 2)).asnumpy() #denoised-real output = nd.mean(output, (1, 3, 2)).asnumpy() print(output) print(lbls) output2 = netD2(real_out) #Image with no noise output2 = nd.mean(output2, (1, 3, 2)).asnumpy() lbllist = lbllist + list(lbls.asnumpy()) scorelist1 = scorelist1 + list(output) scorelist2 = scorelist2 + list(output2) scorelist3 = scorelist3 + list(output3) scorelist4 = scorelist4 + list(output4) fpr, tpr, _ = roc_curve(lbllist, scorelist1, 1) roc_auc1 = auc(fpr, tpr) fpr, tpr, _ = roc_curve(lbllist, scorelist2, 1) roc_auc2 = auc(fpr, tpr) fpr, tpr, _ = roc_curve(lbllist, scorelist3, 1) roc_auc3 = auc(fpr, tpr) fpr, tpr, _ = roc_curve(lbllist, scorelist4, 1) roc_auc4 = auc(fpr, tpr) return ([roc_auc1, roc_auc2, roc_auc3, roc_auc4])
def Cal_Acc(output, label): return nd.mean(nd.argmax(output, axis=1) == label).asscalar()
def forward(self, output1, output2, label): euclidean_distance = nd.sqrt(nd.sum(nd.power(nd.subtract(output1, output2),2))) loss_contrastive = nd.mean(nd.add(nd.subtract(1,label) * nd.power(euclidean_distance, 2),(label) * nd.power(nd.subtract(self.margin, euclidean_distance), 2))) return loss_contrastive
def mean(input, dim): return nd.mean(input, axis=dim)
def hybrid_forward(self, F, pred, label, sample_weight=None): label = _reshape_like(F, label, pred) loss = F.sqrt(F.square(label - pred)) loss = _apply_weighting(F, loss, self._weight / 2, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def Autoencoder(epoch=100, batch_size=128, save_period=10, load_period=100, optimizer="sgd", learning_rate=0.01, dataset="MNIST", ctx=mx.gpu(0)): #data selection if dataset == "MNIST": train_data, test_data = MNIST(batch_size) path = "weights/MNIST-{}.params".format(load_period) elif dataset == "FashionMNIST": train_data, test_data = FashionMNIST(batch_size) path = "weights/FashionMNIST-{}.params".format(load_period) else: return "The dataset does not exist." '''Follow these steps: •Define network •Initialize parameters •Loop over inputs •Forward input through network to get output •Compute loss with output and label •Backprop gradient •Update parameters with gradient descent. ''' #Autoencoder net = gluon.nn.Sequential() # stacks 'Block's sequentially with net.name_scope(): net.add(gluon.nn.Dense(units=200, activation="sigmoid", use_bias=True)) net.add(gluon.nn.Dropout(0.2)) net.add(gluon.nn.Dense(units=100, activation="sigmoid", use_bias=True)) net.add(gluon.nn.Dropout(0.2)) net.add(gluon.nn.Dense(units=100, activation="sigmoid", use_bias=True)) net.add(gluon.nn.Dropout(0.2)) net.add(gluon.nn.Dense(units=200, activation="sigmoid", use_bias=True)) net.add(gluon.nn.Dropout(0.2)) net.add(gluon.nn.Dense(units=784, activation="sigmoid", use_bias=True)) #weights initialization if os.path.exists(path): print("loading weights") net.load_params(filename=path, ctx=ctx) # weights load else: print("initializing weights") net.collect_params().initialize(mx.init.Normal(sigma=0.1), ctx=ctx) # weights initialization #net.initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization #optimizer trainer = gluon.Trainer(net.collect_params(), optimizer, {"learning_rate": learning_rate}) #learning for i in tqdm(range(1, epoch + 1, 1)): for data, label in train_data: data = data.as_in_context(ctx).reshape((batch_size, -1)) data_ = data with autograd.record(train_mode=True): output = net(data) #loss definition loss = gluon.loss.L2Loss()(output, data_) cost = nd.mean(loss).asscalar() loss.backward() trainer.step(batch_size, ignore_stale_grad=True) print(" epoch : {} , last batch cost : {}".format(i, cost)) #weight_save if i % save_period == 0: if not os.path.exists("weights"): os.makedirs("weights") print("saving weights") if dataset == "MNIST": net.save_params("weights/MNIST-{}.params".format(i)) elif dataset == "FashionMNIST": net.save_params("weights/FashionMNIST-{}.params".format(i)) #show image generate_image(test_data, net, ctx, dataset) return "optimization completed"
def mainEvaluation(opt): ctx = mx.gpu() if opt.use_gpu else mx.cpu() testclasspaths = [] testclasslabels = [] print('loading test files') filename = '_testlist.txt' with open(opt.dataset+"_"+opt.expname+filename , 'r') as f: for line in f: testclasspaths.append(line.split(' ')[0]) if int(line.split(' ')[1]) == -1: testclasslabels.append(0) else: testclasslabels.append(1) neworder = range(len(testclasslabels)) c = list(zip(testclasslabels, testclasspaths)) print('shuffling') random.shuffle(c) testclasslabels, testclasspaths = zip(*c) print('loading pictures') test_data = load_image.load_test_images(testclasspaths,testclasslabels,opt.batch_size, opt.img_wd, opt.img_ht, ctx, opt.noisevar,opt.bw) print('picture loading done') opt.istest = True networks = models_cifar.set_network(opt, ctx, True) netEn = networks[0] netDe = networks[1] netD = networks[2] netD2 = networks[3] # load_epoch = opt.epochs - 1 # netEn.load_params('checkpoints/'+opt.expname+'_'+str(load_epoch)+'_En.params', ctx=ctx) # netDe.load_params('checkpoints/'+opt.expname+'_'+str(load_epoch)+'_De.params', ctx=ctx) # if opt.ntype>1: # netD.load_params('checkpoints/'+opt.expname+'_'+str(load_epoch)+'_D.params', ctx=ctx) # if opt.ntype>2: # netD2.load_params('checkpoints/'+opt.expname+'_'+str(load_epoch)+'_D2.params', ctx=ctx) print('Model loading done') lbllist = []; scorelist1 = []; scorelist2 = []; scorelist3 = []; scorelist4 = []; test_data.reset() count = 0 for batch in (test_data): count = count+1 output1=np.zeros(opt.batch_size) output2=np.zeros(opt.batch_size) output3=np.zeros(opt.batch_size) output4=np.zeros(opt.batch_size) real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) lbls = batch.label[0].as_in_context(ctx) outnn = (netDe(netEn((real_in)))) out = outnn output3 = -1*nd.mean((outnn - real_out)**2, (1, 3, 2)).asnumpy() if opt.ntype >1: #AE out_concat = nd.concat(real_in, outnn, dim=1) if opt.append else outnn output1 = nd.mean((netD(out_concat)), (1, 3, 2)).asnumpy() out_concat = nd.concat(real_in, real_in, dim=1) if opt.append else real_in output2 = netD((out_concat)) # Image with no noise output2 = nd.mean(output2, (1,3,2)).asnumpy() out = netDe(netEn(real_out)) out_concat = nd.concat(real_in, out, dim=1) if opt.append else out output = netD(out_concat) #Denoised image output4 = nd.mean(output, (1, 3, 2)).asnumpy() lbllist = lbllist+list(lbls.asnumpy()) scorelist1 = scorelist1+list(output1) scorelist2 = scorelist2+list(output2) scorelist3 = scorelist3+list(output3) scorelist4 = scorelist4+list(output4) out = netDe(netEn(real_in)) # Save some sample results fake_img1 = nd.concat(real_in[0],real_out[0], out[0], outnn[0],dim=1) fake_img2 = nd.concat(real_in[1],real_out[1], out[1],outnn[1], dim=1) fake_img3 = nd.concat(real_in[2],real_out[2], out[2], outnn[2], dim=1) fake_img4 = nd.concat(real_in[3],real_out[3],out[3],outnn[3], dim=1) fake_img = nd.concat(fake_img1,fake_img2, fake_img3,fake_img4, dim=2) visual.visualize(fake_img) plt.savefig('outputs/T_'+opt.expname+'_'+str(count)+'.png') print("Positives" + str(np.sum(lbllist))) print("Negatives" + str(np.shape(lbllist)-np.sum(lbllist) )) fpr, tpr, _ = roc_curve(lbllist, scorelist3, 1) roc_auc1 = 0 roc_auc2 = 0 roc_auc4 = 0 roc_auc3 = auc(fpr, tpr) if int(opt.ntype) >1: #AE fpr, tpr, _ = roc_curve(lbllist, scorelist1, 1) roc_auc1 = auc(fpr, tpr) fpr, tpr, _ = roc_curve(lbllist, scorelist2, 1) roc_auc2 = auc(fpr, tpr) fpr, tpr, _ = roc_curve(lbllist, scorelist4, 1) roc_auc4 = auc(fpr, tpr) return[roc_auc1, roc_auc2, roc_auc3, roc_auc4]
def accuracy(output, label): return nd.mean(output.argmax(axis=1)==label).asscalar()
def r_square(pred, label): # https://en.wikipedia.org/wiki/Coefficient_of_determination return ndarray.sum(ndarray.square(pred - ndarray.mean(label))) /\ ndarray.sum(ndarray.square(label - ndarray.mean(label)))
transform=lambda data, label: (data.astype(np.float32) / 255, label)), batch_size=32, shuffle=False) net = gluon.nn.Sequential() with net.name_scope(): net.add(gluon.nn.Dense(128, activation='relu')) net.add(gluon.nn.Dense(64, activation='relu')) net.add(gluon.nn.Dense(10)) net.collect_params().initialize(mx.init.Normal(sigma=0.05)) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1}) epochs = 10 for e in xrange(epochs): for i, (data, label) in enumerate(train_data): data = data.as_in_context(mx.cpu()).reshape((-1, 784)) label = label.as_in_context(mx.cpu()) with autograd.record(): output = net(data) loss = softmax_cross_entropy(output, label) loss.backward() trainer.step(data.shape[0]) cur_loss = ndarray.mean(loss).asscalar() print "Epoch {}. Current Loss: {}.".format(e, cur_loss)
def CapsuleNet(Reconstruction=True, epoch=100, batch_size=256, save_period=100, load_period=100, optimizer="adam", learning_rate=0.001, dataset="MNIST", ctx=mx.gpu(0)): if dataset == "MNIST": ''' In the paper,'Training is performed on 28? 28 MNIST images have been shifted by up to 2 pixels in each direction with zero padding', But In this implementation, the original data is not transformed as above. ''' train_data, test_data = MNIST(batch_size) path = "weights/MNIST-{}.params".format(load_period) elif dataset == "FashionMNIST": train_data, test_data = FashionMNIST(batch_size) path = "weights/FashionMNIST-{}.params".format(load_period) else: return "The dataset does not exist." #Convolution Neural Network # formula : output_size=((input−weights+2*Padding)/Stride)+1 # data size # MNIST, FashionMNIST = (batch size , 1 , 28 , 28) # Routing_Iteration = 1 due to memory problem. It uses close to 5GB of memory. net = Network(batch_size=batch_size, Routing_Iteration=1) ''' What you need for 'hybridize' mode. 'DigitCaps' calculation process 'batch_size' should be specified. Therefore, 'batch_size' of 'test' data and 'batch_size' of 'training' data should be the same. ''' net.hybridize() # for faster learning and efficient memory use #weights initialization if os.path.exists(path): print("loading weights") net.load_params(filename=path, ctx=ctx) # weights load else: print("initializing weights") net.collect_params().initialize(mx.init.Normal(sigma=0.01), ctx=ctx) # weights initialization #net.initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization ''' In the paper,'including the exponentially decaying learning rate', But In this implementation, Multiply the learning_rate by 0.99 for every 10 steps. ''' lr_scheduler = mx.lr_scheduler.FactorScheduler(step=10, factor=0.99) trainer = gluon.Trainer(net.collect_params(), optimizer, { "learning_rate": learning_rate, "lr_scheduler": lr_scheduler }) #learning for i in tqdm(range(1, epoch + 1, 1)): for data, label in train_data: data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(train_mode=True): output, reconstruction_output = net(data, label) if Reconstruction: margin_loss = Margin_Loss()(output, label) recon_loss = gluon.loss.L2Loss()(reconstruction_output, data.reshape( (batch_size, -1))) loss = margin_loss + 0.0005 * recon_loss else: loss = Margin_Loss()(output, label) cost = nd.mean(loss).asscalar() loss.backward() trainer.step(batch_size, ignore_stale_grad=True) print(" epoch : {} , last batch cost : {}".format(i, cost)) test_accuracy = evaluate_accuracy(test_data, net, ctx) print("Test_acc : {0:0.3f}%".format(test_accuracy * 100)) #weight_save if i % save_period == 0: if not os.path.exists("weights"): os.makedirs("weights") print("saving weights") if dataset == "MNIST": net.save_params("weights/MNIST-{}.params".format(i)) elif dataset == "FashionMNIST": net.save_params("weights/FashionMNIST-{}.params".format(i)) test_accuracy = evaluate_accuracy(test_data, net, ctx) print("Test_acc : {0:0.3f}%".format(test_accuracy * 100)) if Reconstruction: generate_image(test_data, net, ctx, dataset) return "optimization completed"
def Train(train, test, Debug, batch_size, lr, smoothing_constant, num_fc1, num_fc2, num_outputs, epochs, SNR, sl, pool_type, pool_size, pool_stride, params_init=None, period=None): num_examples = train.shape[0] # 训练集数据类型转换 y = nd.array(~train.sigma.isnull() + 0) X = nd.array( Normolise( train.drop([ 'mass', 'positions', 'gaps', 'max_peak', 'sigma', 'SNR_mf', 'SNR_mf0' ], axis=1))) print('Label for training:', y.shape) print('Dataset for training:', X.shape, end='\n\n') dataset_train = gluon.data.ArrayDataset(X, y) train_data = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True, last_batch='discard') y = nd.array(~test.sigma.isnull() + 0) X = nd.array( Normolise( test.drop([ 'mass', 'positions', 'gaps', 'max_peak', 'sigma', 'SNR_mf', 'SNR_mf0' ], axis=1))) print('Label for testing:', y.shape) print('Dataset for testing:', X.shape, end='\n\n') # 这里使用data模块来读取数据。创建测试数据。 (不shuffle) dataset_test = gluon.data.ArrayDataset(X, y) test_data = gluon.data.DataLoader(dataset_test, batch_size, shuffle=True, last_batch='discard') # Train loss_history = [] loss_v_history = [] moving_loss_history = [] test_accuracy_history = [] train_accuracy_history = [] # assert period >= batch_size and period % batch_size == 0 # Initializate parameters if params_init: print('Loading params...') params = params_init # [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6, W7, b7] = params # # random fc layers # weight_scale = .01 # W5 = nd.random_normal(loc=0, scale=weight_scale, shape=(sl, num_fc1), ctx=ctx ) # W6 = nd.random_normal(loc=0, scale=weight_scale, shape=(num_fc1, num_fc2), ctx=ctx ) # W7 = nd.random_normal(loc=0, scale=weight_scale, shape=(num_fc2, num_outputs), ctx=ctx ) # b5 = nd.random_normal(shape=num_fc1, scale=weight_scale, ctx=ctx) # b6 = nd.random_normal(shape=num_fc2, scale=weight_scale, ctx=ctx) # b7 = nd.random_normal(shape=num_outputs, scale=weight_scale, ctx=ctx) # params = [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5] # print('Random the FC1&2-layers...') vs = [] sqrs = [] for param in params: param.attach_grad() vs.append(param.zeros_like()) sqrs.append(param.zeros_like()) else: params, vs, sqrs = init_params(num_fc1=128, num_fc2=64, num_outputs=2, sl=sl) print('Initiate weights from random...') # Debug if Debug: print('Debuging...') if params_init: params = params_init else: params, vs, sqrs = init_params(num_fc1=128, num_fc2=64, num_outputs=2, sl=sl) for data, _ in train_data: data = data.as_in_context(ctx).reshape((batch_size, 1, 1, -1)) break _, _ = net_PLB(data, params, debug=Debug, pool_type=pool_type, pool_size=pool_size, pool_stride=pool_stride) print() # total_loss = [Total_loss(train_data_10, params, batch_size, num_outputs)] t = 0 # Epoch starts from 1. print('pool_type: ', pool_type) print('pool_size: ', pool_size) print('pool_stride: ', pool_stride) print('sl: ', sl) best_test_acc = 0 best_params_epoch = 0 for epoch in range(1, epochs + 1): Epoch_loss = [] # 学习率自我衰减。 if epoch > 2: # lr *= 0.1 lr /= (1 + 0.01 * epoch) for batch_i, ((data, label), (data_v, label_v)) in enumerate(zip(train_data, test_data)): data = data.as_in_context(ctx).reshape((batch_size, 1, 1, -1)) label = label.as_in_context(ctx) label_one_hot = nd.one_hot(label, num_outputs) with autograd.record(): output, _ = net_PLB(data, params, pool_type=pool_type, pool_size=pool_size, pool_stride=pool_stride) loss = softmax_cross_entropy(output, label_one_hot) loss.backward() # print(output) # sgd(params, lr, batch_size) # Increment t before invoking adam. t += 1 adam(params, vs, sqrs, lr, batch_size, t) data_v = data_v.as_in_context(ctx).reshape((batch_size, 1, 1, -1)) label_v = label_v.as_in_context(ctx) label_v_one_hot = nd.one_hot(label_v, num_outputs) output_v, _ = net_PLB(data_v, params, pool_type=pool_type, pool_size=pool_size, pool_stride=pool_stride) loss_v = softmax_cross_entropy(output_v, label_v_one_hot) # ######################### # Keep a moving average of the losses # ######################### curr_loss = nd.mean(loss).asscalar() curr_loss_v = nd.mean(loss_v).asscalar() moving_loss = (curr_loss if ((batch_i == 0) and (epoch - 1 == 0)) else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss) loss_history.append(curr_loss) loss_v_history.append(curr_loss_v) moving_loss_history.append(moving_loss) Epoch_loss.append(curr_loss) # if batch_i * batch_size % period == 0: # print('Curr_loss: ', curr_loss) # print('Working on epoch %d. Curr_loss: %.5f (complete percent: %.2f/100' %(epoch, curr_loss*1.0, 1.0 * batch_i / (num_examples//batch_size) * 100) +')' , end='') # sys.stdout.write("\r") # print('{"metric": "Training Loss for ALL", "value": %.5f}' %(curr_loss*1.0) ) # print('{"metric": "Testing Loss for ALL", "value": %.5f}' %(curr_loss_v*1.0) ) print('{"metric": "Training Loss for SNR=%s", "value": %.5f}' % (str(SNR), curr_loss * 1.0)) print('{"metric": "Testing Loss for SNR=%s", "value": %.5f}' % (str(SNR), curr_loss_v * 1.0)) test_accuracy = evaluate_accuracy(test_data, num_examples, batch_size, params, net_PLB, pool_type=pool_type, pool_size=pool_size, pool_stride=pool_stride) train_accuracy = evaluate_accuracy(train_data, num_examples, batch_size, params, net_PLB, pool_type=pool_type, pool_size=pool_size, pool_stride=pool_stride) test_accuracy_history.append(test_accuracy) train_accuracy_history.append(train_accuracy) if test_accuracy >= best_test_acc: best_test_acc = test_accuracy best_params_epoch = epoch # print("Epoch %d, Moving_loss: %.6f, Epoch_loss(mean): %.6f, Train_acc %.4f, Test_acc %.4f" % # (epoch, moving_loss, np.mean(Epoch_loss), train_accuracy, test_accuracy)) print('{"metric": "Train_acc. for SNR=%s in epoches", "value": %.4f}' % (str(SNR), train_accuracy)) print('{"metric": "Test_acc. for SNR=%s in epoches", "value": %.4f}' % (str(SNR), test_accuracy)) yield (params, loss_history, loss_v_history, moving_loss_history, test_accuracy_history, train_accuracy_history, best_params_epoch)
def cnn(): # Format options for numpy np.set_printoptions(precision=3, suppress=True) # Generate the training set, with nxn images n = 36 trset_size = 30000 print('Generating training set...') # Use NkuMyaDevMaker to generate images, then format X, Y = nmd.makeDataSet(n, trset_size, training=True) # For convolutional neural nets, we want 2d single plane images Xtrain = np.array(X).reshape([-1, n, n, 1]) # Make it a single output, not 2 output with 1-hot Ytrain = np.array([[y] for y in Y], dtype=np.float32) # Use generated images for Dataset, use Dataset to create DataLoader for training # Gluon does mini-batching by defining a parameter in DataLoader ds = MyaDevDataset(Xtrain, Ytrain) train_data = mx.gluon.data.DataLoader(ds, batch_size=100, shuffle=True) # Generate the test set, with nxn images teset_size = 1000 print('Generating test set...') # Use NkuMyaDevMaker to generate images, then format X, Y = nmd.makeDataSet(n, teset_size, training=False) # For convolutional neural nets, we want 2d single plane images Xtest = np.array(X).reshape([-1, n, n, 1]) # Make it a single output, not 2 output with 1-hot Ytest = np.array([[y] for y in Y], dtype=np.float32) # Use generated images for Dataset, use Dataset to create DataLoader for testing ds = MyaDevDataset(Xtest, Ytest) test_data = mx.gluon.data.DataLoader(ds, batch_size=1, shuffle=False) # Initialize the network net = gluon.nn.Sequential() # Identify some key hyperparameters here for reference. k = 5 # Kernels will be k x k nc = n - ( k - 1 ) # Result of convolving nxn image (stride 1, valid) will be nc x nc ps = 2 # ps x ps pooling (stride ps) assert nc % ps == 0 # Pools should evenly divide images being pooled nf = 7 # Will use nf kernel filters nh = 11 # Will have nh neurons in the hidden layer # Define our network with net.name_scope(): net.add( gluon.nn.Conv2D(channels=nf, kernel_size=k, use_bias=True, activation='relu')) net.add(gluon.nn.MaxPool2D(pool_size=ps, strides=ps)) net.add(gluon.nn.Flatten()) net.add(gluon.nn.Dense(nh, activation="relu", use_bias=True)) net.add(gluon.nn.Dense(1, activation="sigmoid", use_bias=True)) # Output layer # Initialize parameters using normal distribution net.collect_params().initialize(mx.init.Normal(sigma=0.05)) # Use Mean Squared Error for our loss function mean_squared_error = gluon.loss.L2Loss() # Declare our training algorithm. trainer = gluon.Trainer(net.collect_params(), 'rmsprop', {'learning_rate': .01}) # Begin training print('Training...') max_epochs = 4 for e in range(max_epochs): correct = 0 # Count of correct results across epoch, for calculating accuracy # Get a tuple containing the images/labels for an entire batch for i, (data, label) in enumerate(train_data): # Specify that we are running this on our cpu. gpu is another option data = data.as_in_context(mx.cpu()).swapaxes(3, 1) label = label.as_in_context(mx.cpu()) with autograd.record(): # Start recording the derivatives output = net(data) # The forward iteration loss = mean_squared_error(output, label) correct += accuracy( output, label ) # Just to print for our benefit, doesn't affect learning loss.backward() # Backprop trainer.step(data.shape[0]) curr_loss = ndarray.mean(loss).asscalar() # Also to print acc = correct / trset_size print("Epoch {}. Current Accuracy: {}. Current Loss: {}.".format( e, acc, curr_loss)) # Begin testing print('Testing...') # Count of correct results across entire test count = 0 for i, (data, label) in enumerate(test_data): # Specify running on cpu data = data.as_in_context(mx.cpu()).swapaxes(3, 1) label = label.as_in_context(mx.cpu()) # Push forward through network output = net(data) # Count correct results count += accuracy(output, label) # Print out 10 example images if i < 10: img = data.swapaxes(3, 1) display_image(img[0]) print("expected: " + str(label) + "| actual: " + str(output)) acc = count / teset_size print("Test accuracy: {}".format(acc))
def muitlclass_logistic_regression(epoch=100, batch_size=128, save_period=10, load_period=100, optimizer="sgd", learning_rate=0.01, dataset="MNIST", ctx=mx.gpu(0)): #data selection if dataset == "MNIST": train_data, test_data = MNIST(batch_size) path = "weights/MNIST-{}.params".format(load_period) elif dataset == "CIFAR10": train_data, test_data = CIFAR10(batch_size) path = "weights/CIFAR10-{}.params".format(load_period) elif dataset == "FashionMNIST": train_data, test_data = FashionMNIST(batch_size) path = "weights/FashionMNIST-{}.params".format(load_period) else: return "The dataset does not exist." '''Follow these steps: •Define network •Initialize parameters •Loop over inputs •Forward input through network to get output •Compute loss with output and label •Backprop gradient •Update parameters with gradient descent. ''' #logistic regression network net = gluon.nn.Sequential() # stacks 'Block's sequentially with net.name_scope(): net.add(gluon.nn.Dense(units=10, activation=None, use_bias=True)) # linear activation # weight initialization if os.path.exists(path): print("loading weights") net.load_params(filename=path, ctx=ctx) # weights load else: print("initializing weights") net.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=ctx) # weights initialization #optimizer trainer = gluon.Trainer(net.collect_params(), optimizer, {"learning_rate": learning_rate}) for i in tqdm(range(1, epoch + 1, 1)): for data, label in train_data: if dataset == "CIFAR10": data = nd.slice_axis(data=data, axis=3, begin=0, end=1) data = data.as_in_context(ctx).reshape((batch_size, -1)) label = label.as_in_context(ctx) with autograd.record(train_mode=True): output = net(data) #loss definition loss = gluon.loss.SoftmaxCrossEntropyLoss()(output, label) cost = nd.mean(loss).asscalar() loss.backward() trainer.step(batch_size, ignore_stale_grad=True) print(" epoch : {} , last batch cost : {}".format(i, cost)) #weight_save if i % save_period == 0: if not os.path.exists("weights"): os.makedirs("weights") print("saving weights") if dataset == "MNIST": net.save_params("weights/MNIST-{}.params".format(i)) if dataset == "FashionMNIST": net.save_params("weights/FashionMNIST-{}.params".format(i)) elif dataset == "CIFAR10": net.save_params("weights/CIFAR10-{}.params".format(i)) test_accuracy = evaluate_accuracy(test_data, net, ctx, dataset) print("Test_acc : {}".format(test_accuracy[1])) return "optimization completed"
epochs = 5 smoothing_constant = 0.01 niter = 0 losses = [] moving_loss = 0 for e in range(epochs): total_loss = 0 for data, label in data_iter: with autograd.record(): output = net(data) loss = make_loss(output, label) loss.backward() trainer.step(batch_size) niter += 1 curr_loss = nd.mean(loss).asscalar() moving_loss = (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss est_loss = moving_loss/(1-(1-smoothing_constant)**niter) if (niter + 1) % 100 == 0: losses.append(est_loss) print("Epoch %s, batch %s. Moving avg of loss: %s. Average loss: %f" % (e, niter, est_loss, total_loss/num_examples)) plot(losses, X) print(dense.weight.data()) print(dense.bias.data()) # help(trainer.step) # help(dense.weight)
def evaluate_accuracy(data_iterator, net): acc = 0. for data, label in data_iterator: output = net(data) acc += accuracy(output, label) return acc / len(data_iterator) learning_rate = .1 for epoch in range(5): train_loss = 0. train_acc = 0. for data, label in train_data: with autograd.record(): output = net(data) loss = cross_entropy(output, label) loss.backward() # 将梯度做平均,这样学习率会对batch size不那么敏感 SGD(params, learning_rate / batch_size) train_loss += nd.mean(loss).asscalar() train_acc += accuracy(output, label) test_acc = evaluate_accuracy(test_data, net) print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (epoch, train_loss / len(train_data), train_acc / len(train_data), test_acc))
def __call__(self, p: float, p_hat: NDArray) -> NDArray: return self._alpha * mean(self._bce(p_hat, full(p_hat.shape, p)))
def accuracy(output, label):#预测输出 output 真实标签label return nd.mean(output.argmax(axis=1)==label).asscalar()
#### 使用softmax cross entropy loss算法 # Softmax和交叉熵损失函数 # softmax 回归实现 exp(Xi)/(sum(exp(Xi))) 归一化概率 使得 10类概率之和为1 # 交叉熵损失函数 将两个概率分布的负交叉熵作为目标值,最小化这个值等价于最大化这两个概率的相似度 # 计算模型的预测能力 softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() ### 优化模型 # 使用随机梯度下降算法(sgd)进行训练 # 并且将学习率的超参数设置为 .1 trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1}) epochs = 10 ##训练 for e in range(epochs):#每一次训练整个训练集 train_loss = 0.# 损失 train_acc = 0. #准确度 for i, (data, label) in enumerate(train_data): ##训练集里的 每一批次样本和标签 data = data.as_in_context(mx.cpu()).reshape((-1, 784)) ## 28*28 转成 1*784 label = label.as_in_context(mx.cpu()) with autograd.record(): # 自动求微分 output = net(data) # 模型输出 向前传播 loss = softmax_cross_entropy(output, label)## 计算误差 loss.backward() # 向后传播 trainer.step(data.shape[0]) # 优化模型参数 data.shape[0] = batch_size # Provide stats on the improvement of the model over each epoch train_loss += ndarray.mean(loss).asscalar() ## 当前的误差损失 均值 train_acc += utils.accuracy(output, label) #准确度 test_acc = utils.evaluate_accuracy(test_data, net)#验证数据集的准确度 print("遍历训练集次数 {}. 训练误差: {}. 训练准确度: {}. 测试准确度: {}.".format( e, train_loss/len(train_data),train_acc/len(train_data), test_acc))