def test_word_embedding_analogy_evaluation_models(analogy_function):
    dataset = nlp.data.GoogleAnalogyTestSet()
    dataset = [d for i, d in enumerate(dataset) if i < 10]

    embedding = nlp.embedding.create('fasttext', source='wiki.simple',
                                     embedding_root='tests/data/embedding')
    counter = nlp.data.utils.Counter(embedding.idx_to_token)
    vocab = nlp.vocab.Vocab(counter)
    vocab.set_embedding(embedding)

    dataset_coded = [[vocab[d[0]], vocab[d[1]], vocab[d[2]], vocab[d[3]]]
                     for d in dataset]
    dataset_coded_nd = nd.array(dataset_coded)

    for k in [1, 3]:
        for exclude_question_words in [True, False]:
            evaluator = nlp.embedding.evaluation.WordEmbeddingAnalogy(
                idx_to_vec=vocab.embedding.idx_to_vec,
                analogy_function=analogy_function, k=k,
                exclude_question_words=exclude_question_words)
            evaluator.initialize()

            words1 = dataset_coded_nd[:, 0]
            words2 = dataset_coded_nd[:, 1]
            words3 = dataset_coded_nd[:, 2]
            pred_idxs = evaluator(words1, words2, words3)

            # If we don't exclude inputs most predictions should be wrong
            words4 = dataset_coded_nd[:, 3]
            accuracy = nd.mean(pred_idxs[:, 0] == nd.array(words4))
            accuracy = accuracy.asscalar()
            if not exclude_question_words:
                assert accuracy <= 0.1

                # Instead the model would predict W3 most of the time
                accuracy_w3 = nd.mean(pred_idxs[:, 0] == nd.array(words3))
                assert accuracy_w3.asscalar() >= 0.89

            else:
                # The wiki.simple vectors don't perform too good
                assert accuracy >= 0.29

            # Assert output shape
            assert pred_idxs.shape[1] == k
Exemple #2
0
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        #tmp_ctx = self._ctx_cpu
        tmp_ctx = self._ctx_single_gpu
        fc7_outs = []
        ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context)))
        #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu())
        arcface_module_outputs = []
        for i, _module in enumerate(self._arcface_modules):
          #_fc7 = _module.get_outputs(merge_multi_context=True)[0]
          out = _module.get_outputs(merge_multi_context=True)
          #print(out[0].shape)
          #print(out[1].shape)
          arcface_module_outputs.append(out)
          _fc7 = out[0]
          fc7_outs.append(_fc7)
          _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx)
          ctx_fc7_max[:,i] = _fc7_max

        local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1))
        nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max)
        global_fc7_max = local_fc7_max
        #local_fc7_sum = None
        local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1))
        local_fc7_sum[:,:] = 0.0
        for i, _module in enumerate(self._arcface_modules):
          _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max)
          fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max)
          fc7_outs[i] = nd.exp(fc7_outs[i])
          _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx)
          local_fc7_sum += _sum
        global_fc7_sum = local_fc7_sum

        if self._iter%self._verbose==0:
          #_ctx = self._context[-1]
          _ctx = self._ctx_cpu
          _probs = []
          for i, _module in enumerate(self._arcface_modules):
            _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i])
            _probs.append(_prob)
          fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context)))
          nd.concat(*_probs, dim=1, out=fc7_prob)
          fc7_pred = nd.argmax(fc7_prob, axis=1)
          local_label = self.global_label - self._local_class_start
          #local_label = self.get_ndarray2(_ctx, 'test_label', local_label)
          _pred = nd.equal(fc7_pred, local_label)
          print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0])


        #local_fc1_grad = []
        #fc1_grad_ctx = self._ctx_cpu
        fc1_grad_ctx = self._ctx_single_gpu
        local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size))
        local_fc1_grad[:,:] = 0.0
        total_eloss = []
        celoss_verbose = 1000
        if self._iter%celoss_verbose==0:
          fc7_celoss = self.get_ndarray(tmp_ctx, 'test_fc7_celoss', (self._batch_size,))
          fc7_celoss[:] = 0.0

        for i, _module in enumerate(self._arcface_modules):
          _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum)
          fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum)
          a = i*self._ctx_num_classes
          b = (i+1)*self._ctx_num_classes
          _label = self.global_label - self._ctx_class_start[i]
          _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label)
          onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes))
          nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label)
          #print(fc7_outs[i].shape, onehot_label.shape)

          if self._iter%celoss_verbose==0:
            _ce_loss = fc7_outs[i] * onehot_label
            _ce_loss = nd.sum(_ce_loss, axis=1)
            fc7_celoss += _ce_loss.as_in_context(tmp_ctx)
          fc7_outs[i] -= onehot_label

          out = arcface_module_outputs[i]
          out_grads = [fc7_outs[i]]
          for j in range(1, len(out)):
              eloss = out[j]
              #print('eloss%d:'%j, eloss.shape)
              #print(out_grads[0].shape)
              #egrad_shape = (out_grads[0].shape[0], eloss.shape[0])
              egrad_shape = eloss.shape
              egrad = self.get_ndarray(fc7_outs[i].context, 'egrad%d'%j, egrad_shape)
              #egrad[:][:] = 1.0/egrad_shape[0]
              egrad[:][:] = 1.0
              out_grads.append(egrad)
              if self._iter%self._verbose==0:
                  total_eloss.append(np.mean(eloss.asnumpy()))

          _module.backward(out_grads = out_grads)
          #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu())
          ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0])
          local_fc1_grad += ctx_fc1_grad

        if self._iter%self._verbose==0 and len(total_eloss)>0:
          print('{eloss}', self._iter, np.mean(total_eloss))
        #if self._iter%self._verbose==0:
        if self._iter%celoss_verbose==0:
          ce_loss = nd.log(fc7_celoss) * -1.0
          ce_loss = nd.mean(ce_loss)
          print('CELOSS,%d,%f'% (self._iter, ce_loss.asnumpy()))

        global_fc1_grad = local_fc1_grad
        self._curr_module.backward(out_grads = [global_fc1_grad])
Exemple #3
0
def LSTM(epoch=100,
         batch_size=100,
         save_period=100,
         load_period=100,
         learning_rate=0.1,
         ctx=mx.gpu(0)):

    train_data, test_data = FashionMNIST(batch_size)

    #network parameter
    time_step = 28
    num_inputs = 28
    num_hidden = 200
    num_outputs = 10

    path = "weights/FashionMNIST_LSTMweights-{}".format(load_period)

    if os.path.exists(path):

        print("loading weights")
        [
            wxhf, wxhi, wxho, wxhg, whhf, whhi, whho, whhg, bhf, bhi, bho, bhg,
            why, by
        ] = nd.load(path)  # weights load
        wxhf = wxhf.as_in_context(ctx)
        wxhi = wxhi.as_in_context(ctx)
        wxho = wxho.as_in_context(ctx)
        wxhg = wxhg.as_in_context(ctx)

        whhf = whhf.as_in_context(ctx)
        whhi = whhi.as_in_context(ctx)
        whho = whho.as_in_context(ctx)
        whhg = whhg.as_in_context(ctx)

        bhf = bhf.as_in_context(ctx)
        bhi = bhi.as_in_context(ctx)
        bho = bho.as_in_context(ctx)
        bhg = bhg.as_in_context(ctx)

        why = why.as_in_context(ctx)
        by = by.as_in_context(ctx)
        params = [
            wxhf, wxhi, wxho, wxhg, whhf, whhi, whho, whhg, bhf, bhi, bho, bhg,
            why, by
        ]

    else:
        print("initializing weights")

        with ctx:
            wxhf = nd.random.normal(loc=0,
                                    scale=0.01,
                                    shape=(num_hidden, num_inputs))
            wxhi = nd.random.normal(loc=0,
                                    scale=0.01,
                                    shape=(num_hidden, num_inputs))
            wxho = nd.random.normal(loc=0,
                                    scale=0.01,
                                    shape=(num_hidden, num_inputs))
            wxhg = nd.random.normal(loc=0,
                                    scale=0.01,
                                    shape=(num_hidden, num_inputs))

            whhf = nd.random.normal(loc=0,
                                    scale=0.01,
                                    shape=(num_hidden, num_hidden))
            whhi = nd.random.normal(loc=0,
                                    scale=0.01,
                                    shape=(num_hidden, num_hidden))
            whho = nd.random.normal(loc=0,
                                    scale=0.01,
                                    shape=(num_hidden, num_hidden))
            whhg = nd.random.normal(loc=0,
                                    scale=0.01,
                                    shape=(num_hidden, num_hidden))

            bhf = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, ))
            bhi = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, ))
            bho = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, ))
            bhg = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, ))

            why = nd.random.normal(loc=0,
                                   scale=0.1,
                                   shape=(num_outputs, num_hidden))
            by = nd.random.normal(loc=0, scale=0.1, shape=(num_outputs, ))

        params = [
            wxhf, wxhi, wxho, wxhg, whhf, whhi, whho, whhg, bhf, bhi, bho, bhg,
            why, by
        ]

    # attach gradient!!!
    for param in params:
        param.attach_grad()

    #Fully Neural Network with 1 Hidden layer
    def LSTM_Cell(input, h_state, c_state):
        for x in input:
            f_t = nd.Activation(nd.FullyConnected(
                data=x, weight=wxhf, no_bias=True, num_hidden=num_hidden) +
                                nd.FullyConnected(data=h_state,
                                                  weight=whhf,
                                                  no_bias=True,
                                                  num_hidden=num_hidden) + bhf,
                                act_type="sigmoid")
            i_t = nd.Activation(nd.FullyConnected(
                data=x, weight=wxhi, no_bias=True, num_hidden=num_hidden) +
                                nd.FullyConnected(data=h_state,
                                                  weight=whhi,
                                                  no_bias=True,
                                                  num_hidden=num_hidden) + bhi,
                                act_type="sigmoid")
            o_t = nd.Activation(nd.FullyConnected(
                data=x, weight=wxho, no_bias=True, num_hidden=num_hidden) +
                                nd.FullyConnected(data=h_state,
                                                  weight=whho,
                                                  no_bias=True,
                                                  num_hidden=num_hidden) + bho,
                                act_type="sigmoid")
            g_t = nd.Activation(nd.FullyConnected(
                data=x, weight=wxhg, no_bias=True, num_hidden=num_hidden) +
                                nd.FullyConnected(data=h_state,
                                                  weight=whhg,
                                                  no_bias=True,
                                                  num_hidden=num_hidden) + bhg,
                                act_type="tanh")
            c_state = nd.multiply(f_t, c_state) + nd.multiply(i_t, g_t)
            h_state = nd.multiply(o_t, nd.tanh(c_state))

        output = nd.FullyConnected(data=h_state,
                                   weight=why,
                                   bias=by,
                                   num_hidden=num_outputs)
        output = nd.softmax(data=output)
        return output, h_state, c_state

    def cross_entropy(output, label):
        return -nd.sum(label * nd.log(output), axis=0, exclude=True)

    #Adam optimizer
    state = []
    optimizer = mx.optimizer.Adam(rescale_grad=1, learning_rate=learning_rate)

    for param in params:
        state.append(optimizer.create_state(0, param))

    for i in tqdm(range(1, epoch + 1, 1)):

        for data, label in train_data:

            h_state = nd.zeros(shape=(data.shape[0], num_hidden), ctx=ctx)
            c_state = nd.zeros(shape=(data.shape[0], num_hidden), ctx=ctx)

            data = data.as_in_context(ctx)
            data = data.reshape(shape=(-1, time_step, num_inputs))
            data = nd.transpose(data=data, axes=(1, 0, 2))
            label = label.as_in_context(ctx)
            label = nd.one_hot(label, num_outputs)

            with autograd.record():
                outputs, h_state, c_state = LSTM_Cell(data, h_state, c_state)
                loss = cross_entropy(outputs, label)  # (batch_size,)
            loss.backward()

            cost = nd.mean(loss).asscalar()
            for j, param in enumerate(params):
                optimizer.update(0, param, param.grad, state[j])

        test_accuracy = evaluate_accuracy(test_data, time_step, num_inputs,
                                          num_hidden, LSTM_Cell, ctx)
        print(" epoch : {} , last batch cost : {}".format(i, cost))
        print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))

        #weight_save
        if i % save_period == 0:
            if not os.path.exists("weights"):
                os.makedirs("weights")
            print("saving weights")
            nd.save("weights/FashionMNIST_LSTMweights-{}".format(i), params)

    test_accuracy = evaluate_accuracy(test_data, time_step, num_inputs,
                                      num_hidden, LSTM_Cell, ctx)
    print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))
    return "optimization completed"
Exemple #4
0
def CNN_Autoencoder(epoch = 100 , batch_size=128, save_period=10 , load_period=100 ,optimizer="sgd",learning_rate= 0.01 , dataset = "MNIST", ctx=mx.gpu(0)):

    #data selection
    if dataset =="MNIST":
        train_data , test_data = MNIST(batch_size)
        path = "weights/MNIST-{}.params".format(load_period)
    elif dataset == "FashionMNIST":
        train_data, test_data = FashionMNIST(batch_size)
        path = "weights/FashionMNIST-{}.params".format(load_period)
    else:
        return "The dataset does not exist."
    
    '''Follow these steps:

    •Define network
    •Initialize parameters
    •Loop over inputs
    •Forward input through network to get output
    •Compute loss with output and label
    •Backprop gradient
    •Update parameters with gradient descent.
    '''

    '''Brief description of deconvolution.
    I was embarrassed when I first heard about deconvolution,
    but it was just the opposite of convolution.
    The formula is as follows.

    The convolution formula is  output_size = ([input_size+2*pad-kernel_size]/stride) + 1

    The Deconvolution formula is output_size = stride(input_size-1)+kernel-2*pad

    '''
    '''
    imperative vs symbolic
    One main reason that the network is faster after hybridizing is because we don’t need to repeatedly invoke the Python forward function, 
    while keeping all computations within the highly efficient C++ backend engine.
    But the potential drawback is the loss of flexibility to write the forward function. 
    In other ways, inserting print for debugging or control logic such as if and for into the forward function is not possible now.
    
    '''


    #convolution autoencoder 
    #net = gluon.nn.Sequential() # stacks 'Block's sequentially
    net = gluon.nn.HybridSequential() #using symbolic for faster learning
    with net.name_scope():

        # FashionMNIST or MNIST : result = ( batch size , 60 , 26 , 26)
        net.add(gluon.nn.Conv2D(channels=60 , kernel_size=(3,3) , strides=(1,1) , activation='relu' , use_bias=True)) 
        # FashionMNIST or MNIST : result = ( batch size , 30 , 24 , 24)
        net.add(gluon.nn.Conv2D(channels=30 , kernel_size=(3,3) , strides=(1,1) , activation='relu' , use_bias=True)) 
        # FashionMNIST or MNIST : result = ( batch size , 15 , 22 , 22)
        net.add(gluon.nn.Conv2D(channels=15 , kernel_size=(3,3) , strides=(1,1) , activation='relu' , use_bias=True)) 
        
        # FashionMNIST : result = ( batch size , 10 , 20 , 20)
        net.add(gluon.nn.Conv2D(channels=10 , kernel_size=(3,3) , strides=(1,1) , activation='relu' , use_bias=True)) 

        # FashionMNIST or MNIST : result = ( batch size , 15 , 22 , 22)
        net.add(gluon.nn.Conv2DTranspose(channels=15 , kernel_size=(3,3) , strides=(1, 1) , activation='relu' , use_bias=True))
        # FashionMNIST or MNIST  : result = ( batch size , 30 , 24 , 24)
        net.add(gluon.nn.Conv2DTranspose(channels=30 , kernel_size=(3,3) , strides=(1, 1) , activation='relu' , use_bias=True))
        # FashionMNIST or MNIST  : result = ( batch size , 60 , 26 , 26)
        net.add(gluon.nn.Conv2DTranspose(channels=60 , kernel_size=(3,3) , strides=(1, 1) , activation='relu' , use_bias=True))
        # FashionMNIST or MNIST : result = ( batch size , 1 , 28 , 28)
        net.add(gluon.nn.Conv2DTranspose(channels=1 , kernel_size=(3,3) , strides=(1, 1) , activation='sigmoid' , use_bias=True))

    net.hybridize() # using symbolic for faster learning

    #weights initialization
    if os.path.exists(path):
        print("loading weights")
        net.load_params(filename=path , ctx=ctx) # weights load
    else:
        print("initializing weights")
        net.collect_params().initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization
        #net.initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization

    #optimizer
    trainer = gluon.Trainer(net.collect_params() , optimizer, {"learning_rate" : learning_rate})

    #learning
    for i in tqdm(range(1,epoch+1,1)):
        for data , label in train_data:

            data = data.as_in_context(ctx)
            data_ = data

            with autograd.record(train_mode=True):
                output=net(data)

                #loss definition
                loss=gluon.loss.L2Loss()(output,data_)
                cost=nd.mean(loss).asscalar()

            loss.backward()
            trainer.step(batch_size,ignore_stale_grad=True)

        print(" epoch : {} , last batch cost : {}".format(i,cost))

        #weight_save
        if i % save_period==0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            if dataset=="FashionMNIST":
                net.save_params("weights/FashionMNIST-{}.params".format(i))
            elif dataset=="MNIST":
                net.save_params("weights/MNIST-{}.params".format(i))

    #show image
    generate_image(test_data , net , ctx ,dataset)

    return "optimization completed"
Exemple #5
0
def accuracy(output, label):
    return nd.mean(output.argmax(axis = 1) == label).asscalar()
Exemple #6
0
def train():
    """training"""
    image_pool = ImagePool(pool_size)
    metric = mx.metric.CustomMetric(facc)

    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)

    # define a summary writer that logs data and flushes to the file every 5 seconds
    sw = SummaryWriter(logdir='%s' % dir_out_sw, flush_secs=5, verbose=False)
    global_step = 0

    for epoch in range(epochs):
        if epoch == 0:
            netG.hybridize()
            netD.hybridize()
        #     sw.add_graph(netG)
        #     sw.add_graph(netD)

        tic = time.time()
        btic = time.time()
        train_data.reset()
        val_data.reset()
        iter = 0
        for local_step, batch in enumerate(train_data):
            ############################
            # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
            ###########################
            tmp = mx.nd.concat(batch.data[0],
                               batch.data[1],
                               batch.data[2],
                               dim=1)
            tmp = augmenter(tmp,
                            patch_size=128,
                            offset=offset,
                            aug_type=1,
                            aug_methods=aug_methods,
                            random_crop=False)
            real_in = tmp[:, :1].as_in_context(ctx)
            real_out = tmp[:, 1:2].as_in_context(ctx)
            m = tmp[:, 2:3].as_in_context(ctx)  # mask

            fake_out = netG(real_in) * m

            # loss weight based on mask, applied on L1 loss
            if no_loss_weights:
                loss_weight = m
            else:
                loss_weight = m.asnumpy()
                loss_weight[loss_weight == 0] = .1
                loss_weight = mx.nd.array(loss_weight, ctx=m.context)

            fake_concat = image_pool.query(nd.concat(real_in, fake_out, dim=1))
            with autograd.record():
                # Train with fake image
                # Use image pooling to utilize history images
                output = netD(fake_concat)
                fake_label = nd.zeros(output.shape, ctx=ctx)
                errD_fake = GAN_loss(output, fake_label)
                metric.update([
                    fake_label,
                ], [
                    output,
                ])

                # Train with real image
                real_concat = nd.concat(real_in, real_out, dim=1)
                output = netD(real_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errD_real = GAN_loss(output, real_label)
                errD = (errD_real + errD_fake) * 0.5
                errD.backward()
                metric.update([
                    real_label,
                ], [
                    output,
                ])

            trainerD.step(batch.data[0].shape[0])

            ############################
            # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
            ###########################
            with autograd.record():
                fake_out = netG(real_in)
                fake_concat = nd.concat(real_in, fake_out, dim=1)
                output = netD(fake_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errG = GAN_loss(output, real_label) + loss_2nd(
                    real_out, fake_out, loss_weight) * lambda1
                errG.backward()

            trainerG.step(batch.data[0].shape[0])

            sw.add_scalar(tag='loss',
                          value=('d_loss', errD.mean().asscalar()),
                          global_step=global_step)
            sw.add_scalar(tag='loss',
                          value=('g_loss', errG.mean().asscalar()),
                          global_step=global_step)
            global_step += 1

            if epoch + local_step == 0:
                sw.add_graph((netG))
                img_in_list, img_out_list, m_val = val_data.next().data
                m_val = m_val.as_in_context(ctx)
                sw.add_image('first_minibatch_train_real', norm3(real_out))
                sw.add_image('first_minibatch_val_real',
                             norm3(img_out_list.as_in_context(ctx)))
                netG.export('%snetG' % dir_out_checkpoints)
            if local_step == 0:
                # Log the first batch of images of each epoch (training)
                sw.add_image('first_minibatch_train_fake',
                             norm3(fake_out * m) * m, epoch)
                sw.add_image(
                    'first_minibatch_val_fake',
                    norm3(netG(img_in_list.as_in_context(ctx)) * m_val) *
                    m_val, epoch)
                # norm3(netG(img_in_list.as_in_context(ctx)) * m_val.as_in_context(ctx)), epoch)

            if (iter + 1) % 10 == 0:
                name, acc = metric.get()

                logging.info('speed: {} samples/s'.format(
                    batch_size / (time.time() - btic)))
                logging.info(
                    'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d'
                    % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc,
                       iter, epoch))

            iter += 1
            btic = time.time()

        sw.add_scalar(tag='binary_training_acc',
                      value=('acc', acc),
                      global_step=epoch)

        name, acc = metric.get()
        metric.reset()

        fake_val = netG(val_data.data[0][1].as_in_context(ctx))
        loss_val = loss_2nd(val_data.data[1][1].as_in_context(ctx), fake_val,
                            val_data.data[2][1].as_in_context(ctx)) * lambda1
        sw.add_scalar(tag='loss_val',
                      value=('g_loss', loss_val.mean().asscalar()),
                      global_step=epoch)

        if (epoch % check_point_interval == 0) | (epoch == epochs - 1):
            netD.save_params('%snetD-%04d' % (dir_out_checkpoints, epoch))
            netG.save_params('%snetG-%04d' % (dir_out_checkpoints, epoch))

        logging.info('\nbinary training acc at epoch %d: %s=%f' %
                     (epoch, name, acc))
        logging.info('time: %f' % (time.time() - tic))

    sw.export_scalars('scalar_dict.json')
    sw.close()
Exemple #7
0
def myTrain(net,
            batch_size,
            train_data,
            valid_data,
            epoches,
            lr,
            wd,
            ctx,
            lr_period,
            lr_decay,
            verbose=False):
    trainer = mx.gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': lr,
        'momentum': 0.9,
        'wd': wd
    })
    prev_time = datetime.datetime.now()
    train_loss_record = []
    valid_loss_record = []  # epoches recycle record loss
    train_acc_record = []
    valid_acc_record = []
    focalloss = netlib.FocalLoss()
    for e in range(epoches):
        train_loss = 0.0
        train_acc = 0.0
        # if e > 99 and e < 251 and e % 10 == 0:
        #     trainer.set_learning_rate(trainer.learning_rate * lr_decay)  # decrease lr
        # if e == 60 or e == 120 or e == 160:
        #     trainer.set_learning_rate(trainer.learning_rate * lr_decay)  # decrease lr
        if e > 150 and e % 20 == 0:
            trainer.set_learning_rate(trainer.learning_rate *
                                      lr_decay)  # decrease
        # print('train len:',len(train_data))
        for data, label in train_data:
            label = label.reshape(
                shape=(label.shape[0], ))  # be careful:it turns to vector
            label = label.astype('float32').as_in_context(ctx)
            with autograd.record():
                output = net(data.as_in_context(ctx))
                loss = softmax_cross_entrory(output, label)  #
                # loss = focalloss(output, label)# focal loss
            loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
            train_acc += utils1.accuracy(output, label)

        train_loss_record.append(train_loss / len(train_data))
        train_acc_record.append(train_acc / len(train_data))
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = 'Time %02d:%02d:%02d' % (h, m, s)
        if valid_data is not None:
            valid_acc = evaluate_accuracy(valid_data, net, ctx)
            valid_acc_record.append(valid_acc)

            if verbose:
                ###valid data loss
                valid_loss = 0
                for data, valid_label in valid_data:
                    valid_label = valid_label.reshape(
                        shape=(valid_label.shape[0],
                               ))  # be careful:it turns to vector
                    valid_label = valid_label.astype('float32').as_in_context(
                        ctx)
                    # with autograd.predict_mode():
                    out = net(data.as_in_context(ctx))
                    loss = softmax_cross_entrory(out, valid_label)
                    # loss = focalloss(out, valid_label)  # focal loss
                    valid_loss += nd.mean(loss).asscalar()
                    # valid_loss = nd.mean(loss).asscalar( # only used valid loss of every batch(vaild_data)
                valid_loss_record.append(
                    valid_loss /
                    len(valid_data))  # record every batch loss of valid data

                epoch_str = (
                    "Epoch %d. Train Loss: %f,Valid Loss: %f, Train acc %f, Valid acc %f, "
                    %
                    (e, train_loss / len(train_data), valid_loss /
                     len(valid_data), train_acc / len(train_data), valid_acc))
            else:
                epoch_str = (
                    "Epoch %d. Train Loss: %f, Train acc %f, Valid acc %f, " %
                    (e, train_loss / len(train_data),
                     train_acc / len(train_data), valid_acc))

        else:
            epoch_str = (
                "Epoch %d. Loss: %f, Train acc %f, " %
                (e, train_loss / len(train_data), train_acc / len(train_data)))
        prev_time = cur_time
        print(epoch_str + 'lr=' + str(trainer.learning_rate) + ',' + time_str)
    # plot loss and acc
    fig, (fig1, fig2) = plt.subplots(1, 2)
    if verbose:
        fig1.plot(train_loss_record, 'b')
        fig1.legend(['train'])
        fig2.plot(train_acc_record, 'b')
        fig2.legend(['train_acc'])
        if valid_data is not None:
            fig1.plot(valid_loss_record, 'r')
            fig1.legend(['train', 'test'])
            fig2.plot(valid_acc_record, 'r')
            fig2.legend(['train_acc', 'valid_acc'])
    else:
        fig1.plot(train_loss_record, 'b')
        fig1.legend(['train'])
        fig2.plot(train_acc_record, 'b')
        fig2.plot(valid_acc_record, 'r')
        fig2.legend(['train_acc', 'valid_acc'])
    fig.show()
    fig.savefig('./CIFAR10_result.png')
#############################################
### 训练 #######################
learning_rate = .1#学习率
epochs = 7##训练迭代 次数
for epoch in range(epochs):
    train_loss = 0.# 损失
    train_acc = 0. #准确度
    for data, label in train_data:#训练数据集 样本和标签
        with autograd.record():#自动微分
            output = net(data) #网络输出
            loss = cross_entropy(output, label)##损失
        loss.backward()#向后传播
        # 将梯度做平均,这样学习率会对batch size不那么敏感
        SGD(params, learning_rate/batch_size)

        train_loss += nd.mean(loss).asscalar()#损失
        train_acc += accuracy(output, label)  #准确度

    test_acc = evaluate_accuracy(test_data, net)#验证数据集的准确度
    print("训练次数 %d. 损失Loss: %f, 训练准确度Train acc %f, 测试准确度Test acc %f" % (
        epoch, train_loss/len(train_data), train_acc/len(train_data), test_acc))


## 查看训练结果

data, label = mnist_test[0:10]#测试数据集前10个数据
show_images(data)#图片实例
print('true labels')#真实标签
print(get_text_labels(label))

predicted_labels = net(data).argmax(axis=1)#将预测概率最高的那个类作为预测的类
Exemple #9
0
def CNN(epoch = 100 , batch_size=10, save_period=10 , load_period=100 , weight_decay=0.001 ,learning_rate= 0.1 , dataset = "MNIST", ctx=mx.cpu(0)):

    #data selection
    if dataset =="MNIST":
        train_data , test_data = MNIST(batch_size)
    elif dataset == "CIFAR10":
        train_data, test_data = CIFAR10(batch_size)
    elif dataset == "FashionMNIST":
        train_data, test_data = FashionMNIST(batch_size)
    else:
        return "The dataset does not exist."


    # data structure
    if dataset == "MNIST" or dataset =="FashionMNIST":
        color = 1
    elif dataset == "CIFAR10":
        color = 3
    num_outputs = 10

    if dataset == "MNIST":
        path = "weights/MNIST_weights-{}".format(load_period)
    elif dataset == "FashionMNIST":
        path = "weights/FashionMNIST_weights-{}".format(load_period)
    elif dataset == "CIFAR10":
        path = "weights/CIFAR10_weights-{}".format(load_period)

    if os.path.exists(path):
        print("loading weights")
        [W1, B1, W2, B2, W3, B3, W4, B4, W5, B5] = nd.load(path)  # weights load

        W1=W1.as_in_context(ctx)
        B1=B1.as_in_context(ctx)
        W2=W2.as_in_context(ctx)
        B2=B2.as_in_context(ctx)
        W3=W3.as_in_context(ctx)
        B3=B3.as_in_context(ctx)
        W4=W4.as_in_context(ctx)
        B4=B4.as_in_context(ctx)
        W5=W5.as_in_context(ctx)
        B5=B5.as_in_context(ctx)

        params = [W1 , B1 , W2 , B2 , W3 , B3 , W4 , B4 , W5 , B5]
    else:
        print("initializing weights")
        with ctx:
            W1 = nd.random.normal(loc=0 , scale=0.1 , shape=(60,color,3,3))
            B1 = nd.random.normal(loc=0 , scale=0.1 , shape=60)

            W2 = nd.random.normal(loc=0 , scale=0.1 , shape=(30,60,6,6))
            B2 = nd.random.normal(loc=0 , scale=0.1 , shape=30)

            if dataset == "CIFAR10":
                reshape=750
            elif dataset == "MNIST" or dataset == "FashionMNIST":
                reshape=480

            W3 = nd.random.normal(loc=0 , scale=0.1 , shape=(120, reshape))
            B3 = nd.random.normal(loc=0 , scale=0.1 , shape=120)

            W4 = nd.random.normal(loc=0 , scale=0.1 , shape=(64, 120))
            B4 = nd.random.normal(loc=0 , scale=0.1 , shape=64)

            W5 = nd.random.normal(loc=0 , scale=0.1 , shape=(num_outputs , 64))
            B5 = nd.random.normal(loc=0 , scale=0.1 , shape=num_outputs)

        params = [W1 , B1 , W2 , B2 , W3 , B3 , W4 , B4, W5 , B5]
        
    # attach gradient!!!
    for i, param in enumerate(params):
        param.attach_grad()

    # network - similar to lenet5 

    '''Convolution parameter
    data: (batch_size, channel, height, width)
    weight: (num_filter, channel, kernel[0], kernel[1])
    bias: (num_filter,)
    out: (batch_size, num_filter, out_height, out_width).
    '''

    def network(X,drop_rate=0.0): # formula : output_size=((input−weights+2*Padding)/Stride)+1
        #data size 
        # MNIST,FashionMNIST = (batch size , 1 , 28 ,  28)
        # CIFAR = (batch size , 3 , 32 ,  32)

        C_H1=nd.Activation(data= nd.Convolution(data=X , weight = W1 , bias = B1 , kernel=(3,3) , stride=(1,1)  , num_filter=60) , act_type="relu") # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30) 
        P_H1=nd.Pooling(data = C_H1 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15)
        C_H2=nd.Activation(data= nd.Convolution(data=P_H1 , weight = W2 , bias = B2 , kernel=(6,6) , stride=(1,1) , num_filter=30), act_type="relu") # MNIST :  result = ( batch size , 30 , 8 , 8), CIFAR10 :  result = ( batch size , 30 , 10 , 10)
        P_H2=nd.Pooling(data = C_H2 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5)
        P_H2 = nd.flatten(data=P_H2)

        '''FullyConnected parameter
        • data: (batch_size, input_dim)
        • weight: (num_hidden, input_dim)
        • bias: (num_hidden,)
        • out: (batch_size, num_hidden)
        '''
        F_H1 =nd.Activation(nd.FullyConnected(data=P_H2 , weight=W3 , bias=B3 , num_hidden=120),act_type="sigmoid")
        F_H1 =nd.Dropout(data=F_H1, p=drop_rate)
        F_H2 =nd.Activation(nd.FullyConnected(data=F_H1 , weight=W4 , bias=B4 , num_hidden=64),act_type="sigmoid")
        F_H2 =nd.Dropout(data=F_H2, p=drop_rate)
        softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10))
        return softmax_Y

    def cross_entropy(output, label):
        return - nd.sum(label * nd.log(output), axis=1)

    #Adam optimizer
    state=[]
    optimizer=mx.optimizer.Adam(rescale_grad=1,learning_rate=learning_rate)
    for i,param in enumerate(params):
        state.append(optimizer.create_state(0,param))

    def SGD(params, lr , wd , bs):
        for param in params:
             param -= ((lr * param.grad)/bs+wd*param)

    for i in tqdm(range(1,epoch+1,1)):
        for data,label in train_data:
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            label = nd.one_hot(label , num_outputs)

            with autograd.record():
                output = network(data,drop_rate=0.2)

                #loss definition
                loss = cross_entropy(output,label) # (batch_size,)
                cost = nd.mean(loss).asscalar()

            loss.backward()
            for j,param in enumerate(params):
                optimizer.update(0,param,param.grad,state[j])

            #SGD(params, learning_rate , weight_decay , batch_size)

        print(" epoch : {} , last batch cost : {}".format(i,cost))

        #weight_save
        if i % save_period==0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            if dataset=="MNIST":
                nd.save("weights/MNIST_weights-{}".format(i),params)

            elif dataset=="CIFAR10":
                nd.save("weights/CIFAR10_weights-{}".format(i),params)

            elif dataset=="FashionMNIST":
                nd.save("weights/FashionMNIST_weights-{}".format(i),params)

    test_accuracy = evaluate_accuracy(test_data , network , ctx)
    print("Test_acc : {}".format(test_accuracy))

    return "optimization completed"
Exemple #10
0
 def forward(self, x, *args):
     return (x - x.mean()) / nd.sqrt(nd.mean(nd.power((x - x.mean()), 2)))
Exemple #11
0
def train(args):
    frames = args.frames
    caption_length = args.caption_length
    glove_file = args.glove_file
    
    #CPU_COUNT = multiprocessing.cpu_count()
    if args.cuda:
        ctx = mx.gpu()
    else:
        ctx = mx.cpu()
    
    if args.load_pretrain:
        pretrain_model = vision.vgg16_bn(pretrained=True,ctx=ctx)
        transform = utils.Compose([utils.ToTensor(ctx),
                               utils.normalize(ctx),
                               utils.extractFeature(ctx,pretrain_model)
                             ])
    else:
        pretrain_model = None
        transform = utils.Compose([utils.ToTensor(ctx),
                                   utils.normalize(ctx),
                                 ])
    
    target_transform = utils.targetCompose([utils.WordToTensor(ctx)])

    train_dataset = videoFolder(args.train_folder,args.train_dict, frames, glove_file, 
                    caption_length, ctx, transform=transform, target_transform=target_transform)

    test_dataset = videoFolder(args.test_folder,args.test_dict, frames, glove_file, 
                        caption_length, ctx, transform=transform, target_transform=target_transform)

    train_loader = gluon.data.DataLoader(train_dataset,batch_size=args.batch_size,
                                last_batch='discard',shuffle=True)

    test_loader = gluon.data.DataLoader(test_dataset,batch_size=args.batch_size,
                                    last_batch='discard',shuffle=False)

    #loss = L2Loss_cos()
    loss = L2Loss_2()
    net = lstm_net(frames,caption_length,ctx,pretrained=args.load_pretrain)
    #net = resnet18_v2(caption_length=caption_length,ctx=ctx)
    
    net.collect_params().initialize(init=mx.initializer.MSRAPrelu(), ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'adam',
                            {'learning_rate': args.lr})
    
    smoothing_constant = 0.01
    
    for e in range(args.epochs):
        epoch_loss = 0
        
        for batch_id, (x,_) in enumerate(train_loader):
            with autograd.record():
                pred = net(x)
                batch_loss = loss(pred,_)
            
            trainer.step(x.shape[0],ignore_stale_grad=True)
            batch_loss.backward()
            mx.nd.waitall()
            
            batch_loss = F.mean(batch_loss).asscalar()
            
            if batch_id % 100 == 0:
                print("Train Batch:{}, batch_loss:{}".format(batch_id+1, batch_loss))
                  
            epoch_loss = (batch_loss if ((batch_id == 0) and (e == 0))
                          else (1 - smoothing_constant)*epoch_loss + smoothing_constant*batch_loss)
        
        epoch_loss_1 = 0
        for batch_id, (x,_) in enumerate(test_loader):
            with autograd.predict_mode():
                predict = net(x)
                batch_loss_1 = loss(pred,_)
            
            batch_loss_1 = F.mean(batch_loss_1).asscalar()
            
            if batch_id % 100 == 0:
                print("Test Batch:{}, batch_loss:{}".format(batch_id+1, batch_loss_1))
                
            epoch_loss_1 = (batch_loss_1 if ((batch_id == 0) and (e == 0))
                          else (1 - smoothing_constant)*epoch_loss_1 + smoothing_constant*batch_loss_1)
            
 
        
        print("Epoch {}, train_loss:{}, test_loss:{}".format(e+1, epoch_loss, epoch_loss_1))
    
    if args.save_model == True:
        file_name = "./saved_model/" + "lstm_pretrain.params"
        net.save_parameters(file_name)
        d_optimizer.step(num_img)

        # ===============train generator
        # compute loss of fake_img
        with g.autograd.record():
            fake_img = ge(z)
            output = d(fake_img)
            g_loss = bce(output, real_label)

        # bp and optimize
        g_loss.backward()
        g_optimizer.step(num_img)

        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], d_loss: {:.6f}, g_loss: {:.6f} '
                  'D real: {:.6f}, D fake: {:.6f}'.format(
                      epoch, num_epoch,
                      nd.mean(d_loss).asscalar(),
                      nd.mean(g_loss).asscalar(),
                      nd.mean(real_scores).asscalar(),
                      nd.mean(fake_scores).asscalar()))
    if epoch == 0:
        real_images = to_img(torch.FloatTensor(real_img.asnumpy()))
        save_image(real_images, './img/real_images.png')

    fake_images = to_img(torch.FloatTensor(fake_img.asnumpy()))
    save_image(fake_images, './img/fake_images-{}.png'.format(epoch + 1))

d.save_params('./dis.params')
ge.save_params('./gen.params')
def accurancy(output, label):
    return nd.mean(output.argmax(axis=1) == label).asscalar() # output: batch_size * nums_classes
Exemple #14
0
def feature_scaling(x: NDArray, mean: float, std: float):
    shifted = x - nd.mean(x)
    deviation = nd.sqrt(nd.mean(sqr(shifted)))
    # deviation = nd.mean(self.sqr(shifted))
    print(deviation)
    return mean + std * shifted / deviation
Exemple #15
0
    l2_loss = gluon.loss.L2Loss()
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': 0.005,
        'wd': weight_decay
    })
    # 4.train
    epoches = 10
    train_loss_record = []
    test_loss_record = []
    for e in range(epoches):
        train_loss = 0
        test_loss = 0
        _dataIter = 0
        for data, label in dataIter(x_train, y_train, batch_size):
            _dataIter += 1
            with autograd.record():
                out = net(data)
                loss = l2_loss(out, label)
            loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
        train_loss_record.append(train_loss / _dataIter)
        test_loss = nd.mean(l2_loss(net(x_test), y_test)).asscalar()
        test_loss_record.append(test_loss)
        print('epoches: %d, train_loss: %f, test_loss: %f' %
              (e, train_loss / _dataIter, test_loss))
    plt.plot(train_loss_record, 'b')
    plt.plot(test_loss_record, 'r')
    plt.legend(['train', 'test'])
    plt.show()
Exemple #16
0
def train():
    image_pool = ImagePool(pool_size)
    metric = mx.metric.CustomMetric(facc)

    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)

    for epoch in range(epochs):
        tic = time.time()
        btic = time.time()
        train_data.reset()
        iter = 0
        for batch in train_data:
            ############################
            # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
            ###########################
            real_in = batch.data[0].as_in_context(ctx)
            real_out = batch.data[1].as_in_context(ctx)

            fake_out = netG(real_in)
            fake_concat = image_pool.query(nd.concat(real_in, fake_out, dim=1))
            with autograd.record():
                # Train with fake image
                # Use image pooling to utilize history images
                output = netD(fake_concat)
                fake_label = nd.zeros(output.shape, ctx=ctx)
                errD_fake = GAN_loss(output, fake_label)
                metric.update([
                    fake_label,
                ], [
                    output,
                ])

                # Train with real image
                real_concat = nd.concat(real_in, real_out, dim=1)
                output = netD(real_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errD_real = GAN_loss(output, real_label)
                errD = (errD_real + errD_fake) * 0.5
                errD.backward()
                metric.update([
                    real_label,
                ], [
                    output,
                ])

            trainerD.step(batch.data[0].shape[0])

            ############################
            # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
            ###########################
            with autograd.record():
                fake_out = netG(real_in)
                fake_concat = nd.concat(real_in, fake_out, dim=1)
                output = netD(fake_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errG = GAN_loss(
                    output, real_label) + L1_loss(real_out, fake_out) * lambda1
                errG.backward()

            trainerG.step(batch.data[0].shape[0])

            # Print log infomation every ten batches
            if iter % 10 == 0:
                name, acc = metric.get()
                logging.info('speed: {} samples/s'.format(
                    batch_size / (time.time() - btic)))
                logging.info(
                    'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d'
                    % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc,
                       iter, epoch))
            iter = iter + 1
            btic = time.time()

        name, acc = metric.get()
        metric.reset()
        logging.info('\nbinary training acc at epoch %d: %s=%f' %
                     (epoch, name, acc))
        logging.info('time: %f' % (time.time() - tic))

        # Visualize one generated image for each epoch
        fake_img = fake_out[0]
        visualize(fake_img)
Exemple #17
0
def accuracy(output, labels):
    return nd.mean(nd.argmax(output, axis=1) == labels).asscalar()
def train(channel_input_dirs, hyperparameters, hosts, **kwargs):
    # retrieve the hyperparameters we set in notebook (with some defaults)
    batch_size = hyperparameters.get('batch_size', 128)
    epochs = hyperparameters.get('epochs', 100)
    learning_rate = hyperparameters.get('learning_rate', 0.1)
    beta1 = hyperparameters.get('beta1', 0.9)
    beta2 = hyperparameters.get('beta2', 0.99)
    num_gpus = hyperparameters.get('num_gpus', 0)
    burn_in = hyperparameters.get('burn_in', 5)
    # set logging
    logging.getLogger().setLevel(logging.DEBUG)

    if len(hosts) == 1:
        kvstore = 'device' if num_gpus > 0 else 'local'
    else:
        kvstore = 'dist_device_sync' if num_gpus > 0 else 'dist_sync'

    ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    print (ctx)
    f_path = channel_input_dirs['training']
    train_X, train_Y, validation_X, validation_Y = get_data(f_path)

    print ('loaded data')
    
    train_iter = mx.io.NDArrayIter(data = train_X, label=train_Y, batch_size=batch_size, shuffle=True)
    validation_iter = mx.io.NDArrayIter(data = validation_X, label=validation_Y, batch_size=batch_size, shuffle=False)
    data_shape = (batch_size,) + train_X.shape[1:]
    label_shape = (batch_size,) + train_Y.shape[1:]

    print ('created iters')
   
    sym = build_unet()
    net = mx.mod.Module(sym, context=ctx, data_names=('data',), label_names=('label',))
    net.bind(data_shapes=[['data', data_shape]], label_shapes=[['label', label_shape]])
    net.init_params(mx.initializer.Xavier(magnitude=6))
    net.init_optimizer(optimizer = 'adam', 
                               optimizer_params=(
                                   ('learning_rate', learning_rate),
                                   ('beta1', beta1),
                                   ('beta2', beta2)
                              ))
    print ('start training')
    smoothing_constant = .01
    curr_losses = []
    moving_losses = []
    i = 0
    best_val_loss = np.inf
    for e in range(epochs):
        while True:
            try:
                batch = next(train_iter)
            except StopIteration:
                train_iter.reset()
                break
            net.forward_backward(batch)
            loss = net.get_outputs()[0]
            net.update()
            curr_loss = F.mean(loss).asscalar()
            curr_losses.append(curr_loss)
            moving_loss = (curr_loss if ((i == 0) and (e == 0))
                                   else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)
            moving_losses.append(moving_loss)
            i += 1
        val_losses = []
        for batch in validation_iter:
            net.forward(batch)
            loss = net.get_outputs()[0]
            val_losses.append(F.mean(loss).asscalar())
        validation_iter.reset()
        # early stopping
        val_loss = np.mean(val_losses)
        if e > burn_in and val_loss < best_val_loss:
            best_val_loss = val_loss
            net.save_checkpoint('best_net', 0)
            print("Best model at Epoch %i" %(e+1))
        print("Epoch %i: Moving Training Loss %0.5f, Validation Loss %0.5f" % (e+1, moving_loss, val_loss))
    inference_sym = build_unet(inference=True)
    net = mx.mod.Module(inference_sym, context=ctx, data_names=('data',))
    net.bind(data_shapes=[['data', data_shape]])
    net.load_params('best_net-0000.params')
    return net
Exemple #19
0
def main(opt):
    ctx = mx.gpu() if opt.use_gpu else mx.cpu()
    testclasspaths = []
    testclasslabels = []
    if opt.istest:
        filename = '_testlist.txt'
    else:
        filename = '_validationlist.txt'
    with open(opt.dataset + "_" + opt.expname + filename, 'r') as f:
        for line in f:
            testclasspaths.append(line.split(' ')[0])
            if int(line.split(' ')[1]) == -1:
                testclasslabels.append(0)
            else:
                testclasslabels.append(1)

    test_data = load_image.load_test_images(testclasspaths, testclasslabels,
                                            opt.batch_size, opt.img_wd,
                                            opt.img_ht, ctx, opt.noisevar)
    netEn, netDe, netD, netD2 = set_network(opt.depth, ctx, opt.ngf)
    netEn.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) +
                      '_En.params',
                      ctx=ctx)
    netDe.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) +
                      '_De.params',
                      ctx=ctx)
    netD.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) +
                     '_D.params',
                     ctx=ctx)
    netD2.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) +
                      '_D2.params',
                      ctx=ctx)

    lbllist = []
    scorelist1 = []
    scorelist2 = []
    scorelist3 = []
    scorelist4 = []
    test_data.reset()
    count = 0
    for batch in (test_data):
        count += 1
        real_in = batch.data[0].as_in_context(ctx)
        real_out = batch.data[1].as_in_context(ctx)
        lbls = batch.label[0].as_in_context(ctx)
        out = netDe(netEn(real_out))
        output4 = nd.mean((netD2(out)), (1, 3, 2)).asnumpy()
        out = netDe(netEn(real_in))

        #real_concat = nd.concat(out, out, dim=1)
        output = netD2(out)  #Denoised image

        output3 = nd.mean(out - real_out, (1, 3, 2)).asnumpy()  #denoised-real
        output = nd.mean(output, (1, 3, 2)).asnumpy()
        print(output)
        print(lbls)
        output2 = netD2(real_out)  #Image with no noise
        output2 = nd.mean(output2, (1, 3, 2)).asnumpy()
        lbllist = lbllist + list(lbls.asnumpy())
        scorelist1 = scorelist1 + list(output)
        scorelist2 = scorelist2 + list(output2)
        scorelist3 = scorelist3 + list(output3)
        scorelist4 = scorelist4 + list(output4)
    fpr, tpr, _ = roc_curve(lbllist, scorelist1, 1)
    roc_auc1 = auc(fpr, tpr)
    fpr, tpr, _ = roc_curve(lbllist, scorelist2, 1)
    roc_auc2 = auc(fpr, tpr)
    fpr, tpr, _ = roc_curve(lbllist, scorelist3, 1)
    roc_auc3 = auc(fpr, tpr)
    fpr, tpr, _ = roc_curve(lbllist, scorelist4, 1)
    roc_auc4 = auc(fpr, tpr)
    return ([roc_auc1, roc_auc2, roc_auc3, roc_auc4])
Exemple #20
0
def Cal_Acc(output, label):
    return nd.mean(nd.argmax(output, axis=1) == label).asscalar()
 def forward(self, output1, output2, label):
     euclidean_distance = nd.sqrt(nd.sum(nd.power(nd.subtract(output1, output2),2))) 
     loss_contrastive = nd.mean(nd.add(nd.subtract(1,label) * nd.power(euclidean_distance, 2),(label) * nd.power(nd.subtract(self.margin, euclidean_distance), 2)))
     return loss_contrastive
Exemple #22
0
def mean(input, dim):
    return nd.mean(input, axis=dim)
Exemple #23
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     label = _reshape_like(F, label, pred)
     loss = F.sqrt(F.square(label - pred))
     loss = _apply_weighting(F, loss, self._weight / 2, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemple #24
0
def Autoencoder(epoch=100,
                batch_size=128,
                save_period=10,
                load_period=100,
                optimizer="sgd",
                learning_rate=0.01,
                dataset="MNIST",
                ctx=mx.gpu(0)):

    #data selection
    if dataset == "MNIST":
        train_data, test_data = MNIST(batch_size)
        path = "weights/MNIST-{}.params".format(load_period)
    elif dataset == "FashionMNIST":
        train_data, test_data = FashionMNIST(batch_size)
        path = "weights/FashionMNIST-{}.params".format(load_period)
    else:
        return "The dataset does not exist."
    '''Follow these steps:

    •Define network
    •Initialize parameters
    •Loop over inputs
    •Forward input through network to get output
    •Compute loss with output and label
    •Backprop gradient
    •Update parameters with gradient descent.
    '''

    #Autoencoder
    net = gluon.nn.Sequential()  # stacks 'Block's sequentially
    with net.name_scope():
        net.add(gluon.nn.Dense(units=200, activation="sigmoid", use_bias=True))
        net.add(gluon.nn.Dropout(0.2))
        net.add(gluon.nn.Dense(units=100, activation="sigmoid", use_bias=True))
        net.add(gluon.nn.Dropout(0.2))
        net.add(gluon.nn.Dense(units=100, activation="sigmoid", use_bias=True))
        net.add(gluon.nn.Dropout(0.2))
        net.add(gluon.nn.Dense(units=200, activation="sigmoid", use_bias=True))
        net.add(gluon.nn.Dropout(0.2))
        net.add(gluon.nn.Dense(units=784, activation="sigmoid", use_bias=True))

    #weights initialization
    if os.path.exists(path):
        print("loading weights")
        net.load_params(filename=path, ctx=ctx)  # weights load
    else:
        print("initializing weights")
        net.collect_params().initialize(mx.init.Normal(sigma=0.1),
                                        ctx=ctx)  # weights initialization
        #net.initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization

    #optimizer
    trainer = gluon.Trainer(net.collect_params(), optimizer,
                            {"learning_rate": learning_rate})

    #learning
    for i in tqdm(range(1, epoch + 1, 1)):
        for data, label in train_data:

            data = data.as_in_context(ctx).reshape((batch_size, -1))
            data_ = data

            with autograd.record(train_mode=True):
                output = net(data)

                #loss definition
                loss = gluon.loss.L2Loss()(output, data_)
                cost = nd.mean(loss).asscalar()
            loss.backward()
            trainer.step(batch_size, ignore_stale_grad=True)

        print(" epoch : {} , last batch cost : {}".format(i, cost))

        #weight_save
        if i % save_period == 0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            if dataset == "MNIST":
                net.save_params("weights/MNIST-{}.params".format(i))

            elif dataset == "FashionMNIST":
                net.save_params("weights/FashionMNIST-{}.params".format(i))

    #show image
    generate_image(test_data, net, ctx, dataset)

    return "optimization completed"
Exemple #25
0
def mainEvaluation(opt):
    ctx = mx.gpu() if opt.use_gpu else mx.cpu()
    testclasspaths = []
    testclasslabels = []
    print('loading test files')
    filename = '_testlist.txt'
    with open(opt.dataset+"_"+opt.expname+filename , 'r') as f:
        for line in f:
            testclasspaths.append(line.split(' ')[0])
            if int(line.split(' ')[1]) == -1:
                testclasslabels.append(0)
            else:
                testclasslabels.append(1)
    neworder = range(len(testclasslabels))
    c = list(zip(testclasslabels, testclasspaths))
    print('shuffling')
    random.shuffle(c)
    testclasslabels, testclasspaths = zip(*c)
    print('loading pictures')
    test_data = load_image.load_test_images(testclasspaths,testclasslabels,opt.batch_size, opt.img_wd, opt.img_ht, ctx, opt.noisevar,opt.bw)
    print('picture loading done')
    opt.istest = True
    networks = models_cifar.set_network(opt, ctx, True)
    netEn = networks[0]
    netDe = networks[1]
    netD = networks[2]
    netD2 = networks[3]
    # load_epoch = opt.epochs - 1
    # netEn.load_params('checkpoints/'+opt.expname+'_'+str(load_epoch)+'_En.params', ctx=ctx)
    # netDe.load_params('checkpoints/'+opt.expname+'_'+str(load_epoch)+'_De.params', ctx=ctx)
    # if opt.ntype>1:
    # 	netD.load_params('checkpoints/'+opt.expname+'_'+str(load_epoch)+'_D.params', ctx=ctx)
    # if opt.ntype>2:
	# netD2.load_params('checkpoints/'+opt.expname+'_'+str(load_epoch)+'_D2.params', ctx=ctx)

    print('Model loading done')
    lbllist = [];
    scorelist1 = [];
    scorelist2 = [];
    scorelist3 = [];
    scorelist4 = [];
    test_data.reset()
    count = 0

    for batch in (test_data):
	count = count+1
        output1=np.zeros(opt.batch_size)
        output2=np.zeros(opt.batch_size)
        output3=np.zeros(opt.batch_size)
        output4=np.zeros(opt.batch_size)
        real_in = batch.data[0].as_in_context(ctx)
        real_out = batch.data[1].as_in_context(ctx)
        lbls = batch.label[0].as_in_context(ctx)
        outnn = (netDe(netEn((real_in))))
	out = outnn
        output3 = -1*nd.mean((outnn - real_out)**2, (1, 3, 2)).asnumpy()
        if opt.ntype >1: #AE
        	out_concat = nd.concat(real_in, outnn, dim=1) if opt.append else outnn
        	output1 = nd.mean((netD(out_concat)), (1, 3, 2)).asnumpy()
        	out_concat = nd.concat(real_in, real_in, dim=1) if opt.append else real_in
        	output2 = netD((out_concat))  # Image with no noise
        	output2 = nd.mean(output2, (1,3,2)).asnumpy()
        	out = netDe(netEn(real_out))
        	out_concat =  nd.concat(real_in, out, dim=1) if opt.append else out
        	output = netD(out_concat) #Denoised image
        	output4 = nd.mean(output, (1, 3, 2)).asnumpy()
        lbllist = lbllist+list(lbls.asnumpy())
        scorelist1 = scorelist1+list(output1)
        scorelist2 = scorelist2+list(output2)
        scorelist3 = scorelist3+list(output3)
        scorelist4 = scorelist4+list(output4)
        out = netDe(netEn(real_in))

        # Save some sample results
        fake_img1 = nd.concat(real_in[0],real_out[0], out[0], outnn[0],dim=1)
        fake_img2 = nd.concat(real_in[1],real_out[1], out[1],outnn[1], dim=1)
        fake_img3 = nd.concat(real_in[2],real_out[2], out[2], outnn[2], dim=1)
        fake_img4 = nd.concat(real_in[3],real_out[3],out[3],outnn[3], dim=1)
        fake_img = nd.concat(fake_img1,fake_img2, fake_img3,fake_img4, dim=2)
        visual.visualize(fake_img)
        plt.savefig('outputs/T_'+opt.expname+'_'+str(count)+'.png')

    print("Positives" + str(np.sum(lbllist)))
    print("Negatives" + str(np.shape(lbllist)-np.sum(lbllist) ))
    fpr, tpr, _ = roc_curve(lbllist, scorelist3, 1)
    roc_auc1 = 0
    roc_auc2 = 0
    roc_auc4 = 0
    roc_auc3 = auc(fpr, tpr)
    if int(opt.ntype) >1: #AE
	    fpr, tpr, _ = roc_curve(lbllist, scorelist1, 1)
	    roc_auc1 = auc(fpr, tpr)
	    fpr, tpr, _ = roc_curve(lbllist, scorelist2, 1)
	    roc_auc2 = auc(fpr, tpr)
    	    fpr, tpr, _ = roc_curve(lbllist, scorelist4, 1)
	    roc_auc4 = auc(fpr, tpr)

    return[roc_auc1, roc_auc2, roc_auc3, roc_auc4]
Exemple #26
0
def accuracy(output, label):
    return nd.mean(output.argmax(axis=1)==label).asscalar()
 def r_square(pred, label):
     # https://en.wikipedia.org/wiki/Coefficient_of_determination
     return ndarray.sum(ndarray.square(pred - ndarray.mean(label))) /\
         ndarray.sum(ndarray.square(label - ndarray.mean(label)))
Exemple #28
0
    transform=lambda data, label: (data.astype(np.float32) / 255, label)),
                                     batch_size=32,
                                     shuffle=False)

net = gluon.nn.Sequential()

with net.name_scope():
    net.add(gluon.nn.Dense(128, activation='relu'))
    net.add(gluon.nn.Dense(64, activation='relu'))
    net.add(gluon.nn.Dense(10))

net.collect_params().initialize(mx.init.Normal(sigma=0.05))

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})

epochs = 10

for e in xrange(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(mx.cpu()).reshape((-1, 784))
        label = label.as_in_context(mx.cpu())
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
            loss.backward()
        trainer.step(data.shape[0])
        cur_loss = ndarray.mean(loss).asscalar()
        print "Epoch {}. Current Loss: {}.".format(e, cur_loss)
Exemple #29
0
def CapsuleNet(Reconstruction=True,
               epoch=100,
               batch_size=256,
               save_period=100,
               load_period=100,
               optimizer="adam",
               learning_rate=0.001,
               dataset="MNIST",
               ctx=mx.gpu(0)):

    if dataset == "MNIST":
        '''
        In the paper,'Training is performed on 28? 28 MNIST images have been shifted by up to 2 pixels in each direction with zero padding', But
        In this implementation, the original data is not transformed as above.
        '''
        train_data, test_data = MNIST(batch_size)
        path = "weights/MNIST-{}.params".format(load_period)
    elif dataset == "FashionMNIST":
        train_data, test_data = FashionMNIST(batch_size)
        path = "weights/FashionMNIST-{}.params".format(load_period)
    else:
        return "The dataset does not exist."

    #Convolution Neural Network
    # formula : output_size=((input−weights+2*Padding)/Stride)+1
    # data size
    # MNIST, FashionMNIST = (batch size , 1 , 28 ,  28)

    # Routing_Iteration = 1 due to memory problem. It uses close to 5GB of memory.
    net = Network(batch_size=batch_size, Routing_Iteration=1)
    '''
    What you need for 'hybridize' mode.
    'DigitCaps' calculation process 'batch_size' should be specified.
    Therefore, 'batch_size' of 'test' data and 'batch_size' of 'training' data should be the same.
    '''
    net.hybridize()  # for faster learning and efficient memory use

    #weights initialization
    if os.path.exists(path):
        print("loading weights")
        net.load_params(filename=path, ctx=ctx)  # weights load
    else:
        print("initializing weights")
        net.collect_params().initialize(mx.init.Normal(sigma=0.01),
                                        ctx=ctx)  # weights initialization
        #net.initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization
    '''
    In the paper,'including the exponentially decaying learning rate', But
    In this implementation, Multiply the learning_rate by 0.99 for every 10 steps.
    '''
    lr_scheduler = mx.lr_scheduler.FactorScheduler(step=10, factor=0.99)
    trainer = gluon.Trainer(net.collect_params(), optimizer, {
        "learning_rate": learning_rate,
        "lr_scheduler": lr_scheduler
    })

    #learning
    for i in tqdm(range(1, epoch + 1, 1)):
        for data, label in train_data:

            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)

            with autograd.record(train_mode=True):
                output, reconstruction_output = net(data, label)
                if Reconstruction:
                    margin_loss = Margin_Loss()(output, label)
                    recon_loss = gluon.loss.L2Loss()(reconstruction_output,
                                                     data.reshape(
                                                         (batch_size, -1)))
                    loss = margin_loss + 0.0005 * recon_loss
                else:
                    loss = Margin_Loss()(output, label)

            cost = nd.mean(loss).asscalar()
            loss.backward()
            trainer.step(batch_size, ignore_stale_grad=True)

        print(" epoch : {} , last batch cost : {}".format(i, cost))
        test_accuracy = evaluate_accuracy(test_data, net, ctx)
        print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))

        #weight_save
        if i % save_period == 0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            if dataset == "MNIST":
                net.save_params("weights/MNIST-{}.params".format(i))

            elif dataset == "FashionMNIST":
                net.save_params("weights/FashionMNIST-{}.params".format(i))

    test_accuracy = evaluate_accuracy(test_data, net, ctx)
    print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))

    if Reconstruction:
        generate_image(test_data, net, ctx, dataset)

    return "optimization completed"
Exemple #30
0
def Train(train,
          test,
          Debug,
          batch_size,
          lr,
          smoothing_constant,
          num_fc1,
          num_fc2,
          num_outputs,
          epochs,
          SNR,
          sl,
          pool_type,
          pool_size,
          pool_stride,
          params_init=None,
          period=None):

    num_examples = train.shape[0]
    # 训练集数据类型转换
    y = nd.array(~train.sigma.isnull() + 0)
    X = nd.array(
        Normolise(
            train.drop([
                'mass', 'positions', 'gaps', 'max_peak', 'sigma', 'SNR_mf',
                'SNR_mf0'
            ],
                       axis=1)))
    print('Label for training:', y.shape)
    print('Dataset for training:', X.shape, end='\n\n')

    dataset_train = gluon.data.ArrayDataset(X, y)
    train_data = gluon.data.DataLoader(dataset_train,
                                       batch_size,
                                       shuffle=True,
                                       last_batch='discard')

    y = nd.array(~test.sigma.isnull() + 0)
    X = nd.array(
        Normolise(
            test.drop([
                'mass', 'positions', 'gaps', 'max_peak', 'sigma', 'SNR_mf',
                'SNR_mf0'
            ],
                      axis=1)))
    print('Label for testing:', y.shape)
    print('Dataset for testing:', X.shape, end='\n\n')

    # 这里使用data模块来读取数据。创建测试数据。  (不shuffle)
    dataset_test = gluon.data.ArrayDataset(X, y)
    test_data = gluon.data.DataLoader(dataset_test,
                                      batch_size,
                                      shuffle=True,
                                      last_batch='discard')

    # Train
    loss_history = []
    loss_v_history = []
    moving_loss_history = []
    test_accuracy_history = []
    train_accuracy_history = []

    #     assert period >= batch_size and period % batch_size == 0

    # Initializate parameters
    if params_init:
        print('Loading params...')
        params = params_init

        #         [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6, W7, b7] = params

        #         # random fc layers
        #         weight_scale = .01

        #         W5 = nd.random_normal(loc=0, scale=weight_scale, shape=(sl, num_fc1), ctx=ctx )
        #         W6 = nd.random_normal(loc=0, scale=weight_scale, shape=(num_fc1, num_fc2), ctx=ctx )
        #         W7 = nd.random_normal(loc=0, scale=weight_scale, shape=(num_fc2, num_outputs), ctx=ctx )
        #         b5 = nd.random_normal(shape=num_fc1, scale=weight_scale, ctx=ctx)
        #         b6 = nd.random_normal(shape=num_fc2, scale=weight_scale, ctx=ctx)
        #         b7 = nd.random_normal(shape=num_outputs, scale=weight_scale, ctx=ctx)

        #         params = [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5]
        #         print('Random the FC1&2-layers...')

        vs = []
        sqrs = []
        for param in params:
            param.attach_grad()
            vs.append(param.zeros_like())
            sqrs.append(param.zeros_like())
    else:
        params, vs, sqrs = init_params(num_fc1=128,
                                       num_fc2=64,
                                       num_outputs=2,
                                       sl=sl)
        print('Initiate weights from random...')

    # Debug
    if Debug:
        print('Debuging...')
        if params_init:
            params = params_init
        else:
            params, vs, sqrs = init_params(num_fc1=128,
                                           num_fc2=64,
                                           num_outputs=2,
                                           sl=sl)
        for data, _ in train_data:
            data = data.as_in_context(ctx).reshape((batch_size, 1, 1, -1))
            break
        _, _ = net_PLB(data,
                       params,
                       debug=Debug,
                       pool_type=pool_type,
                       pool_size=pool_size,
                       pool_stride=pool_stride)
        print()


#     total_loss = [Total_loss(train_data_10, params, batch_size, num_outputs)]

    t = 0
    #   Epoch starts from 1.
    print('pool_type: ', pool_type)
    print('pool_size: ', pool_size)
    print('pool_stride: ', pool_stride)
    print('sl: ', sl)
    best_test_acc = 0
    best_params_epoch = 0

    for epoch in range(1, epochs + 1):
        Epoch_loss = []
        #         学习率自我衰减。
        if epoch > 2:
            #             lr *= 0.1
            lr /= (1 + 0.01 * epoch)

        for batch_i, ((data, label),
                      (data_v,
                       label_v)) in enumerate(zip(train_data, test_data)):
            data = data.as_in_context(ctx).reshape((batch_size, 1, 1, -1))
            label = label.as_in_context(ctx)
            label_one_hot = nd.one_hot(label, num_outputs)
            with autograd.record():
                output, _ = net_PLB(data,
                                    params,
                                    pool_type=pool_type,
                                    pool_size=pool_size,
                                    pool_stride=pool_stride)
                loss = softmax_cross_entropy(output, label_one_hot)
            loss.backward()
            #             print(output)
            #             sgd(params, lr, batch_size)

            #           Increment t before invoking adam.
            t += 1
            adam(params, vs, sqrs, lr, batch_size, t)

            data_v = data_v.as_in_context(ctx).reshape((batch_size, 1, 1, -1))
            label_v = label_v.as_in_context(ctx)
            label_v_one_hot = nd.one_hot(label_v, num_outputs)
            output_v, _ = net_PLB(data_v,
                                  params,
                                  pool_type=pool_type,
                                  pool_size=pool_size,
                                  pool_stride=pool_stride)
            loss_v = softmax_cross_entropy(output_v, label_v_one_hot)

            #             #########################
            #              Keep a moving average of the losses
            #             #########################
            curr_loss = nd.mean(loss).asscalar()
            curr_loss_v = nd.mean(loss_v).asscalar()
            moving_loss = (curr_loss if
                           ((batch_i == 0) and (epoch - 1 == 0)) else
                           (1 - smoothing_constant) * moving_loss +
                           (smoothing_constant) * curr_loss)

            loss_history.append(curr_loss)
            loss_v_history.append(curr_loss_v)
            moving_loss_history.append(moving_loss)
            Epoch_loss.append(curr_loss)
            #             if batch_i * batch_size % period == 0:
            #                 print('Curr_loss: ', curr_loss)

            # print('Working on epoch %d. Curr_loss: %.5f (complete percent: %.2f/100' %(epoch, curr_loss*1.0, 1.0 * batch_i / (num_examples//batch_size) * 100) +')' , end='')
            # sys.stdout.write("\r")
            # print('{"metric": "Training Loss for ALL", "value": %.5f}' %(curr_loss*1.0) )
            # print('{"metric": "Testing Loss for ALL", "value": %.5f}' %(curr_loss_v*1.0) )
            print('{"metric": "Training Loss for SNR=%s", "value": %.5f}' %
                  (str(SNR), curr_loss * 1.0))
            print('{"metric": "Testing Loss for SNR=%s", "value": %.5f}' %
                  (str(SNR), curr_loss_v * 1.0))
        test_accuracy = evaluate_accuracy(test_data,
                                          num_examples,
                                          batch_size,
                                          params,
                                          net_PLB,
                                          pool_type=pool_type,
                                          pool_size=pool_size,
                                          pool_stride=pool_stride)
        train_accuracy = evaluate_accuracy(train_data,
                                           num_examples,
                                           batch_size,
                                           params,
                                           net_PLB,
                                           pool_type=pool_type,
                                           pool_size=pool_size,
                                           pool_stride=pool_stride)
        test_accuracy_history.append(test_accuracy)
        train_accuracy_history.append(train_accuracy)

        if test_accuracy >= best_test_acc:
            best_test_acc = test_accuracy
            best_params_epoch = epoch

        # print("Epoch %d, Moving_loss: %.6f, Epoch_loss(mean): %.6f, Train_acc %.4f, Test_acc %.4f" %
        # (epoch, moving_loss, np.mean(Epoch_loss), train_accuracy, test_accuracy))
        print('{"metric": "Train_acc. for SNR=%s in epoches", "value": %.4f}' %
              (str(SNR), train_accuracy))
        print('{"metric": "Test_acc. for SNR=%s in epoches", "value": %.4f}' %
              (str(SNR), test_accuracy))
        yield (params, loss_history, loss_v_history, moving_loss_history,
               test_accuracy_history, train_accuracy_history,
               best_params_epoch)
def cnn():
    # Format options for numpy
    np.set_printoptions(precision=3, suppress=True)

    # Generate the training set, with nxn images
    n = 36
    trset_size = 30000

    print('Generating training set...')

    # Use NkuMyaDevMaker to generate images, then format
    X, Y = nmd.makeDataSet(n, trset_size, training=True)
    # For convolutional neural nets, we want 2d single plane images
    Xtrain = np.array(X).reshape([-1, n, n, 1])
    # Make it a single output, not 2 output with 1-hot
    Ytrain = np.array([[y] for y in Y], dtype=np.float32)

    # Use generated images for Dataset, use Dataset to create DataLoader for training
    # Gluon does mini-batching by defining a parameter in DataLoader
    ds = MyaDevDataset(Xtrain, Ytrain)
    train_data = mx.gluon.data.DataLoader(ds, batch_size=100, shuffle=True)

    # Generate the test set, with nxn images
    teset_size = 1000
    print('Generating test set...')

    # Use NkuMyaDevMaker to generate images, then format
    X, Y = nmd.makeDataSet(n, teset_size, training=False)
    # For convolutional neural nets, we want 2d single plane images
    Xtest = np.array(X).reshape([-1, n, n, 1])
    # Make it a single output, not 2 output with 1-hot
    Ytest = np.array([[y] for y in Y], dtype=np.float32)

    # Use generated images for Dataset, use Dataset to create DataLoader for testing
    ds = MyaDevDataset(Xtest, Ytest)
    test_data = mx.gluon.data.DataLoader(ds, batch_size=1, shuffle=False)

    # Initialize the network
    net = gluon.nn.Sequential()

    # Identify some key hyperparameters here for reference.
    k = 5  # Kernels will be k x k
    nc = n - (
        k - 1
    )  # Result of convolving nxn image (stride 1, valid) will be nc x nc
    ps = 2  # ps x ps pooling (stride ps)
    assert nc % ps == 0  # Pools should evenly divide images being pooled
    nf = 7  # Will use nf kernel filters
    nh = 11  # Will have nh neurons in the hidden layer

    # Define our network
    with net.name_scope():
        net.add(
            gluon.nn.Conv2D(channels=nf,
                            kernel_size=k,
                            use_bias=True,
                            activation='relu'))
        net.add(gluon.nn.MaxPool2D(pool_size=ps, strides=ps))
        net.add(gluon.nn.Flatten())
        net.add(gluon.nn.Dense(nh, activation="relu", use_bias=True))
        net.add(gluon.nn.Dense(1, activation="sigmoid",
                               use_bias=True))  # Output layer

    # Initialize parameters using normal distribution
    net.collect_params().initialize(mx.init.Normal(sigma=0.05))
    # Use Mean Squared Error for our loss function
    mean_squared_error = gluon.loss.L2Loss()

    # Declare our training algorithm.
    trainer = gluon.Trainer(net.collect_params(), 'rmsprop',
                            {'learning_rate': .01})

    # Begin training
    print('Training...')
    max_epochs = 4
    for e in range(max_epochs):
        correct = 0  # Count of correct results across epoch, for calculating accuracy

        # Get a tuple containing the images/labels for an entire batch
        for i, (data, label) in enumerate(train_data):
            # Specify that we are running this on our cpu. gpu is another option
            data = data.as_in_context(mx.cpu()).swapaxes(3, 1)
            label = label.as_in_context(mx.cpu())
            with autograd.record():  # Start recording the derivatives
                output = net(data)  # The forward iteration
                loss = mean_squared_error(output, label)
                correct += accuracy(
                    output, label
                )  # Just to print for our benefit, doesn't affect learning
                loss.backward()  # Backprop
            trainer.step(data.shape[0])
            curr_loss = ndarray.mean(loss).asscalar()  # Also to print
        acc = correct / trset_size
        print("Epoch {}. Current Accuracy: {}. Current Loss: {}.".format(
            e, acc, curr_loss))

    # Begin testing
    print('Testing...')
    # Count of correct results across entire test
    count = 0
    for i, (data, label) in enumerate(test_data):
        # Specify running on cpu
        data = data.as_in_context(mx.cpu()).swapaxes(3, 1)
        label = label.as_in_context(mx.cpu())
        # Push forward through network
        output = net(data)
        # Count correct results
        count += accuracy(output, label)

        # Print out 10 example images
        if i < 10:
            img = data.swapaxes(3, 1)
            display_image(img[0])
            print("expected: " + str(label) + "| actual: " + str(output))
    acc = count / teset_size
    print("Test accuracy: {}".format(acc))
Exemple #32
0
def muitlclass_logistic_regression(epoch=100,
                                   batch_size=128,
                                   save_period=10,
                                   load_period=100,
                                   optimizer="sgd",
                                   learning_rate=0.01,
                                   dataset="MNIST",
                                   ctx=mx.gpu(0)):

    #data selection
    if dataset == "MNIST":
        train_data, test_data = MNIST(batch_size)
        path = "weights/MNIST-{}.params".format(load_period)
    elif dataset == "CIFAR10":
        train_data, test_data = CIFAR10(batch_size)
        path = "weights/CIFAR10-{}.params".format(load_period)
    elif dataset == "FashionMNIST":
        train_data, test_data = FashionMNIST(batch_size)
        path = "weights/FashionMNIST-{}.params".format(load_period)
    else:
        return "The dataset does not exist."
    '''Follow these steps:

    •Define network
    •Initialize parameters
    •Loop over inputs
    •Forward input through network to get output
    •Compute loss with output and label
    •Backprop gradient
    •Update parameters with gradient descent.
    '''
    #logistic regression network
    net = gluon.nn.Sequential()  # stacks 'Block's sequentially
    with net.name_scope():
        net.add(gluon.nn.Dense(units=10, activation=None,
                               use_bias=True))  # linear activation

    # weight initialization
    if os.path.exists(path):
        print("loading weights")
        net.load_params(filename=path, ctx=ctx)  # weights load
    else:
        print("initializing weights")
        net.collect_params().initialize(mx.init.Normal(sigma=1.),
                                        ctx=ctx)  # weights initialization

    #optimizer
    trainer = gluon.Trainer(net.collect_params(), optimizer,
                            {"learning_rate": learning_rate})

    for i in tqdm(range(1, epoch + 1, 1)):
        for data, label in train_data:
            if dataset == "CIFAR10":
                data = nd.slice_axis(data=data, axis=3, begin=0, end=1)
            data = data.as_in_context(ctx).reshape((batch_size, -1))
            label = label.as_in_context(ctx)

            with autograd.record(train_mode=True):
                output = net(data)

                #loss definition
                loss = gluon.loss.SoftmaxCrossEntropyLoss()(output, label)
                cost = nd.mean(loss).asscalar()
            loss.backward()
            trainer.step(batch_size, ignore_stale_grad=True)

        print(" epoch : {} , last batch cost : {}".format(i, cost))

        #weight_save
        if i % save_period == 0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            if dataset == "MNIST":
                net.save_params("weights/MNIST-{}.params".format(i))

            if dataset == "FashionMNIST":
                net.save_params("weights/FashionMNIST-{}.params".format(i))

            elif dataset == "CIFAR10":
                net.save_params("weights/CIFAR10-{}.params".format(i))

    test_accuracy = evaluate_accuracy(test_data, net, ctx, dataset)
    print("Test_acc : {}".format(test_accuracy[1]))

    return "optimization completed"
Exemple #33
0
epochs = 5
smoothing_constant = 0.01
niter = 0
losses = []
moving_loss = 0
for e in range(epochs):
    total_loss = 0
    for data, label in data_iter:
        with autograd.record():
            output = net(data)
            loss = make_loss(output, label)
        loss.backward()
        trainer.step(batch_size)

        niter += 1
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss
        est_loss = moving_loss/(1-(1-smoothing_constant)**niter)
        if (niter + 1) % 100 == 0:
            losses.append(est_loss)
            print("Epoch %s, batch %s. Moving avg of loss: %s. Average loss: %f" % (e, niter, est_loss, total_loss/num_examples))
            plot(losses, X)

print(dense.weight.data())
print(dense.bias.data())
# help(trainer.step)
# help(dense.weight)



Exemple #34
0

def evaluate_accuracy(data_iterator, net):
    acc = 0.
    for data, label in data_iterator:
        output = net(data)
        acc += accuracy(output, label)
    return acc / len(data_iterator)


learning_rate = .1

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        with autograd.record():
            output = net(data)
            loss = cross_entropy(output, label)
        loss.backward()
        # 将梯度做平均,这样学习率会对batch size不那么敏感
        SGD(params, learning_rate / batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, label)

    test_acc = evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" %
          (epoch, train_loss / len(train_data), train_acc / len(train_data),
           test_acc))
Exemple #35
0
 def __call__(self, p: float, p_hat: NDArray) -> NDArray:
     return self._alpha * mean(self._bce(p_hat, full(p_hat.shape, p)))
def accuracy(output, label):#预测输出 output 真实标签label
    return nd.mean(output.argmax(axis=1)==label).asscalar()
#### 使用softmax cross entropy loss算法 
# Softmax和交叉熵损失函数
# softmax 回归实现  exp(Xi)/(sum(exp(Xi))) 归一化概率 使得 10类概率之和为1
# 交叉熵损失函数  将两个概率分布的负交叉熵作为目标值,最小化这个值等价于最大化这两个概率的相似度 
# 计算模型的预测能力 
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() 

### 优化模型 
# 使用随机梯度下降算法(sgd)进行训练 
# 并且将学习率的超参数设置为 .1 
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1}) 

epochs = 10 ##训练
for e in range(epochs):#每一次训练整个训练集
    train_loss = 0.# 损失
    train_acc = 0. #准确度
    for i, (data, label) in enumerate(train_data): ##训练集里的 每一批次样本和标签
        data = data.as_in_context(mx.cpu()).reshape((-1, 784)) ## 28*28 转成 1*784
        label = label.as_in_context(mx.cpu()) 
        with autograd.record(): # 自动求微分
            output = net(data)  # 模型输出 向前传播 
            loss = softmax_cross_entropy(output, label)## 计算误差
        loss.backward()     # 向后传播
        trainer.step(data.shape[0]) # 优化模型参数 data.shape[0] = batch_size 
        # Provide stats on the improvement of the model over each epoch 
        train_loss += ndarray.mean(loss).asscalar() ## 当前的误差损失 均值
        train_acc += utils.accuracy(output, label)  #准确度
    test_acc = utils.evaluate_accuracy(test_data, net)#验证数据集的准确度
    print("遍历训练集次数 {}. 训练误差: {}. 训练准确度: {}. 测试准确度: {}.".format(
        e, train_loss/len(train_data),train_acc/len(train_data), test_acc))