def k_fold_cross_valid(k, epochs, verbose_epoch, X_train, y_train,
                       learning_rate, weight_decay, batch_size):
    """Conducts k-fold cross validation for the model."""
    assert k > 1
    fold_size = X_train.shape[0] // k

    train_loss_sum = 0.0
    test_loss_sum = 0.0
    for test_idx in range(k):
        X_val_test = X_train[test_idx * fold_size: (test_idx + 1) *
                                                   fold_size, :]
        y_val_test = y_train[test_idx * fold_size: (test_idx + 1) * fold_size]
        val_train_defined = False
        for i in range(k):
            if i != test_idx:
                X_cur_fold = X_train[i * fold_size: (i + 1) * fold_size, :]
                y_cur_fold = y_train[i * fold_size: (i + 1) * fold_size]
                if not val_train_defined:
                    X_val_train = X_cur_fold
                    y_val_train = y_cur_fold
                    val_train_defined = True
                else:
                    X_val_train = nd.concat(X_val_train, X_cur_fold, dim=0)
                    y_val_train = nd.concat(y_val_train, y_cur_fold, dim=0)
        net = get_net()
        train_loss = train(net, X_val_train, y_val_train, epochs, verbose_epoch,
                           learning_rate, weight_decay, batch_size)
        train_loss_sum += train_loss
        test_loss = get_rmse_log(net, X_val_test, y_val_test)
        print("Test loss: %f" % test_loss)
        test_loss_sum += test_loss
    return train_loss_sum / k, test_loss_sum / k
Exemple #2
0
def biLSTM(f_lstm, b_lstm, inputs, batch_size=None, dropout_x=0., dropout_h=0.):
    """Feature extraction through BiLSTM

    Parameters
    ----------
    f_lstm : VariationalDropoutCell
        Forward cell
    b_lstm : VariationalDropoutCell
        Backward cell
    inputs : NDArray
        seq_len x batch_size
    dropout_x : float
        Variational dropout on inputs
    dropout_h :
        Not used

    Returns
    -------
    outputs : NDArray
        Outputs of BiLSTM layers, seq_len x 2 hidden_dims x batch_size
    """
    for f, b in zip(f_lstm, b_lstm):
        inputs = nd.Dropout(inputs, dropout_x, axes=[0])  # important for variational dropout
        fo, fs = f.unroll(length=inputs.shape[0], inputs=inputs, layout='TNC', merge_outputs=True)
        bo, bs = b.unroll(length=inputs.shape[0], inputs=inputs.flip(axis=0), layout='TNC', merge_outputs=True)
        f.reset(), b.reset()
        inputs = nd.concat(fo, bo.flip(axis=0), dim=2)
    return inputs
Exemple #3
0
    def _forward_alg(self, feats):
        # Do the forward algorithm to compute the partition function
        alphas = [[-10000.] * self.tagset_size]
        alphas[0][self.tag2idx[START_TAG]] = 0.
        alphas = nd.array(alphas)

        # Iterate through the sentence
        for feat in feats:
            alphas_t = []  # The forward variables at this timestep
            for next_tag in range(self.tagset_size):
                # broadcast the emission score: it is the same regardless of
                # the previous tag
                emit_score = feat[next_tag].reshape((1, -1))
                # the ith entry of trans_score is the score of transitioning to
                # next_tag from i
                trans_score = self.transitions[next_tag].reshape((1, -1))
                # The ith entry of next_tag_var is the value for the
                # edge (i -> next_tag) before we do log-sum-exp
                next_tag_var = alphas + trans_score + emit_score
                # The forward variable for this tag is log-sum-exp of all the
                # scores.
                alphas_t.append(log_sum_exp(next_tag_var))
            alphas = nd.concat(*alphas_t, dim=0).reshape((1, -1))
        terminal_var = alphas + self.transitions[self.tag2idx[STOP_TAG]]
        alpha = log_sum_exp(terminal_var)
        return alpha
Exemple #4
0
def bilinear(x, W, y, input_size, seq_len, batch_size, num_outputs=1, bias_x=False, bias_y=False):
    """Do xWy

    Parameters
    ----------
    x : NDArray
        (input_size x seq_len) x batch_size
    W : NDArray
        (num_outputs x ny) x nx
    y : NDArray
        (input_size x seq_len) x batch_size
    input_size : int
        input dimension
    seq_len : int
        sequence length
    batch_size : int
        batch size
    num_outputs : int
        number of outputs
    bias_x : bool
        whether concat bias vector to input x
    bias_y : bool
        whether concat bias vector to input y

    Returns
    -------
    output : NDArray
        [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size
    """
    if bias_x:
        x = nd.concat(x, nd.ones((1, seq_len, batch_size)), dim=0)
    if bias_y:
        y = nd.concat(y, nd.ones((1, seq_len, batch_size)), dim=0)

    nx, ny = input_size + bias_x, input_size + bias_y
    # W: (num_outputs x ny) x nx
    lin = nd.dot(W, x)
    if num_outputs > 1:
        lin = reshape_fortran(lin, (ny, num_outputs * seq_len, batch_size))
    y = y.transpose([2, 1, 0])  # May cause performance issues
    lin = lin.transpose([2, 1, 0])
    blin = nd.batch_dot(lin, y, transpose_b=True)
    blin = blin.transpose([2, 1, 0])
    if num_outputs > 1:
        blin = reshape_fortran(blin, (seq_len, num_outputs, seq_len, batch_size))
    return blin
 def forward(self, x):
     if  isinstance(x, np.ndarray):
         x = nd.array(x)
     if self._max_len > x.size:
         pad = nd.ones((self._max_len - x.size,)) * self._fill_value
         x = nd.concat(x, pad, dim=0)
     elif self._max_len < x.size:
         x = x[:self._max_len]
     return x
Exemple #6
0
def train(input_variable, target_variable, encoder, decoder, teacher_forcing_ratio,
          encoder_optimizer, decoder_optimizer, criterion, max_length, ctx):
    with autograd.record():
        loss = F.zeros((1,), ctx=ctx)

        encoder_hidden = encoder.initHidden(ctx)

        input_length = input_variable.shape[0]
        target_length = target_variable.shape[0]

        encoder_outputs, encoder_hidden = encoder(
                input_variable.expand_dims(0), encoder_hidden)

        if input_length < max_length:
            encoder_outputs = F.concat(encoder_outputs.flatten(),
                F.zeros((max_length - input_length, encoder.hidden_size), ctx=ctx), dim=0)
        else:
            encoder_outputs = encoder_outputs.flatten()



        decoder_input = F.array([SOS_token], ctx=ctx)

        decoder_hidden = encoder_hidden

        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

        if use_teacher_forcing:
            # Teacher forcing: Feed the target as the next input
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)

                loss = F.add(loss, criterion(decoder_output, target_variable[di]))
                print criterion(decoder_output, target_variable[di])
                decoder_input = target_variable[di]  # Teacher forcing

        else:
            # Without teacher forcing: use its own predictions as the next input
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                topi = decoder_output.argmax(axis=1)

                decoder_input = F.array([topi.asscalar()], ctx=ctx)

                loss = F.add(loss, criterion(decoder_output, target_variable[di]))

                if topi.asscalar() == EOS_token:
                    break

        loss.backward()

    encoder_optimizer.step(1)
    decoder_optimizer.step(1)

    return loss.asscalar()/target_length
Exemple #7
0
 def _score_sentence(self, feats, tags):
     # Gives the score of a provided tag sequence
     score = nd.array([0])
     tags = nd.concat(nd.array([self.tag2idx[START_TAG]]), *tags, dim=0)
     for i, feat in enumerate(feats):
         score = score + \
             self.transitions[to_scalar(tags[i+1]), to_scalar(tags[i])] + feat[to_scalar(tags[i+1])]
     score = score + self.transitions[self.tag2idx[STOP_TAG],
                                      to_scalar(tags[int(tags.shape[0]-1)])]
     return score
Exemple #8
0
def subtract_imagenet_mean_preprocess_batch(batch):
    """Subtract ImageNet mean pixel-wise from a BGR image."""
    batch = F.swapaxes(batch,0, 1)
    (r, g, b) = F.split(batch, num_outputs=3, axis=0)
    r = r - 123.680
    g = g - 116.779
    b = b - 103.939
    batch = F.concat(b, g, r, dim=0)
    batch = F.swapaxes(batch,0, 1)
    return batch
Exemple #9
0
    def forward(self, input, hidden, encoder_outputs):
        #input shape, (1,)
        embedded = self.embedding(input)
        if self.dropout_p > 0:
            embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(F.concat(embedded, hidden[0].flatten(), dim=1)))
        attn_applied = F.batch_dot(attn_weights.expand_dims(0),
                                 encoder_outputs.expand_dims(0))

        output = F.concat(embedded.flatten(), attn_applied.flatten(), dim=1)
        output = self.attn_combine(output).expand_dims(0)

        for i in range(self.n_layers):
            output = F.relu(output)
            output, hidden = self.gru(output, hidden)

        output = self.out(output)

        return output, hidden, attn_weights
Exemple #10
0
def add_imagenet_mean_batch(batch):
    batch = F.swapaxes(batch,0, 1)
    (b, g, r) = F.split(batch, num_outputs=3, axis=0)
    r = r + 123.680
    g = g + 116.779
    b = b + 103.939
    batch = F.concat(b, g, r, dim=0)
    batch = F.swapaxes(batch,0, 1)
    """
    batch = denormalizer(batch)
    """
    return batch
Exemple #11
0
    def _viterbi_decode(self, feats):
        backpointers = []

        # Initialize the viterbi variables in log space
        vvars = nd.full((1, self.tagset_size), -10000.)
        vvars[0, self.tag2idx[START_TAG]] = 0

        for feat in feats:
            bptrs_t = []  # holds the backpointers for this step
            viterbivars_t = []  # holds the viterbi variables for this step

            for next_tag in range(self.tagset_size):
                # next_tag_var[i] holds the viterbi variable for tag i at the
                # previous step, plus the score of transitioning
                # from tag i to next_tag.
                # We don't include the emission scores here because the max
                # does not depend on them (we add them in below)
                next_tag_var = vvars + self.transitions[next_tag]
                best_tag_id = argmax(next_tag_var)
                bptrs_t.append(best_tag_id)
                viterbivars_t.append(next_tag_var[0, best_tag_id])
            # Now add in the emission scores, and assign vvars to the set
            # of viterbi variables we just computed
            vvars = (nd.concat(*viterbivars_t, dim=0) + feat).reshape((1, -1))
            backpointers.append(bptrs_t)

        # Transition to STOP_TAG
        terminal_var = vvars + self.transitions[self.tag2idx[STOP_TAG]]
        best_tag_id = argmax(terminal_var)
        path_score = terminal_var[0, best_tag_id]

        # Follow the back pointers to decode the best path.
        best_path = [best_tag_id]
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        # Pop off the start tag (we dont want to return that to the caller)
        start = best_path.pop()
        assert start == self.tag2idx[START_TAG]  # Sanity check
        best_path.reverse()
        return path_score, best_path
Exemple #12
0
 def query(self, images):
     if self.pool_size == 0:
         return images
     return_images = []
     for image in images:
         image = image.reshape(1,image.shape[0],image.shape[1],image.shape[2])
         if self.num_imgs < self.pool_size:
             self.num_imgs = self.num_imgs + 1
             self.images.append(image)
             return_images.append(image)
         else:
             p = random.uniform(0, 1)
             if p > 0.5:
                 random_id = random.randint(0, self.pool_size - 1)  # randint is inclusive
                 tmp = self.images[random_id].copy()
                 self.images[random_id] = image
                 return_images.append(tmp)
             else:
                 return_images.append(image)
     image_array = return_images[0].copyto(images.context)
     for image in return_images[1:]:
         image_array = nd.concat(image_array,image.copyto(images.context),dim=0)
     return image_array
Exemple #13
0
def parse_net_output(Y,numClass, box_per_cell):
    pred = nd.transpose(Y,(0,2,3,1))
    pred = pred.reshape((0,0,0,box_per_cell,numClass + 5)) #add one dim for boxes
    predCls = nd.slice_axis(pred, begin = 0, end = numClass,axis=-1)
    predObject = nd.slice_axis(pred,begin=numClass,end=numClass+1,axis=-1)
    #predObject = nd.sigmoid(predObject)
    predXY = nd.slice_axis(pred, begin = numClass + 1, end = numClass + 3, axis=-1)
    #predXY = nd.sigmoid(predXY)
    predWH = nd.slice_axis(pred, begin = numClass + 3, end = numClass + 5, axis=-1)
    #x,y = convert_xy(predXY)
    #w,h = convert_wh(predWH)
    #w = nd.clip(w,0,1)
    #h = nd.clip(h,0,1)
    #x0 = nd.clip(x, 0, 1)
    #y0 = nd.clip(y,0,1)
    #x1 = nd.clip(x0 + w,0,1)
    #y1 = np.clip(y0 + h, 0,1)
    #x = x0
    #y = y0
    #w = x1 - x0
    #h = y1 - y0
    XYWH = nd.concat(predXY,predWH,dim=-1)
   # pdb.set_trace()
    return predCls, predObject, XYWH
Exemple #14
0
    valid_ds_aug, label_valid_aug, angle_valid_aug = augment_data(valid_ds[0], valid_ds[1], valid_ds[2])
    valid_ds = (
        valid_ds_aug.astype('float32'),
        nd.array(label_valid_aug).astype('float32'),
        nd.array(angle_valid_aug).astype('float32')
        )

    test_norm = []
    angle_test_norm = []
    for k in range(test.shape[0]):
        imag = test[k].reshape(shape=(1, test[k].shape[0], test[k].shape[1], test[k].shape[2]))
        test_norm.append(img_norm(imag))

    for k in range(angles_test.shape[0]):
        angle_test_norm.append(test_norm(angles_test[k].asscalar()))

    test_ds = (nd.concat(*test_norm, dim=0).astype('float32'), ids, angle_test_norm)

    batch_size = 128
    train_data = DataLoader(train_ds, batch_size, shuffle=True)
    valid_data = DataLoader(valid_ds, batch_size, shuffle=False)
    test_data = TestDataLoader(test_ds, batch_size)

    test_norm = []
    for k in range(test.shape[0]):


    print(len(test_ds[0]))

    train(train_data, valid_data, test_data, batch_size)
Exemple #15
0
                    batch[(be - bs) * w +
                          b] = max(0,
                                   int(tgt[b][w]) -
                                   1) if w < len(tgt[b]) else max_voc
            batch = nd.array(batch)
            # forward
            with autograd.record():
                result = []
                # CNN+encoder forward
                output, status = model(buffer, dpt)
                # RNN decoder forward
                for w in range(maxlen + expand_terminal):
                    output, word, status = model.one_word(output, status)
                    result.append(word)
                # make RNN output to sequencial
                result = F.concat(*result, dim=0)
                # make loss
                loss = loss_func(result, batch)
                loss_n.append(np.mean(loss.asnumpy()))
                del output, result
            # backward
            loss.backward()
            trainer.step(be - bs, ignore_stale_grad=True)
            n_iter += be - bs
            del loss, ids, tgt, maxlen, buffer, dpt, batch
        del bs, be, indexs
    print('%d/%d epoch loss=%f...' % (epoch, epochs, np.mean(loss_n)))
    loss_n = []
    del n_iter, t_index, r_index
del trainer, loss_func, loss_n, ID, TG, X_imgs
    def generate_learned_samples(self):
        '''
        Draw and generate data.

        Returns:
            `Tuple` data. The shape is ...
            - `mxnet.ndarray` of observed data points in training.
            - `mxnet.ndarray` of supervised data in training.
            - `mxnet.ndarray` of observed data points in test.
            - `mxnet.ndarray` of supervised data in test.
        '''
        for epoch in range(self.epochs):
            training_batch_arr, test_batch_arr = None, None
            training_label_arr, test_label_arr = None, None
            for batch_size in range(self.batch_size):
                dir_key = np.random.randint(
                    low=0, high=len(self.__training_file_path_list))

                training_one_hot_arr = nd.zeros(
                    (1, len(self.__training_file_path_list)), ctx=self.__ctx)
                training_one_hot_arr[0, dir_key] = 1

                training_file_path_list = self.__split_at_intervals(
                    self.__training_file_path_list[dir_key],
                    start_pos=0,
                    seq_interval=self.__at_intervals)

                training_data_arr, test_data_arr = None, None
                training_file_key = np.random.randint(
                    low=0, high=len(training_file_path_list) - self.__seq_len)

                test_dir_key = np.random.randint(
                    low=0, high=len(self.__test_file_path_list))
                test_one_hot_arr = nd.zeros(
                    (1, len(self.__test_file_path_list)), ctx=self.__ctx)
                test_one_hot_arr[0, test_dir_key] = 1

                test_file_path_list = self.__split_at_intervals(
                    self.__test_file_path_list[test_dir_key],
                    start_pos=0,
                    seq_interval=self.__at_intervals)

                test_file_key = np.random.randint(
                    low=0, high=len(test_file_path_list) - self.__seq_len)
                for seq in range(self.__seq_len):
                    seq_training_batch_arr = self.__image_extractor.extract(
                        path=training_file_path_list[training_file_key +
                                                     seq], )
                    seq_training_batch_arr = self.pre_normalize(
                        seq_training_batch_arr)
                    seq_training_batch_arr = nd.expand_dims(
                        seq_training_batch_arr, axis=0)
                    seq_test_batch_arr = self.__image_extractor.extract(
                        path=test_file_path_list[test_file_key + seq], )
                    seq_test_batch_arr = self.pre_normalize(seq_test_batch_arr)
                    seq_test_batch_arr = nd.expand_dims(seq_test_batch_arr,
                                                        axis=0)

                    if training_data_arr is not None:
                        training_data_arr = nd.concat(training_data_arr,
                                                      seq_training_batch_arr,
                                                      dim=0)
                    else:
                        training_data_arr = seq_training_batch_arr

                    if test_data_arr is not None:
                        test_data_arr = nd.concat(test_data_arr,
                                                  seq_test_batch_arr,
                                                  dim=0)
                    else:
                        test_data_arr = seq_test_batch_arr

                training_data_arr = nd.expand_dims(training_data_arr, axis=0)
                test_data_arr = nd.expand_dims(test_data_arr, axis=0)

                if training_batch_arr is not None:
                    training_batch_arr = nd.concat(training_batch_arr,
                                                   training_data_arr,
                                                   dim=0)
                else:
                    training_batch_arr = training_data_arr

                if test_batch_arr is not None:
                    test_batch_arr = nd.concat(test_batch_arr,
                                               test_data_arr,
                                               dim=0)
                else:
                    test_batch_arr = test_data_arr

                if training_label_arr is not None:
                    training_label_arr = nd.concat(training_label_arr,
                                                   training_one_hot_arr,
                                                   dim=0)
                else:
                    training_label_arr = training_one_hot_arr

                if test_label_arr is not None:
                    test_label_arr = nd.concat(test_label_arr,
                                               test_one_hot_arr,
                                               dim=0)
                else:
                    test_label_arr = test_one_hot_arr

            if self.__noiseable_data is not None:
                training_batch_arr = self.__noiseable_data.noise(
                    training_batch_arr)

            yield training_batch_arr, training_label_arr, test_batch_arr, test_label_arr
Exemple #17
0
def cat(seq, dim):
    return nd.concat(*seq, dim=dim)
Exemple #18
0
def trainadnov(opt, train_data, val_data, ctx, networks):

    netEn = networks[0]
    netDe = networks[1]
    netD = networks[2]
    netD2 = networks[3]
    netDS = networks[4]
    trainerEn = networks[5]
    trainerDe = networks[6]
    trainerD = networks[7]
    trainerD2 = networks[8]
    trainerSD = networks[9]
    cep = opt.continueEpochFrom
    epochs = opt.epochs
    lambda1 = opt.lambda1
    batch_size = opt.batch_size
    expname = opt.expname
    append = opt.append
    text_file = open(expname + "_trainloss.txt", "w")
    text_file.close()
    text_file = open(expname + "_validtest.txt", "w")
    text_file.close()
    GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
    L1_loss = gluon.loss.L2Loss()
    metric = mx.metric.CustomMetric(facc)
    metricl = mx.metric.CustomMetric(facc)
    metricStrong = mx.metric.CustomMetric(facc)
    metric2 = mx.metric.MSE()
    loss_rec_G2 = []
    acc2_rec = []
    loss_rec_G = []
    loss_rec_D = []
    loss_rec_R = []
    acc_rec = []
    loss_rec_D2 = []
    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)
    lr = 2.0 * batch_size
    logging.basicConfig(level=logging.DEBUG)
    if cep == -1:
        cep = 0
    else:
        netEn.load_params('checkpoints/' + opt.expname + '_' + str(cep) +
                          '_En.params',
                          ctx=ctx)
        netDe.load_params('checkpoints/' + opt.expname + '_' + str(cep) +
                          '_De.params',
                          ctx=ctx)
        netD.load_params('checkpoints/' + opt.expname + '_' + str(cep) +
                         '_D.params',
                         ctx=ctx)
        netD2.load_params('checkpoints/' + opt.expname + '_' + str(cep) +
                          '_D2.params',
                          ctx=ctx)
        netDS.load_params('checkpoints/' + opt.expname + '_' + str(cep) +
                          '_SD.params',
                          ctx=ctx)
    for epoch in range(cep + 1, epochs):
        tic = time.time()
        btic = time.time()
        train_data.reset()
        iter = 0
        for batch in train_data:
            ############################
            # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
            ###########################
            real_in = batch.data[0].as_in_context(ctx)
            real_out = batch.data[1].as_in_context(ctx)
            fake_latent = netEn(real_in)
            mu = nd.random.uniform(low=-1,
                                   high=1,
                                   shape=fake_latent.shape,
                                   ctx=ctx)
            real_latent = nd.random.uniform(low=-1,
                                            high=1,
                                            shape=fake_latent.shape,
                                            ctx=ctx)
            fake_out = netDe(fake_latent)
            fake_concat = nd.concat(real_in, fake_out,
                                    dim=1) if append else fake_out
            if epoch > 150:  # negative mining
                mu = nd.random.uniform(low=-1,
                                       high=1,
                                       shape=fake_latent.shape,
                                       ctx=ctx)
                mu.attach_grad()
                for ep2 in range(1):  # doing single gradient step
                    with autograd.record():
                        eps2 = nd.tanh(mu)
                        rec_output = netDS(netDe(eps2))
                        fake_label = nd.zeros(rec_output.shape, ctx=ctx)
                        errGS = GAN_loss(rec_output, fake_label)
                        errGS.backward()
                    mu -= lr / mu.shape[0] * mu.grad  # Update mu with SGD
            eps2 = nd.tanh(mu)
            with autograd.record():
                # Train with fake image
                output = netD(fake_concat)
                output2 = netD2(fake_latent)
                fake_label = nd.zeros(output.shape, ctx=ctx)
                fake_latent_label = nd.zeros(output2.shape, ctx=ctx)
                eps = nd.random.uniform(low=-1,
                                        high=1,
                                        shape=fake_latent.shape,
                                        ctx=ctx)
                rec_output = netD(netDe(eps))
                errD_fake = GAN_loss(rec_output, fake_label)
                errD_fake2 = GAN_loss(output, fake_label)
                errD2_fake = GAN_loss(output2, fake_latent_label)
                metric.update([
                    fake_label,
                ], [
                    rec_output,
                ])
                metric2.update([
                    fake_latent_label,
                ], [
                    output2,
                ])
                real_concat = nd.concat(real_in, real_out,
                                        dim=1) if append else real_out
                output = netD(real_concat)
                output2 = netD2(real_latent)
                real_label = nd.ones(output.shape, ctx=ctx)
                real_latent_label = nd.ones(output2.shape, ctx=ctx)
                errD_real = GAN_loss(output, real_label)
                errD2_real = GAN_loss(output2, real_latent_label)
                errD = (errD_real + errD_fake) * 0.5
                errD2 = (errD2_real + errD2_fake) * 0.5
                totalerrD = errD + errD2
                totalerrD.backward()
            metric.update([
                real_label,
            ], [
                output,
            ])
            metric2.update([
                real_latent_label,
            ], [
                output2,
            ])
            trainerD.step(batch.data[0].shape[0])
            trainerD2.step(batch.data[0].shape[0])
            with autograd.record():
                # Train classifier
                strong_output = netDS(netDe(eps))
                strong_real = netDS(fake_concat)
                errs1 = GAN_loss(strong_output, fake_label)
                errs2 = GAN_loss(strong_real, real_label)
                metricStrong.update([
                    fake_label,
                ], [
                    strong_output,
                ])
                metricStrong.update([
                    real_label,
                ], [
                    strong_real,
                ])
                strongerr = 0.5 * (errs1 + errs2)
                strongerr.backward()
            trainerSD.step(batch.data[0].shape[0])
            ############################
            # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
            ###########################
            with autograd.record():
                rec_output = netD(netDe(eps2))
                fake_latent = (netEn(real_in))
                output2 = netD2(fake_latent)
                fake_out = netDe(fake_latent)
                fake_concat = nd.concat(real_in, fake_out,
                                        dim=1) if append else fake_out
                output = netD(fake_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                real_latent_label = nd.ones(output2.shape, ctx=ctx)
                errG2 = GAN_loss(rec_output, real_label)
                errR = L1_loss(real_out, fake_out) * lambda1
                errG = 10.0 * GAN_loss(output2,
                                       real_latent_label) + errG2 + errR
                errG.backward()
            trainerDe.step(batch.data[0].shape[0])
            trainerEn.step(batch.data[0].shape[0])
            loss_rec_G2.append(nd.mean(errG2).asscalar())
            loss_rec_G.append(
                nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() -
                nd.mean(errR).asscalar())
            loss_rec_D.append(nd.mean(errD).asscalar())
            loss_rec_R.append(nd.mean(errR).asscalar())
            loss_rec_D2.append(nd.mean(errD2).asscalar())
            _, acc2 = metric2.get()
            name, acc = metric.get()
            acc_rec.append(acc)
            acc2_rec.append(acc2)

            # Print log infomation every ten batches
            if iter % 10 == 0:
                _, acc2 = metric2.get()
                name, acc = metric.get()
            _, accStrong = metricStrong.get()
            logging.info('speed: {} samples/s'.format(batch_size /
                                                      (time.time() - btic)))
            logging.info(
                'discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f, SD loss = %f,  D acc = %f , D2 acc = %f, DS acc = %f, reconstruction error= %f  at iter %d epoch %d'
                %
                (nd.mean(errD).asscalar(), nd.mean(errD2).asscalar(),
                 nd.mean(errG - errG2 - errR).asscalar(),
                 nd.mean(errG2).asscalar(), nd.mean(strongerr).asscalar(), acc,
                 acc2, accStrong, nd.mean(errR).asscalar(), iter, epoch))
            iter = iter + 1
            btic = time.time()
            name, acc = metric.get()
            _, acc2 = metric2.get()
            metric.reset()
            metric2.reset()
            train_data.reset()
            metricStrong.reset()

            logging.info('\nbinary training acc at epoch %d: %s=%f' %
                         (epoch, name, acc))
            logging.info('time: %f' % (time.time() - tic))
            if epoch % 5 == 0:
                filename = "checkpoints/" + expname + "_" + str(
                    epoch) + "_D.params"
                netD.save_params(filename)
                filename = "checkpoints/" + expname + "_" + str(
                    epoch) + "_D2.params"
                netD2.save_params(filename)
                filename = "checkpoints/" + expname + "_" + str(
                    epoch) + "_En.params"
                netEn.save_params(filename)
                filename = "checkpoints/" + expname + "_" + str(
                    epoch) + "_De.params"
                netDe.save_params(filename)
                filename = "checkpoints/" + expname + "_" + str(
                    epoch) + "_SD.params"
                netDS.save_params(filename)
                val_data.reset()
                text_file = open(expname + "_validtest.txt", "a")
                for vbatch in val_data:
                    real_in = vbatch.data[0].as_in_context(ctx)
                    real_out = vbatch.data[1].as_in_context(ctx)
                    fake_latent = netEn(real_in)
                    y = netDe(fake_latent)
                    fake_out = y
                    metricMSE.update([
                        fake_out,
                    ], [
                        real_out,
                    ])
                _, acc2 = metricMSE.get()
                text_file.write(
                    "%s %s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(),
                                       str(acc2), str(accStrong)))
                metricMSE.reset()
    return [
        loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2,
        acc2_rec
    ]
Exemple #19
0
    group_begin = time.monotonic()
    epoch_begin_time = time.monotonic()

    for batch_id, batch in enumerate(iterator):

        data = batch.data[0].as_in_context(ctx)  # Data: Images
        labels = batch.label[0].as_in_context(ctx)  # Data: Labels

        all_labels.extend(labels.asnumpy())

        semantic_vectors = Resnet50(data)

        out1 = low_net(semantic_vectors[0])
        out2 = med_net(semantic_vectors[1])
        out3 = high_net(semantic_vectors[2])
        combined = nd.concat(out1, out2, out3).asnumpy()
        print(" >> %d" % len(combined))
        # print(len(out3))
        for row in combined:
            all_features.append(row)

# print(len(all_features))
# for row in all_features:
#     print(len(row))
#
write_features_to_file(all_features, all_labels, feature_file)

# --------------------------------------------------------------------
# -----------------------------< End >--------------------------------
# --------------------------------------------------------------------
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        #tmp_ctx = self._ctx_cpu
        tmp_ctx = self._ctx_single_gpu
        fc7_outs = []
        ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max',
                                       (self._batch_size, len(self._context)))
        #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu())
        for i, _module in enumerate(self._arcface_modules):
            _fc7 = _module.get_outputs(merge_multi_context=True)[0]
            fc7_outs.append(_fc7)
            _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx)
            ctx_fc7_max[:, i] = _fc7_max

        local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max',
                                         (self._batch_size, 1))
        nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max)
        global_fc7_max = local_fc7_max
        #local_fc7_sum = None
        local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum',
                                         (self._batch_size, 1))
        local_fc7_sum[:, :] = 0.0
        for i, _module in enumerate(self._arcface_modules):
            _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max',
                                     global_fc7_max)
            fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max)
            fc7_outs[i] = nd.exp(fc7_outs[i])
            _sum = nd.sum(fc7_outs[i], axis=1,
                          keepdims=True).as_in_context(tmp_ctx)
            local_fc7_sum += _sum
        global_fc7_sum = local_fc7_sum

        if self._iter % self._frequent == 0:
            #_ctx = self._context[-1]
            _ctx = self._ctx_cpu
            _probs = []
            for i, _module in enumerate(self._arcface_modules):
                _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d' % i,
                                          fc7_outs[i])
                _probs.append(_prob)
            fc7_prob = self.get_ndarray(
                _ctx, 'test_fc7_prob',
                (self._batch_size, self._ctx_num_classes * len(self._context)))
            nd.concat(*_probs, dim=1, out=fc7_prob)
            fc7_pred = nd.argmax(fc7_prob, axis=1)
            pd = fc7_pred.asnumpy().astype('int32')
            local_label = self.global_label - self._local_class_start
            #local_label = self.get_ndarray2(_ctx, 'test_label', local_label)
            _pred = nd.equal(fc7_pred, local_label)
            print('fc7_acc [%d]: %f' %
                  (self._iter, nd.mean(_pred).asnumpy()[0]))

        #local_fc1_grad = []
        #fc1_grad_ctx = self._ctx_cpu
        fc1_grad_ctx = self._ctx_single_gpu
        local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad',
                                          (self._batch_size, self._emb_size))
        local_fc1_grad[:, :] = 0.0

        for i, _module in enumerate(self._arcface_modules):
            _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum',
                                     global_fc7_sum)
            fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum)
            a = i * self._ctx_num_classes
            b = (i + 1) * self._ctx_num_classes
            _label = self.global_label - self._ctx_class_start[i]
            _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label)
            onehot_label = self.get_ndarray(
                fc7_outs[i].context, 'label_onehot',
                (self._batch_size, self._ctx_num_classes))
            nd.one_hot(_label,
                       depth=self._ctx_num_classes,
                       on_value=1.0,
                       off_value=0.0,
                       out=onehot_label)
            fc7_outs[i] -= onehot_label
            _module.backward(out_grads=[fc7_outs[i]])
            #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu())
            ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx,
                                             'ctx_fc1_grad_%d' % i,
                                             _module.get_input_grads()[0])
            local_fc1_grad += ctx_fc1_grad

        global_fc1_grad = local_fc1_grad
        self._curr_module.backward(out_grads=[global_fc1_grad])
Exemple #21
0
def trainAE(opt, train_data, val_data, ctx, networks):

    netEn = networks[0]
    netDe = networks[1]
    trainerEn = networks[5]
    trainerDe = networks[6]
    epochs = opt.epochs
    batch_size = opt.batch_size
    expname = opt.expname
    text_file = open(expname + "_trainloss.txt", "w")
    text_file.close()
    text_file = open(expname + "_validtest.txt", "w")
    text_file.close()
    L1_loss = gluon.loss.L2Loss()
    metric2 = mx.metric.MSE()
    loss_rec_G = []
    loss_rec_D = []
    loss_rec_R = []
    acc_rec = []
    loss_rec_D2 = []
    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)
    for epoch in range(epochs):
        tic = time.time()
        btic = time.time()
        train_data.reset()
        iter = 0
        for batch in train_data:
            real_in = batch.data[0].as_in_context(ctx)
            real_out = batch.data[1].as_in_context(ctx)
            with autograd.record():
                fake_out = netDe(netEn(real_in))
                errR = L1_loss(real_out, fake_out)
                errR.backward()
            trainerDe.step(batch.data[0].shape[0])
            trainerEn.step(batch.data[0].shape[0])
        loss_rec_R.append(nd.mean(errR).asscalar())

        if iter % 10 == 0:
            logging.info('speed: {} samples/s'.format(batch_size /
                                                      (time.time() - btic)))
            logging.info('reconstruction error= %f at iter %d epoch %d' %
                         (nd.mean(errR).asscalar(), iter, epoch))
        iter = iter + 1
        btic = time.time()
        text_tl = open(expname + "_trainloss.txt", "a")
        text_tl.write('%f %f %f %f %f %f %f ' %
                      (0, 0, 0, 0, 0, nd.mean(errR).asscalar(), epoch))
        text_file.close()
        train_data.reset()
        if epoch % 10 == 0:
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_En.params"
            netEn.save_params(filename)
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_De.params"
            netDe.save_params(filename)
            fake_img1 = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1)
            fake_img2 = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1)
            fake_img3 = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1)
            val_data.reset()
            text_file = open(expname + "_validtest.txt", "a")
            for vbatch in val_data:
                real_in = vbatch.data[0].as_in_context(ctx)
                real_out = vbatch.data[1].as_in_context(ctx)
                fake_out = netDe(netEn(real_in))
                metric2.update([
                    fake_out,
                ], [
                    real_out,
                ])
                _, acc2 = metric2.get()
            text_file.write("%s %s %s\n" %
                            (str(epoch), nd.mean(errR).asscalar(), str(acc2)))
            metric2.reset()
            fake_img1T = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1)
            fake_img2T = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1)
            fake_img3T = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1)
            fake_img = nd.concat(fake_img1,
                                 fake_img2,
                                 fake_img3,
                                 fake_img1T,
                                 fake_img2T,
                                 fake_img3T,
                                 dim=2)
            visual.visualize(fake_img)
            plt.savefig('outputs/' + expname + '_' + str(epoch) + '.png')
            text_file.close()
    return ([loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2])
Exemple #22
0
def make_grid(tensor,
              nrow=8,
              padding=2,
              normalize=False,
              range=None,
              scale_each=False,
              pad_value=0):
    if not (is_ndarray(tensor) or
            (isinstance(tensor, list) and all(is_ndarray(t) for t in tensor))):

        raise TypeError('tensor or list of tensors expected, got {}'.format(
            type(tensor)))

    # if list of tensors, convert to a 4D mini-batch Tensor
    if isinstance(tensor, list):
        tensor = nd.stack(tensor, dim=0)

    if tensor.ndim == 2:  # single image H x W
        tensor = nd.expand_dims(tensor, axis=0)
    if tensor.ndim == 3:  # single image
        if tensor.shape[0] == 1:  # if single-channel, convert to 3-channel
            tensor = nd.concat(tensor, tensor, tensor, dim=0)
        tensor = nd.expand_dims(tensor, axis=0)

    if tensor.ndim == 4 and tensor.shape[1] == 1:  # single-channel images
        tensor = nd.concat(tensor, tensor, tensor, dim=1)

    if normalize is True:
        tensor = tensor.copy()  # avoid modifying tensor in-place
        if range is not None:
            assert isinstance(
                range, tuple
            ), "range has to be a tuple (min, max) if specified. min and max are numbers"

        def norm_ip(img, min, max):
            nd.clip(img, min, max)
            img += (-min)
            img /= (max - min + 1e-5)
            #img.add_(-min).div_(max - min + 1e-5)

        def norm_range(t, range):
            if range is not None:
                norm_ip(t, range[0], range[1])
            else:
                norm_ip(t, float(t.min().asscalar()),
                        float(t.max().asscalar()))

        if scale_each is True:
            for t in tensor:  # loop over mini-batch dimension
                norm_range(t, range)
        else:
            norm_range(tensor, range)

    if tensor.shape[0] == 1:
        return tensor.reshape((-3, -2))

    # make the mini-batch of images into a grid
    nmaps = tensor.shape[0]  # 我们截取的mini_batch大小
    # print(nmaps)
    xmaps = min(nrow, nmaps)  # 输入的参数
    ymaps = int(math.ceil(float(nmaps) / xmaps))  # 算列数向下取整
    height, width = int(tensor.shape[2] + padding), int(
        tensor.shape[3] + padding)  # 图片显示的高宽
    num_channels = tensor.shape[1]  # 图像通道数
    grid = nd.full(
        (num_channels, height * ymaps + padding, width * xmaps + padding),
        pad_value)  # 创建一个全为零的通道数等于输入图片,高宽等于图片高宽分别乘以行列数加上图片的间隔
    k = 0
    for y in irange(ymaps):
        for x in irange(xmaps):
            if k >= nmaps:
                break
            grid[:, x * width + padding:x * width + padding + width - padding,
                 y * height + padding:y * height + padding + height -
                 padding] = tensor[k]
            k = k + 1
    return grid
    def forward_dense_prediction(self, data):
        """Returns the output of a forward pass for dense prediction tasks."""
        _, guidance_large, data_small = data
        height, width = guidance_large.shape[2:]

        # Upsample data_small with nearest neighbors if applicable.
        if data_small.shape[2:] == guidance_large.shape[2:]:
            data_small_upsampled = data_small
        else:
            # Use skimage as mx.nd.UpSampling allows only for one scale factor.
            batch_size = guidance_large.shape[0]
            data_small_upsampled = nd.zeros(
                (batch_size, data_small.shape[1], height, width),
                ctx=data_small.context)
            for batch_num in range(batch_size):
                data_help = skimage.transform.resize(np.transpose(
                    data_small[batch_num].asnumpy(), (1, 2, 0)),
                                                     (height, width),
                                                     order=0,
                                                     anti_aliasing=False,
                                                     mode='constant')
                data_small_upsampled[batch_num] = nd.array(
                    np.transpose(data_help, (2, 0, 1)), ctx=data_small.context)

        # Generate features.
        with data_small.context:
            features = self.feature_generator(guidance_large)
        # Scale spatial features by height/width to get invariance to size.
        with data_small.context:
            spatial_scaling = nd.array([[[[width]], [[height]]]])
        spatial_features = spatial_scaling * self.feature_factor_spatial.data(
        ) * features[:, :2]
        # Center remaining features if applicable.
        remaining_features = features[:, 2:]
        if self.data_mean is not None:
            remaining_features = (
                remaining_features -
                self.guidance_mean.copyto(guidance_large.context))
        # Scale remaining features and pass through embedding network.
        remaining_features = self.feature_factor_intensity.data(
        ) * remaining_features
        remaining_features = self.embedding(remaining_features)
        remaining_features = self.batchnorm(remaining_features)

        # Concatenate and reshape features.
        features = nd.concat(spatial_features, remaining_features, dim=1)
        features = features.reshape([0, 0, -1])
        features_size = features.shape[-1]

        # Center input data if applicable.
        if self.data_mean is not None:
            data_small_upsampled = data_small_upsampled - self.data_mean.copyto(
                data_small.context)
        # Reshape input data.
        data_small_upsampled = data_small_upsampled.reshape([0, 0, -1])
        # Pass small data through permutohedral convolution.
        data_large = self.convolutions(data_small_upsampled, features, 0,
                                       features_size, 0, features_size,
                                       self.weight_factor)
        # Reshape output data.
        data_large = data_large.reshape([0, 0, height, width])
        # Revert centering if applicable.
        if self.data_mean is not None:
            data_large = data_large + self.data_mean.copyto(data_large.context)

        return data_large
    def forward_colorization(self, data):
        """Returns the output of a forward pass for colorization."""
        guidance_small, guidance_large, data_small = data
        height, width = guidance_large.shape[2:]

        # Upsample data_small and guidance_small with nearest neighbors.
        # Use skimage as mx.nd.UpSampling allows only for one scale factor.
        batch_size = guidance_large.shape[0]
        data_small_upsampled = nd.zeros(
            (batch_size, data_small.shape[1], height, width),
            ctx=data_small.context)
        for batch_num in range(batch_size):
            data_help = skimage.transform.resize(np.transpose(
                data_small[batch_num].asnumpy(), (1, 2, 0)), (height, width),
                                                 order=0,
                                                 anti_aliasing=False,
                                                 mode='constant')
            data_small_upsampled[batch_num] = nd.array(np.transpose(
                data_help, (2, 0, 1)),
                                                       ctx=data_small.context)
        guidance_small_upsampled = nd.zeros(
            (batch_size, guidance_small.shape[1], height, width),
            ctx=data_small.context)
        for batch_num in range(batch_size):
            guidance_help = skimage.transform.resize(np.transpose(
                guidance_small[batch_num].asnumpy(), (1, 2, 0)),
                                                     (height, width),
                                                     order=0,
                                                     anti_aliasing=False,
                                                     mode='constant')
            guidance_small_upsampled[batch_num] = nd.array(
                np.transpose(guidance_help, (2, 0, 1)), ctx=data_small.context)

        # Generate features for small input data.
        with data_small.context:
            features_small = self.feature_generator(guidance_small_upsampled)
        # Scale spatial features by height/width to get invariance to size.
        with data_small.context:
            spatial_scaling = nd.array([[[[width]], [[height]]]])
        spatial_features = spatial_scaling * self.feature_factor_spatial.data(
        ) * features_small[:, :2]
        # Center remaining features if applicable.
        remaining_features = features_small[:, 2:]
        if self.data_mean is not None:
            remaining_features = remaining_features - self.guidance_mean.copyto(
                guidance_large.context)
        # Scale remaining feature and pass through embedding network.
        remaining_features = self.feature_factor_intensity.data(
        ) * remaining_features
        remaining_features = self.embedding(remaining_features)
        remaining_features = self.batchnorm(remaining_features)
        # Concatenate and reshape features_small.
        features_small = nd.concat(spatial_features, remaining_features, dim=1)
        features_small = features_small.reshape([0, 0, -1])

        # Generate features for large output data.
        with data_small.context:
            features_large = self.feature_generator(guidance_large)
        # Scale spatial features by height/width to get invariance to size.
        spatial_features = spatial_scaling * self.feature_factor_spatial.data(
        ) * features_large[:, :2]
        # Center remaining features if applicable.
        remaining_features = features_large[:, 2:]
        if self.data_mean is not None:
            remaining_features = remaining_features - self.guidance_mean.copyto(
                guidance_large.context)
        # Center remaining features and pass through embedding network.
        remaining_features = self.feature_factor_intensity.data(
        ) * remaining_features
        remaining_features = self.embedding(remaining_features)
        remaining_features = self.batchnorm(remaining_features)
        # Concatenate and reshape features_large.
        features_large = nd.concat(spatial_features, remaining_features, dim=1)
        features_large = features_large.reshape([0, 0, -1])

        # Concatenate input and output features.
        features = nd.concat(features_small, features_large, dim=2)
        features_in_size = features_small.shape[-1]
        features_out_size = features_large.shape[-1]

        # Reshape input data and guidance images.
        data_small_upsampled = data_small_upsampled.reshape([0, 0, -1])
        guidance_small_upsampled = guidance_small_upsampled.reshape([0, 0, -1])
        guidance_large = guidance_large.reshape([0, 0, -1])

        # Compute offset between data_small and guidance_small
        offset_small = data_small_upsampled - guidance_small_upsampled
        # Pass offset_small through permutohedral convolutions.
        offset_large = self.convolutions(offset_small, features, 0,
                                         features_in_size, features_in_size,
                                         features_out_size, self.weight_factor)
        # Generate output data from estimated offset.
        data_large = offset_large + guidance_large

        return data_large.reshape([0, 0, height, width])
Exemple #25
0
# ## Model 2: Distance to Administrator and Instructor as Additional Features

# In[ ]:

X_2 = nd.zeros((A.shape[0], 2))
node_distance_instructor = shortest_path_length(zkc.network, target=33)
node_distance_administrator = shortest_path_length(zkc.network, target=0)

for node in zkc.network.nodes():
    X_2[node][0] = node_distance_administrator[node]
    X_2[node][1] = node_distance_instructor[node]

# In[ ]:

X_2 = nd.concat(X_1, X_2)
model_2, features_2 = build_model(A, X_2)
model_2(X_2)

# # Train and Test Models

# In[ ]:

get_ipython().run_line_magic('time', '')
from mxnet import autograd
from mxnet.gluon import Trainer
from mxnet.ndarray import sum as ndsum
import numpy as np


def train(model, features, X, X_train, y_train, epochs):
Exemple #26
0
def traincvpr18(opt, train_data, val_data, ctx, networks):

    netEn = networks[0]
    netDe = networks[1]
    netD = networks[2]
    trainerEn = networks[5]
    trainerDe = networks[6]
    trainerD = networks[7]
    epochs = opt.epochs
    lambda1 = opt.lambda1
    batch_size = opt.batch_size
    expname = opt.expname
    append = opt.append
    text_file = open(expname + "_trainloss.txt", "w")
    text_file.close()
    text_file = open(expname + "_validtest.txt", "w")
    text_file.close()
    GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
    L1_loss = gluon.loss.L2Loss()
    metric = mx.metric.CustomMetric(facc)
    metricl = mx.metric.CustomMetric(facc)
    metric2 = mx.metric.MSE()
    loss_rec_G2 = []
    loss_rec_G = []
    loss_rec_D = []
    loss_rec_R = []
    acc_rec = []
    loss_rec_D2 = []
    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)
    for epoch in range(epochs):
        tic = time.time()
        btic = time.time()
        train_data.reset()
        iter = 0
        for batch in train_data:
            ############################
            # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
            ###########################
            real_in = batch.data[0].as_in_context(ctx)
            real_out = batch.data[1].as_in_context(ctx)
            fake_latent = netEn(real_in)
            fake_out = netDe(fake_latent)
            fake_concat = nd.concat(real_in, fake_out,
                                    dim=1) if append else fake_out
            with autograd.record():
                # Train with fake image
                # Use image pooling to utilize history imagesi
                output = netD(fake_concat)
                fake_label = nd.zeros(output.shape, ctx=ctx)
                errD_fake = GAN_loss(output, fake_label)
                metric.update([
                    fake_label,
                ], [
                    output,
                ])
                real_concat = nd.concat(real_in, real_out,
                                        dim=1) if append else real_out
                output = netD(real_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errD_real = GAN_loss(output, real_label)
                errD = (errD_real + errD_fake) * 0.5
                errD.backward()
                metric.update([
                    real_label,
                ], [
                    output,
                ])
            trainerD.step(batch.data[0].shape[0])
            ############################
            # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
            ###########################
            with autograd.record():
                fake_latent = (netEn(real_in))
                fake_out = netDe(fake_latent)
                fake_concat = nd.concat(real_in, fake_out,
                                        dim=1) if append else fake_out
                output = netD(fake_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errG = GAN_loss(
                    output, real_label) + L1_loss(real_out, fake_out) * lambda1
                errR = L1_loss(real_out, fake_out)
                errG.backward()
            trainerDe.step(batch.data[0].shape[0])
            trainerEn.step(batch.data[0].shape[0])
        loss_rec_G.append(
            nd.mean(errG).asscalar() - nd.mean(errR).asscalar() * lambda1)
        loss_rec_D.append(nd.mean(errD).asscalar())
        loss_rec_R.append(nd.mean(errR).asscalar())
        name, acc = metric.get()
        acc_rec.append(acc)
        # Print log infomation every ten batches
        if iter % 10 == 0:
            name, acc = metric.get()
            logging.info('speed: {} samples/s'.format(batch_size /
                                                      (time.time() - btic)))
            logging.info(
                'discriminator loss = %f, generator loss = %f, binary training acc = %f , reconstruction error= %f at iter %d epoch %d'
                % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc,
                   nd.mean(errR).asscalar(), iter, epoch))
        iter = iter + 1
        btic = time.time()

        name, acc = metric.get()
        _, acc2 = metricl.get()
        text_tl = open(expname + "_trainloss.txt", "a")
        text_tl.write('%f %f %f %f %f %f %f ' %
                      (nd.mean(errD).asscalar(), 0, nd.mean(errG).asscalar(),
                       acc, 0, nd.mean(errR).asscalar(), epoch))
        text_file.close()
        metricl.reset()
        metric.reset()
        train_data.reset()

        logging.info('\nbinary training acc at epoch %d: %s=%f' %
                     (epoch, name, acc))
        logging.info('time: %f' % (time.time() - tic))
        if epoch % 10 == 0:
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_D.params"
            netD.save_params(filename)
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_En.params"
            netEn.save_params(filename)
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_De.params"
            netDe.save_params(filename)
            fake_img1 = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1)
            fake_img2 = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1)
            fake_img3 = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1)
            val_data.reset()
            text_file = open(expname + "_validtest.txt", "a")
            for vbatch in val_data:
                real_in = vbatch.data[0].as_in_context(ctx)
                real_out = vbatch.data[1].as_in_context(ctx)
                fake_latent = netEn(real_in)
                y = netDe(fake_latent)
                fake_out = y
                metric2.update([
                    fake_out,
                ], [
                    real_out,
                ])
                _, acc2 = metric2.get()
            text_file.write("%s %s %s\n" %
                            (str(epoch), nd.mean(errR).asscalar(), str(acc2)))
            metric2.reset()
            fake_img1T = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1)
            fake_img2T = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1)
            fake_img3T = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1)
            fake_img = nd.concat(fake_img1,
                                 fake_img2,
                                 fake_img3,
                                 fake_img1T,
                                 fake_img2T,
                                 fake_img3T,
                                 dim=2)
            visual.visualize(fake_img)
            plt.savefig('outputs/' + expname + '_' + str(epoch) + '.png')
            text_file.close()
    return [loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2]
Exemple #27
0
    def select_action(
        self, 
        possible_action_arr, 
        possible_predicted_q_arr, 
        possible_reward_value_arr,
        possible_next_q_arr,
        possible_meta_data_arr=None
    ):
        '''
        Select action by Q(state, action).

        Args:
            possible_action_arr:                Tensor of actions.
            possible_predicted_q_arr:           Tensor of Q-Values.
            possible_reward_value_arr:          Tensor of reward values.
            possible_next_q_arr:                Tensor of Q-Values in next time.
            possible_meta_data_arr:             `mxnet.ndarray.NDArray` or `np.array` of meta data of the actions.

        Retruns:
            Tuple(`np.ndarray` of action., Q-Value)
        '''
        key_arr = self.select_action_key(possible_action_arr, possible_predicted_q_arr)
        meta_data_arr = None
        if possible_meta_data_arr is not None:
            for i in range(possible_meta_data_arr.shape[0]):
                _meta_data_arr = possible_meta_data_arr[i, key_arr[i]]
                if i == 0:
                    if isinstance(_meta_data_arr, nd.NDArray) is True:
                        meta_data_arr = nd.expand_dims(_meta_data_arr, axis=0)
                    else:
                        meta_data_arr = np.expand_dims(_meta_data_arr, axis=0)
                else:
                    if isinstance(_meta_data_arr, nd.NDArray) is True:
                        meta_data_arr = nd.concat(
                            meta_data_arr,
                            nd.expand_dims(_meta_data_arr, axis=0),
                            dim=0
                        )
                    else:
                        meta_data_arr = np.concatenate(
                            [
                                meta_data_arr,
                                np.expand_dims(_meta_data_arr, axis=0),
                            ],
                            axis=0
                        )

        action_arr = None
        predicted_q_arr = None
        reward_value_arr = None
        next_q_arr = None

        for i in range(possible_action_arr.shape[0]):
            _action_arr = possible_action_arr[i, key_arr[i]]
            _predicted_q_arr = possible_predicted_q_arr[i, key_arr[i]]
            _reward_value_arr = possible_reward_value_arr[i, key_arr[i]]
            _next_q_arr = possible_next_q_arr[i, key_arr[i]]
            if i == 0:
                action_arr = nd.expand_dims(_action_arr, axis=0)
                predicted_q_arr = nd.expand_dims(_predicted_q_arr, axis=0)
                reward_value_arr = nd.expand_dims(_reward_value_arr, axis=0)
                next_q_arr = nd.expand_dims(_next_q_arr, axis=0)
            else:
                action_arr = nd.concat(
                    action_arr,
                    nd.expand_dims(_action_arr, axis=0),
                    dim=0
                )
                predicted_q_arr = nd.concat(
                    predicted_q_arr,
                    nd.expand_dims(_predicted_q_arr, axis=0),
                    dim=0
                )
                reward_value_arr = nd.concat(
                    reward_value_arr,
                    nd.expand_dims(_reward_value_arr, axis=0),
                    dim=0
                )
                next_q_arr = nd.concat(
                    next_q_arr,
                    nd.expand_dims(_next_q_arr, axis=0),
                    dim=0
                )

        return (
            action_arr, 
            predicted_q_arr, 
            reward_value_arr,
            next_q_arr,
            meta_data_arr
        )
Exemple #28
0
# y=1.2x−3.4x2+5.6x3+5.0+noise
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
import matplotlib as mpl

# mpl.rcParams['figure.dpi'] = 120
import matplotlib.pyplot as plt

num_train = 100
num_test = 100
true_w = [1.2, -3.4, 5.6]
true_b = 5.0

x = nd.random.normal(shape=(num_train + num_test, 1))
X = nd.concat(x, nd.power(x, 2), nd.power(x, 3))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b
y += .1 * nd.random.normal(shape=y.shape)


def train(X_train, X_test, y_train, y_test):
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(gluon.nn.Dense(1))
    net.initialize()
    learning_rate = 0.01
    epochs = 100
    batch_size = min(10, y_train.shape[0])
    dataset_train = gluon.data.ArrayDataset(X_train, y_train)
    data_iter_train = gluon.data.DataLoader(dataset_train,
                                            batch_size,
Exemple #29
0
def facc(label, pred):
    pred = pred.ravel()
    label = label.ravel()
    return ((pred > 0.5) == label).mean()


lbllist = []
scorelist = []
test_data.reset()
count = 0
for batch in (test_data):
    print(count)
    count += 1
    real_in = batch.data[0].as_in_context(ctx)
    real_out = batch.data[1].as_in_context(ctx)
    lbls = batch.label[0].as_in_context(ctx)
    out = (netG(real_in))
    real_concat = nd.concat(real_in, real_in, dim=1)
    #real_concat = nd.concat(out, out, dim=1)
    output = netD(real_concat)
    output = nd.mean(output, (1, 3, 2)).asnumpy()
    lbllist = lbllist + list(lbls.asnumpy())
    scorelist = scorelist + list(output)
    visualize(out[0, :, :, :])
    plt.savefig('outputs/testnet_T' + str(count) + '.png')

print((lbllist))
print((scorelist))
fpr, tpr, _ = roc_curve(lbllist, scorelist, 0)
roc_auc = auc(fpr, tpr)
print(roc_auc)
Exemple #30
0
def train(cep,
          pool_size,
          epochs,
          train_data,
          val_data,
          ctx,
          netEn,
          netDe,
          netD,
          netD2,
          netDS,
          trainerEn,
          trainerDe,
          trainerD,
          trainerD2,
          trainerSD,
          lambda1,
          batch_size,
          expname,
          append=True,
          useAE=False):
    tp_file = open(expname + "_trainloss.txt", "w")
    tp_file.close()
    text_file = open(expname + "_validtest.txt", "w")
    text_file.close()
    #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf,  opt.ngf, opt.append)
    GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
    L1_loss = gluon.loss.L2Loss()
    image_pool = imagePool.ImagePool(pool_size)
    metric = mx.metric.CustomMetric(facc)
    metric2 = mx.metric.CustomMetric(facc)
    metricStrong = mx.metric.CustomMetric(facc)
    metricMSE = mx.metric.MSE()
    loss_rec_G = []
    loss_rec_D = []
    loss_rec_R = []
    acc_rec = []
    acc2_rec = []
    loss_rec_D2 = []
    loss_rec_G2 = []
    lr = 2.0 * 512
    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)
    if cep == -1:
        cep = 0
    else:
        netEn.load_params('checkpoints/' + opt.expname + '_' + str(cep) +
                          '_En.params',
                          ctx=ctx)
        netDe.load_params('checkpoints/' + opt.expname + '_' + str(cep) +
                          '_De.params',
                          ctx=ctx)
        netD.load_params('checkpoints/' + opt.expname + '_' + str(cep) +
                         '_D.params',
                         ctx=ctx)
        netD2.load_params('checkpoints/' + opt.expname + '_' + str(cep) +
                          '_D2.params',
                          ctx=ctx)
        netDS.load_params('checkpoints/' + opt.expname + '_' + str(cep) +
                          '_SD.params',
                          ctx=ctx)
    iter = 0
    for epoch in range(cep + 1, epochs):

        tic = time.time()
        btic = time.time()
        train_data.reset()
        #print('learning rate : '+str(trainerD.learning_rate ))
        for batch in train_data:
            ############################
            # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
            ###########################
            if ctx == mx.cpu():
                ct = mx.cpu()
            else:
                ct = mx.gpu()
            real_in = batch.data[0]  #.as_in_context(ctx)
            real_out = batch.data[1]  #.as_in_context(ctx)
            if iter == 0:
                latent_shape = (batch_size, 512, 1, 1)  #code.shape
                out_l_shape = (batch_size, 1, 1, 1)  #netD2((code)).shape
                out_i_shape = (batch_size, 1, 1, 1)  #netD(netDe(code)).shape
                out_s_shape = (batch_size, 1, 1, 1)  #netSD(netDe(code)).shape
            real_in = gluon.utils.split_and_load(real_in, ctx)
            real_out = gluon.utils.split_and_load(real_out, ctx)
            fake_latent = [netEn(r) for r in real_in]
            real_latent = nd.random.uniform(low=-1, high=1, shape=latent_shape)
            real_latent = gluon.utils.split_and_load(real_latent, ctx)
            fake_out = [netDe(f) for f in fake_latent]
            fake_concat = nd.concat(real_in, fake_out,
                                    dim=1) if append else fake_out
            eps2 = nd.random.uniform(low=-1,
                                     high=1,
                                     shape=latent_shape,
                                     ctx=ct)
            eps2 = gluon.utils.split_and_load(eps2, ctx)
            if epoch > 150:  # (1/float(batch_size))*512*150:# and epoch%10==0:
                print('Mining..')
                mu = nd.random.uniform(low=-1,
                                       high=1,
                                       shape=latent_shape,
                                       ctx=ct)
                #isigma = nd.ones((batch_size,64,1,1),ctx=ctx)*0.000001
                mu.attach_grad()
                #sigma.attach_grad()
                images = netDe(mu)
                fake_img1T = nd.concat(images[0], images[1], images[2], dim=1)
                fake_img2T = nd.concat(images[3], images[4], images[5], dim=1)
                fake_img3T = nd.concat(images[6], images[7], images[8], dim=1)
                fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2)
                visual.visualize(fake_img)
                plt.savefig('outputs/' + expname + '_fakespre_' + str(epoch) +
                            '.png')
                eps2 = gluon.utils.split_and_load(mu, ctx)
                for e in eps2:
                    e.attach_grad()
                for ep2 in range(1):
                    with autograd.record():
                        #eps = nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx) #
                        #eps2 = gluon.utils.split_and_load(nd.tanh(mu),ctx) #+nd.multiply(eps,sigma))#nd.random.uniform( low=-1, high=1, shape=fake_latent.shape, ctx=ctx)
                        rec_output = [netDS(netDe(e)) for e in eps2]
                        fake_label = gluon.utils.split_and_load(
                            nd.zeros(out_s_shape), ctx)
                        errGS = [
                            GAN_loss(r, f)
                            for r, f in zip(rec_output, fake_label)
                        ]
                        for e in errGS:
                            e.backward()
                    for idx, _ in enumerate(eps2):
                        eps2[idx] = nd.tanh(eps2[idx] -
                                            lr / eps2[idx].shape[0] *
                                            eps2[idx].grad)
                images = netDe((eps2[0]))
                fake_img1T = nd.concat(images[0], images[1], images[2], dim=1)
                fake_img2T = nd.concat(images[3], images[4], images[5], dim=1)
                fake_img3T = nd.concat(images[6], images[7], images[8], dim=1)
                fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2)
                visual.visualize(fake_img)
                plt.savefig('outputs/' + expname + str(ep2) + '_fakespost_' +
                            str(epoch) + '.png')
                #eps2 = nd.tanh(mu)#+nd.multiply(eps,sigma))#nd.random.uniform( low=-1, high=1, shape=fake_latent.shape, ctx=ctx)

            with autograd.record():
                #eps2 = gluon.utils.split_and_load(eps2,ctx)
                # Train with fake image
                # Use image pooling to utilize history imagesi
                output = [netD(f) for f in fake_concat]
                output2 = [netD2(f) for f in fake_latent]
                fake_label = nd.zeros(out_i_shape)
                fake_label = gluon.utils.split_and_load(fake_label, ctx)
                fake_latent_label = nd.zeros(out_l_shape)
                fake_latent_label = gluon.utils.split_and_load(
                    fake_latent_label, ctx)
                eps = gluon.utils.split_and_load(
                    nd.random.uniform(low=-1, high=1, shape=latent_shape), ctx)
                rec_output = [netD(netDe(e)) for e in eps]
                errD_fake = [
                    GAN_loss(r, f) for r, f in zip(rec_output, fake_label)
                ]
                errD_fake2 = [
                    GAN_loss(o, f) for o, f in zip(output, fake_label)
                ]
                errD2_fake = [
                    GAN_loss(o, f) for o, f in zip(output2, fake_latent_label)
                ]
                for f, o in zip(fake_label, rec_output):
                    metric.update([
                        f,
                    ], [
                        o,
                    ])
                for f, o in zip(fake_latent_label, output2):
                    metric2.update([
                        f,
                    ], [
                        o,
                    ])
                real_concat = nd.concat(real_in, real_out,
                                        dim=1) if append else real_out
                output = [netD(r) for r in real_concat]
                output2 = [netD2(r) for r in real_latent]
                real_label = gluon.utils.split_and_load(
                    nd.ones(out_i_shape), ctx)
                real_latent_label = gluon.utils.split_and_load(
                    nd.ones(out_l_shape), ctx)
                errD_real = [
                    GAN_loss(o, r) for o, r in zip(output, real_label)
                ]
                errD2_real = [
                    GAN_loss(o, r) for o, r in zip(output2, real_latent_label)
                ]
                for e1, e2, e4, e5 in zip(errD_real, errD_fake, errD2_real,
                                          errD2_fake):
                    err = (e1 + e2) * 0.5 + (e5 + e4) * 0.5
                    err.backward()
                for f, o in zip(real_label, output):
                    metric.update([
                        f,
                    ], [
                        o,
                    ])
                for f, o in zip(real_latent_label, output2):
                    metric2.update([
                        f,
                    ], [
                        o,
                    ])
            trainerD.step(batch.data[0].shape[0])
            trainerD2.step(batch.data[0].shape[0])
            nd.waitall()
            with autograd.record():
                strong_output = [netDS(netDe(e)) for e in eps]
                strong_real = [netDS(f) for f in fake_concat]
                errs1 = [
                    GAN_loss(r, f) for r, f in zip(strong_output, fake_label)
                ]
                errs2 = [
                    GAN_loss(r, f) for r, f in zip(strong_real, real_label)
                ]
                for f, s in zip(fake_label, strong_output):
                    metricStrong.update([
                        f,
                    ], [
                        s,
                    ])
                for f, s in zip(real_label, strong_real):
                    metricStrong.update([
                        f,
                    ], [
                        s,
                    ])
                for e1, e2 in zip(errs1, errs2):
                    strongerr = 0.5 * (e1 + e2)
                    strongerr.backward()
            trainerSD.step(batch.data[0].shape[0])
            nd.waitall()
            ############################
            # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
            ###########################
            with autograd.record():
                sh = out_l_shape
                #eps2 = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) #
                #eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx)
                #if epoch>100:
                #        eps2 = nd.multiply(eps2,sigma)+mu
                #        eps2 = nd.tanh(eps2)
                #else:
                #eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx)
                #eps2 = nd.concat(eps,eps2,dim=0)
                rec_output = [netD(netDe(e)) for e in eps2]
                fake_latent = [(netEn(r)) for r in real_in]
                output2 = [netD2(f) for f in fake_latent]
                fake_out = [netDe(f) for f in fake_latent]
                fake_concat = nd.concat(real_in, fake_out,
                                        dim=1) if append else fake_out
                output = [netD(f) for f in fake_concat]
                real_label = gluon.utils.split_and_load(
                    nd.ones(out_i_shape), ctx)
                real_latent_label = gluon.utils.split_and_load(
                    nd.ones(out_l_shape), ctx)
                errG2 = [
                    GAN_loss(r, f) for r, f in zip(rec_output, real_label)
                ]
                errR = [
                    L1_loss(r, f) * lambda1
                    for r, f in zip(real_out, fake_out)
                ]
                errG = [
                    10 * GAN_loss(r, f)
                    for r, f in zip(output2, real_latent_label)
                ]  # +errG2+errR
                for e1, e2, e3 in zip(errG, errG2, errR):
                    e = e1 + e2 + e3
                    e.backward()
            trainerDe.step(batch.data[0].shape[0])
            trainerEn.step(batch.data[0].shape[0])
            nd.waitall()
            errD = (errD_real[0] + errD_fake[0]) * 0.5
            errD2 = (errD2_real[0] + errD2_fake[0]) * 0.5
            loss_rec_G2.append(nd.mean(errG2[0]).asscalar())
            loss_rec_G.append(
                nd.mean(nd.mean(errG[0])).asscalar() -
                nd.mean(errG2[0]).asscalar() - nd.mean(errR[0]).asscalar())
            loss_rec_D.append(nd.mean(errD[0]).asscalar())
            loss_rec_R.append(nd.mean(errR[0]).asscalar())
            loss_rec_D2.append(nd.mean(errD2[0]).asscalar())
            _, acc2 = metric2.get()
            name, acc = metric.get()
            acc_rec.append(acc)
            acc2_rec.append(acc2)

            # Print log infomation every ten batches
            if iter % 10 == 0:
                _, acc2 = metric2.get()
                name, acc = metric.get()
                _, accStrong = metricStrong.get()
                logging.info('speed: {} samples/s'.format(
                    batch_size / (time.time() - btic)))
                #print(errD)
                #logging.info('discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f, SD loss = %f,  D acc = %f , D2 acc = %f, DS acc = %f, reconstruction error= %f  at iter %d epoch %d'
                #   	% (nd.mean(errD[0]).asscalar(),nd.mean(errD2[0]).asscalar(),
                #     	nd.mean(errG[0]-errG2[0]-errR[0]).asscalar(),nd.mean(errG2[0]).asscalar(),nd.mean(strongerr[0]).asscalar() ,acc,acc2,accStrong[0],nd.mean(errR[0]).asscalar() ,iter, epoch))
                iter = iter + 1
        btic = time.time()
        name, acc = metric.get()
        _, acc2 = metric2.get()
        #tp_file = open(expname + "_trainloss.txt", "a")
        #tp_file.write(str(nd.mean(errG2).asscalar()) + " " + str(
        #    nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) + " " + str(
        #    nd.mean(errD).asscalar()) + " " + str(nd.mean(errD2).asscalar()) + " " + str(nd.mean(errR).asscalar()) +" "+str(acc) + " " + str(acc2)+"\n")
        #tp_file.close()
        metric.reset()
        metric2.reset()
        train_data.reset()
        metricStrong.reset()

        logging.info('\nbinary training acc at epoch %d: %s=%f' %
                     (epoch, name, acc))
        logging.info('time: %f' % (time.time() - tic))
        if epoch % 2 == 0:  # and epoch>0:
            text_file = open(expname + "_validtest.txt", "a")
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_D.params"
            netD.save_parameters(filename)
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_D2.params"
            netD2.save_parameters(filename)
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_En.params"
            netEn.save_parameters(filename)
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_De.params"
            netDe.save_parameters(filename)
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_SD.params"
            netDS.save_parameters(filename)
            fake_img1 = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1)
            fake_img2 = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1)
            fake_img3 = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1)
            fake_img4 = nd.concat(real_in[3], real_out[3], fake_out[3], dim=1)
            val_data.reset()
            text_file = open(expname + "_validtest.txt", "a")
            for vbatch in val_data:

                real_in = vbatch.data[0]
                real_out = vbatch.data[1]
                real_in = gluon.utils.split_and_load(real_in, ctx)
                real_out = gluon.utils.split_and_load(real_out, ctx)

                fake_latent = [netEn(r) for r in real_in]
                fake_out = [netDe(f) for f in fake_latent]
                for f, r in zip(fake_out, real_out):
                    metricMSE.update([
                        f,
                    ], [
                        r,
                    ])
            _, acc2 = metricMSE.get()
            toterrR = 0
            for e in errR:
                toterrR += nd.mean(e).asscalar()
            text_file.write("%s %s %s\n" % (str(epoch), toterrR, str(acc2)))
            metricMSE.reset()
    return ([
        loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2,
        acc2_rec
    ])
Exemple #31
0
    def generate_learned_samples(self):
        '''
        Draw and generate data.

        Returns:
            `Tuple` data. The shape is ...
            - `mxnet.ndarray` of observed data points in training.
            - `mxnet.ndarray` of supervised data in training.
            - `mxnet.ndarray` of observed data points in test.
            - `mxnet.ndarray` of supervised data in test.
        '''
        for epoch in range(self.epochs):
            training_batch_arr, test_batch_arr = None, None

            for i in range(self.batch_size):
                file_key = np.random.randint(low=0, high=len(self.__train_csv_path_list))
                train_observed_arr = self.__unlabeled_csv_extractor.extract(
                    self.__train_csv_path_list[file_key]
                )
                test_file_key = np.random.randint(low=0, high=len(self.__test_csv_path_list))
                test_observed_arr = self.__unlabeled_csv_extractor.extract(
                    self.__test_csv_path_list[test_file_key]
                )

                train_observed_arr = np.identity(
                    1 + int(train_observed_arr.max() + (train_observed_arr.min() * -1))
                )[
                    (train_observed_arr.reshape(train_observed_arr.shape[0], -1) + (train_observed_arr.min() * -1)).astype(int)
                ]

                test_observed_arr = np.identity(
                    1 + int(test_observed_arr.max() + (test_observed_arr.min() * -1))
                )[
                    (test_observed_arr.reshape(test_observed_arr.shape[0], -1) + (test_observed_arr.min() * -1)).astype(int)
                ]

                start_row = np.random.randint(low=0, high=train_observed_arr.shape[0] - self.seq_len)
                test_start_row = np.random.randint(low=0, high=test_observed_arr.shape[0] - self.seq_len)

                train_observed_arr = train_observed_arr[start_row:start_row+self.seq_len]
                test_observed_arr = test_observed_arr[test_start_row:test_start_row+self.seq_len]

                if training_batch_arr is None:
                    training_batch_arr = nd.expand_dims(
                        nd.ndarray.array(train_observed_arr, ctx=self.__ctx),
                        axis=0
                    )
                else:
                    training_batch_arr = nd.concat(
                        training_batch_arr,
                        nd.expand_dims(
                            nd.ndarray.array(train_observed_arr, ctx=self.__ctx),
                            axis=0
                        ),
                        dim=0
                    )

                if test_batch_arr is None:
                    test_batch_arr = nd.expand_dims(
                        nd.ndarray.array(test_observed_arr, ctx=self.__ctx),
                        axis=0
                    )
                else:
                    test_batch_arr = nd.concat(
                        test_batch_arr,
                        nd.expand_dims(
                            nd.ndarray.array(test_observed_arr, ctx=self.__ctx),
                            axis=0
                        ),
                        dim=0
                    )

            training_batch_arr = self.pre_normalize(training_batch_arr)
            test_batch_arr = self.pre_normalize(test_batch_arr)

            if self.__noiseable_data is not None:
                training_batch_arr = self.__noiseable_data.noise(training_batch_arr)

            yield training_batch_arr, training_batch_arr, test_batch_arr, test_batch_arr
 def hybrid_forward(self, F, x, c1):
     c1 = self.c1_block(c1)
     x = self.aspp(x)
     x = F.contrib.BilinearResize2D(x, **self._up_kwargs)
     return self.block(F.concat(c1, x, dim=1))
Exemple #33
0
 def forward(self, inpt):
     fwd = self._lstm_fwd(inpt)
     bwd_inpt = nd.flip(inpt, 0)
     bwd = self._lstm_bwd(bwd_inpt)
     bwd = nd.flip(bwd, 0)
     return nd.concat(fwd, bwd, dim=2)
Exemple #34
0
def tensor_save_bgrimage(tensor, filename, cuda=False):
    (b, g, r) = F.split(tensor, num_outputs=3, axis=0)
    tensor = F.concat(r, g, b, dim=0)
    tensor_save_rgbimage(tensor, filename, cuda)
Exemple #35
0
 def hybrid_forward(self, F, x, *args, **kwargs):
     if self.outermost:
         return self.model(x)
     else:
         return nd.concat([x, self.model(x)],1)
Exemple #36
0
def tensor_save_bgrimage(tensor, filename, cuda=False):
    (b, g, r) = F.split(tensor, num_outputs=3, axis=0)
    tensor = F.concat(r, g, b, dim=0)
    tensor_save_rgbimage(tensor, filename, cuda)
Exemple #37
0
def train(pool_size,
          epochs,
          train_data,
          val_data,
          ctx,
          netEn,
          netDe,
          netD,
          netD2,
          trainerEn,
          trainerDe,
          trainerD,
          trainerD2,
          lambda1,
          batch_size,
          expname,
          append=True,
          useAE=False):
    tp_file = open(expname + "_trainloss.txt", "w")
    tp_file.close()
    text_file = open(expname + "_validtest.txt", "w")
    text_file.close()
    #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf,  opt.ngf, opt.append)
    GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
    L1_loss = gluon.loss.L2Loss()
    image_pool = imagePool.ImagePool(pool_size)
    metric = mx.metric.CustomMetric(facc)
    metric2 = mx.metric.CustomMetric(facc)
    metricMSE = mx.metric.MSE()
    loss_rec_G = []
    loss_rec_D = []
    loss_rec_R = []
    acc_rec = []
    acc2_rec = []
    loss_rec_D2 = []
    loss_rec_G2 = []
    lr = 0.002
    #mu = nd.random_normal(loc=0, scale=1, shape=(batch_size/2,64,1,1), ctx=ctx)
    mu = nd.random.uniform(low=-1,
                           high=1,
                           shape=(batch_size / 2, 64, 1, 1),
                           ctx=ctx)
    #mu =  nd.zeros((batch_size/2,64,1,1),ctx=ctx)
    sigma = nd.ones((batch_size / 2, 64, 1, 1), ctx=ctx) * 0.02
    mu.attach_grad()
    sigma.attach_grad()
    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)
    for epoch in range(epochs):

        tic = time.time()
        btic = time.time()
        train_data.reset()
        iter = 0
        #print('learning rate : '+str(trainerD.learning_rate ))
        for batch in train_data:
            ############################
            # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
            ###########################
            real_in = batch.data[0].as_in_context(ctx)
            real_out = batch.data[1].as_in_context(ctx)
            fake_latent = netEn(real_in)
            #real_latent = nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx)
            real_latent = nd.random.uniform(low=-1,
                                            high=1,
                                            shape=fake_latent.shape,
                                            ctx=ctx)
            fake_out = netDe(fake_latent)
            fake_concat = nd.concat(real_in, fake_out,
                                    dim=1) if append else fake_out
            with autograd.record():
                # Train with fake image
                # Use image pooling to utilize history imagesi
                output = netD(fake_concat)
                output2 = netD2(fake_latent)
                fake_label = nd.zeros(output.shape, ctx=ctx)
                fake_latent_label = nd.zeros(output2.shape, ctx=ctx)
                noiseshape = (fake_latent.shape[0] / 2, fake_latent.shape[1],
                              fake_latent.shape[2], fake_latent.shape[3])
                eps2 = nd.random_normal(loc=0,
                                        scale=1,
                                        shape=noiseshape,
                                        ctx=ctx)  #
                eps = nd.random.uniform(low=-1,
                                        high=1,
                                        shape=noiseshape,
                                        ctx=ctx)
                if epoch > 100:
                    eps2 = nd.multiply(eps2, sigma) + mu
                    eps2 = nd.tanh(eps2)
                else:
                    eps2 = nd.random.uniform(low=-1,
                                             high=1,
                                             shape=noiseshape,
                                             ctx=ctx)
                eps2 = nd.concat(eps, eps2, dim=0)
                rec_output = netD(netDe(eps2))
                errD_fake = GAN_loss(rec_output, fake_label)
                errD_fake2 = GAN_loss(output, fake_label)
                errD2_fake = GAN_loss(output2, fake_latent_label)
                metric.update([
                    fake_label,
                ], [
                    output,
                ])
                metric2.update([
                    fake_latent_label,
                ], [
                    output2,
                ])
                real_concat = nd.concat(real_in, real_out,
                                        dim=1) if append else real_out
                output = netD(real_concat)
                output2 = netD2(real_latent)
                real_label = nd.ones(output.shape, ctx=ctx)
                real_latent_label = nd.ones(output2.shape, ctx=ctx)
                errD_real = GAN_loss(output, real_label)
                errD2_real = GAN_loss(output2, real_latent_label)
                #errD = (errD_real + 0.5*(errD_fake+errD_fake2)) * 0.5
                errD = (errD_real + errD_fake) * 0.5
                errD2 = (errD2_real + errD2_fake) * 0.5
                totalerrD = errD + errD2
                totalerrD.backward()
                #errD2.backward()
                metric.update([
                    real_label,
                ], [
                    output,
                ])
                metric2.update([
                    real_latent_label,
                ], [
                    output2,
                ])
            trainerD.step(batch.data[0].shape[0])
            trainerD2.step(batch.data[0].shape[0])
            ############################
            # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
            ###########################
            with autograd.record():
                sh = fake_latent.shape
                eps2 = nd.random_normal(loc=0,
                                        scale=1,
                                        shape=noiseshape,
                                        ctx=ctx)  #
                eps = nd.random.uniform(low=-1,
                                        high=1,
                                        shape=noiseshape,
                                        ctx=ctx)
                if epoch > 100:
                    eps2 = nd.multiply(eps2, sigma) + mu
                    eps2 = nd.tanh(eps2)
                else:
                    eps2 = nd.random.uniform(low=-1,
                                             high=1,
                                             shape=noiseshape,
                                             ctx=ctx)
                eps2 = nd.concat(eps, eps2, dim=0)
                rec_output = netD(netDe(eps2))
                fake_latent = (netEn(real_in))
                output2 = netD2(fake_latent)
                fake_out = netDe(fake_latent)
                fake_concat = nd.concat(real_in, fake_out,
                                        dim=1) if append else fake_out
                output = netD(fake_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                real_latent_label = nd.ones(output2.shape, ctx=ctx)
                errG2 = GAN_loss(rec_output, real_label)
                errR = L1_loss(real_out, fake_out) * lambda1
                errG = 10.0 * GAN_loss(output2,
                                       real_latent_label) + errG2 + errR
                errG.backward()
            trainerDe.step(batch.data[0].shape[0])
            trainerEn.step(batch.data[0].shape[0])
            loss_rec_G2.append(nd.mean(errG2).asscalar())
            loss_rec_G.append(
                nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() -
                nd.mean(errR).asscalar())
            loss_rec_D.append(nd.mean(errD).asscalar())
            loss_rec_R.append(nd.mean(errR).asscalar())
            loss_rec_D2.append(nd.mean(errD2).asscalar())
            _, acc2 = metric2.get()
            name, acc = metric.get()
            acc_rec.append(acc)
            acc2_rec.append(acc2)

            # Print log infomation every ten batches
            if iter % 10 == 0:
                _, acc2 = metric2.get()
                name, acc = metric.get()
                logging.info('speed: {} samples/s'.format(
                    batch_size / (time.time() - btic)))
                #print(errD)
                logging.info(
                    'discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f,  binary training acc = %f , D2 acc = %f, reconstruction error= %f  at iter %d epoch %d'
                    % (nd.mean(errD).asscalar(), nd.mean(errD2).asscalar(),
                       nd.mean(errG - errG2 - errR).asscalar(),
                       nd.mean(errG2).asscalar(), acc, acc2,
                       nd.mean(errR).asscalar(), iter, epoch))
                iter = iter + 1
        btic = time.time()
        name, acc = metric.get()
        _, acc2 = metric2.get()
        tp_file = open(expname + "_trainloss.txt", "a")
        tp_file.write(
            str(nd.mean(errG2).asscalar()) + " " + str(
                nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() -
                nd.mean(errR).asscalar()) + " " +
            str(nd.mean(errD).asscalar()) + " " +
            str(nd.mean(errD2).asscalar()) + " " +
            str(nd.mean(errR).asscalar()) + " " + str(acc) + " " + str(acc2) +
            "\n")
        tp_file.close()
        metric.reset()
        metric2.reset()
        train_data.reset()

        logging.info('\nbinary training acc at epoch %d: %s=%f' %
                     (epoch, name, acc))
        logging.info('time: %f' % (time.time() - tic))
        if epoch % 10 == 0:  # and epoch>0:
            text_file = open(expname + "_validtest.txt", "a")
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_D.params"
            netD.save_params(filename)
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_D2.params"
            netD2.save_params(filename)
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_En.params"
            netEn.save_params(filename)
            filename = "checkpoints/" + expname + "_" + str(
                epoch) + "_De.params"
            netDe.save_params(filename)
            fake_img1 = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1)
            fake_img2 = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1)
            fake_img3 = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1)
            fake_img4 = nd.concat(real_in[3], real_out[3], fake_out[3], dim=1)
            val_data.reset()
            text_file = open(expname + "_validtest.txt", "a")
            for vbatch in val_data:

                real_in = vbatch.data[0].as_in_context(ctx)
                real_out = vbatch.data[1].as_in_context(ctx)
                fake_latent = netEn(real_in)
                y = netDe(fake_latent)
                fake_out = y
                metricMSE.update([
                    fake_out,
                ], [
                    real_out,
                ])
            _, acc2 = metricMSE.get()
            text_file.write("%s %s %s\n" %
                            (str(epoch), nd.mean(errR).asscalar(), str(acc2)))
            metricMSE.reset()
            images = netDe(eps2)
            fake_img1T = nd.concat(images[0], images[1], images[2], dim=1)
            fake_img2T = nd.concat(images[3], images[4], images[5], dim=1)
            fake_img3T = nd.concat(images[6], images[7], images[8], dim=1)
            fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2)
            visual.visualize(fake_img)
            plt.savefig('outputs/' + expname + '_fakes_' + str(epoch) + '.png')
            text_file.close()

            # Do 10 iterations of sampler update
            fake_img1T = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1)
            fake_img2T = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1)
            fake_img3T = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1)
            #fake_img4T = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1)
            fake_img = nd.concat(fake_img1,
                                 fake_img2,
                                 fake_img3,
                                 fake_img1T,
                                 fake_img2T,
                                 fake_img3T,
                                 dim=2)
            visual.visualize(fake_img)
            plt.savefig('outputs/' + expname + '_' + str(epoch) + '.png')
            if epoch > 100:
                for ep2 in range(10):
                    with autograd.record():
                        #eps = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) #
                        eps = nd.random.uniform(low=-1,
                                                high=1,
                                                shape=noiseshape,
                                                ctx=ctx)
                        eps2 = nd.random_normal(loc=0,
                                                scale=0.02,
                                                shape=noiseshape,
                                                ctx=ctx)
                        eps2 = nd.tanh(eps2 * sigma + mu)
                        eps2 = nd.concat(eps, eps2, dim=0)
                        rec_output = netD(netDe(eps2))
                        fake_label = nd.zeros(rec_output.shape, ctx=ctx)
                        errGS = GAN_loss(rec_output, fake_label)
                        errGS.backward()
                    mu -= lr / mu.shape[0] * mu.grad
                    sigma -= lr / sigma.shape[0] * sigma.grad
                    print('mu ' + str(mu[0, 0, 0, 0].asnumpy()) + '  sigma ' +
                          str(sigma[0, 0, 0, 0].asnumpy()))
            images = netDe(eps2)
            fake_img1T = nd.concat(images[0], images[1], images[2], dim=1)
            fake_img2T = nd.concat(images[3], images[4], images[5], dim=1)
            fake_img3T = nd.concat(images[6], images[7], images[8], dim=1)
            fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2)
            visual.visualize(fake_img)
            plt.savefig('outputs/' + expname + '_fakespost_' + str(epoch) +
                        '.png')
    return ([
        loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2,
        acc2_rec
    ])
Exemple #38
0
def preprocess_batch(batch):
    batch = F.swapaxes(batch, 0, 1)
    (r, g, b) = F.split(batch, num_outputs=3, axis=0)
    batch = F.concat(b, g, r, dim=0)
    batch = F.swapaxes(batch, 0, 1)
    return batch
                get_accuracy(
                    nd.argmax(pred_val, axis=1).asnumpy(), label.asnumpy()))
            loss_accumulate_val = loss_accumulate_val + nd.sum(loss).asscalar()

            pred_val_accumulate.append(nd.argmax(pred_val, axis=1))
            label_val_accumulate.append(label)

            # get indexes of wrong predictions
            if False:
                u = list(pred_val.argmax_channel().asnumpy())  # prediction
                idx = [i for i in range(u.__len__())
                       if u[i] != label_val[i]]  # index of prediction == 0

        # compute the confusion matrix on validation set
        cm = confusion_matrix(
            nd.concat(*pred_val_accumulate, dim=0).asnumpy(),
            nd.concat(*label_val_accumulate, dim=0).asnumpy())
        logger.info('[%d %d] [%d %d]' %
                    (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]))

        logger.info('Epoch %d/%d: train [acc, loss] = [%.4f, %.4f],  '
                    'val [acc, loss] = [%.4f, %.4f],  LR = %.5f, GS = %d' %
                    (e, opts.num_epochs, np.array(acc_accumulate).mean(),
                     loss_accumulate / im_train.shape[0],
                     np.array(acc_accumulate_val).mean(), loss_accumulate_val /
                     im_val.shape[0], trainer.learning_rate, global_step))

        sw.add_scalar(tag='acc',
                      value=('training', np.array(acc_accumulate).mean()),
                      global_step=e)
        sw.add_scalar(tag='acc',
Exemple #40
0
 def hybrid_forward(self, F, x, *args, **kwargs):
     if self.outermost:
         return self.model(x)
     else:
         return nd.concat([x, self.model(x)], 1)
Exemple #41
0
def preprocess_batch(batch):
    batch = F.swapaxes(batch, 0, 1)
    (r, g, b) = F.split(batch, num_outputs=3, axis=0)
    batch = F.concat(b, g, r, dim=0)
    batch = F.swapaxes(batch, 0, 1)
    return batch
Exemple #42
0
def debug_bilinear(x,
                   W,
                   y,
                   input_size,
                   seq_len,
                   batch_size,
                   num_outputs=1,
                   bias_x=False,
                   bias_y=False):
    """
    Do xWy

    :param x: (input_size x seq_len) x batch_size
    :param W:
    :param y: (input_size x seq_len) x batch_size
    :param input_size:
    :param seq_len:
    :param batch_size:
    :param num_outputs:
    :param bias_x:
    :param bias_y:
    :return: [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size
    """
    import dynet as dy
    xd = dy.inputTensor(x, batched=True)
    xm = nd.array(x)
    yd = dy.inputTensor(y, batched=True)
    ym = nd.array(y)
    Wd = dy.inputTensor(W)
    Wm = nd.array(W)

    def allclose(dyarray, mxarray):
        a = dyarray.npvalue()
        b = mxarray.asnumpy()
        return np.allclose(a, b)

    if bias_x:
        xd = dy.concatenate(
            [xd, dy.inputTensor(np.ones((1, seq_len), dtype=np.float32))])
        xm = nd.concat(xm, nd.ones((1, seq_len, batch_size)), dim=0)
        # print(allclose(xd, xm))
    if bias_y:
        yd = dy.concatenate(
            [yd, dy.inputTensor(np.ones((1, seq_len), dtype=np.float32))])
        ym = nd.concat(ym, nd.ones((1, seq_len, batch_size)), dim=0)
        # print(allclose(yd, ym))

    nx, ny = input_size + bias_x, input_size + bias_y
    # W: (num_outputs x ny) x nx
    lind = Wd * xd
    linm = nd.dot(Wm, xm)
    # print(allclose(lind, linm))
    if num_outputs > 1:
        lind = dy.reshape(lind, (ny, num_outputs * seq_len),
                          batch_size=batch_size)
        # linm = nd.reshape(linm, (ny, num_outputs * seq_len, batch_size))
        linm = reshape_fortran(linm, (ny, num_outputs * seq_len, batch_size))
        # print(allclose(lind, linm))

    blind = dy.transpose(yd) * lind
    ym = ym.transpose([2, 1, 0])
    linm = linm.transpose([2, 1, 0])
    blinm = nd.batch_dot(linm, ym, transpose_b=True)
    blinm = blinm.transpose([2, 1, 0])

    print(np.allclose(blind.npvalue(), blinm.asnumpy()))

    if num_outputs > 1:
        blind = dy.reshape(blind, (seq_len, num_outputs, seq_len),
                           batch_size=batch_size)
        blinm = reshape_fortran(blinm,
                                (seq_len, num_outputs, seq_len, batch_size))
        print(allclose(blind, blinm))
    return blind
#过拟合:机器学习模型的训练误差远小于其在测试数据集上的误差。

## 一二次多项式拟合为例子
#y=1.2x−3.4x^2+5.6x^3+5.0+noise

from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon

num_train = 100
num_test = 100
true_w = [1.2, -3.4, 5.6]
true_b = 5.0

x = nd.random.normal(shape=(num_train + num_test, 1))#随机
X = nd.concat(x, nd.power(x, 2), nd.power(x, 3))#x  x^2  x^3
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b
y += .1 * nd.random.normal(shape=y.shape)#加入噪声

print('x:', x[:5], 'X:', X[:5], 'y:', y[:5])


### 训练
import matplotlib as mpl#画图
mpl.rcParams['figure.dpi']= 120#分辨率
import matplotlib.pyplot as plt#画图

def train(X_train, X_test, y_train, y_test):
    # 线性回归模型
    net = gluon.nn.Sequential()
    with net.name_scope():
Exemple #44
0
    def forward(self, input_vec, loss=None, training=True):
        # print('************* ' + str(input_vec.shape[1]) + ' *************')
        # print('############# ' + str(input_vec.shape) + ' #############')
        assert input_vec.shape[1] == self.input_dimension

        # get inputs for every slot(including global)
        inputs = {}
        for slot in self.slots:
            slot_input = input_vec[:, self.slot_dimension[slot][0]:self.
                                   slot_dimension[slot][1]]
            global_input = input_vec[:, self.global_dimension[0][0]:self.
                                     global_dimension[0][1]]
            inputs[slot] = nd.concat(*[slot_input, global_input], dim=1)

        batch_size = input_vec.shape[0]
        zero_slot_input = nd.zeros((batch_size, 25))
        input_global = [zero_slot_input]
        for seg in self.global_dimension:
            input_global.append(input_vec[:, seg[0]:seg[1]])
        inputs['global'] = nd.concat(*input_global, dim=1)

        layer = []
        # inputs -> first_hidden_layer
        if (not self.sort_input_vec) and self.state_feature != 'dip':
            layer.append([])
            for slot in self.slots:
                layer[0].append(self.input_trans[slot](inputs[slot],
                                                       training=training))
            layer[0].append(self.input_trans['global'](inputs['global'],
                                                       training=training))
        elif self.state_feature == 'dip':
            sorted_inputs = []
            for slot in self.slots:
                sorted_inputs.append(inputs[slot])
            sorted_inputs.append(inputs['global'])
            layer.append(
                self.input_trans.forward(sorted_inputs,
                                         loss,
                                         training=training))
        elif self.sort_input_vec:
            sorted_inputs = []
            for slot in self.slots:
                tmp = inputs[slot][:, :-2].sort(is_ascend=False)
                if tmp.shape[1] < 20:
                    tmp = nd.concat(tmp,
                                    nd.zeros((tmp.shape[0], 20 - tmp.shape[1]),
                                             ctx=CTX),
                                    dim=1)
                else:
                    tmp = nd.slice_axis(tmp, axis=1, begin=0, end=20)
                sorted_inputs.append(
                    nd.concat(tmp, inputs[slot][:, -2:], dim=1))
            sorted_inputs.append(inputs['global'])
            layer.append(
                self.input_trans.forward(sorted_inputs,
                                         loss,
                                         training=training))

        # hidden_layers
        for i in range(self.hidden_layers - 1):
            if self.recurrent_mode is False:
                # equal to 'layer.append(self.ma_trans[i](layer[-1], loss))'
                layer.append(self.ma_trans[i].forward(layer[i],
                                                      loss,
                                                      training=training))
            else:
                layer.append(
                    self.ma_trans.forward(layer[i], loss, training=training))

        if self.share_last_layer is False:
            # dropout of last hidden layer
            for j in range(len(self.slots)):
                layer[-1][j] = self.local_out_drop_op(layer[-1][j])
            layer[-1][-1] = self.global_out_drop_op(layer[-1][-1])

            # last_hidden_layer -> outputs
            outputs = []
            for i in range(len(self.slots) + 1):
                outputs.append(self.output_trans(layer[-1][i]))
        #         if self.use_dueling is False:
        #             outputs.append(self.output_trans[i](layer[-1][i]))
        #         else:
        #             if i < len(self.slots):
        #                 tmp_adv = self.output_trans_local_advantage1.forward(sorted_inputs[i], training=training)
        #                 tmp_adv = self.output_trans_local_advantage2.forward(tmp_adv, training=training)
        #             else:
        #                 tmp_adv = self.output_trans_global_advantage1.forward(sorted_inputs[-1], training=training)
        #                 tmp_adv = self.output_trans_global_advantage2.forward(tmp_adv, training=training)
        #             if self.dueling_share_last:
        #                 if i < len(self.slots):
        #                     cur_value = self.output_trans_local_value.forward(layer[-1][i], training=training)
        #                     if self.shared_last_layer_use_bias:
        #                         cur_value = cur_value + nd.slice(self.value_bias_local.data(), begin=(i, ), end=(i + 1, ))
        #                 else:
        #                     cur_value = self.output_trans_global_value.forward(layer[-1][i], training=training)
        #             else:
        #                 cur_value = self.output_trans_value[i].forward(layer[-1][i], training=training)
        #             outputs.append(
        #                 cur_value +
        #                 tmp_adv - tmp_adv.mean(axis=1).reshape(
        #                     (tmp_adv.shape[0], 1)).broadcast_axes(axis=1, size=tmp_adv.shape[1]))
        # else:
        #     outputs = []
        #     for i in range(len(self.slots)):
        #         output_i = self.output_trans_local.forward(layer[-1][i], training=training)
        #         if self.shared_last_layer_use_bias:
        #             output_i = output_i + self.output_trans_local_biases[i].data()
        #         outputs.append(output_i)
        #     outputs.append(self.output_trans_global.forward(layer[-1][-1], training=training))
        normal_output = []
        for i in range(len(self.slots)):
            normal_output.append(outputs[i][:, :3])
        normal_output.append(outputs[-1][:, 3:])

        return nd.concat(*normal_output, dim=1)
Exemple #45
0
# -*- coding: utf-8 -*-

from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 120
import matplotlib.pyplot as plt

num_train = 100
num_test = 100
true_w = [1.2, -3.4, 5.6]
true_b = 5.0

x = nd.random.normal(shape=(num_train + num_test, 1))
X = nd.concat(x, nd.power(x, 2), nd.power(x, 3)) # power(x,2)表示x中所有元素2次方
# y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b
y = true_w[0] * X[:, 0] + true_b
y += .1 * nd.random.normal(shape=y.shape)
y_train, y_test = y[:num_train], y[num_train:]

# matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 120
import matplotlib.pyplot as plt

# def test(net, X, y):
#     return square_loss(net(X), y).mean().asscalar()

def train(X_train, X_test, y_train, y_test):
    def generate_learned_samples(self):
        '''
        Draw and generate data.

        Returns:
            `Tuple` data. The shape is ...
            - `mxnet.ndarray` of observed data points in training.
            - `mxnet.ndarray` of supervised data in training.
            - `mxnet.ndarray` of observed data points in test.
            - `mxnet.ndarray` of supervised data in test.
            - `mxnet.ndarray` of obsrved data points in target domain.
        '''
        for _ in range(self.iter_n):
            training_batch_arr, test_batch_arr = None, None
            training_label_arr, test_label_arr = None, None
            target_domain_batch_arr = None
            for batch_size in range(self.batch_size):
                dir_key = np.random.randint(low=0, high=len(self.__training_file_path_list))

                training_one_hot_arr = nd.zeros((1, len(self.__training_file_path_list)), ctx=self.__ctx)
                training_one_hot_arr[0, dir_key] = 1

                file_key = np.random.randint(low=0, high=len(self.__training_file_path_list[dir_key]))
                training_data_arr = self.__image_extractor.extract(
                    path=self.__training_file_path_list[dir_key][file_key],
                )
                training_data_arr = self.pre_normalize(training_data_arr)

                test_dir_key = np.random.randint(low=0, high=len(self.__test_file_path_list))

                test_one_hot_arr = nd.zeros((1, len(self.__test_file_path_list)), ctx=self.__ctx)
                test_one_hot_arr[0, test_dir_key] = 1

                file_key = np.random.randint(low=0, high=len(self.__test_file_path_list[test_dir_key]))
                test_data_arr = self.__image_extractor.extract(
                    path=self.__test_file_path_list[test_dir_key][file_key],
                )
                test_data_arr = self.pre_normalize(test_data_arr)

                target_domain_dir_key = np.random.randint(low=0, high=len(self.__target_domain_file_path_list))

                target_domain_one_hot_arr = nd.zeros((1, len(self.__target_domain_file_path_list)), ctx=self.__ctx)
                target_domain_one_hot_arr[0, target_domain_dir_key] = 1

                target_domain_file_key = np.random.randint(low=0, high=len(self.__target_domain_file_path_list[target_domain_dir_key]))
                target_domain_data_arr = self.__image_extractor.extract(
                    path=self.__target_domain_file_path_list[target_domain_dir_key][target_domain_file_key],
                )
                target_domain_data_arr = self.pre_normalize(target_domain_data_arr)

                training_data_arr = nd.expand_dims(training_data_arr, axis=0)
                test_data_arr = nd.expand_dims(test_data_arr, axis=0)
                target_domain_data_arr = nd.expand_dims(target_domain_data_arr, axis=0)

                if training_batch_arr is not None:
                    training_batch_arr = nd.concat(training_batch_arr, training_data_arr, dim=0)
                else:
                    training_batch_arr = training_data_arr
                
                if test_batch_arr is not None:
                    test_batch_arr = nd.concat(test_batch_arr, test_data_arr, dim=0)
                else:
                    test_batch_arr = test_data_arr

                if training_label_arr is not None:
                    training_label_arr = nd.concat(training_label_arr, training_one_hot_arr, dim=0)
                else:
                    training_label_arr = training_one_hot_arr

                if test_label_arr is not None:
                    test_label_arr = nd.concat(test_label_arr, test_one_hot_arr, dim=0)
                else:
                    test_label_arr = test_one_hot_arr

                if target_domain_batch_arr is not None:
                    target_domain_batch_arr = nd.concat(target_domain_batch_arr, target_domain_data_arr, dim=0)
                else:
                    target_domain_batch_arr = target_domain_data_arr

            if self.__noiseable_data is not None:
                training_batch_arr = self.__noiseable_data.noise(training_batch_arr)
                target_domain_batch_arr = self.__noiseable_data.noise(target_domain_batch_arr)

            yield training_batch_arr, training_label_arr, test_batch_arr, test_label_arr, target_domain_batch_arr
Exemple #47
0
 def hybrid_forward(self, F, X, entry_b):
     contactlist = []
     for index, param in self.layer_list:
         contactlist.append(F.dot(param.data(), X[index, :]))
     y = nd.concat(*contactlist, dim=0) + entry_b
     return self.activation(y)
Exemple #48
0
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim,
                          learning_rate, clipping_theta, batch_size,
                          pred_period, pred_len, seqs, get_params, get_inputs,
                          ctx, corpus_indices, idx_to_char, char_to_idx,
                          is_lstm=False):
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()

    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    for e in range(1, epochs + 1):
        # 如使用相邻批量采样,在同一个epoch中,隐含变量只需要在该epoch开始的时候初始化。
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            if is_lstm:
                # 当RNN使用LSTM时才会用到,这里可以忽略。
                state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
        train_loss, num_examples = 0, 0
        for data, label in data_iter(corpus_indices, batch_size, num_steps,
                                     ctx):
            # 如使用随机批量采样,处理每个随机小批量前都需要初始化隐含变量。
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
                if is_lstm:
                    # 当RNN使用LSTM时才会用到,这里可以忽略。
                    state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            with autograd.record():
                # outputs 尺寸:(batch_size, vocab_size)
                if is_lstm:
                    # 当RNN使用LSTM时才会用到,这里可以忽略。
                    outputs, state_h, state_c = rnn(get_inputs(data), state_h,
                                                    state_c, *params)
                else:
                    outputs, state_h = rnn(get_inputs(data), state_h, *params)
                # 设t_ib_j为i时间批量中的j元素:
                # label 尺寸:(batch_size * num_steps)
                # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]
                label = label.T.reshape((-1,))
                # 拼接outputs,尺寸:(batch_size * num_steps, vocab_size)。
                outputs = nd.concat(*outputs, dim=0)
                # 经上述操作,outputs和label已对齐。
                loss = softmax_cross_entropy(outputs, label)
            loss.backward()

            grad_clipping(params, clipping_theta, ctx)
            utils.SGD(params, learning_rate)

            train_loss += nd.sum(loss).asscalar()
            num_examples += loss.size

        if e % pred_period == 0:
            print("Epoch %d. Perplexity %f" % (e,
                                               exp(train_loss/num_examples)))
            for seq in seqs:
                print' - ', predict_rnn(rnn, seq, pred_len, params,
                      hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs,
                      is_lstm)
            print()