Example #1
0
def test_resize():
    # Test with normal case 3D input float type
    data_in_3d = nd.random.uniform(0, 255, (300, 300, 3))
    out_nd_3d = transforms.Resize((100, 100))(data_in_3d)
    data_in_4d_nchw = nd.moveaxis(nd.expand_dims(data_in_3d, axis=0), 3, 1)
    data_expected_3d = (nd.moveaxis(nd.contrib.BilinearResize2D(data_in_4d_nchw, 100, 100), 1, 3))[0]
    assert_almost_equal(out_nd_3d.asnumpy(), data_expected_3d.asnumpy())

    # Test with normal case 4D input float type
    data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3))
    out_nd_4d = transforms.Resize((100, 100))(data_in_4d)
    data_in_4d_nchw = nd.moveaxis(data_in_4d, 3, 1)
    data_expected_4d = nd.moveaxis(nd.contrib.BilinearResize2D(data_in_4d_nchw, 100, 100), 1, 3)
    assert_almost_equal(out_nd_4d.asnumpy(), data_expected_4d.asnumpy())

    # Test invalid interp
    data_in_3d = nd.random.uniform(0, 255, (300, 300, 3))
    invalid_transform = transforms.Resize(-150, keep_ratio=False, interpolation=2)
    assertRaises(MXNetError, invalid_transform, data_in_3d)

    # Credited to Hang Zhang
    def py_bilinear_resize_nhwc(x, outputHeight, outputWidth):
        batch, inputHeight, inputWidth, channel = x.shape
        if outputHeight == inputHeight and outputWidth == inputWidth:
            return x
        y = np.empty([batch, outputHeight, outputWidth, channel]).astype('uint8')
        rheight = 1.0 * (inputHeight - 1) / (outputHeight - 1) if outputHeight > 1 else 0.0
        rwidth = 1.0 * (inputWidth - 1) / (outputWidth - 1) if outputWidth > 1 else 0.0
        for h2 in range(outputHeight):
            h1r = 1.0 * h2 * rheight
            h1 = int(np.floor(h1r))
            h1lambda = h1r - h1
            h1p = 1 if h1 < (inputHeight - 1) else 0
            for w2 in range(outputWidth):
                w1r = 1.0 * w2 * rwidth
                w1 = int(np.floor(w1r))
                w1lambda = w1r - w1
                w1p = 1 if w1 < (inputHeight - 1) else 0
                for b in range(batch):
                    for c in range(channel):
                        y[b][h2][w2][c] = (1-h1lambda)*((1-w1lambda)*x[b][h1][w1][c] + \
                            w1lambda*x[b][h1][w1+w1p][c]) + \
                            h1lambda*((1-w1lambda)*x[b][h1+h1p][w1][c] + \
                            w1lambda*x[b][h1+h1p][w1+w1p][c])
        return y

    # Test with normal case 3D input int8 type
    data_in_4d = nd.random.uniform(0, 255, (1, 300, 300, 3)).astype('uint8')
    out_nd_3d = transforms.Resize((100, 100))(data_in_4d[0])
    assert_almost_equal(out_nd_3d.asnumpy(), py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100, 100)[0], atol=1.0)

    # Test with normal case 4D input int8 type
    data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3)).astype('uint8')
    out_nd_4d = transforms.Resize((100, 100))(data_in_4d)
    assert_almost_equal(out_nd_4d.asnumpy(), py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100, 100), atol=1.0)
Example #2
0
def tensor_load_rgbimage(filename, ctx, size=None, scale=None, keep_asp=False):
    img = Image.open(filename).convert('RGB')
    if size is not None:
        if keep_asp:
            size2 = int(size * 1.0 / img.size[0] * img.size[1])
            img = img.resize((size, size2), Image.ANTIALIAS)
        else:
            img = img.resize((size, size), Image.ANTIALIAS)

    elif scale is not None:
        img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
    img = np.array(img).transpose(2, 0, 1).astype(float)
    img = F.expand_dims(mx.nd.array(img, ctx=ctx), 0)
    return img
Example #3
0
def K_means_Algorithm(epoch=100,
                      point_numbers=2000,
                      centroid_numbers=5,
                      ctx=mx.gpu(0)):

    dataset = []
    centroid = []

    # data generation
    for i in range(point_numbers):

        if random.random() > 0.5:
            dataset.append([
                np.random.normal(loc=0, scale=0.9),
                np.random.normal(loc=0, scale=0.9)
            ])
        else:
            dataset.append([
                np.random.normal(loc=3, scale=0.5),
                np.random.normal(loc=0, scale=0.9)
            ])

    df = pd.DataFrame({
        "x": [d[0] for d in dataset],
        "y": [d[1] for d in dataset]
    })
    sns.lmplot("x", "y", data=df, fit_reg=False, size=10)
    plt.savefig("K means Algorithm init using mxnet.png")

    # 1-step
    random.shuffle(dataset)
    for i in range(centroid_numbers):
        centroid.append(random.choice(dataset))

    # using mxnet
    dataset = nd.array(dataset, ctx=ctx)
    centroid = nd.array(centroid, ctx=ctx)

    # data assignment , updating new center values
    for i in tqdm(range(epoch)):

        # 2-step
        diff = nd.subtract(nd.expand_dims(dataset, axis=0),
                           nd.expand_dims(centroid, axis=1))
        sqr = nd.square(diff)
        distance = nd.sum(sqr, axis=2)
        clustering = nd.argmin(distance, axis=0)

        # 3-step
        '''
        Because mxnet's nd.where did not return the location. I wrote the np.where function.
        '''
        for j in range(centroid_numbers):
            centroid[j][:] = nd.mean(nd.take(
                dataset,
                nd.array(np.reshape(
                    np.where(np.equal(clustering.asnumpy(), j)), (-1, )),
                         ctx=ctx),
                axis=0),
                                     axis=0)
        print("epoch : {}".format(i + 1))

    for i in range(centroid_numbers):
        print("{}_center : Final center_value : {}".format(
            i + 1,
            centroid.asnumpy()[i]))

    #4 show result
    data = {"x": [], "y": [], "cluster": []}
    for i in range(len(clustering)):
        data["x"].append(dataset[i][0].asscalar())
        data["y"].append(dataset[i][1].asscalar())
        data["cluster"].append(clustering[i].asscalar())

    df = pd.DataFrame(data)
    sns.lmplot("x", "y", data=df, fit_reg=False, size=10, hue="cluster")
    plt.savefig("K means Algorithm completed using mxnet.png")
    plt.show()
Example #4
0
def run_demo(cuda, record, vfile):
    model = 'models/21styles.params'
    ngf = 128
    style_size = 512
    style_folder = 'images/styles/'
    mirror = False
    vDir = './video/'
    vPath = vDir + vfile
    oFile = 'output21-' + vfile
    wM, hM = 640, 480
    if cuda:
        ctx = mx.gpu(0)
        os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
    else:
        ctx = mx.cpu(0)
    style_loader = StyleLoader(style_folder, style_size, ctx)
    style_model = Net(ngf=ngf)
    style_model.load_parameters(model, ctx=ctx)
    metadata = ffprobe(vPath)
    fps = metadata["video"]["@avg_frame_rate"]
    #	print(json.dumps(metadata["video"], indent=4))
    w, h = int(metadata["video"]["@width"]), int(metadata["video"]["@height"])
    downsize = h > hM
    if downsize:
        w = 2 * int(w * hM / h / 2)
        h = hM


#	downsize = w > wM
#	if downsize :
#		h = 2 * int(h * wM / w / 2); w = wM
    swidth = int(w / 4)
    sheight = int(h / 4)
    wName = vfile + '  STYLIZED VIDEO   fps:' + fps + '  W:' + str(
        w) + '  H:' + str(h)
    if record:
        out = FFmpegWriter(vDir + oFile,
                           inputdict={
                               '-r': str(fps),
                               '-s': '{}x{}'.format(2 * w, h)
                           },
                           outputdict={
                               '-r': str(fps),
                               '-c:v': 'h264'
                           })
    key, idx = 0, 0
    cv2.namedWindow(wName, cv2.WINDOW_NORMAL)
    cv2.resizeWindow(wName, 2 * w, h)
    for img in vreader(vPath):
        idx += 1
        if downsize:
            img = cv2.resize(img, (w, h), interpolation=cv2.INTER_AREA)
        if mirror:
            img = cv2.flip(img, 1)
        cimg = img.copy()
        img = np.array(img).transpose(2, 0, 1).astype(float)
        img = F.expand_dims(mx.nd.array(img, ctx=ctx), 0)
        # changing styles
        if idx % 50 == 1:
            style_v = style_loader.get(int(idx / 20))
            style_model.set_target(style_v)
        img = style_model(img)

        simg = np.squeeze(style_v.asnumpy())
        simg = simg.transpose(1, 2, 0).astype('uint8')
        img = F.clip(img[0], 0, 255).asnumpy()
        img = img.transpose(1, 2, 0).astype('uint8')

        # display
        simg = cv2.resize(simg, (swidth, sheight),
                          interpolation=cv2.INTER_CUBIC)
        cimg[0:sheight, 0:swidth, :] = simg
        img = np.concatenate((cimg, cv2.cvtColor(img, cv2.COLOR_BGR2RGB)),
                             axis=1)
        if record:
            out.writeFrame(img)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        cv2.imshow(wName, img)
        key = cv2.waitKey(1)
        if key == 27:  # Esc
            break
    if record:
        out.close()
        transferAudio(vPath, vDir, oFile)
        print("Done OK. Created Stylised Video file", vDir + oFile)
        print("fps :", fps, "    W:", w, " H:", h)
    cv2.destroyAllWindows()
Example #5
0
    def compute_retrospective_loss(self, observed_arr, encoded_arr,
                                   decoded_arr, re_encoded_arr):
        '''
        Compute retrospective loss.

        Returns:
            The tuple data.
            - `np.ndarray` of delta.
            - `np.ndarray` of losses of each batch.
            - float of loss of all batch.

        '''
        if self.__output_neuron_count == self.__hidden_neuron_count:
            target_arr = nd.broadcast_sub(
                encoded_arr, nd.expand_dims(observed_arr.mean(axis=2), axis=2))
            summary_delta_arr = nd.sqrt(nd.power(decoded_arr - target_arr, 2))
        else:
            # For each batch, draw a samples from the Uniform distribution.
            if self.__output_neuron_count > self.__hidden_neuron_count:
                all_dim_arr = np.arange(self.__output_neuron_count)
                np.random.shuffle(all_dim_arr)
                choiced_dim_arr = all_dim_arr[:self.__hidden_neuron_count]
                target_arr = nd.broadcast_sub(
                    encoded_arr,
                    nd.expand_dims(observed_arr[:, :,
                                                choiced_dim_arr].mean(axis=2),
                                   axis=2))
                summary_delta_arr = nd.sqrt(
                    nd.power(decoded_arr[:, :, choiced_dim_arr] - target_arr,
                             2))
            else:
                all_dim_arr = np.arange(self.__hidden_neuron_count)
                np.random.shuffle(all_dim_arr)
                choiced_dim_arr = all_dim_arr[:self.__output_neuron_count]
                target_arr = nd.broadcast_sub(
                    encoded_arr[:, :, choiced_dim_arr],
                    nd.expand_dims(observed_arr.mean(axis=2), axis=2))
                summary_delta_arr = nd.sqrt(
                    nd.power(decoded_arr - target_arr, 2))

        match_delta_arr = None
        for i in range(self.__batch_size):
            arr = nd.sqrt(
                nd.power(encoded_arr[i, -1] - re_encoded_arr[i, -1], 2))
            if match_delta_arr is None:
                match_delta_arr = nd.expand_dims(arr, axis=0)
            else:
                match_delta_arr = nd.concat(match_delta_arr,
                                            nd.expand_dims(arr, axis=0),
                                            dim=0)
        """
        other_encoded_delta_arr = None
        for i in range(self.__batch_size):
            _encoded_arr = None
            for seq in range(encoded_arr[i].shape[0] - 1):
                if _encoded_arr is None:
                    _encoded_arr = nd.expand_dims(encoded_arr[i][seq], axis=0)
                else:
                    _encoded_arr = nd.concat(
                        _encoded_arr,
                        nd.expand_dims(encoded_arr[i][seq], axis=0),
                        dim=0
                    )

            arr = nd.nansum(
                nd.sqrt(
                    nd.power(
                        nd.maximum(
                            0,
                            _encoded_arr - re_encoded_arr[i, -1].reshape(
                                1, 
                                re_encoded_arr.shape[2]
                            )
                        ),
                        2
                    )
                ) + self.__margin_param,
                axis=0
            )
            if other_encoded_delta_arr is None:
                other_encoded_delta_arr = nd.expand_dims(arr, axis=0)
            else:
                other_encoded_delta_arr = nd.concat(
                    other_encoded_delta_arr,
                    nd.expand_dims(arr, axis=0),
                    dim=0
                )

        other_re_encoded_delta_arr = None
        for i in range(self.__batch_size):
            _re_encoded_arr = None
            for seq in range(re_encoded_arr[i].shape[0] - 1):
                if _re_encoded_arr is None:
                    _re_encoded_arr = nd.expand_dims(re_encoded_arr[i][seq], axis=0)
                else:
                    _re_encoded_arr = nd.concat(
                        _re_encoded_arr,
                        nd.expand_dims(re_encoded_arr[i][seq], axis=0),
                        dim=0
                    )

            arr = nd.nansum(
                nd.sqrt(
                    nd.power(
                        nd.maximum(
                            0, 
                            encoded_arr[i, -1].reshape(
                                1,
                                encoded_arr.shape[2]
                            ) - _re_encoded_arr
                        ),
                        2
                    )
                ) + self.__margin_param,
                axis=0
            )
            if other_re_encoded_delta_arr is None:
                other_re_encoded_delta_arr = nd.expand_dims(arr, axis=0)
            else:
                other_re_encoded_delta_arr = nd.concat(
                    other_re_encoded_delta_arr,
                    nd.expand_dims(arr, axis=0),
                    dim=0
                )

        mismatch_delta_arr = (
            match_delta_arr - other_encoded_delta_arr
        ) + (
            match_delta_arr - other_re_encoded_delta_arr
        )

        delta_arr = summary_delta_arr + nd.expand_dims(
            self.__retrospective_lambda * match_delta_arr, axis=1
        ) + nd.expand_dims(
            self.__retrospective_eta * mismatch_delta_arr, axis=1
        )
        """
        delta_arr = summary_delta_arr + nd.expand_dims(
            self.__retrospective_lambda * match_delta_arr, axis=1)
        v = nd.norm(delta_arr)
        if v > self.__grad_clip_threshold:
            delta_arr = delta_arr * self.__grad_clip_threshold / v

        loss = nd.mean(delta_arr, axis=0, exclude=True)

        return loss
    def generate_learned_samples(self):
        '''
        Draw and generate data.

        Returns:
            `Tuple` data. The shape is ...
            - `mxnet.ndarray` of observed data points in training.
            - `mxnet.ndarray` of supervised data in training.
            - `mxnet.ndarray` of observed data points in test.
            - `mxnet.ndarray` of supervised data in test.
        '''
        for _ in range(self.iter_n):
            training_batch_arr, test_batch_arr = None, None
            training_label_arr, test_label_arr = None, None
            for batch_size in range(self.batch_size):
                dir_key = np.random.randint(
                    low=0, high=len(self.__training_file_path_list))

                training_one_hot_arr = nd.zeros(
                    (1, len(self.__training_file_path_list)), ctx=self.__ctx)
                training_one_hot_arr[0, dir_key] = 1

                file_key = np.random.randint(
                    low=0, high=len(self.__training_file_path_list[dir_key]))
                training_data_arr = self.__image_extractor.extract(
                    path=self.__training_file_path_list[dir_key][file_key], )
                training_data_arr = self.pre_normalize(training_data_arr)

                test_dir_key = np.random.randint(
                    low=0, high=len(self.__test_file_path_list))

                test_one_hot_arr = nd.zeros(
                    (1, len(self.__test_file_path_list)), ctx=self.__ctx)
                test_one_hot_arr[0, test_dir_key] = 1

                file_key = np.random.randint(
                    low=0, high=len(self.__test_file_path_list[test_dir_key]))
                test_data_arr = self.__image_extractor.extract(
                    path=self.__test_file_path_list[test_dir_key][file_key], )
                test_data_arr = self.pre_normalize(test_data_arr)

                training_data_arr = nd.expand_dims(training_data_arr, axis=0)
                test_data_arr = nd.expand_dims(test_data_arr, axis=0)

                if training_batch_arr is not None:
                    training_batch_arr = nd.concat(training_batch_arr,
                                                   training_data_arr,
                                                   dim=0)
                else:
                    training_batch_arr = training_data_arr

                if test_batch_arr is not None:
                    test_batch_arr = nd.concat(test_batch_arr,
                                               test_data_arr,
                                               dim=0)
                else:
                    test_batch_arr = test_data_arr

                if training_label_arr is not None:
                    training_label_arr = nd.concat(training_label_arr,
                                                   training_one_hot_arr,
                                                   dim=0)
                else:
                    training_label_arr = training_one_hot_arr

                if test_label_arr is not None:
                    test_label_arr = nd.concat(test_label_arr,
                                               test_one_hot_arr,
                                               dim=0)
                else:
                    test_label_arr = test_one_hot_arr

            if self.__noiseable_data is not None:
                training_batch_arr = self.__noiseable_data.noise(
                    training_batch_arr)

            yield training_batch_arr, training_label_arr, test_batch_arr, test_label_arr
Example #7
0
def train(pool_size, epochs, train_data, val_data,  ctx, netEn, netDe,  netD, trainerEn, trainerDe, trainerD, lambda1, batch_size, expname, append=True, useAE = False):
    
    text_file = open(expname + "_validtest.txt", "w")
    text_file.close()
    #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf,  opt.ngf, opt.append)
    GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
    L1_loss = gluon.loss.L2Loss()
    image_pool = imagePool.ImagePool(pool_size)
    metric = mx.metric.CustomMetric(facc)
    metric2 = mx.metric.MSE()
    loss_rec_G = []
    loss_rec_D = []
    loss_rec_R = []
    acc_rec = []
    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)
    for epoch in range(epochs):

        tic = time.time()
        btic = time.time()
        train_data.reset()
        iter = 0
        #print('learning rate : '+str(trainerD.learning_rate ))
	for batch in train_data:
            ############################
            # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
            ###########################
            real_in = batch.data[0].as_in_context(ctx)
            real_out = batch.data[1].as_in_context(ctx)
            soft_zero = 1e-10
            fake_latent= netEn(real_in)
	    fake_latent = np.squeeze(fake_latent)
            mu_lv = nd.split(fake_latent, axis=1, num_outputs=2)
	    mu = (mu_lv[0])
            lv = (mu_lv[1])
	    KL = 0.5*nd.nansum(1+lv-mu*mu-nd.exp(lv+soft_zero))
            eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, 2048), ctx=ctx)
            z = mu + nd.exp(0.5*lv)*eps
	    z = nd.expand_dims(nd.expand_dims(z,2),2)
            y = netDe(z)
            fake_out = y
	    
	    logloss = nd.nansum(real_in*nd.log(y+soft_zero)+ (1-real_in)*nd.log(1-y+soft_zero))
            loss = -logloss-KL
            fake_concat =  nd.concat(real_in, fake_out, dim=1) if append else  fake_out
            with autograd.record():
                # Train with fake image
                # Use image pooling to utilize history imagesi
                output = netD(fake_concat)
                fake_label = nd.zeros(output.shape, ctx=ctx)
                errD_fake = GAN_loss(output, fake_label)
                metric.update([fake_label, ], [output, ])
                real_concat =  nd.concat(real_in, real_out, dim=1) if append else  real_out
                output = netD(real_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errD_real = GAN_loss(output, real_label)
                errD = (errD_real + errD_fake) * 0.5
                errD.backward()
                metric.update([real_label, ], [output, ])

            trainerD.step(batch.data[0].shape[0])

            ############################
            # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
            ###########################
            with autograd.record():

                fake_latent= np.squeeze(netEn(real_in))
                mu_lv = nd.split(fake_latent, axis=1, num_outputs=2)
                mu = mu_lv[0]
                lv = mu_lv[1]
		KL = 0.5*nd.nansum(1+lv-mu*mu-nd.exp(lv+soft_zero))
                eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, 2048), ctx=ctx)
		#KL = 0.5*nd.nansum(1+lv-mu*mu-nd.exp(lv+soft_zero))
                z = mu + nd.exp(0.5*lv)*eps
		z = nd.expand_dims(nd.expand_dims(z,2),2)
                y = netDe(z)
                fake_out = y
		logloss = nd.nansum((real_in+1)*0.5*nd.log(0.5*(y+1)+soft_zero)+ (1-0.5*(real_in+1))*nd.log(1-0.5*(y+1)+soft_zero))
                loss =-logloss-KL
                fake_concat =  nd.concat(real_in, fake_out, dim=1) if append else  fake_out
                output = netD(fake_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errG = GAN_loss(output, real_label) + loss*lambda1 #L1_loss(real_out, fake_out) * lambda1
                errR = logloss#L1_loss(real_out, fake_out)
                errG.backward()
        trainerDe.step(batch.data[0].shape[0])	   
        trainerEn.step(batch.data[0].shape[0])
        loss_rec_G.append(nd.mean(errG).asscalar()-nd.mean(errR).asscalar()*lambda1)
        loss_rec_D.append(nd.mean(errD).asscalar())
        loss_rec_R.append(nd.mean(errR).asscalar())
        name, acc = metric.get()
        acc_rec.append(acc)
        # Print log infomation every ten batches
        if iter % 10 == 0:
                name, acc = metric.get()
                logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic)))
                #print(errD)
		logging.info('discriminator loss = %f, generator loss = %f, binary training acc = %f reconstruction error= %f at iter %d epoch %d'
                    	% (nd.mean(errD).asscalar(),
                      	nd.mean(errG).asscalar(), acc,nd.mean(errR).asscalar() ,iter, epoch))
        iter = iter + 1
        btic = time.time()

        name, acc = metric.get()
        metric.reset()
        train_data.reset()

        logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc))
        logging.info('time: %f' % (time.time() - tic))
        if epoch%10 ==0:
            text_file = open(expname + "_validtest.txt", "a")
            filename = "checkpoints/"+expname+"_"+str(epoch)+"_D.params"
            netD.save_params(filename)
            filename = "checkpoints/"+expname+"_"+str(epoch)+"_En.params"
            netEn.save_params(filename)
            filename = "checkpoints/"+expname+"_"+str(epoch)+"_De.params"
            netDe.save_params(filename)
            fake_img1 = nd.concat(real_in[0],real_out[0], fake_out[0], dim=1)
            fake_img2 = nd.concat(real_in[1],real_out[1], fake_out[1], dim=1)
            fake_img3 = nd.concat(real_in[2],real_out[2], fake_out[2], dim=1)
            fake_img4 = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1)
            val_data.reset()
            text_file = open(expname + "_validtest.txt", "a")
            for vbatch in val_data:
                
            	real_in = vbatch.data[0].as_in_context(ctx)
            	real_out = vbatch.data[1].as_in_context(ctx)

            	fake_latent= netEn(real_in)
            	mu_lv = nd.split(fake_latent, axis=1, num_outputs=2)
            	mu = mu_lv[0]
            	lv = mu_lv[1]
            	eps = nd.random_normal(loc=0, scale=1, shape=(batch_size/5, 2048,1,1), ctx=ctx)
            	z = mu + nd.exp(0.5*lv)*eps
            	y = netDe(z)
            	fake_out = y
            	KL = 0.5*nd.sum(1+lv-mu*mu-nd.exp(lv),axis=1)
            	logloss = nd.sum(real_in*nd.log(y+soft_zero)+ (1-real_in)*nd.log(1-y+soft_zero), axis=1)
            	loss = logloss+KL
            	metric2.update([fake_out, ], [real_out, ])
            	_, acc2 = metric2.get()
            text_file.write("%s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2)))
            metric2.reset()

            fake_img1T = nd.concat(real_in[0],real_out[0], fake_out[0], dim=1)
            fake_img2T = nd.concat(real_in[1],real_out[1], fake_out[1], dim=1)
            fake_img3T = nd.concat(real_in[2],real_out[2], fake_out[2], dim=1)
            #fake_img4T = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1)
            fake_img = nd.concat(fake_img1,fake_img2, fake_img3,fake_img1T,fake_img2T, fake_img3T,dim=2)
            visual.visualize(fake_img)
            plt.savefig('outputs/'+expname+'_'+str(epoch)+'.png')
            text_file.close()
    return([loss_rec_D,loss_rec_G, loss_rec_R, acc_rec])
    def inference_g(self, observed_arr):
        '''
        Inference with generator.

        Args:
            observed_arr:       `mxnet.ndarray` of observed data points.
        
        Returns:
            Tuple data.
            - re-parametric data.
            - encoded data points.
            - re-encoded data points.
        '''
        generated_arr, encoded_arr, re_encoded_arr = super().inference_g(observed_arr)

        if autograd.is_recording():
            limit = self.long_term_seq_len

            seq_len = self.noise_sampler.seq_len
            self.noise_sampler.seq_len = limit
            long_term_observed_arr = self.noise_sampler.draw()

            observed_mean_arr = nd.expand_dims(nd.mean(long_term_observed_arr, axis=1), axis=1)
            sum_arr = None
            for seq in range(2, long_term_observed_arr.shape[1]):
                add_arr = nd.sum(long_term_observed_arr[:, :seq] - observed_mean_arr, axis=1)
                if sum_arr is None:
                    sum_arr = nd.expand_dims(add_arr, axis=0)
                else:
                    sum_arr = nd.concat(
                        sum_arr,
                        nd.expand_dims(add_arr, axis=0),
                        dim=0
                    )
            max_arr = nd.max(sum_arr, axis=0)
            min_arr = nd.min(sum_arr, axis=0)

            diff_arr = long_term_observed_arr - observed_mean_arr
            std_arr = nd.power(nd.mean(nd.square(diff_arr), axis=1), 1/2)
            R_S_arr = (max_arr - min_arr) / std_arr
            len_arr = nd.ones_like(R_S_arr, ctx=R_S_arr.context) * np.log(long_term_observed_arr.shape[1] / 2)
            observed_H_arr = nd.log(R_S_arr) / len_arr

            self.noise_sampler.seq_len = seq_len

            g_min_arr = nd.expand_dims(generated_arr.min(axis=1), axis=1)
            g_max_arr = nd.expand_dims(generated_arr.max(axis=1), axis=1)
            o_min_arr = nd.expand_dims(observed_arr.min(axis=1), axis=1)
            o_max_arr = nd.expand_dims(observed_arr.max(axis=1), axis=1)

            _observed_arr = generated_arr

            long_term_generated_arr = None
            for i in range(limit):
                generated_arr, _, _ = super().inference_g(_observed_arr)

                g_min_arr = nd.expand_dims(generated_arr.min(axis=1), axis=1)
                g_max_arr = nd.expand_dims(generated_arr.max(axis=1), axis=1)
                o_min_arr = nd.expand_dims(_observed_arr.min(axis=1), axis=1)
                o_max_arr = nd.expand_dims(_observed_arr.max(axis=1), axis=1)
                generated_arr = (generated_arr - g_min_arr) / (g_max_arr - g_min_arr)
                generated_arr = (o_max_arr - o_min_arr) * generated_arr
                generated_arr = o_min_arr + generated_arr

                if self.condition_sampler is not None:
                    self.condition_sampler.output_shape = generated_arr.shape
                    noise_arr = self.condition_sampler.generate()
                    generated_arr += noise_arr

                if long_term_generated_arr is None:
                    long_term_generated_arr = generated_arr
                else:
                    long_term_generated_arr = nd.concat(
                        long_term_generated_arr,
                        generated_arr,
                        dim=1
                    )

                _observed_arr = generated_arr

            generated_mean_arr = nd.expand_dims(nd.mean(long_term_generated_arr, axis=1), axis=1)
            sum_arr = None
            for seq in range(2, long_term_generated_arr.shape[1]):
                add_arr = nd.sum(long_term_generated_arr[:, :seq] - generated_mean_arr, axis=1)
                if sum_arr is None:
                    sum_arr = nd.expand_dims(add_arr, axis=0)
                else:
                    sum_arr = nd.concat(
                        sum_arr,
                        nd.expand_dims(add_arr, axis=0),
                        dim=0
                    )
            max_arr = nd.max(sum_arr, axis=0)
            min_arr = nd.min(sum_arr, axis=0)

            diff_arr = long_term_generated_arr - generated_mean_arr
            std_arr = nd.power(nd.mean(nd.square(diff_arr), axis=1), 1/2)
            R_S_arr = (max_arr - min_arr) / std_arr
            len_arr = nd.ones_like(R_S_arr, ctx=R_S_arr.context) * np.log(long_term_generated_arr.shape[1] / 2)
            generated_H_arr = nd.log(R_S_arr) / len_arr

            multi_fractal_loss = nd.abs(generated_H_arr - observed_H_arr)
            multi_fractal_loss = nd.mean(multi_fractal_loss, axis=0, exclude=True)
            multi_fractal_loss = nd.expand_dims(multi_fractal_loss, axis=-1)
            multi_fractal_loss = nd.expand_dims(multi_fractal_loss, axis=-1)

            generated_arr = generated_arr + multi_fractal_loss

        return generated_arr, encoded_arr, re_encoded_arr
Example #9
0
    def select_action(
        self, 
        possible_action_arr, 
        possible_predicted_q_arr, 
        possible_reward_value_arr,
        possible_next_q_arr,
        possible_meta_data_arr=None
    ):
        '''
        Select action by Q(state, action).

        Args:
            possible_action_arr:                Tensor of actions.
            possible_predicted_q_arr:           Tensor of Q-Values.
            possible_reward_value_arr:          Tensor of reward values.
            possible_next_q_arr:                Tensor of Q-Values in next time.
            possible_meta_data_arr:             `mxnet.ndarray.NDArray` or `np.array` of meta data of the actions.

        Retruns:
            Tuple(`np.ndarray` of action., Q-Value)
        '''
        key_arr = self.select_action_key(possible_action_arr, possible_predicted_q_arr)
        meta_data_arr = None
        if possible_meta_data_arr is not None:
            for i in range(possible_meta_data_arr.shape[0]):
                _meta_data_arr = possible_meta_data_arr[i, key_arr[i]]
                if i == 0:
                    if isinstance(_meta_data_arr, nd.NDArray) is True:
                        meta_data_arr = nd.expand_dims(_meta_data_arr, axis=0)
                    else:
                        meta_data_arr = np.expand_dims(_meta_data_arr, axis=0)
                else:
                    if isinstance(_meta_data_arr, nd.NDArray) is True:
                        meta_data_arr = nd.concat(
                            meta_data_arr,
                            nd.expand_dims(_meta_data_arr, axis=0),
                            dim=0
                        )
                    else:
                        meta_data_arr = np.concatenate(
                            [
                                meta_data_arr,
                                np.expand_dims(_meta_data_arr, axis=0),
                            ],
                            axis=0
                        )

        action_arr = None
        predicted_q_arr = None
        reward_value_arr = None
        next_q_arr = None

        for i in range(possible_action_arr.shape[0]):
            _action_arr = possible_action_arr[i, key_arr[i]]
            _predicted_q_arr = possible_predicted_q_arr[i, key_arr[i]]
            _reward_value_arr = possible_reward_value_arr[i, key_arr[i]]
            _next_q_arr = possible_next_q_arr[i, key_arr[i]]
            if i == 0:
                action_arr = nd.expand_dims(_action_arr, axis=0)
                predicted_q_arr = nd.expand_dims(_predicted_q_arr, axis=0)
                reward_value_arr = nd.expand_dims(_reward_value_arr, axis=0)
                next_q_arr = nd.expand_dims(_next_q_arr, axis=0)
            else:
                action_arr = nd.concat(
                    action_arr,
                    nd.expand_dims(_action_arr, axis=0),
                    dim=0
                )
                predicted_q_arr = nd.concat(
                    predicted_q_arr,
                    nd.expand_dims(_predicted_q_arr, axis=0),
                    dim=0
                )
                reward_value_arr = nd.concat(
                    reward_value_arr,
                    nd.expand_dims(_reward_value_arr, axis=0),
                    dim=0
                )
                next_q_arr = nd.concat(
                    next_q_arr,
                    nd.expand_dims(_next_q_arr, axis=0),
                    dim=0
                )

        return (
            action_arr, 
            predicted_q_arr, 
            reward_value_arr,
            next_q_arr,
            meta_data_arr
        )
Example #10
0
def batched_l1_dist(a, b):
    a = nd.expand_dims(a, axis=-2)
    b = nd.expand_dims(b, axis=-3)
    res = nd.norm(a - b, ord=1, axis=-1)
    return res
Example #11
0
def make_grid(tensor,
              nrow=8,
              padding=2,
              normalize=False,
              range=None,
              scale_each=False,
              pad_value=0):
    if not (is_ndarray(tensor) or
            (isinstance(tensor, list) and all(is_ndarray(t) for t in tensor))):

        raise TypeError('tensor or list of tensors expected, got {}'.format(
            type(tensor)))

    # if list of tensors, convert to a 4D mini-batch Tensor
    if isinstance(tensor, list):
        tensor = nd.stack(tensor, dim=0)

    if tensor.ndim == 2:  # single image H x W
        tensor = nd.expand_dims(tensor, axis=0)
    if tensor.ndim == 3:  # single image
        if tensor.shape[0] == 1:  # if single-channel, convert to 3-channel
            tensor = nd.concat(tensor, tensor, tensor, dim=0)
        tensor = nd.expand_dims(tensor, axis=0)

    if tensor.ndim == 4 and tensor.shape[1] == 1:  # single-channel images
        tensor = nd.concat(tensor, tensor, tensor, dim=1)

    if normalize is True:
        tensor = tensor.copy()  # avoid modifying tensor in-place
        if range is not None:
            assert isinstance(
                range, tuple
            ), "range has to be a tuple (min, max) if specified. min and max are numbers"

        def norm_ip(img, min, max):
            nd.clip(img, min, max)
            img += (-min)
            img /= (max - min + 1e-5)
            #img.add_(-min).div_(max - min + 1e-5)

        def norm_range(t, range):
            if range is not None:
                norm_ip(t, range[0], range[1])
            else:
                norm_ip(t, float(t.min().asscalar()),
                        float(t.max().asscalar()))

        if scale_each is True:
            for t in tensor:  # loop over mini-batch dimension
                norm_range(t, range)
        else:
            norm_range(tensor, range)

    if tensor.shape[0] == 1:
        return tensor.reshape((-3, -2))

    # make the mini-batch of images into a grid
    nmaps = tensor.shape[0]  # 我们截取的mini_batch大小
    # print(nmaps)
    xmaps = min(nrow, nmaps)  # 输入的参数
    ymaps = int(math.ceil(float(nmaps) / xmaps))  # 算列数向下取整
    height, width = int(tensor.shape[2] + padding), int(
        tensor.shape[3] + padding)  # 图片显示的高宽
    num_channels = tensor.shape[1]  # 图像通道数
    grid = nd.full(
        (num_channels, height * ymaps + padding, width * xmaps + padding),
        pad_value)  # 创建一个全为零的通道数等于输入图片,高宽等于图片高宽分别乘以行列数加上图片的间隔
    k = 0
    for y in irange(ymaps):
        for x in irange(xmaps):
            if k >= nmaps:
                break
            grid[:, x * width + padding:x * width + padding + width - padding,
                 y * height + padding:y * height + padding + height -
                 padding] = tensor[k]
            k = k + 1
    return grid
def verify_broadcast_like_dynamic(xshp, wshp, lhs_axes, rhs_axes):
    x_np = np.random.uniform(size=xshp)
    w_np = np.random.uniform(size=wshp)
    x = nd.array(x_np)
    w = nd.array(w_np)

    # org op
    y = nd.broadcast_like(x, w,
        lhs_axes=lhs_axes, rhs_axes=rhs_axes)
    print(y.shape)

    # rewrite op
    xndims, wndims = len(xshp), len(wshp)
    if lhs_axes is None or rhs_axes is None:
        assert xndims == wndims and lhs_axes is None \
            and rhs_axes is None
        z = _broadcast_like(x, w)
    else:
        lhs_axes, lndims = list(lhs_axes), len(lhs_axes)
        rhs_axes, rndims = list(rhs_axes), len(rhs_axes)
        assert lndims == rndims > 0

        lhs_axes = tuple([v+xndims if v<0 else v for v in lhs_axes])
        assert all([0<=v<xndims for v in list(lhs_axes)])

        rhs_axes = tuple([v+wndims if v<0 else v for v in rhs_axes])
        assert all([0<=v<wndims for v in list(rhs_axes)])

        assert all([xshp[lhs_axes[i]] == 1 for i in range(lndims)])

        batch_axes = [0]
        flg = all([batch_axis not in rhs_axes \
            for batch_axis in batch_axes])
        if flg:
            cnts = {v: wshp[rhs_axes[i]] \
                for i, v in enumerate(lhs_axes)}
            reps = tuple([cnts[v] if v in lhs_axes else 1 \
                for v in range(xndims)])
            z = nd.tile(x, reps=reps)
        else:
            axis_map = {}
            for i, v in enumerate(lhs_axes):
                axis_map[v] = rhs_axes[i]
            for batch_axis in batch_axes:
                assert sum([1 if v == batch_axis else 0 \
                    for k, v in axis_map.items()]) <= 1, \
                    "multiple broadcast on batch_axis: %s, " + \
                    "which is not support by dynamic shape fusion." % \
                    batch_axis
            assert wndims < 6, \
                "slice can manipulate at most 5d"

            # reduce shape to 1 for non-broadcast dimensions
            begin = tuple([0]*wndims)
            end = tuple([wshp[v] if v in axis_map.values() else 1 \
                for v in range(wndims)])
            w = nd.slice(w, begin=begin, end=end)

            # decompose k1->v, k2->v into k1->v, k2->v2
            # which make axis
            while True:
                vs, flag, paxis_map = set(), True, axis_map
                for pk, pv in paxis_map.items():
                    if pv not in vs:
                        vs.add(pv)
                        continue
                    flag = False
                    axis_map = {k: (v+1 if v>pv or k==pk else v) \
                        for k, v in axis_map.items()}
                    w = nd.expand_dims(w, axis=pv)
                    w = nd.repeat(w, axis=pv, repeats=wshp[pv])
                    wshp = wshp[:pv] + (wshp[pv],) + wshp[pv:]
                    break
                if flag:
                    break
            wndims = len(wshp)

            # trim wndims if not equal to xndims
            v = 0
            while wndims > xndims:
                while v in axis_map.values():
                    v += 1
                w = nd.squeeze(w, axis=v)
                wndims -= 1
                axis_map = {k: (nv-1 if nv > v else nv) \
                    for k, nv in axis_map.items()}
            while wndims < xndims:
                w = nd.expand_dims(w, axis=wndims)
                wndims += 1
            axes = list(range(wndims))
            while True:
                dels = [k for k, v in axis_map.items() if k==v]
                for k in dels:
                    del axis_map[k]
                if not axis_map:
                    break
                keys = list(axis_map.keys())
                k, v = keys[0], axis_map[keys[0]]
                axes[k], axes[v] = axes[v], axes[k]
                for nk in keys:
                    nv = axis_map[nk]
                    if nv == k:
                        axis_map[nk] = v
                    elif nv == v:
                        axis_map[nk] = k
            axes = tuple(axes)
            if axes != tuple(range(wndims)):
                assert wndims < 7, \
                    "slice can manipulate at most 6d"
                w = nd.transpose(w, axes=axes)
            z = _broadcast_like(x, w)
    print(z.shape)

    # compare
    assert z.shape == y.shape
    zn, zp = get_norm(z)
    yn, yp = get_norm(y)
    rn = np.linalg.norm(zp-yp)
    print(zn, yn, rn)
Example #13
0
def tracker(siamfc, params, frame_name_list, pos_x, pos_y, target_w, target_h, ctx=mx.cpu()):
    pos_x = pos_x - 1
    pos_y = pos_y - 1
    # Load Video Information
    z = image.imread(frame_name_list[params.startFrame]).astype('float32') # H W C
#    frame_sz = z.shape # H W C
    avgChans = nd.mean(z, axis=[0, 1]) 
    nImgs = np.size(frame_name_list)
    
    context = params.contextAmount * (target_w + target_h)
    wc_z = target_w + context
    hc_z = target_h + context
    s_z = params.exemplarSize / 127 * np.sqrt(np.prod(wc_z * hc_z))
    s_x = params.instanceSize / 127 * np.sqrt(np.prod(wc_z * hc_z))
    scales = params.scaleStep ** np.linspace(np.ceil(params.numScale/2 - params.numScale), np.floor(params.numScale/2), params.numScale)
    scaledExemplar = s_z * scales
    
    z_crop_, _ = make_scale_pyramid(z, pos_x, pos_y, scaledExemplar, params.exemplarSize, avgChans, params, ctx=ctx) # B H W C
    z_crop = z_crop_[1]
    z_crop = nd.expand_dims(z_crop, axis = 0)
    z_crop = np.transpose(z_crop, axes = (0, 3, 1, 2))
    
    z_out_val = siamfc.net(z_crop.as_in_context(ctx))
    
    min_s_x = params.minSFactor * s_x
    max_s_x = params.maxSFactor * s_x
    min_s_z = params.minSFactor * s_z
    max_s_z = params.maxSFactor * s_z
    
    # cosine window to penalize large displacements   
    window_hann_1d = np.expand_dims(np.hanning(params.responseUp * params.scoreSize), axis = 0)
    window_hann_2d = np.transpose(window_hann_1d) * window_hann_1d
    window = window_hann_2d / np.sum(window_hann_2d)
    # stores tracker's output for evaluation
    print('Frame: %d' % (params.startFrame + 1))
    bboxes = np.zeros((nImgs, 4))
    bboxes[0,:] = [pos_x + 1-target_w / 2, pos_y + 1-target_h / 2, target_w, target_h]
    
    t_start = time.time()
    
    for i in range(params.startFrame + 1, nImgs):
        print('Frame: %d' % (i + 1))
        scaledInstance = s_x * scales
        x = image.imread(frame_name_list[i]).astype('float32') # H W C
        x_crops, pad_masks_x = make_scale_pyramid(x, pos_x, pos_y, scaledInstance, params.instanceSize, avgChans, params, ctx=ctx) # B H W C
        x_crops_ = np.transpose(x_crops, axes = (0, 3, 1, 2))
        x_out = siamfc.net(x_crops_.as_in_context(ctx))
        responseMaps = siamfc.match_templates(z_out_val, x_out) # B C H W        
        pos_x, pos_y, newScale = tracker_step(responseMaps, pos_x, pos_y, s_x, window, params)
        s_x = np.maximum(min_s_x, np.minimum(max_s_x, (1.0 - np.float64(params.scaleLR))* s_x + np.float64(params.scaleLR) * scaledInstance[newScale]))
        
        if params.zLR >0:
            scaledExemplar = s_z * scales
            z_crop_, _ = make_scale_pyramid(x, pos_x, pos_y, scaledExemplar, params.exemplarSize, avgChans, params, ctx=ctx) # B H W C
            z_crop = z_crop_[1]
            z_crop = nd.expand_dims(z_crop, axis = 0)
            z_crop = np.transpose(z_crop, axes = (0, 3, 1, 2))    
            z_out_val_new = siamfc.net(z_crop.as_in_context(ctx))
            z_out_val = (1 - params.zLR) * z_out_val + params.zLR * z_out_val_new
            s_z = np.maximum(min_s_z, np.minimum(max_s_z, (1 - params.scaleLR) * s_z + params.scaleLR * scaledExemplar[newScale]))
        
        scaledTarget_x, scaledTarget_y = target_w * scales, target_h * scales
        target_w = (1 - params.scaleLR) * target_w + params.scaleLR * scaledTarget_x[newScale]
        target_h = (1 - params.scaleLR) * target_h + params.scaleLR * scaledTarget_y[newScale]
        bboxes[i-params.startFrame, :] = pos_x + 1 - target_w / 2, pos_y + 1 - target_h / 2, target_w, target_h
        
        if params.visualization:
            show_frame(x.asnumpy(), bboxes[i-params.startFrame, :], 1)
		
    t_elapsed = time.time() - t_start + 1
    speed = (nImgs - 1) / t_elapsed
    
    return bboxes, speed
def test_resize_gpu():
    # Test with normal case 3D input float type
    data_in_3d = nd.random.uniform(0, 255, (300, 300, 3))
    out_nd_3d = transforms.Resize((100, 100))(data_in_3d)
    data_in_4d_nchw = nd.moveaxis(nd.expand_dims(data_in_3d, axis=0), 3, 1)
    data_expected_3d = (nd.moveaxis(
        nd.contrib.BilinearResize2D(data_in_4d_nchw, height=100, width=100), 1,
        3))[0]
    assert_almost_equal(out_nd_3d.asnumpy(), data_expected_3d.asnumpy())

    # Test with normal case 4D input float type
    data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3))
    out_nd_4d = transforms.Resize((100, 100))(data_in_4d)
    data_in_4d_nchw = nd.moveaxis(data_in_4d, 3, 1)
    data_expected_4d = nd.moveaxis(
        nd.contrib.BilinearResize2D(data_in_4d_nchw, height=100, width=100), 1,
        3)
    assert_almost_equal(out_nd_4d.asnumpy(), data_expected_4d.asnumpy())

    # Test invalid interp
    data_in_3d = nd.random.uniform(0, 255, (300, 300, 3))
    invalid_transform = transforms.Resize(-150,
                                          keep_ratio=False,
                                          interpolation=2)
    assertRaises(MXNetError, invalid_transform, data_in_3d)

    # Credited to Hang Zhang
    def py_bilinear_resize_nhwc(x, outputHeight, outputWidth):
        batch, inputHeight, inputWidth, channel = x.shape
        if outputHeight == inputHeight and outputWidth == inputWidth:
            return x
        y = np.empty([batch, outputHeight, outputWidth,
                      channel]).astype('uint8')
        rheight = 1.0 * (inputHeight - 1) / (outputHeight -
                                             1) if outputHeight > 1 else 0.0
        rwidth = 1.0 * (inputWidth - 1) / (outputWidth -
                                           1) if outputWidth > 1 else 0.0
        for h2 in range(outputHeight):
            h1r = 1.0 * h2 * rheight
            h1 = int(np.floor(h1r))
            h1lambda = h1r - h1
            h1p = 1 if h1 < (inputHeight - 1) else 0
            for w2 in range(outputWidth):
                w1r = 1.0 * w2 * rwidth
                w1 = int(np.floor(w1r))
                w1lambda = w1r - w1
                w1p = 1 if w1 < (inputHeight - 1) else 0
                for b in range(batch):
                    for c in range(channel):
                        y[b][h2][w2][c] = (1-h1lambda)*((1-w1lambda)*x[b][h1][w1][c] + \
                            w1lambda*x[b][h1][w1+w1p][c]) + \
                            h1lambda*((1-w1lambda)*x[b][h1+h1p][w1][c] + \
                            w1lambda*x[b][h1+h1p][w1+w1p][c])
        return y

    # Test with normal case 3D input int8 type
    data_in_4d = nd.random.uniform(0, 255, (1, 300, 300, 3)).astype('uint8')
    out_nd_3d = transforms.Resize((100, 100))(data_in_4d[0])
    assert_almost_equal(out_nd_3d.asnumpy(),
                        py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100,
                                                100)[0],
                        atol=1.0)

    # Test with normal case 4D input int8 type
    data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3)).astype('uint8')
    out_nd_4d = transforms.Resize((100, 100))(data_in_4d)
    assert_almost_equal(out_nd_4d.asnumpy(),
                        py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100,
                                                100),
                        atol=1.0)
def train(args):
    np.random.seed(args.seed)
    if args.cuda:
        ctx = mx.gpu(0)
    else:
        ctx = mx.cpu(0)
    # dataloader
    transform = utils.Compose([utils.Scale(args.image_size),
                               utils.CenterCrop(args.image_size),
                               utils.ToTensor(ctx),
                               ])
    train_dataset = data.ImageFolder(args.dataset, transform)
    train_loader = gluon.data.DataLoader(train_dataset, batch_size=args.batch_size,
                                         last_batch='discard')
    style_loader = utils.StyleLoader(args.style_folder, args.style_size, ctx=ctx)
    print('len(style_loader):',style_loader.size())
    # models
    vgg = net.Vgg16()
    utils.init_vgg_params(vgg, 'models', ctx=ctx)
    style_model = net.Net(ngf=args.ngf)
    style_model.initialize(init=mx.initializer.MSRAPrelu(), ctx=ctx)
    if args.resume is not None:
        print('Resuming, initializing using weight from {}.'.format(args.resume))
        style_model.load_parameters(args.resume, ctx=ctx)
    print('style_model:',style_model)
    # optimizer and loss
    trainer = gluon.Trainer(style_model.collect_params(), 'adam',
                            {'learning_rate': args.lr})
    mse_loss = gluon.loss.L2Loss()

    for e in range(args.epochs):
        agg_content_loss = 0.
        agg_style_loss = 0.
        count = 0
        for batch_id, (x, _) in enumerate(train_loader):
            n_batch = len(x)
            count += n_batch
            # prepare data
            style_image = style_loader.get(batch_id)
            style_v = utils.subtract_imagenet_mean_preprocess_batch(style_image.copy())
            style_image = utils.preprocess_batch(style_image)

            features_style = vgg(style_v)
            gram_style = [net.gram_matrix(y) for y in features_style]

            xc = utils.subtract_imagenet_mean_preprocess_batch(x.copy())
            f_xc_c = vgg(xc)[1]
            with autograd.record():
                style_model.set_target(style_image)
                y = style_model(x)

                y = utils.subtract_imagenet_mean_batch(y)
                features_y = vgg(y)

                content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c)

                style_loss = 0.
                for m in range(len(features_y)):
                    gram_y = net.gram_matrix(features_y[m])
                    _, C, _ = gram_style[m].shape
                    gram_s = F.expand_dims(gram_style[m], 0).broadcast_to((args.batch_size, 1, C, C))
                    style_loss = style_loss + 2 * args.style_weight * \
                        mse_loss(gram_y, gram_s[:n_batch, :, :])

                total_loss = content_loss + style_loss
                total_loss.backward()

            trainer.step(args.batch_size)
            mx.nd.waitall()

            agg_content_loss += content_loss[0]
            agg_style_loss += style_loss[0]

            if (batch_id + 1) % args.log_interval == 0:
                mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.3f}\tstyle: {:.3f}\ttotal: {:.3f}".format(
                    time.ctime(), e + 1, count, len(train_dataset),
                                agg_content_loss.asnumpy()[0] / (batch_id + 1),
                                agg_style_loss.asnumpy()[0] / (batch_id + 1),
                                (agg_content_loss + agg_style_loss).asnumpy()[0] / (batch_id + 1)
                )
                print(mesg)


            if (batch_id + 1) % (4 * args.log_interval) == 0:
                # save model
                save_model_filename = "Epoch_" + str(e) + "iters_" + \
                    str(count) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str(
                    args.content_weight) + "_" + str(args.style_weight) + ".params"
                save_model_path = os.path.join(args.save_model_dir, save_model_filename)
                style_model.save_parameters(save_model_path)
                print("\nCheckpoint, trained model saved at", save_model_path)

    # save model
    save_model_filename = "Final_epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str(
        args.content_weight) + "_" + str(args.style_weight) + ".params"
    save_model_path = os.path.join(args.save_model_dir, save_model_filename)
    style_model.save_parameters(save_model_path)
    print("\nDone, trained model saved at", save_model_path)
Example #16
0
def np2nd(img_np, ctx=get_ctx()):
    img_nd = nd.array(img_np, ctx=ctx)
    img_nd = nd.swapaxes(img_nd, 1, 2)
    img_nd = nd.swapaxes(img_nd, 0, 1)
    img_nd = nd.expand_dims(img_nd, 0)
    return img_nd
Example #17
0
def unsqueeze(input, dim):
    return nd.expand_dims(input, axis=dim)
Example #18
0
    def generate_learned_samples(self):
        '''
        Draw and generate data.

        Returns:
            `Tuple` data. The shape is ...
            - `mxnet.ndarray` of observed data points in training.
            - `mxnet.ndarray` of supervised data in training.
            - `mxnet.ndarray` of observed data points in test.
            - `mxnet.ndarray` of supervised data in test.
        '''
        for epoch in range(self.epochs):
            training_batch_arr, test_batch_arr = None, None

            for i in range(self.batch_size):
                file_key = np.random.randint(low=0, high=len(self.__train_csv_path_list))
                train_observed_arr = self.__unlabeled_csv_extractor.extract(
                    self.__train_csv_path_list[file_key]
                )
                test_file_key = np.random.randint(low=0, high=len(self.__test_csv_path_list))
                test_observed_arr = self.__unlabeled_csv_extractor.extract(
                    self.__test_csv_path_list[test_file_key]
                )

                train_observed_arr = np.identity(
                    1 + int(train_observed_arr.max() + (train_observed_arr.min() * -1))
                )[
                    (train_observed_arr.reshape(train_observed_arr.shape[0], -1) + (train_observed_arr.min() * -1)).astype(int)
                ]

                test_observed_arr = np.identity(
                    1 + int(test_observed_arr.max() + (test_observed_arr.min() * -1))
                )[
                    (test_observed_arr.reshape(test_observed_arr.shape[0], -1) + (test_observed_arr.min() * -1)).astype(int)
                ]

                start_row = np.random.randint(low=0, high=train_observed_arr.shape[0] - self.seq_len)
                test_start_row = np.random.randint(low=0, high=test_observed_arr.shape[0] - self.seq_len)

                train_observed_arr = train_observed_arr[start_row:start_row+self.seq_len]
                test_observed_arr = test_observed_arr[test_start_row:test_start_row+self.seq_len]

                if training_batch_arr is None:
                    training_batch_arr = nd.expand_dims(
                        nd.ndarray.array(train_observed_arr, ctx=self.__ctx),
                        axis=0
                    )
                else:
                    training_batch_arr = nd.concat(
                        training_batch_arr,
                        nd.expand_dims(
                            nd.ndarray.array(train_observed_arr, ctx=self.__ctx),
                            axis=0
                        ),
                        dim=0
                    )

                if test_batch_arr is None:
                    test_batch_arr = nd.expand_dims(
                        nd.ndarray.array(test_observed_arr, ctx=self.__ctx),
                        axis=0
                    )
                else:
                    test_batch_arr = nd.concat(
                        test_batch_arr,
                        nd.expand_dims(
                            nd.ndarray.array(test_observed_arr, ctx=self.__ctx),
                            axis=0
                        ),
                        dim=0
                    )

            training_batch_arr = self.pre_normalize(training_batch_arr)
            test_batch_arr = self.pre_normalize(test_batch_arr)

            if self.__noiseable_data is not None:
                training_batch_arr = self.__noiseable_data.noise(training_batch_arr)

            yield training_batch_arr, training_batch_arr, test_batch_arr, test_batch_arr
Example #19
0
def train(args):
    np.random.seed(args.seed)
    if args.cuda:
        ctx = mx.gpu(0)
    else:
        ctx = mx.cpu(0)
    # dataloader
    transform = utils.Compose([utils.Scale(args.image_size),
                               utils.CenterCrop(args.image_size),
                               utils.ToTensor(ctx),
                               ])
    train_dataset = data.ImageFolder(args.dataset, transform)
    train_loader = gluon.data.DataLoader(train_dataset, batch_size=args.batch_size, last_batch='discard')
    style_loader = utils.StyleLoader(args.style_folder, args.style_size, ctx=ctx)
    print('len(style_loader):',style_loader.size())
    # models
    vgg = net.Vgg16()
    utils.init_vgg_params(vgg, 'models', ctx=ctx)
    style_model = net.Net(ngf=args.ngf)
    style_model.initialize(init=mx.initializer.MSRAPrelu(), ctx=ctx)
    if args.resume is not None:
        print('Resuming, initializing using weight from {}.'.format(args.resume))
        style_model.collect_params().load(args.resume, ctx=ctx)
    print('style_model:',style_model)
    # optimizer and loss
    trainer = gluon.Trainer(style_model.collect_params(), 'adam',
                            {'learning_rate': args.lr})
    mse_loss = gluon.loss.L2Loss()

    for e in range(args.epochs):
        agg_content_loss = 0.
        agg_style_loss = 0.
        count = 0
        for batch_id, (x, _) in enumerate(train_loader):
            n_batch = len(x)
            count += n_batch
            # prepare data
            style_image = style_loader.get(batch_id)
            style_v = utils.subtract_imagenet_mean_preprocess_batch(style_image.copy())
            style_image = utils.preprocess_batch(style_image)

            features_style = vgg(style_v)
            gram_style = [net.gram_matrix(y) for y in features_style]

            xc = utils.subtract_imagenet_mean_preprocess_batch(x.copy())
            f_xc_c = vgg(xc)[1]
            with autograd.record():
                style_model.setTarget(style_image)
                y = style_model(x)

                y = utils.subtract_imagenet_mean_batch(y)
                features_y = vgg(y)

                content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c)

                style_loss = 0.
                for m in range(len(features_y)):
                    gram_y = net.gram_matrix(features_y[m])
                    _, C, _ = gram_style[m].shape
                    gram_s = F.expand_dims(gram_style[m], 0).broadcast_to((args.batch_size, 1, C, C))
                    style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s[:n_batch, :, :])

                total_loss = content_loss + style_loss
                total_loss.backward()
                
            trainer.step(args.batch_size)
            mx.nd.waitall()

            agg_content_loss += content_loss[0]
            agg_style_loss += style_loss[0]

            if (batch_id + 1) % args.log_interval == 0:
                mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.3f}\tstyle: {:.3f}\ttotal: {:.3f}".format(
                    time.ctime(), e + 1, count, len(train_dataset),
                                agg_content_loss.asnumpy()[0] / (batch_id + 1),
                                agg_style_loss.asnumpy()[0] / (batch_id + 1),
                                (agg_content_loss + agg_style_loss).asnumpy()[0] / (batch_id + 1)
                )
                print(mesg)

            
            if (batch_id + 1) % (4 * args.log_interval) == 0:
                # save model
                save_model_filename = "Epoch_" + str(e) + "iters_" + str(count) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str(
                    args.content_weight) + "_" + str(args.style_weight) + ".params"
                save_model_path = os.path.join(args.save_model_dir, save_model_filename)
                style_model.collect_params().save(save_model_path)
                print("\nCheckpoint, trained model saved at", save_model_path)

    # save model
    save_model_filename = "Final_epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str(
        args.content_weight) + "_" + str(args.style_weight) + ".params"
    save_model_path = os.path.join(args.save_model_dir, save_model_filename)
    style_model.collect_params().save(save_model_path)
    print("\nDone, trained model saved at", save_model_path)
Example #20
0
 def get_action(self, st):
     st = nd.expand_dims(st, axis=0)
     a_q = self.infer_q_mod.forward(is_train=False, data=st)
     a = nd.argmax_channel(a_q[0])
     return a