Ejemplo n.º 1
0
def main():
	# load MNIST images
	images, labels = dataset.load_train_images()

	# config
	config = model.config

	# settings
	max_epoch = 10000
	num_trains_per_epoch = 5000
	num_validation_data = 10000
	batchsize = 128

	# seed
	np.random.seed(args.seed)
	if args.gpu_device != -1:
		cuda.cupy.random.seed(args.seed)

	# save validation accuracy per epoch
	csv_results = []

	# create semi-supervised split
	training_images, training_labels, validation_images, validation_labels = dataset.split_data(images, labels, num_validation_data, seed=args.seed)
	training_labels = np.random.randint(0, config.num_classes, training_labels.size).astype(np.int32)
	validation_labels = np.random.randint(0, config.num_classes, validation_labels.size).astype(np.int32)

	# training
	progress = Progress()
	for epoch in xrange(1, max_epoch):
		progress.start_epoch(epoch, max_epoch)
		sum_loss = 0

		for t in xrange(num_trains_per_epoch):
			# sample from data distribution
			image_batch, label_batch = dataset.sample_data(training_images, training_labels, batchsize, binarize=False)
			image_batch = np.reshape(image_batch, (-1, 1, 28, 28))
			distribution = model.discriminate(image_batch, apply_softmax=False)
			loss = F.softmax_cross_entropy(distribution, model.to_variable(label_batch))
			sum_loss += float(loss.data)

			model.backprop(loss)

			if t % 10 == 0:
				progress.show(t, num_trains_per_epoch, {})

		model.save(args.model_dir)
		train_accuracy = compute_accuracy(training_images, training_labels)
		validation_accuracy = compute_accuracy(validation_images, validation_labels)
		
		progress.show(num_trains_per_epoch, num_trains_per_epoch, {
			"loss": sum_loss / num_trains_per_epoch,
			"accuracy (validation)": validation_accuracy,
			"accuracy (train)": train_accuracy,
		})

		# write accuracy to csv
		csv_results.append([epoch, train_accuracy, validation_accuracy, progress.get_total_time()])
		data = pd.DataFrame(csv_results)
		data.columns = ["epoch", "train_accuracy", "validation_accuracy", "min"]
		data.to_csv("{}/result.csv".format(args.model_dir))
Ejemplo n.º 2
0
def main():
	# load MNIST images
	images, labels = dataset.load_train_images()

	# config
	discriminator_config = gan.config_discriminator
	generator_config = gan.config_generator

	# settings
	max_epoch = 1000
	num_updates_per_epoch = 500
	plot_interval = 5
	batchsize_true = 100
	batchsize_fake = batchsize_true

	# seed
	np.random.seed(args.seed)
	if args.gpu_device != -1:
		cuda.cupy.random.seed(args.seed)

	# training
	progress = Progress()
	for epoch in xrange(1, max_epoch + 1):
		progress.start_epoch(epoch, max_epoch)
		sum_loss_critic = 0
		sum_loss_generator = 0

		for t in xrange(num_updates_per_epoch):

			for k in xrange(discriminator_config.num_critic):
				# clamp parameters to a cube
				gan.clip_discriminator_weights()
				# gan.decay_discriminator_weights()

				# sample true data from data distribution
				images_true = dataset.sample_data(images, batchsize_true, binarize=False)
				# sample fake data from generator
				images_fake = gan.generate_x(batchsize_fake)
				images_fake.unchain_backward()

				fw_true, activations_true = gan.discriminate(images_true)
				fw_fake, _ = gan.discriminate(images_fake)

				loss_critic = -F.sum(fw_true - fw_fake) / batchsize_true
				sum_loss_critic += float(loss_critic.data) / discriminator_config.num_critic

				# update discriminator
				gan.backprop_discriminator(loss_critic)

			# generator loss
			images_fake = gan.generate_x(batchsize_fake)
			fw_fake, activations_fake = gan.discriminate(images_fake)
			loss_generator = -F.sum(fw_fake) / batchsize_fake

			# feature matching
			if discriminator_config.use_feature_matching:
				features_true = activations_true[-1]
				features_true.unchain_backward()
				if batchsize_true != batchsize_fake:
					images_fake = gan.generate_x(batchsize_true)
					_, activations_fake = gan.discriminate(images_fake, apply_softmax=False)
				features_fake = activations_fake[-1]
				loss_generator += F.mean_squared_error(features_true, features_fake)

			# update generator
			gan.backprop_generator(loss_generator)
			sum_loss_generator += float(loss_generator.data)
			if t % 10 == 0:
				progress.show(t, num_updates_per_epoch, {})

		gan.save(args.model_dir)

		progress.show(num_updates_per_epoch, num_updates_per_epoch, {
			"wasserstein": -sum_loss_critic / num_updates_per_epoch,
			"loss_g": sum_loss_generator / num_updates_per_epoch,
		})

		if epoch % plot_interval == 0 or epoch == 1:
			plot(filename="epoch_{}_time_{}min".format(epoch, progress.get_total_time()))
Ejemplo n.º 3
0
Archivo: train.py Proyecto: cai-mj/ddgm
def main():
    # load MNIST images
    images = load_rgb_images(args.image_dir)

    # config
    config_energy_model = to_object(params_energy_model["config"])
    config_generative_model = to_object(params_generative_model["config"])

    # settings
    max_epoch = 1000
    n_trains_per_epoch = 500
    batchsize_positive = 128
    batchsize_negative = 128
    plot_interval = 5

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # init weightnorm layers
    if config_energy_model.use_weightnorm:
        print "initializing weight normalization layers of the energy model ..."
        x_positive = sample_from_data(images, batchsize_positive * 5)
        ddgm.compute_energy(x_positive)

    if config_generative_model.use_weightnorm:
        print "initializing weight normalization layers of the generative model ..."
        x_negative = ddgm.generate_x(batchsize_negative * 5)

    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_energy_positive = 0
        sum_energy_negative = 0
        sum_loss = 0
        sum_kld = 0

        for t in xrange(n_trains_per_epoch):
            # sample from data distribution
            x_positive = sample_from_data(images, batchsize_positive)

            # sample from generator
            x_negative = ddgm.generate_x(batchsize_negative)

            # train energy model
            energy_positive = ddgm.compute_energy_sum(x_positive)
            energy_negative = ddgm.compute_energy_sum(x_negative)
            loss = energy_positive - energy_negative
            ddgm.backprop_energy_model(loss)

            # train generative model
            # TODO: KLD must be greater than or equal to 0
            x_negative = ddgm.generate_x(batchsize_negative)
            kld = ddgm.compute_kld_between_generator_and_energy_model(
                x_negative)
            ddgm.backprop_generative_model(kld)

            sum_energy_positive += float(energy_positive.data)
            sum_energy_negative += float(energy_negative.data)
            sum_loss += float(loss.data)
            sum_kld += float(kld.data)
            progress.show(t, n_trains_per_epoch, {})

        progress.show(
            n_trains_per_epoch, n_trains_per_epoch, {
                "x+": int(sum_energy_positive / n_trains_per_epoch),
                "x-": int(sum_energy_negative / n_trains_per_epoch),
                "loss": sum_loss / n_trains_per_epoch,
                "kld": sum_kld / n_trains_per_epoch
            })
        ddgm.save(args.model_dir)

        if epoch % plot_interval == 0 or epoch == 1:
            plot(filename="epoch_{}_time_{}min".format(
                epoch, progress.get_total_time()))
Ejemplo n.º 4
0
def main():
    images = load_rgb_images(args.image_dir)

    # config
    discriminator_config = gan.config_discriminator
    generator_config = gan.config_generator

    # settings
    max_epoch = 1000
    num_updates_per_epoch = 500
    batchsize_true = 128
    batchsize_fake = 128
    plot_interval = 5

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # init weightnorm layers
    if discriminator_config.use_weightnorm:
        print "initializing weight normalization layers of the discriminator ..."
        x_true = sample_from_data(images, batchsize_true)
        gan.discriminate(x_true)

    if generator_config.use_weightnorm:
        print "initializing weight normalization layers of the generator ..."
        gan.generate_x(batchsize_fake)

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch + 1):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_unsupervised = 0
        sum_loss_adversarial = 0
        sum_dx_unlabeled = 0
        sum_dx_generated = 0

        for t in xrange(num_updates_per_epoch):
            # sample data
            x_true = sample_from_data(images, batchsize_true)
            x_fake = gan.generate_x(batchsize_fake)
            x_fake.unchain_backward()

            # unsupervised loss
            # D(x) = Z(x) / {Z(x) + 1}, where Z(x) = \sum_{k=1}^K exp(l_k(x))
            # softplus(x) := log(1 + exp(x))
            # logD(x) = logZ(x) - log(Z(x) + 1)
            # 		  = logZ(x) - log(exp(log(Z(x))) + 1)
            # 		  = logZ(x) - softplus(logZ(x))
            # 1 - D(x) = 1 / {Z(x) + 1}
            # log{1 - D(x)} = log1 - log(Z(x) + 1)
            # 				= -log(exp(log(Z(x))) + 1)
            # 				= -softplus(logZ(x))
            log_zx_u, activations_u = gan.discriminate(x_true,
                                                       apply_softmax=False)
            log_dx_u = log_zx_u - F.softplus(log_zx_u)
            dx_u = F.sum(F.exp(log_dx_u)) / batchsize_true
            loss_unsupervised = -F.sum(
                log_dx_u) / batchsize_true  # minimize negative logD(x)
            py_x_g, _ = gan.discriminate(x_fake, apply_softmax=False)
            log_zx_g = F.logsumexp(py_x_g, axis=1)
            loss_unsupervised += F.sum(F.softplus(
                log_zx_g)) / batchsize_true  # minimize negative log{1 - D(x)}

            # update discriminator
            gan.backprop_discriminator(loss_unsupervised)

            sum_loss_unsupervised += float(loss_unsupervised.data)
            sum_dx_unlabeled += float(dx_u.data)

            # generator loss
            x_fake = gan.generate_x(batchsize_fake)
            log_zx_g, activations_g = gan.discriminate(x_fake,
                                                       apply_softmax=False)
            log_dx_g = log_zx_g - F.softplus(log_zx_g)
            dx_g = F.sum(F.exp(log_dx_g)) / batchsize_fake
            loss_generator = -F.sum(
                log_dx_g) / batchsize_true  # minimize negative logD(x)

            # feature matching
            if discriminator_config.use_feature_matching:
                features_true = activations_u[-1]
                features_true.unchain_backward()
                if batchsize_true != batchsize_fake:
                    x_fake = gan.generate_x(batchsize_true)
                    _, activations_g = gan.discriminate(x_fake,
                                                        apply_softmax=False)
                features_fake = activations_g[-1]
                loss_generator += F.mean_squared_error(features_true,
                                                       features_fake)

            # update generator
            gan.backprop_generator(loss_generator)

            sum_loss_adversarial += float(loss_generator.data)
            sum_dx_generated += float(dx_g.data)
            if t % 10 == 0:
                progress.show(t, num_updates_per_epoch, {})

        gan.save(args.model_dir)

        progress.show(
            num_updates_per_epoch, num_updates_per_epoch, {
                "loss_u": sum_loss_unsupervised / num_updates_per_epoch,
                "loss_g": sum_loss_adversarial / num_updates_per_epoch,
                "dx_u": sum_dx_unlabeled / num_updates_per_epoch,
                "dx_g": sum_dx_generated / num_updates_per_epoch,
            })

        if epoch % plot_interval == 0 or epoch == 1:
            plot(filename="epoch_{}_time_{}min".format(
                epoch, progress.get_total_time()))
Ejemplo n.º 5
0
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    discriminator_config = gan.config_discriminator
    generator_config = gan.config_generator

    # settings
    # _l -> labeled
    # _u -> unlabeled
    # _g -> generated
    max_epoch = 1000
    num_trains_per_epoch = 500
    plot_interval = 5
    batchsize_l = 100
    batchsize_u = 100
    batchsize_g = batchsize_u

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # save validation accuracy per epoch
    csv_results = []

    # create semi-supervised split
    num_validation_data = 10000
    num_labeled_data = args.num_labeled
    if batchsize_l > num_labeled_data:
        batchsize_l = num_labeled_data

    training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(
        images,
        labels,
        num_validation_data,
        num_labeled_data,
        discriminator_config.ndim_output,
        seed=args.seed)
    print training_labels_l

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_supervised = 0
        sum_loss_unsupervised = 0
        sum_loss_adversarial = 0
        sum_dx_labeled = 0
        sum_dx_unlabeled = 0
        sum_dx_generated = 0

        gan.update_learning_rate(get_learning_rate_for_epoch(epoch))

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                training_images_l,
                training_labels_l,
                batchsize_l,
                discriminator_config.ndim_input,
                discriminator_config.ndim_output,
                binarize=False)
            images_u = dataset.sample_unlabeled_data(
                training_images_u,
                batchsize_u,
                discriminator_config.ndim_input,
                binarize=False)
            images_g = gan.generate_x(batchsize_g)
            images_g.unchain_backward()

            # supervised loss
            py_x_l, activations_l = gan.discriminate(images_l,
                                                     apply_softmax=False)
            loss_supervised = F.softmax_cross_entropy(
                py_x_l, gan.to_variable(label_ids_l))

            log_zx_l = F.logsumexp(py_x_l, axis=1)
            log_dx_l = log_zx_l - F.softplus(log_zx_l)
            dx_l = F.sum(F.exp(log_dx_l)) / batchsize_l

            # unsupervised loss
            # D(x) = Z(x) / {Z(x) + 1}, where Z(x) = \sum_{k=1}^K exp(l_k(x))
            # softplus(x) := log(1 + exp(x))
            # logD(x) = logZ(x) - log(Z(x) + 1)
            # 		  = logZ(x) - log(exp(log(Z(x))) + 1)
            # 		  = logZ(x) - softplus(logZ(x))
            # 1 - D(x) = 1 / {Z(x) + 1}
            # log{1 - D(x)} = log1 - log(Z(x) + 1)
            # 				= -log(exp(log(Z(x))) + 1)
            # 				= -softplus(logZ(x))
            py_x_u, _ = gan.discriminate(images_u, apply_softmax=False)
            log_zx_u = F.logsumexp(py_x_u, axis=1)
            log_dx_u = log_zx_u - F.softplus(log_zx_u)
            dx_u = F.sum(F.exp(log_dx_u)) / batchsize_u
            loss_unsupervised = -F.sum(
                log_dx_u) / batchsize_u  # minimize negative logD(x)
            py_x_g, _ = gan.discriminate(images_g, apply_softmax=False)
            log_zx_g = F.logsumexp(py_x_g, axis=1)
            loss_unsupervised += F.sum(F.softplus(
                log_zx_g)) / batchsize_u  # minimize negative log{1 - D(x)}

            # update discriminator
            gan.backprop_discriminator(loss_supervised + loss_unsupervised)

            # adversarial loss
            images_g = gan.generate_x(batchsize_g)
            py_x_g, activations_g = gan.discriminate(images_g,
                                                     apply_softmax=False)
            log_zx_g = F.logsumexp(py_x_g, axis=1)
            log_dx_g = log_zx_g - F.softplus(log_zx_g)
            dx_g = F.sum(F.exp(log_dx_g)) / batchsize_g
            loss_adversarial = -F.sum(
                log_dx_g) / batchsize_u  # minimize negative logD(x)

            # feature matching
            if discriminator_config.use_feature_matching:
                features_true = activations_l[-1]
                features_true.unchain_backward()
                if batchsize_l != batchsize_g:
                    images_g = gan.generate_x(batchsize_l)
                    _, activations_g = gan.discriminate(images_g,
                                                        apply_softmax=False)
                features_fake = activations_g[-1]
                loss_adversarial += F.mean_squared_error(
                    features_true, features_fake)

            # update generator
            gan.backprop_generator(loss_adversarial)

            sum_loss_supervised += float(loss_supervised.data)
            sum_loss_unsupervised += float(loss_unsupervised.data)
            sum_loss_adversarial += float(loss_adversarial.data)
            sum_dx_labeled += float(dx_l.data)
            sum_dx_unlabeled += float(dx_u.data)
            sum_dx_generated += float(dx_g.data)
            if t % 10 == 0:
                progress.show(t, num_trains_per_epoch, {})

        gan.save(args.model_dir)

        # validation
        images_l, _, label_ids_l = dataset.sample_labeled_data(
            validation_images,
            validation_labels,
            num_validation_data,
            discriminator_config.ndim_input,
            discriminator_config.ndim_output,
            binarize=False)
        images_l_segments = np.split(images_l, num_validation_data // 500)
        label_ids_l_segments = np.split(label_ids_l,
                                        num_validation_data // 500)
        sum_accuracy = 0
        for images_l, label_ids_l in zip(images_l_segments,
                                         label_ids_l_segments):
            y_distribution, _ = gan.discriminate(images_l,
                                                 apply_softmax=True,
                                                 test=True)
            accuracy = F.accuracy(y_distribution, gan.to_variable(label_ids_l))
            sum_accuracy += float(accuracy.data)
        validation_accuracy = sum_accuracy / len(images_l_segments)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "loss_l": sum_loss_supervised / num_trains_per_epoch,
                "loss_u": sum_loss_unsupervised / num_trains_per_epoch,
                "loss_g": sum_loss_adversarial / num_trains_per_epoch,
                "dx_l": sum_dx_labeled / num_trains_per_epoch,
                "dx_u": sum_dx_unlabeled / num_trains_per_epoch,
                "dx_g": sum_dx_generated / num_trains_per_epoch,
                "accuracy": validation_accuracy,
            })

        # write accuracy to csv
        csv_results.append(
            [epoch, validation_accuracy,
             progress.get_total_time()])
        data = pd.DataFrame(csv_results)
        data.columns = ["epoch", "accuracy", "min"]
        data.to_csv("{}/result.csv".format(args.model_dir))

        if epoch % plot_interval == 0 or epoch == 1:
            plot(filename="epoch_{}_time_{}min".format(
                epoch, progress.get_total_time()))
Ejemplo n.º 6
0
def main():
    images = load_rgb_images(args.image_dir)
    config = began.config

    # settings
    max_epoch = 1000
    batchsize = 16
    num_updates_per_epoch = int(len(images) / batchsize)
    plot_interval = 5

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # training
    kt = 0
    lambda_k = 0.001
    progress = Progress()
    for epoch in xrange(1, max_epoch + 1):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_d = 0
        sum_loss_g = 0
        sum_M = 0

        for t in xrange(num_updates_per_epoch):
            # sample data
            images_real = sample_from_data(images, batchsize)
            images_fake = began.generate_x(batchsize)

            loss_real = began.compute_loss(images_real)
            loss_fake = began.compute_loss(images_fake)

            loss_d = loss_real - kt * loss_fake
            loss_g = loss_fake

            began.backprop_discriminator(loss_d)
            began.backprop_generator(loss_g)

            loss_d = float(loss_d.data)
            loss_g = float(loss_g.data)
            loss_real = float(loss_real.data)
            loss_fake = float(loss_fake.data)

            sum_loss_d += loss_d
            sum_loss_g += loss_g

            # update control parameters
            kt += lambda_k * (config.gamma * loss_real - loss_fake)
            kt = max(0, min(1, kt))
            M = loss_real + abs(config.gamma * loss_real - loss_fake)
            sum_M += M

            if t % 10 == 0:
                progress.show(t, num_updates_per_epoch, {})

        began.save(args.model_dir)

        progress.show(
            num_updates_per_epoch, num_updates_per_epoch, {
                "loss_d": sum_loss_d / num_updates_per_epoch,
                "loss_g": sum_loss_g / num_updates_per_epoch,
                "k": kt,
                "M": sum_M / num_updates_per_epoch,
            })

        if epoch % plot_interval == 0 or epoch == 1:
            plot_generator_outputs(
                filename="generator_epoch_{}_time_{}_min".format(
                    epoch, progress.get_total_time()))
            plot_autoencoder_outputs(
                images,
                filename="autoencoder_epoch_{}_time_{}_min".format(
                    epoch, progress.get_total_time()))
Ejemplo n.º 7
0
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    discriminator_config = gan.config_discriminator
    generator_config = gan.config_generator

    # labels
    a = discriminator_config.a
    b = discriminator_config.b
    c = discriminator_config.c

    # settings
    max_epoch = 1000
    num_updates_per_epoch = 500
    plot_interval = 5
    batchsize_true = 100
    batchsize_fake = batchsize_true

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch + 1):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_d = 0
        sum_loss_g = 0

        for t in xrange(num_updates_per_epoch):
            # sample true data from data distribution
            images_true = dataset.sample_data(images,
                                              batchsize_true,
                                              binarize=False)
            # sample fake data from generator
            images_fake = gan.generate_x(batchsize_fake)
            images_fake.unchain_backward()

            d_true = gan.discriminate(images_true, return_activations=False)
            d_fake = gan.discriminate(images_fake, return_activations=False)

            loss_d = 0.5 * (F.sum((d_true - b)**2) + F.sum(
                (d_fake - a)**2)) / batchsize_true
            sum_loss_d += float(loss_d.data)

            # update discriminator
            gan.backprop_discriminator(loss_d)

            # generator loss
            images_fake = gan.generate_x(batchsize_fake)
            d_fake = gan.discriminate(images_fake, return_activations=False)
            loss_g = 0.5 * (F.sum((d_fake - c)**2)) / batchsize_fake
            sum_loss_g += float(loss_g.data)

            # update generator
            gan.backprop_generator(loss_g)

            if t % 10 == 0:
                progress.show(t, num_updates_per_epoch, {})

        gan.save(args.model_dir)

        progress.show(
            num_updates_per_epoch, num_updates_per_epoch, {
                "loss_d": sum_loss_d / num_updates_per_epoch,
                "loss_g": sum_loss_g / num_updates_per_epoch,
            })

        if epoch % plot_interval == 0 or epoch == 1:
            plot(filename="epoch_{}_time_{}min".format(
                epoch, progress.get_total_time()))
Ejemplo n.º 8
0
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    config = sdgm.config

    # settings
    max_epoch = 1000
    num_trains_per_epoch = 500
    batchsize_l = 100
    batchsize_u = 100
    alpha = 1

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # save validation accuracy per epoch
    csv_results = []

    # create semi-supervised split
    num_validation_data = 10000
    num_labeled_data = 100
    num_types_of_label = 10
    training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(
        images,
        labels,
        num_validation_data,
        num_labeled_data,
        num_types_of_label,
        seed=args.seed)
    print training_labels_l

    # init weightnorm layers
    if config.use_weightnorm:
        print "initializing weight normalization layers ..."
        images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data(
            training_images_l, training_labels_l, batchsize_l, config.ndim_x,
            config.ndim_y)
        images_u = dataset.sample_unlabeled_data(training_images_u,
                                                 batchsize_u, config.ndim_x)
        sdgm.compute_lower_bound(images_l, label_onehot_l, images_u)

    # training
    temperature = 1
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_lower_bound_l = 0
        sum_lower_bound_u = 0
        sum_loss_classifier = 0

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                training_images_l, training_labels_l, batchsize_l,
                config.ndim_x, config.ndim_y)
            images_u = dataset.sample_unlabeled_data(training_images_u,
                                                     batchsize_u,
                                                     config.ndim_x)

            # lower bound loss using gumbel-softmax
            lower_bound, lb_labeled, lb_unlabeled = sdgm.compute_lower_bound_gumbel(
                images_l, label_onehot_l, images_u, temperature)
            loss_lower_bound = -lower_bound

            # classification loss
            unnormalized_y_distribution = sdgm.encode_x_y_distribution(
                images_l, softmax=False)
            loss_classifier = alpha * F.softmax_cross_entropy(
                unnormalized_y_distribution, sdgm.to_variable(label_ids_l))

            # backprop
            sdgm.backprop(loss_classifier + loss_lower_bound)

            sum_lower_bound_l += float(lb_labeled.data)
            sum_lower_bound_u += float(lb_unlabeled.data)
            sum_loss_classifier += float(loss_classifier.data)
            progress.show(t, num_trains_per_epoch, {})

        sdgm.save(args.model_dir)

        # validation
        images_l, _, label_ids_l = dataset.sample_labeled_data(
            validation_images, validation_labels, num_validation_data,
            config.ndim_x, config.ndim_y)
        images_l_segments = np.split(images_l, num_validation_data // 500)
        label_ids_l_segments = np.split(label_ids_l,
                                        num_validation_data // 500)
        sum_accuracy = 0
        for images_l, label_ids_l in zip(images_l_segments,
                                         label_ids_l_segments):
            y_distribution = sdgm.encode_x_y_distribution(images_l,
                                                          softmax=True,
                                                          test=True)
            accuracy = F.accuracy(y_distribution,
                                  sdgm.to_variable(label_ids_l))
            sum_accuracy += float(accuracy.data)
        validation_accuracy = sum_accuracy / len(images_l_segments)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "lb_u": sum_lower_bound_u / num_trains_per_epoch,
                "lb_l": sum_lower_bound_l / num_trains_per_epoch,
                "loss_spv": sum_loss_classifier / num_trains_per_epoch,
                "accuracy": validation_accuracy,
                "tmp": temperature,
            })

        # anneal the temperature
        temperature = max(0.5, temperature * 0.999)

        # write accuracy to csv
        csv_results.append(
            [epoch, validation_accuracy,
             progress.get_total_time()])
        data = pd.DataFrame(csv_results)
        data.columns = ["epoch", "accuracy", "min"]
        data.to_csv("{}/result.csv".format(args.model_dir))
Ejemplo n.º 9
0
def main():
    images = load_rgb_images(args.image_dir)

    # config
    discriminator_config = gan.config_discriminator
    generator_config = gan.config_generator

    # labels
    a = discriminator_config.a
    b = discriminator_config.b
    c = discriminator_config.c

    # settings
    max_epoch = 1000
    num_updates_per_epoch = 500
    batchsize_true = 128
    batchsize_fake = 128
    plot_interval = 5

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # init weightnorm layers
    if discriminator_config.use_weightnorm:
        print "initializing weight normalization layers of the discriminator ..."
        images_true = sample_from_data(images, batchsize_true)
        gan.discriminate(images_true)

    if generator_config.use_weightnorm:
        print "initializing weight normalization layers of the generator ..."
        gan.generate_x(batchsize_fake)

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch + 1):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_d = 0
        sum_loss_g = 0

        for t in xrange(num_updates_per_epoch):
            # sample data
            images_true = sample_from_data(images, batchsize_true)
            images_fake = gan.generate_x(batchsize_true)
            images_fake.unchain_backward()

            d_true = gan.discriminate(images_true, return_activations=False)
            d_fake = gan.discriminate(images_fake, return_activations=False)

            loss_d = 0.5 * (F.sum((d_true - b)**2) + F.sum(
                (d_fake - a)**2)) / batchsize_true
            sum_loss_d += float(loss_d.data)

            # update discriminator
            gan.backprop_discriminator(loss_d)

            # generator loss
            images_fake = gan.generate_x(batchsize_fake)
            d_fake = gan.discriminate(images_fake, return_activations=False)
            loss_g = 0.5 * (F.sum((d_fake - c)**2)) / batchsize_fake
            sum_loss_g += float(loss_g.data)

            # update generator
            gan.backprop_generator(loss_g)

            if t % 10 == 0:
                progress.show(t, num_updates_per_epoch, {})

        gan.save(args.model_dir)

        progress.show(
            num_updates_per_epoch, num_updates_per_epoch, {
                "loss_d": sum_loss_d / num_updates_per_epoch,
                "loss_g": sum_loss_g / num_updates_per_epoch,
            })

        if epoch % plot_interval == 0 or epoch == 1:
            plot(filename="epoch_{}_time_{}min".format(
                epoch, progress.get_total_time()))
Ejemplo n.º 10
0
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    config = vat.config

    # settings
    max_epoch = 1000
    num_trains_per_epoch = 500
    batchsize_l = 100
    batchsize_u = 200

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # save validation accuracy per epoch
    csv_results = []

    # create semi-supervised split
    num_validation_data = 10000
    num_labeled_data = 100
    training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(
        images,
        labels,
        num_validation_data,
        num_labeled_data,
        config.ndim_y,
        seed=args.seed)
    print training_labels_l

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_supervised = 0
        sum_loss_lds = 0

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                training_images_l,
                training_labels_l,
                batchsize_l,
                config.ndim_x,
                config.ndim_y,
                binarize=False)
            images_u = dataset.sample_unlabeled_data(training_images_u,
                                                     batchsize_u,
                                                     config.ndim_x,
                                                     binarize=False)

            # supervised loss
            unnormalized_y_distribution = vat.encode_x_y(images_l,
                                                         apply_softmax=False)
            loss_supervised = F.softmax_cross_entropy(
                unnormalized_y_distribution, vat.to_variable(label_ids_l))

            # virtual adversarial training
            lds_l = -F.sum(vat.compute_lds(images_l)) / batchsize_l
            lds_u = -F.sum(vat.compute_lds(images_u)) / batchsize_u
            loss_lsd = lds_l + lds_u

            # backprop
            vat.backprop(loss_supervised + config.lambda_ * loss_lsd)

            sum_loss_supervised += float(loss_supervised.data)
            sum_loss_lds += float(loss_lsd.data)
            if t % 10 == 0:
                progress.show(t, num_trains_per_epoch, {})

        vat.save(args.model_dir)

        # validation
        images_l, _, label_ids_l = dataset.sample_labeled_data(
            validation_images,
            validation_labels,
            num_validation_data,
            config.ndim_x,
            config.ndim_y,
            binarize=False)
        images_l_segments = np.split(images_l, num_validation_data // 500)
        label_ids_l_segments = np.split(label_ids_l,
                                        num_validation_data // 500)
        sum_accuracy = 0
        for images_l, label_ids_l in zip(images_l_segments,
                                         label_ids_l_segments):
            y_distribution = vat.encode_x_y(images_l,
                                            apply_softmax=True,
                                            test=True)
            accuracy = F.accuracy(y_distribution, vat.to_variable(label_ids_l))
            sum_accuracy += float(accuracy.data)
        validation_accuracy = sum_accuracy / len(images_l_segments)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "loss_spv": sum_loss_supervised / num_trains_per_epoch,
                "loss_lds": sum_loss_lds / num_trains_per_epoch,
                "accuracy": validation_accuracy,
            })

        # write accuracy to csv
        csv_results.append(
            [epoch, validation_accuracy,
             progress.get_total_time()])
        data = pd.DataFrame(csv_results)
        data.columns = ["epoch", "accuracy", "min"]
        data.to_csv("{}/result.csv".format(args.model_dir))
Ejemplo n.º 11
0
def main():
	images = load_rgb_images(args.image_dir)

	# config
	discriminator_config = gan.config_discriminator
	generator_config = gan.config_generator

	# settings
	max_epoch = 1000
	num_updates_per_epoch = 500
	batchsize_true = 128
	batchsize_fake = 128
	plot_interval = 5

	# seed
	np.random.seed(args.seed)
	if args.gpu_device != -1:
		cuda.cupy.random.seed(args.seed)

	# init weightnorm layers
	if discriminator_config.use_weightnorm:
		print "initializing weight normalization layers of the discriminator ..."
		x_true = sample_from_data(images, batchsize_true)
		gan.discriminate(x_true)

	if generator_config.use_weightnorm:
		print "initializing weight normalization layers of the generator ..."
		gan.generate_x(batchsize_fake)

	# training
	progress = Progress()
	for epoch in xrange(1, max_epoch + 1):
		progress.start_epoch(epoch, max_epoch)
		sum_loss_critic = 0
		sum_loss_generator = 0
		learning_rate = get_learning_rate_for_epoch(epoch)
		gan.update_learning_rate(learning_rate)

		for t in xrange(num_updates_per_epoch):

			for k in xrange(discriminator_config.num_critic):
				# clamp parameters to a cube
				gan.clip_discriminator_weights()
				# gan.scale_discriminator_weights()

				# sample data
				x_true = sample_from_data(images, batchsize_true)
				x_fake = gan.generate_x(batchsize_true)
				x_fake.unchain_backward()

				fw_u, activations_u = gan.discriminate(x_true)
				fw_g, _ = gan.discriminate(x_fake)

				loss_critic = -F.sum(fw_u - fw_g) / batchsize_true
				sum_loss_critic += float(loss_critic.data) / discriminator_config.num_critic

				# update discriminator
				gan.backprop_discriminator(loss_critic)

			# generator loss
			x_fake = gan.generate_x(batchsize_fake)
			fw_g, activations_g = gan.discriminate(x_fake)
			loss_generator = -F.sum(fw_g) / batchsize_fake

			# feature matching
			if discriminator_config.use_feature_matching:
				features_true = activations_u[-1]
				features_true.unchain_backward()
				if batchsize_true != batchsize_fake:
					x_fake = gan.generate_x(batchsize_true)
					_, activations_g = gan.discriminate(x_fake, apply_softmax=False)
				features_fake = activations_g[-1]
				loss_generator += F.mean_squared_error(features_true, features_fake)

			# update generator
			gan.backprop_generator(loss_generator)
			sum_loss_generator += float(loss_generator.data)
			
			if t % 10 == 0:
				progress.show(t, num_updates_per_epoch, {})

		gan.save(args.model_dir)

		progress.show(num_updates_per_epoch, num_updates_per_epoch, {
			"wasserstein": -sum_loss_critic / num_updates_per_epoch,
			"loss_g": sum_loss_generator / num_updates_per_epoch,
			"lr": learning_rate
		})

		if epoch % plot_interval == 0 or epoch == 1:
			plot(filename="epoch_{}_time_{}min".format(epoch, progress.get_total_time()))
Ejemplo n.º 12
0
def main():
    images = load_rgb_images(args.image_dir)

    # config
    config = chainer.config

    # settings
    max_epoch = 1000
    num_updates_per_epoch = 500
    batchsize_true = 128
    batchsize_fake = 128
    plot_interval = 5

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch + 1):
        with chainer.using_config("train", True):
            progress.start_epoch(epoch, max_epoch)
            sum_loss_critic = 0
            sum_loss_generator = 0
            learning_rate = get_learning_rate_for_epoch(epoch)
            gan.update_learning_rate(learning_rate)

            for t in xrange(num_updates_per_epoch):

                for k in xrange(config.discriminator.num_critic):
                    # clamp parameters to a cube
                    gan.clip_discriminator_weights()

                    # sample data
                    x_true = sample_from_data(images, batchsize_true)
                    x_fake = gan.generate_x(batchsize_true)
                    x_fake.unchain_backward()

                    fw_u, activations_u = gan.discriminate(x_true)
                    fw_g, _ = gan.discriminate(x_fake)

                    loss_critic = -F.sum(fw_u - fw_g) / batchsize_true
                    sum_loss_critic += float(
                        loss_critic.data) / config.discriminator.num_critic

                    # update discriminator
                    gan.backprop_discriminator(loss_critic)

                # generator loss
                x_fake = gan.generate_x(batchsize_fake)
                fw_g, activations_g = gan.discriminate(x_fake)
                loss_generator = -F.sum(fw_g) / batchsize_fake

                # update generator
                gan.backprop_generator(loss_generator)
                sum_loss_generator += float(loss_generator.data)

                if t % 10 == 0:
                    progress.show(t, num_updates_per_epoch, {})

            gan.save(args.model_dir)

            progress.show(
                num_updates_per_epoch, num_updates_per_epoch, {
                    "wasserstein": -sum_loss_critic / num_updates_per_epoch,
                    "loss_g": sum_loss_generator / num_updates_per_epoch,
                    "lr": learning_rate
                })

        with chainer.using_config("train", False):
            if epoch % plot_interval == 0 or epoch == 1:
                plot(filename="epoch_{}_time_{}min".format(
                    epoch, progress.get_total_time()))
Ejemplo n.º 13
0
def main():
    images = load_rgb_images(args.image_dir)

    # config
    discriminator_config = gan.config_discriminator
    generator_config = gan.config_generator

    # settings
    max_epoch = 1000
    n_trains_per_epoch = 500
    batchsize_true = 128
    batchsize_fake = 128
    plot_interval = 5

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # init weightnorm layers
    if discriminator_config.use_weightnorm:
        print "initializing weight normalization layers of the discriminator ..."
        x_true = sample_from_data(images, batchsize_true)
        gan.discriminate(x_true)

    if generator_config.use_weightnorm:
        print "initializing weight normalization layers of the generator ..."
        gan.generate_x(batchsize_fake)

    # classification
    # 0 -> true sample
    # 1 -> generated sample
    class_true = gan.to_variable(np.zeros(batchsize_true, dtype=np.int32))
    class_fake = gan.to_variable(np.ones(batchsize_fake, dtype=np.int32))

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_discriminator = 0
        sum_loss_generator = 0
        sum_loss_vat = 0

        for t in xrange(n_trains_per_epoch):
            # sample data
            x_true = sample_from_data(images, batchsize_true)
            x_fake = gan.generate_x(batchsize_fake).data  # unchain

            # train discriminator
            discrimination_true, activations_true = gan.discriminate(
                x_true, apply_softmax=False)
            discrimination_fake, _ = gan.discriminate(x_fake,
                                                      apply_softmax=False)
            loss_discriminator = F.softmax_cross_entropy(
                discrimination_true, class_true) + F.softmax_cross_entropy(
                    discrimination_fake, class_fake)
            gan.backprop_discriminator(loss_discriminator)

            # virtual adversarial training
            loss_vat = 0
            if discriminator_config.use_virtual_adversarial_training:
                z = gan.sample_z(batchsize_fake)
                loss_vat = -F.sum(gan.compute_lds(z)) / batchsize_fake
                gan.backprop_discriminator(loss_vat)
                sum_loss_vat += float(loss_vat.data)

            # train generator
            x_fake = gan.generate_x(batchsize_fake)
            discrimination_fake, activations_fake = gan.discriminate(
                x_fake, apply_softmax=False)
            loss_generator = F.softmax_cross_entropy(discrimination_fake,
                                                     class_true)

            # feature matching
            if discriminator_config.use_feature_matching:
                features_true = activations_true[-1]
                features_fake = activations_fake[-1]
                loss_generator += F.mean_squared_error(features_true,
                                                       features_fake)

            gan.backprop_generator(loss_generator)

            sum_loss_discriminator += float(loss_discriminator.data)
            sum_loss_generator += float(loss_generator.data)
            if t % 10 == 0:
                progress.show(t, n_trains_per_epoch, {})

        progress.show(
            n_trains_per_epoch, n_trains_per_epoch, {
                "loss_d": sum_loss_discriminator / n_trains_per_epoch,
                "loss_g": sum_loss_generator / n_trains_per_epoch,
                "loss_vat": sum_loss_vat / n_trains_per_epoch,
            })
        gan.save(args.model_dir)

        if epoch % plot_interval == 0 or epoch == 1:
            plot(filename="epoch_{}_time_{}min".format(
                epoch, progress.get_total_time()))
Ejemplo n.º 14
0
def main():
	# load MNIST images
	images, labels = dataset.load_train_images()

	# config
	config = sdgm.config

	# settings
	max_epoch = 1000
	num_trains_per_epoch = 500
	batchsize_l = 100
	batchsize_u = 100
	alpha = 1

	# seed
	np.random.seed(args.seed)
	if args.gpu_device != -1:
		cuda.cupy.random.seed(args.seed)

	# save validation accuracy per epoch
	csv_results = []

	# create semi-supervised split
	num_validation_data = 10000
	num_labeled_data = 100
	num_types_of_label = 10
	training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(images, labels, num_validation_data, num_labeled_data, num_types_of_label, seed=args.seed)
	print training_labels_l

	# init weightnorm layers
	if config.use_weightnorm:
		print "initializing weight normalization layers ..."
		images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data(training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y)
		images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x)
		sdgm.compute_lower_bound(images_l, label_onehot_l, images_u)

	# training
	temperature = 1
	progress = Progress()
	for epoch in xrange(1, max_epoch):
		progress.start_epoch(epoch, max_epoch)
		sum_lower_bound_l = 0
		sum_lower_bound_u = 0
		sum_loss_classifier = 0

		for t in xrange(num_trains_per_epoch):
			# sample from data distribution
			images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y)
			images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x)

			# lower bound loss using gumbel-softmax
			lower_bound, lb_labeled, lb_unlabeled = sdgm.compute_lower_bound_gumbel(images_l, label_onehot_l, images_u, temperature)
			loss_lower_bound = -lower_bound

			# classification loss
			unnormalized_y_distribution = sdgm.encode_x_y_distribution(images_l, softmax=False)
			loss_classifier = alpha * F.softmax_cross_entropy(unnormalized_y_distribution, sdgm.to_variable(label_ids_l))

			# backprop
			sdgm.backprop(loss_classifier + loss_lower_bound)

			sum_lower_bound_l += float(lb_labeled.data)
			sum_lower_bound_u += float(lb_unlabeled.data)
			sum_loss_classifier += float(loss_classifier.data)
			progress.show(t, num_trains_per_epoch, {})

		sdgm.save(args.model_dir)

		# validation
		images_l, _, label_ids_l = dataset.sample_labeled_data(validation_images, validation_labels, num_validation_data, config.ndim_x, config.ndim_y)
		images_l_segments = np.split(images_l, num_validation_data // 500)
		label_ids_l_segments = np.split(label_ids_l, num_validation_data // 500)
		sum_accuracy = 0
		for images_l, label_ids_l in zip(images_l_segments, label_ids_l_segments):
			y_distribution = sdgm.encode_x_y_distribution(images_l, softmax=True, test=True)
			accuracy = F.accuracy(y_distribution, sdgm.to_variable(label_ids_l))
			sum_accuracy += float(accuracy.data)
		validation_accuracy = sum_accuracy / len(images_l_segments)
		
		progress.show(num_trains_per_epoch, num_trains_per_epoch, {
			"lb_u": sum_lower_bound_u / num_trains_per_epoch,
			"lb_l": sum_lower_bound_l / num_trains_per_epoch,
			"loss_spv": sum_loss_classifier / num_trains_per_epoch,
			"accuracy": validation_accuracy,
			"tmp": temperature,
		})

		# anneal the temperature
		temperature = max(0.5, temperature * 0.999)

		# write accuracy to csv
		csv_results.append([epoch, validation_accuracy, progress.get_total_time()])
		data = pd.DataFrame(csv_results)
		data.columns = ["epoch", "accuracy", "min"]
		data.to_csv("{}/result.csv".format(args.model_dir))