Exemple #1
0
def train_distill_model(dataset, args):
    # load dataset
    train_x, train_y, test_x, test_y = dataset
    # load or train a teacher model
    teacher = Model(net=teacher_net,
                    loss=SoftmaxCrossEntropy(),
                    optimizer=Adam(lr=args.lr))
    teacher_model_path = os.path.join(args.model_dir, "teacher.model")
    if not os.path.isfile(teacher_model_path):
        print("No teacher model founded. Training a new one...")
        train_single_model(teacher, dataset, args, name="teacher")
    teacher.load(teacher_model_path)
    teacher.set_phase("TEST")

    print("training distill model")
    # define a student model
    student = Model(net=student_net,
                    loss=DistillationLoss(alpha=args.alpha, T=args.T),
                    optimizer=Adam(lr=args.lr))

    # run training
    iterator = BatchIterator(batch_size=args.batch_size)
    for epoch in range(args.num_ep):
        t_start = time.time()
        for i, batch in enumerate(iterator(train_x, train_y)):
            pred = student.forward(batch.inputs)
            teacher_out = teacher.forward(batch.inputs)
            teacher_out_prob = softmax(teacher_out, t=args.T)

            loss = student.loss.loss(pred, batch.targets, teacher_out_prob)
            grad_from_loss = student.loss.grad(pred, batch.targets,
                                               teacher_out_prob)
            grads = student.net.backward(grad_from_loss)
            student.apply_grads(grads)
        print("Epoch %d time cost: %.4f" % (epoch, time.time() - t_start))
        # evaluate
        student.set_phase("TEST")
        hit, total = 0, 0
        for i, batch in enumerate(iterator(test_x, test_y)):
            pred = student.forward(batch.inputs)
            res = accuracy(np.argmax(pred, 1), np.argmax(batch.targets, 1))
            hit += res["hit_num"]
            total += res["total_num"]
        print("accuracy: %.4f" % (1.0 * hit / total))
        student.set_phase("TRAIN")

    # save the distilled model
    if not os.path.isdir(args.model_dir):
        os.makedirs(args.model_dir)
    model_path = os.path.join(args.model_dir, "distill-%d.model" % args.T)
    student.save(model_path)
    print("model saved in %s" % model_path)
Exemple #2
0
def evaluate(args):
    G = Model(net=G_mlp(), loss=None, optimizer=None)
    model_path = os.path.join(args.output_dir, args.model_name)
    print("Loading model from ", model_path)
    G.load(model_path)
    noise = get_noise(size=(128, args.nz))
    samples = G.forward(noise)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    save_path = os.path.join(args.output_dir, "evaluate.png")
    save_batch_as_images(save_path, samples)
Exemple #3
0
def main(args):
    if args.seed >= 0:
        random_seed(args.seed)

    # data preparing
    train_x, train_y, img_shape = prepare_dataset(args.img)

    net = Net([
        Dense(30),
        ReLU(),
        Dense(100),
        ReLU(),
        Dense(100),
        ReLU(),
        Dense(30),
        ReLU(),
        Dense(3),
        Sigmoid()
    ])

    model = Model(net=net, loss=MSE(), optimizer=Adam())
    iterator = BatchIterator(batch_size=args.batch_size)
    for epoch in range(args.num_ep):
        for batch in iterator(train_x, train_y):
            preds = model.forward(batch.inputs)
            loss, grads = model.backward(preds, batch.targets)
            model.apply_grads(grads)

        # evaluate
        preds = net.forward(train_x)
        mse = mean_square_error(preds, train_y)
        print("Epoch %d %s" % (epoch, mse))

        # generate painting
        if epoch % 5 == 0:
            preds = preds.reshape(img_shape[0], img_shape[1], -1)
            preds = (preds * 255.0).astype("uint8")
            name, ext = os.path.splitext(args.img)
            filename = os.path.basename(name)
            out_filename = filename + "-paint-epoch" + str(epoch) + ext
            if not os.path.exists(args.output_dir):
                os.makedirs(args.output_dir)
            out_path = os.path.join(args.output_dir, out_filename)
            Image.fromarray(preds).save(out_path)
            print("save painting to %s" % out_path)
Exemple #4
0
def test_parameters_change(fake_dataset):
    # make sure the parameters does change after apply gradients

    # fake dataset
    X, y = fake_dataset
    # simple model
    net = Net([Dense(10), Dense(1)])
    loss = MSE()
    opt = SGD(lr=1.0)
    model = Model(net, loss, opt)

    # forward and backward
    pred = model.forward(X)
    loss, grads = model.backward(pred, y)

    # parameters change test
    params_before = model.net.params.values
    model.apply_grads(grads)
    params_after = model.net.params.values
    for p1, p2 in zip(params_before, params_after):
        assert np.all(p1 != p2)
Exemple #5
0
def train(args):
    # prepare dataset
    train_, valid, test = mnist(args.data_dir)
    X = np.concatenate([train_[0], valid[0], test[0]])
    y = np.concatenate([train_[1], valid[1], test[1]])

    if args.model_type == "cnn":
        X = X.reshape((-1, 28, 28, 1))
        G_net, D_net = G_cnn(), D_cnn()
    elif args.model_type == "mlp":
        G_net, D_net = G_mlp(), D_mlp()
    else:
        raise ValueError("Invalid argument: model_type")

    fix_noise = get_noise(size=(args.batch_size, args.nz))
    loss = SigmoidCrossEntropy()
    G = Model(net=G_net,
              loss=loss,
              optimizer=Adam(args.lr_g, beta1=args.beta1))
    D = Model(net=D_net,
              loss=loss,
              optimizer=Adam(args.lr_d, beta1=args.beta1))

    running_g_err, running_d_err = 0, 0
    iterator = BatchIterator(batch_size=args.batch_size)
    for epoch in range(args.num_ep):
        for i, batch in enumerate(iterator(X, y)):
            # --- Train Discriminator ---
            # feed with real data (maximize log(D(x)))
            d_pred_real = D.forward(batch.inputs)
            label_real = np.ones_like(d_pred_real)
            d_real_err, d_real_grad = D.backward(d_pred_real, label_real)

            # feed with fake data (maximize log(1 - D(G(z))))
            noise = get_noise(size=(len(batch.inputs), args.nz))
            g_out = G.forward(noise)
            d_pred_fake = D.forward(g_out)
            label_fake = np.zeros_like(d_pred_fake)
            d_fake_err, d_fake_grad = D.backward(d_pred_fake, label_fake)

            # train D
            d_err = d_real_err + d_fake_err
            d_grads = d_real_grad + d_fake_grad
            D.apply_grads(d_grads)

            # ---- Train Generator ---
            # maximize log(D(G(z)))
            d_pred_fake = D.forward(g_out)
            g_err, d_grad = D.backward(d_pred_fake, label_real)
            g_grads = G.net.backward(d_grad.wrt_input)
            G.apply_grads(g_grads)

            running_d_err = 0.9 * running_d_err + 0.1 * d_err
            running_g_err = 0.9 * running_g_err + 0.1 * g_err
            if i % 100 == 0:
                print("epoch-%d iter-%d d_err: %.4f g_err: %.4f" %
                      (epoch + 1, i + 1, running_d_err, running_g_err))

        # sampling
        print("epoch: %d/%d d_err: %.4f g_err: %.4f" %
              (epoch + 1, args.num_ep, running_d_err, running_g_err))
        samples = G.forward(fix_noise)
        img_name = "ep%d.png" % (epoch + 1)
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        save_path = os.path.join(args.output_dir, img_name)
        save_batch_as_images(save_path, samples)

        # save generator
        model_path = os.path.join(args.output_dir, args.model_name)
        G.save(model_path)
        print("Saving generator ", model_path)
Exemple #6
0
def main(args):
    if args.seed >= 0:
        random_seed(args.seed)

    train_set, _, test_set = mnist(args.data_dir, one_hot=True)
    train_x, train_y = train_set
    test_x, test_y = test_set

    if args.model_type == "mlp":
        # A multilayer perceptron model
        net = Net([
            Dense(200),
            ReLU(),
            Dense(100),
            ReLU(),
            Dense(70),
            ReLU(),
            Dense(30),
            ReLU(),
            Dense(10)
        ])
    elif args.model_type == "cnn":
        # A LeNet-5 model with activation function changed to ReLU
        train_x = train_x.reshape((-1, 28, 28, 1))
        test_x = test_x.reshape((-1, 28, 28, 1))
        net = Net([
            Conv2D(kernel=[5, 5, 1, 6], stride=[1, 1]),
            ReLU(),
            MaxPool2D(pool_size=[2, 2], stride=[2, 2]),
            Conv2D(kernel=[5, 5, 6, 16], stride=[1, 1]),
            ReLU(),
            MaxPool2D(pool_size=[2, 2], stride=[2, 2]),
            Flatten(),
            Dense(120),
            ReLU(),
            Dense(84),
            ReLU(),
            Dense(10)
        ])
    elif args.model_type == "rnn":
        # A simple recurrent neural net to classify images.
        train_x = train_x.reshape((-1, 28, 28))
        test_x = test_x.reshape((-1, 28, 28))
        net = Net([RNN(num_hidden=50, activation=Tanh()), Dense(10)])
    else:
        raise ValueError("Invalid argument: model_type")

    model = Model(net=net,
                  loss=SoftmaxCrossEntropy(),
                  optimizer=Adam(lr=args.lr))

    iterator = BatchIterator(batch_size=args.batch_size)
    loss_list = list()
    for epoch in range(args.num_ep):
        t_start = time.time()
        for batch in iterator(train_x, train_y):
            pred = model.forward(batch.inputs)
            loss, grads = model.backward(pred, batch.targets)
            model.apply_grads(grads)
            loss_list.append(loss)
        print("Epoch %d time cost: %.4f" % (epoch, time.time() - t_start))
        # evaluate
        model.set_phase("TEST")
        test_pred = model.forward(test_x)
        test_pred_idx = np.argmax(test_pred, axis=1)
        test_y_idx = np.argmax(test_y, axis=1)
        res = accuracy(test_pred_idx, test_y_idx)
        print(res)
        model.set_phase("TRAIN")

    # save model
    if not os.path.isdir(args.model_dir):
        os.makedirs(args.model_dir)
    model_name = "mnist-%s-epoch%d.pkl" % (args.model_type, args.num_ep)
    model_path = os.path.join(args.model_dir, model_name)
    model.save(model_path)
    print("model saved in %s" % model_path)