def train_test(self):

        #load model if model exists weigh initialization
        if self.config.load_model is True:
            self.model.load_model()
        else:
            self.model.weight_init()
            print('weight is initilized')

        # optimizer
        self.momentum = 0.9
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.lr, weight_decay=1.0)

        # scheduler = lr_scheduler.StepLR(self.optimizer, step_size=70, gamma=0.01)
        scheduler = lr_scheduler.ExponentialLR(self.optimizer, gamma=0.9)

        # loss function
        if self.config.gpu_mode:
            self.model.cuda()
            self.MSE_loss = nn.MSELoss().cuda()
        else:
            self.MSE_loss = nn.MSELoss()

        print('---------- Networks architecture -------------')
        utils.print_network(self.model)
        print('----------------------------------------------')

        # load dataset
        train_data_loader = self.data_train
        test_data_loader = self.data_test

        ################# Train #################
        print('Training is started.')
        avg_loss = []
        avg_loss_test = []
        avg_loss_log_test = []
        step = 0

        es = EarlyStopping(patience=8)

        self.model.train() # It just sets the training mode.model.eval() to set testing mode
        for epoch in range(self.config.num_epochs):
            scheduler.step()
            epoch_loss = 0
            for iter, (input, target, _) in enumerate(train_data_loader):
                # input data (low resolution image)
                if self.config.gpu_mode:
                    x_ = Variable(input.cuda())
                    y_ = Variable(target.cuda())
                else:
                    x_ = Variable(input)
                    y_ = Variable(target)

                # update network
                self.optimizer.zero_grad()
                model_out = self.model(x_)
                loss = torch.sqrt(self.MSE_loss(model_out, y_))
                loss.backward() # 结果得到是tensor
                self.optimizer.step()

                # log
                epoch_loss += loss
                print("Epoch: [%2d] [%4d/%4d] loss: %.8f" % ((epoch + 1), (iter + 1), len(train_data_loader), loss))

                # tensorboard logging
                self.logger.scalar_summary('loss', loss, step + 1)
                step += 1

            # avg. loss per epoch
            avg_loss.append((epoch_loss / len(train_data_loader)).detach().cpu().numpy())

            if (epoch + 1) % self.config.save_epochs == 0:
                self.model.save_model(epoch + 1)

            # caculate test loss
            with torch.no_grad():
                loss_test, loss_log_test = self.test(test_data_loader)

            epoch_loss_test = loss_test / len(test_data_loader)
            epoch_loss_log_test = loss_log_test / len(test_data_loader)

            avg_loss_test.append(float(epoch_loss_test))
            avg_loss_log_test.append(float(epoch_loss_log_test))

            # if es.step(float(epoch_loss_test)):
            #     self.model.save_model(epoch=None)
            #     print('Early stop at %2d epoch' % (epoch + 1))
            #     break

        # Plot avg. loss
        utils.plot_loss(self.config, [avg_loss, avg_loss_log_test])
        utils.plot_loss(self.config, [avg_loss_test], origin=True)

        print('avg_loss: ', avg_loss[-1])
        print('avg_loss_log with original data: ', avg_loss_test[-1])
        print('avg_loss_log with log data: ', avg_loss_log_test[-1])
        print("Training and test is finished.")

        # Save final trained parameters of model
        self.model.save_model(epoch=None)
Esempio n. 2
0
    def train(self,
              train_source_datagen,
              train_target_datagen,
              val_datagen,
              pixel_mean,
              interval,
              train_iter_num,
              val_iter_num,
              pre_model_path=None):
        """
        这是DANN的训练函数
        :param train_source_datagen: 源域训练数据集生成器
        :param train_target_datagen: 目标域训练数据集生成器
        :param val_datagen: 验证数据集生成器
        :param interval: 验证间隔
        :param train_iter_num: 每个epoch的训练次数
        :param val_iter_num: 每次验证过程的验证次数
        :param pre_model_path: 预训练模型地址,与训练模型为ckpt文件,注意文件路径只需到.ckpt即可。
        """
        # 初始化相关文件目录路径
        time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        checkpoint_dir = os.path.join(self.cfg.checkpoints_dir, time)
        if not os.path.exists(checkpoint_dir):
            os.mkdir(checkpoint_dir)

        log_dir = os.path.join(self.cfg.logs_dir, time)
        if not os.path.exists(log_dir):
            os.mkdir(log_dir)

        result_dir = os.path.join(self.cfg.result_dir, time)
        if not os.path.exists(result_dir):
            os.mkdir(result_dir)

        self.cfg.save_config(time)

        # 初始化训练损失和精度数组
        train_loss_results = []  # 保存训练loss值
        train_image_cls_loss_results = []  # 保存训练图像分类loss值
        train_domain_cls_loss_results = []  # 保存训练域分类loss值
        train_accuracy_results = []  # 保存训练accuracy值

        # 初始化验证损失和精度数组,验证最大精度
        val_ep = []
        val_loss_results = []  # 保存验证loss值
        val_image_cls_loss_results = []  # 保存验证图像分类loss值
        val_domain_cls_loss_results = []  # 保存验证域分类loss值
        val_accuracy_results = []  # 保存验证accuracy值
        val_acc_max = 0  # 最大验证精度

        with tf.Session() as sess:
            # 初始化变量
            sess.run(tf.global_variables_initializer())

            # 加载预训练模型
            if pre_model_path is not None:  # pre_model_path的地址写到.ckpt
                saver_restore = tf.train.import_meta_graph(pre_model_path +
                                                           ".meta")
                saver_restore.restore(sess, pre_model_path)
                print("restore model from : %s" % (pre_model_path))

            self.merged = tf.summary.merge_all()
            self.writer = tf.summary.FileWriter(log_dir, sess.graph)
            #self.writer1 = tf.summary.FileWriter(os.path.join("./tf_dir"), sess.graph)

            print('\n----------- start to train -----------\n')

            total_global_step = self.cfg.epoch * train_iter_num
            for ep in np.arange(self.cfg.epoch):
                # 初始化每次迭代的训练损失与精度平均指标类
                epoch_loss_avg = AverageMeter()
                epoch_image_cls_loss_avg = AverageMeter()
                epoch_domain_cls_loss_avg = AverageMeter()
                epoch_accuracy = AverageMeter()

                # 初始化精度条
                progbar = K.utils.Progbar(train_iter_num)
                print('Epoch {}/{}'.format(ep + 1, self.cfg.epoch))
                batch_domain_labels = np.vstack([
                    np.tile([1., 0.], [self.cfg.batch_size // 2, 1]),
                    np.tile([0., 1.], [self.cfg.batch_size // 2, 1])
                ])
                for i in np.arange(1, train_iter_num + 1):
                    # 获取小批量数据集及其图像标签与域标签
                    batch_mnist_image_data, batch_mnist_labels = train_source_datagen.__next__(
                    )  #train_source_datagen.next_batch()
                    batch_mnist_m_image_data, batch_mnist_m_labels = train_target_datagen.__next__(
                    )  #train_target_datagen.next_batch()
                    """
                    print(np.shape(batch_mnist_image_data))
                    print(np.shape(batch_mnist_labels))
                    print(np.shape(batch_mnist_domain_labels))
                    print(np.shape(batch_mnist_m_image_data))
                    print(np.shape(batch_mnist_m_labels))
                    print(np.shape(batch_mnist_m_domain_labels))
                    """
                    # 计算学习率和GRL层的参数lambda
                    global_step = (ep - 1) * train_iter_num + i
                    process = global_step * 1.0 / total_global_step
                    leanring_rate = learning_rate_schedule(
                        process, self.cfg.init_learning_rate)
                    grl_lambda = grl_lambda_schedule(process)

                    # 前向传播,计算损失及其梯度
                    op,train_loss,train_image_cls_loss,train_domain_cls_loss,train_acc = \
                        sess.run([self.train_op,self.loss,self.image_cls_loss,self.domain_cls_loss,self.acc],
                                  feed_dict={self.source_image_input:batch_mnist_image_data,
                                             self.target_image_input:batch_mnist_m_image_data,
                                             self.source_image_labels:batch_mnist_labels,
                                             self.domain_labels:batch_domain_labels,
                                             self.learning_rate:leanring_rate,
                                             self.grl_lambd:grl_lambda})
                    self.writer.add_summary(make_summary(
                        'learning_rate', leanring_rate),
                                            global_step=global_step)
                    #self.writer1.add_summary(make_summary('learning_rate', leanring_rate), global_step=global_step)

                    # 更新训练损失与训练精度
                    epoch_loss_avg.update(train_loss, 1)
                    epoch_image_cls_loss_avg.update(train_image_cls_loss, 1)
                    epoch_domain_cls_loss_avg.update(train_domain_cls_loss, 1)
                    epoch_accuracy.update(train_acc, 1)

                    # 更新进度条
                    progbar.update(
                        i, [('train_image_cls_loss', train_image_cls_loss),
                            ('train_domain_cls_loss', train_domain_cls_loss),
                            ('train_loss', train_loss),
                            ("train_acc", train_acc)])

                # 保存相关损失与精度值,可用于可视化
                train_loss_results.append(epoch_loss_avg.average)
                train_image_cls_loss_results.append(
                    epoch_image_cls_loss_avg.average)
                train_domain_cls_loss_results.append(
                    epoch_domain_cls_loss_avg.average)
                train_accuracy_results.append(epoch_accuracy.average)

                self.writer.add_summary(make_summary('train/train_loss',
                                                     epoch_loss_avg.average),
                                        global_step=ep + 1)
                self.writer.add_summary(make_summary(
                    'train/train_image_cls_loss',
                    epoch_image_cls_loss_avg.average),
                                        global_step=ep + 1)
                self.writer.add_summary(make_summary(
                    'train/train_domain_cls_loss',
                    epoch_domain_cls_loss_avg.average),
                                        global_step=ep + 1)
                self.writer.add_summary(make_summary('accuracy/train_accuracy',
                                                     epoch_accuracy.average),
                                        global_step=ep + 1)

                #self.writer1.add_summary(make_summary('train/train_loss', epoch_loss_avg.average),global_step=ep+1)
                #self.writer1.add_summary(make_summary('train/train_image_cls_loss', epoch_image_cls_loss_avg.average),
                #                   global_step=ep+1)
                #self.writer1.add_summary(make_summary('train/train_domain_cls_loss', epoch_domain_cls_loss_avg.average),
                #                   global_step=ep+1)
                #self.writer1.add_summary(make_summary('accuracy/train_accuracy', epoch_accuracy.average),global_step=ep+1)

                if (ep + 1) % interval == 0:
                    # 评估模型在验证集上的性能
                    val_ep.append(ep)
                    val_loss, val_image_cls_loss,val_domain_cls_loss, \
                        val_accuracy = self.eval_on_val_dataset(sess,val_datagen,val_iter_num,ep+1)
                    val_loss_results.append(val_loss)
                    val_image_cls_loss_results.append(val_image_cls_loss)
                    val_domain_cls_loss_results.append(val_domain_cls_loss)
                    val_accuracy_results.append(val_accuracy)
                    str =  "Epoch{:03d}_val_image_cls_loss{:.3f}_val_domain_cls_loss{:.3f}_val_loss{:.3f}" \
                           "_val_accuracy{:.3%}".format(ep+1,val_image_cls_loss,val_domain_cls_loss,val_loss,val_accuracy)
                    print(str)

                    if val_accuracy > val_acc_max:  # 验证精度达到当前最大,保存模型
                        val_acc_max = val_accuracy
                        self.saver_save.save(
                            sess, os.path.join(checkpoint_dir, str + ".ckpt"))

            # 保存训练与验证结果
            path = os.path.join(result_dir, "train_loss.jpg")
            plot_loss(np.arange(1,
                                len(train_loss_results) + 1), [
                                    np.array(train_loss_results),
                                    np.array(train_image_cls_loss_results),
                                    np.array(train_domain_cls_loss_results)
                                ], path, "train")
            path = os.path.join(result_dir, "val_loss.jpg")
            plot_loss(
                np.array(val_ep) + 1, [
                    np.array(val_loss_results),
                    np.array(val_image_cls_loss_results),
                    np.array(val_domain_cls_loss_results)
                ], path, "val")
            train_acc = np.array(train_accuracy_results)[np.array(val_ep)]
            path = os.path.join(result_dir, "accuracy.jpg")
            plot_accuracy(
                np.array(val_ep) + 1, [train_acc, val_accuracy_results], path)

            # 保存最终的模型
            model_path = os.path.join(checkpoint_dir, "trained_model.ckpt")
            self.saver_save.save(sess, model_path)
            print("Train model finshed. The model is saved in : ", model_path)
            print('\n----------- end to train -----------\n')
Esempio n. 3
0
labelNames = [
    "airplane", "bird", "car", "cat", "deer", "dog", "horse", "monkey", "ship",
    "truck"
]

final_predictions = utils.decode_predictions(model_final.predict(x_test),
                                             labelNames)
for i in range(0, 50):
    x = list(y_test[i]).index(True)
    print("predicted " + final_predictions[i][0] + " " +
          final_predictions[i][1] + " " + final_predictions[i][2] +
          " actual " + labelNames[list(y_test[i]).index(True)])

# plot loss curves
utils.plot_loss(model_history)
utils.plot_loss(autoenc_history)

# display decoded autoencode images if first autoencode training
if 'decoded_imgs' in globals():
    n = 10
    plt.figure(figsize=(20, 4))
    for i in range(1, n):
        # display original
        ax = plt.subplot(2, n, i)
        plt.imshow(x_test[i].reshape(96, 96, 3))
        #plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        # display reconstruction
        ax = plt.subplot(2, n, i + n)
Esempio n. 4
0
    def train(self):

        #load model if model exists weigh initialization
        if self.config.load_model is True:
            self.model.load_model()
        else:
            self.model.weight_init()
            print('weight is initilized')

        # optimizer
        self.momentum = 0.9
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.config.lr,
                                   momentum=self.momentum)

        # loss function
        if self.config.gpu_mode:
            self.model.cuda()
            self.MSE_loss = nn.MSELoss().cuda()
        else:
            self.MSE_loss = nn.MSELoss()

        print('---------- Networks architecture -------------')
        utils.print_network(self.model)
        print('----------------------------------------------')

        # load dataset
        train_data_loader = self.data

        ################# Train #################
        print('Training is started.')
        avg_loss = []
        step = 0

        self.model.train(
        )  # It just sets the training mode.model.eval() to set testing mode
        for epoch in range(self.config.num_epochs):

            epoch_loss = 0
            for iter, (input, _, target) in enumerate(train_data_loader):
                # input data (low resolution image)
                if self.config.gpu_mode:
                    x_ = Variable(input.cuda())
                    y_ = Variable(target.cuda())
                else:
                    x_ = Variable(input)
                    y_ = Variable(target)

                # update network
                self.optimizer.zero_grad()
                model_out = self.model(x_)
                loss = torch.sqrt(self.MSE_loss(model_out, y_))
                loss.backward()
                self.optimizer.step()

                # log
                epoch_loss += loss
                print("Epoch: [%2d] [%4d/%4d] loss: %.8f" %
                      ((epoch + 1), (iter + 1), len(train_data_loader), loss))

                # tensorboard logging
                # self.logger.scalar_summary('loss', loss, step + 1)
                step += 1

            # avg. loss per epoch
            avg_loss.append(epoch_loss / len(train_data_loader))

            if (epoch + 1) % self.config.save_epochs == 0:
                self.model.save_model(epoch + 1)

        # Plot avg. loss
        utils.plot_loss(self.config, [avg_loss])
        print('avg_loss: ', avg_loss[-1])
        print("Training is finished.")

        # Save final trained parameters of model
        self.model.save_model(epoch=None)
Esempio n. 5
0
    def train_test(self):

        # load model if model exists weigh initialization
        if self.config.load_model is True:
            self.load_model()
            # self.load_spec_model()
        else:
            self.weight_init()

        # loss function
        if self.config.gpu_mode:
            self.model.cuda()
            self.MSE_loss = nn.MSELoss().cuda()  # 默认算出来是对每个sample的平均
        else:
            self.MSE_loss = nn.MSELoss()

        # optimizer
        self.momentum = 0.9
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.config.lr,
                                    weight_decay=1.0)

        scheduler = lr_scheduler.StepLR(self.optimizer,
                                        step_size=100,
                                        gamma=0.1)
        # scheduler = lr_scheduler.ExponentialLR(self.optimizer, gamma=0.9)

        print('---------- Networks architecture -------------')
        utils.print_network(self.model)
        print('----------------------------------------------')

        # load dataset
        train_data_loader = self.data_train
        test_data_loader = self.data_test

        ################# Train #################
        print('Training is started.')
        avg_loss = []
        avg_loss_test = []
        avg_loss_log_test = []
        # step = 0

        es = EarlyStopping(patience=50)

        self.model.train(
        )  # It just sets the training mode.model.eval() to set testing mode
        for epoch in range(self.config.num_epochs):
            scheduler.step()
            epoch_loss = 0
            for iter, (input, target,
                       groundtruth) in enumerate(train_data_loader):
                # input data (low resolution image)
                if self.config.gpu_mode:
                    x_ = Variable(input.cuda())
                    y_ = Variable(groundtruth.cuda())
                else:
                    x_ = Variable(input)
                    y_ = Variable(groundtruth)

                # scale是10的话,x_.shape is (batchsize, 1, 300)
                # scale是100的话,x_.shape is (batchsize, 1, 30)

                # update network
                self.optimizer.zero_grad()
                model_out = self.model(x_)
                loss = torch.sqrt(self.MSE_loss(model_out, y_))
                loss.backward()  # 结果得到是tensor
                self.optimizer.step()
                epoch_loss += loss

                # 注意:len(train_data_loader) 是 # train samples/batchsize,有多少个train_data_loader即需要iter多少个batch
                print("Epoch: [%2d] [%4d/%4d] loss: %.8f" %
                      ((epoch + 1), (iter + 1), len(train_data_loader), loss))

                # tensorboard logging
                # self.logger.scalar_summary('loss', loss, step + 1)
                # step += 1

            # avg. loss per epoch
            # 如果除以len(train_data_loader)是平均每一个sample的loss
            avg_loss.append(
                (epoch_loss / len(train_data_loader)).detach().cpu().numpy())

            if (epoch + 1) % self.config.save_epochs == 0:
                self.save_model(epoch + 1)

            # caculate test loss
            with torch.no_grad():
                loss_test, _ = self.test(test_data_loader)

            epoch_loss_test = loss_test / len(test_data_loader)

            avg_loss_test.append(float(epoch_loss_test))

            #nni.report_intermediate_result(
            #    {"default": float(epoch_loss_test), "epoch_loss": float(avg_loss[-1])})

            # if es.step(avg_loss[-1]):
            #     self.save_model(epoch=None)
            #     print('Early stop at %2d epoch' % (epoch + 1))
            #     break

            if epoch % 10 == 0 and epoch != 0:
                utils.plot_loss(self.config, [avg_loss, avg_loss_test])

        #nni.report_final_result({"default": float(avg_loss_test[-1]), "epoch_loss": float(avg_loss[-1])})

        # Plot avg. loss
        utils.plot_loss(self.config, [avg_loss, avg_loss_test])

        with torch.no_grad():
            _, dtw_test = self.test(test_data_loader, True)
            avg_dtw_test = dtw_test / len(test_data_loader)

        print('avg_loss: ', avg_loss[-1])
        print('avg_loss_log with original data: ', avg_loss_test[-1])
        print('dtw with original data: ', avg_dtw_test)
        print("Training and test is finished.")

        # Save final trained parameters of model
        self.save_model(epoch=None)