Exemple #1
0
    def validate_1epoch(self):
        print('==> Epoch:[{0}/{1}][validation stage]'.format(
            self.epoch, self.nb_epochs))
        batch_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        self.model.eval()
        end = time.time()

        progress = tqdm(self.testloader)
        for i, (data, label) in enumerate(progress):
            label = label.cuda(async=True)
            input_var = Variable(data).cuda()
            target_var = Variable(label).cuda()
            # print('load to cuda:', time.time() - t)

            output = self.model(input_var)
            prec1, prec5 = accuracy(output.data, label, topk=(1, 5))
            top1.update(prec1.item(), output.data.size(0))
            top5.update(prec5.item(), output.data.size(0))

        info = {
            'Epoch': [self.epoch],
            'Batch Time': [round(batch_time.avg, 3)],
            'Loss': [0],
            'Prec@1': [round(top1.avg, 4)],
            'Prec@5': [round(top5.avg, 4)],
        }

        record_info(info, 'record/spatial/rgb_test.csv', 'test')
Exemple #2
0
    def train_1epoch(self):
        print('==> Epoch:[{0}/{1}][training stage]'.format(
            self.epoch, self.nb_epochs))
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        #switch to train mode
        self.model.train()
        end = time.time()
        # mini-batch training
        progress = tqdm(self.train_loader)
        for i, (data_dict, label) in enumerate(progress):

            # measure data loading time
            data_time.update(time.time() - end)

            label = label.cuda(async=True)
            target_var = Variable(label).cuda()

            # compute output
            output = Variable(
                torch.zeros(len(data_dict['img1']), 101).float()).cuda()
            for i in range(len(data_dict)):
                key = 'img' + str(i)
                data = data_dict[key]
                input_var = Variable(data).cuda()
                output += self.model(input_var)

            loss = self.criterion(output, target_var)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, label, topk=(1, 5))
            losses.update(loss.data[0], data.size(0))
            top1.update(prec1[0], data.size(0))
            top5.update(prec5[0], data.size(0))

            # compute gradient and do SGD step
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

        info = {
            'Epoch': [self.epoch],
            'Batch Time': [np.round(batch_time.avg, 4)],
            'Data Time': [np.round(data_time.avg, 4)],
            'Loss': [np.round(losses.avg, 4)],
            'Prec@1': [np.round(top1.avg, 4)],
            'Prec@5': [np.round(top5.avg, 4)],
            'lr': self.optimizer.param_groups[0]['lr']
        }
        record_info(info,
                    filename='record/spatial/rgb_train.csv',
                    mode='train')
Exemple #3
0
    def train_1epoch(self):
        print('==> Epoch:[{0}/{1}][training stage]'.format(
            self.epoch, self.nb_epochs))
        batch_time = utils.AverageMeter()
        data_time = utils.AverageMeter()
        losses = utils.AverageMeter()
        top1 = utils.AverageMeter()
        top5 = utils.AverageMeter()
        # switch to train mode
        self.model.train()
        end = time.time()
        # mini-batch training
        progress = tqdm.tqdm(self.train_loader)
        for i, (data, label) in enumerate(progress):
            # Probabilistically withhold a data batch
            if 100 * random.random() > self.percent:
                continue

            # measure data loading time
            data_time.update(time.time() - end)

            label = label.cuda(async=True)
            input_var = Variable(data).cuda()
            target_var = Variable(label).cuda()

            # compute output
            output = self.model(input_var)
            loss = self.criterion(output, target_var)

            # measure accuracy and record loss
            prec1, prec5 = utils.accuracy(output.data, label, topk=(1, 5))
            losses.update(loss.item(), data.size(0))
            top1.update(prec1.item(), data.size(0))
            top5.update(prec5.item(), data.size(0))

            # compute gradient and do SGD step
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

        info = {
            'Epoch': [self.epoch],
            'Batch Time': [round(float(batch_time.avg), 3)],
            'Data Time': [round(float(data_time.avg), 3)],
            'Loss': [round(float(losses.avg), 5)],
            'Prec@1': [round(float(top1.avg), 4)],
            'Prec@5': [round(float(top5.avg), 4)],
            'lr': self.optimizer.param_groups[0]['lr']
        }
        utils.record_info(
            info,
            os.path.join(self.output_dir,
                         'opf_train_{}.csv'.format(self.model_type)), 'train')
Exemple #4
0
    def validate_1epoch(self):
        print('==> Epoch:[{0}/{1}][validation stage]'.format(
            self.epoch, self.nb_epochs))

        batch_time = utils.AverageMeter()
        losses = utils.AverageMeter()
        top1 = utils.AverageMeter()
        top5 = utils.AverageMeter()
        # switch to evaluate mode
        self.model.eval()
        self.dic_video_level_preds = {}
        end = time.time()
        progress = tqdm.tqdm(self.test_loader)
        for i, (keys, data, label) in enumerate(progress):
            label = label.cuda(async=True)
            with torch.no_grad():
                data_var = Variable(data).cuda(async=True)
                label_var = Variable(label).cuda(async=True)

            # compute output
            output = self.model(data_var)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            # Calculate video level prediction
            preds = output.data.cpu().numpy()
            nb_data = preds.shape[0]
            for j in range(nb_data):
                videoName = keys[j].split('|', 1)[0]  # ApplyMakeup_g01_c01
                if videoName not in self.dic_video_level_preds.keys():
                    self.dic_video_level_preds[videoName] = preds[j, :]
                else:
                    self.dic_video_level_preds[videoName] += preds[j, :]

        # Frame to video level accuracy
        video_top1, video_top5, video_loss = self.frame2_video_level_accuracy()
        info = {
            'Epoch': [self.epoch],
            'Batch Time': [np.round(batch_time.avg, 3)],
            'Loss': [np.round(video_loss, 5)],
            'Prec@1': [np.round(video_top1, 3)],
            'Prec@5': [np.round(video_top5, 3)]
        }
        utils.record_info(
            info,
            os.path.join(self.output_dir,
                         'opf_test_{}.csv'.format(self.model_type)), 'test')
        return video_top1, video_loss
Exemple #5
0
    def train_1epoch(self):
        print('==> Epoch:[{0}/{1}][training stage]'.format(
            self.epoch, self.nb_epochs))
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        self.model.train()
        end = time.time()

        progress = tqdm(self.trainloader)
        for i, (data, label) in enumerate(progress):
            data_time.update(time.time() - end)

            label = label.cuda(async=True)
            input_var = Variable(data).cuda()
            target_var = Variable(label).cuda()
            # print('load to cuda:', time.time() - t)

            # t = time.time()
            output = self.model(input_var)
            loss = self.criterion(output, target_var)
            # print('loss:', loss.item())
            # print('calculate loss:', time.time() - t)
            prec1, prec5 = accuracy(output.data, label, topk=(1, 5))
            losses.update(loss.item(), output.data.size(0))
            top1.update(prec1.item(), output.data.size(0))
            top5.update(prec5.item(), output.data.size(0))
            # print('test', output.data.size(0))

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            batch_time.update(time.time() - end)
            end = time.time()

        info = {
            'Epoch': [self.epoch],
            'Batch Time': [round(batch_time.avg, 3)],
            'Data Time': [round(data_time.avg, 3)],
            'Loss': [round(losses.avg, 5)],
            'Prec@1': [round(top1.avg, 4)],
            'Prec@5': [round(top5.avg, 4)],
            'lr': self.optimizer.param_groups[0]['lr']
        }

        record_info(info, './record/spatial/rgb_train.csv', 'train')
Exemple #6
0
    def validate_1epoch(self):
        print('==> Epoch:[{0}/{1}][validation stage]'.format(self.epoch, self.nb_epochs))

        batch_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        # switch to evaluate mode
        self.model.eval()
        self.dic_video_level_preds={}
        end = time.time()
        progress = tqdm(self.test_loader)
        for i, (keys,data,label) in enumerate(progress):
            
            #data = data.sub_(127.353346189).div_(14.971742063)
            label = label.cuda(async=True)
            data_var = Variable(data, volatile=True).cuda(async=True)
            label_var = Variable(label, volatile=True).cuda(async=True)

            # compute output
            output = self.model(data_var)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            #Calculate video level prediction
            preds = output.data.cpu().numpy()
            nb_data = preds.shape[0]
            for j in range(nb_data):
                videoName = keys[j].split('-',1)[0] # ApplyMakeup_g01_c01
                if videoName not in self.dic_video_level_preds.keys():
                    self.dic_video_level_preds[videoName] = preds[j,:]
                else:
                    self.dic_video_level_preds[videoName] += preds[j,:]
                    
        #Frame to video level accuracy
        video_top1, video_top5, video_loss = self.frame2_video_level_accuracy()
        info = {'Epoch': [self.epoch],
                'Batch Time': [np.round(batch_time.avg, 4)],
                'Loss': [np.round(video_loss, 4)],
                'Prec@1': [np.round(video_top1, 4)],
                'Prec@5': [np.round(video_top5, 4)]
                }
        record_info(info, filename='record/motion/opf_test.csv', mode='test')
        return video_top1, video_loss
Exemple #7
0
    def validate_1epoch(self):
        '''
        Run validation on the test set
        '''
        print('==> Epoch:[{0}/{1}][validation stage]'.format(
            self.epoch, self.nb_epochs))
        batch_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        # Switch to evaluate mode
        self.model.eval()
        self.dic_video_level_preds = {}
        end = time.time()
        if self.arg.prog:
            progress = tqdm(self.test_loader)
        else:
            progress = self.test_loader

        # Iterate over dataset
        for i, (keys, data, label) in enumerate(progress):

            label = label.cuda(async=True)
            data_var = Variable(data, volatile=True).cuda(async=True)
            label_var = Variable(label, volatile=True).cuda(async=True)

            # Compute Loss
            output = self.model(data_var)

            # Measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            # Calculate video level prediction
            preds = output.data.cpu().numpy()
            nb_data = preds.shape[0]
            for j in range(nb_data):
                videoName = keys[j].split('/', 1)[0]
                if videoName not in self.dic_video_level_preds.keys():
                    self.dic_video_level_preds[videoName] = preds[j, :]
                else:
                    self.dic_video_level_preds[videoName] += preds[j, :]

        # Calculate and record top-1 accuracy, top-5 accuracy, and evaluation loss
        video_top1, video_top5, video_loss = self.frame2_video_level_accuracy()
        losses.update(video_loss)
        top1.update(video_top1)
        top5.update(video_top5)
        self.writer.add_scalars("Eval Loss", {
            "val": losses.val,
            "average": losses.avg
        }, self.epoch)
        self.writer.add_scalars("Eval Acc@1", {
            "val": top1.val,
            "average": top1.avg
        }, self.epoch)
        self.writer.add_scalars("Eval Acc@5", {
            "val": top5.val,
            "average": top5.avg
        }, self.epoch)

        # Record to TensorBoard
        info = {
            'Epoch': [self.epoch],
            'Batch Time': [round(batch_time.avg, 3)],
            'Loss': [round(video_loss, 5)],
            'Prec@1': [round(video_top1, 3)],
            'Prec@5': [round(video_top5, 3)]
        }
        record_info(info, os.path.join(self.arg.savedir, 'rgb_train.csv'),
                    'test')
        return video_top1, video_loss
Exemple #8
0
    def train_1epoch(self):
        '''
        Train for a single epoch
        '''
        print('==> Epoch:[{0}/{1}][training stage]'.format(
            self.epoch, self.nb_epochs))
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        # Switch to train mode
        self.model.train()
        end = time.time()

        # Mini-batch training
        if self.arg.prog:
            progress = tqdm(self.train_loader)
        else:
            progress = self.train_loader

        # Iterate over dataset
        for i, (data, label) in enumerate(progress):

            # Measure data loading time
            data_time.update(time.time() - end)
            label = label.cuda(async=True)
            target_var = Variable(label).cuda()

            # Compute loss
            input_var = Variable(data).cuda()
            output = self.model(input_var)
            loss = self.criterion(output, target_var)

            # Measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, label, topk=(1, 5))
            losses.update(loss.data[0], data.size(0))
            top1.update(prec1[0], data.size(0))
            top5.update(prec5[0], data.size(0))

            # Compute gradient and do SGD step
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # Measure elapsed time and record results
            batch_time.update(time.time() - end)
            end = time.time()
            self.writer.add_scalars("Loss", {
                "val": losses.val,
                "average": losses.avg
            }, (self.epoch) * len(self.train_loader) + i)
            self.writer.add_scalars("Acc@1", {
                "val": top1.val,
                "average": top1.avg
            }, (self.epoch) * len(self.train_loader) + i)
            self.writer.add_scalars("Acc@5", {
                "val": top5.val,
                "average": top5.avg
            }, (self.epoch) * len(self.train_loader) + i)

            # Print every 10 iterations
            if i % 10 == 0:
                print(
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\tAcc@1 {top1.val:.3f} ({top1.avg:.3f})\tAcc@5 {top5.val:.3f} ({top5.avg:.3f})'
                    .format(loss=losses, top1=top1, top5=top5))

        # Write to TensorBoard
        info = {
            'Epoch': [self.epoch],
            'Batch Time': [round(batch_time.avg, 3)],
            'Data Time': [round(data_time.avg, 3)],
            'Loss': [round(losses.avg, 5)],
            'Prec@1': [round(top1.avg, 4)],
            'Prec@5': [round(top5.avg, 4)],
            'lr': self.optimizer.param_groups[0]['lr']
        }
        # Save training info to CSV
        record_info(info, os.path.join(self.arg.savedir, 'rgb_train.csv'),
                    'train')

        return None
Exemple #9
0
    def train(self,
              train_data_path='data/TFRdata/BSDS500_64.tfrecords',
              test_data_dir='data/test',
              epoch_volume=63000,
              epoch_to_train=None,
              time_str=None,
              train_batch_size=64,
              steps=None,
              max_steps=None,
              log_print_interval=50,
              test_interval=500,
              save_interval=10000,
              loss_func='l2',
              optimizer='adam',
              learning_rate=0.001,
              decay=None,
              decay_epoch=1,
              decay_strategy='exponent'):
        # params:
        time_str = time_str or get_time_str()
        self.loss_func = loss_func
        self.optimizer = optimizer

        # paths:
        log_dir = os.path.join('./logs', self.model_name, self.name, time_str)
        ckpt_dir = os.path.join('./checkpoints', self.model_name, self.name,
                                time_str)
        test_imgs_dir = os.path.join(log_dir, 'test_imgs')
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        if not os.path.exists(ckpt_dir):
            os.makedirs(ckpt_dir)
        if not os.path.exists(test_imgs_dir):
            os.makedirs(test_imgs_dir)
        latest_ckpt_path = tf.train.latest_checkpoint(ckpt_dir)

        # info:
        self.info_train = {}
        self.info_train['time_str'] = str(time_str)
        self.info_train['train_batch_size'] = str(train_batch_size)
        self.info_train['loss_func'] = str(loss_func)
        self.info_train['optimizer'] = str(optimizer)
        if isinstance(decay_strategy, str):
            self.info_train['learning_rate'] = str(learning_rate)
            self.info_train['decay'] = str(decay)
            self.info_train['decay_strategy'] = str(decay_strategy)
        else:
            self.info_train['learning_rate'] = 'CustomStrategy'
        self.info_train['train_data_path'] = str(train_data_path)
        self.info_train['test_data_dir'] = str(test_data_dir)
        print('\n\n********** Train **********')
        print_info([self.info_train])
        print('********** ***** **********')
        record_info([self.info_top, self.info_train],
                    os.path.join(log_dir, 'info.txt'))

        # define graph:
        print('\n** Define graph...')
        self.train_graph = tf.Graph()
        with self.train_graph.as_default():
            self._build(self.MODE_TRAIN)
            # logs:
            log_train_MSE = tf.summary.scalar('MSE_train', self.mse)
            log_test_MSE = tf.summary.scalar('MSE_test', self.mse)
            log_train_PSNR = tf.summary.scalar('PSNR_train', self.psnr_float)
            log_test_PSNR = tf.summary.scalar('PSNR_test', self.psnr_float)
            log_lr = tf.summary.scalar('learning_rate', self.learning_rate)
            test_PSNR_mean = tf.placeholder(tf.float32, name='PSNR_mean')
            log_test_PSNR_mean = tf.summary.scalar('PSNR_mean_test',
                                                   test_PSNR_mean)
            log_writer = tf.summary.FileWriter(log_dir)
            log_writer.add_graph(self.train_graph)
            log_writer.flush()
            # saver:
            saver_all = tf.train.Saver(max_to_keep=0, name='saver_all')
        print('Done.')

        # datasets:
        print('\n** Generate datasets...')
        print('train data path:', train_data_path)
        print('test  data  dir:', test_data_dir)
        with self.train_graph.as_default():
            get_train_batch = dataset_TFR(train_data_path, train_batch_size,
                                          epoch_volume)
            test_batches = dataset_IMG(test_data_dir)
        print('Done.')

        print('\n** Initialize and prepare...')

        # init:
        sess = tf.Session(graph=self.train_graph)
        if latest_ckpt_path:
            saver_all.restore(sess, latest_ckpt_path)
        else:
            sess.run(self.variable_init)
        step = tf.train.global_step(sess, self.global_step)
        epoch = self._get_epoch(step, train_batch_size, epoch_volume)
        steps_to_run = None
        if steps or max_steps:
            steps_to_run = steps or max(max_steps - step, 0)

        # define process functions:
        def train_once(step, epoch=None, pring_log=True):
            train_batch = sess.run(get_train_batch)
            feed_dic = {
                self.inputs: train_batch,
                self.labels: train_batch,
                self.learning_rate: lr
            }
            mse, mse_log, psnr, psnr_log, lr_log, _ = sess.run([
                self.mse, log_train_MSE, self.psnr_float, log_train_PSNR,
                log_lr, self.train_op
            ], feed_dic)
            log_writer.add_summary(mse_log, step)
            log_writer.add_summary(psnr_log, step)
            log_writer.add_summary(lr_log, step)

            if pring_log:
                log = 'step: %d  lr: %.8f  train-loss: %.10f  train-PSNR: %.6f' % (
                    step, lr, mse, psnr)
                if epoch is not None:
                    log = ('epoch: %d ' % epoch) + log
                print(log)

        def test_all(step, epoch=None, pring_log=True, save_dir=None):
            if pring_log:
                print(
                    '--------------------------------------------------------------'
                )
                print('Test all:')
            img_num = len(test_batches['imgs'])
            psnr_sum = 0
            for tb in range(img_num):
                img = test_batches['imgs'][tb][np.newaxis, :]
                name = test_batches['names'][tb]
                feed_dic = {self.inputs: img, self.labels: img}
                run_list = [
                    self.mse, log_test_MSE, self.psnr_float, log_test_PSNR
                ]
                if save_dir is not None:
                    run_list.append(self.outputs)
                run_results = sess.run(run_list, feed_dic)
                if save_dir is None:
                    mse, mse_log, psnr, psnr_log = run_results
                else:
                    mse, mse_log, psnr, psnr_log, outputs = run_results
                    name_no_ext = os.path.splitext(name)[0]
                    if epoch is not None:
                        cv_imwrite(
                            os.path.join(
                                save_dir, 'epoch_%d_step_%d_%s_psnr_%.4f.png' %
                                (epoch, step, name_no_ext, psnr)), outputs[0],
                            'RGB')
                    else:
                        cv_imwrite(
                            os.path.join(
                                save_dir, 'step_%d_%s_psnr_%.4f.png' %
                                (step, name_no_ext, psnr)), outputs[0], 'RGB')
                log_writer.add_summary(mse_log, step)
                log_writer.add_summary(psnr_log, step)
                log_writer.flush()
                psnr_sum += psnr
                if pring_log:
                    log = 'step: %d  test-loss: %.10f  test-PSNR: %.6f' % (
                        step, mse, psnr)
                    if epoch is not None:
                        log = ('epoch: %d ' % epoch) + log
                    log = ('| img: %s ' % name) + log
                    print(log)
            psnr_mean = psnr_sum / img_num
            log_writer.add_summary(
                sess.run(log_test_PSNR_mean, {test_PSNR_mean: psnr_mean}),
                step)
            if pring_log:
                print('PSNR-mean: %.6f (img_num: %d)' % (psnr_mean, img_num))
                print(
                    '--------------------------------------------------------------'
                )
            return psnr_mean

        def save_once(step, pring_log=True):
            save_path = os.path.join(ckpt_dir, get_time_str())
            saver_all.save(sess=sess,
                           save_path=save_path,
                           global_step=step,
                           write_meta_graph=False)
            if pring_log:
                print('save:', save_path)
            return save_path

        print('Done.')

        # run:
        print('\n** Begin training:')
        save_path = None
        if latest_ckpt_path is None:
            test_all(0, 0, True)
            save_path = save_once(0)
        else:
            test_all(step, epoch, True)
        save_flag_final = False
        save_flag_max = False
        psnr_max = 0
        lr = self._lr_update(learning_rate, step, epoch, decay, decay_strategy)
        t = time.time()

        while (steps_to_run is None) or (steps_to_run > 0):  # main loop
            step = tf.train.global_step(sess, self.global_step) + 1
            epoch_old = epoch
            epoch = self._get_epoch(step, train_batch_size, epoch_volume)
            if epoch_to_train and (epoch > epoch_to_train):
                break
            if epoch_old != epoch:  # change lr only when new epoch
                if isinstance(decay_strategy, str):
                    if epoch_old % decay_epoch == 0:
                        lr = self._lr_update(learning_rate, step, epoch, decay,
                                             decay_strategy)
                else:
                    lr = self._lr_update(learning_rate, step, epoch, decay,
                                         decay_strategy)
            save_flag_final = True
            save_flag_max = False
            if (step % log_print_interval) == 0:
                train_once(step, epoch, pring_log=True)
            else:
                train_once(step, epoch, pring_log=False)
            if (step % test_interval) == 0:
                print('time: train_%d %.6fs' %
                      (test_interval, time.time() - t))
                t = time.time()
                psnr_tmp = test_all(step, epoch, True)
                print('time: test_once %.6fs' % (time.time() - t))
                if psnr_tmp > psnr_max:
                    test_all(step, epoch, False, test_imgs_dir)
                    psnr_max = psnr_tmp
                    print('psnr_max: %.6f epoch: %d step: %d' %
                          (psnr_max, epoch, step))
                    save_flag_max = True
                t = time.time()
            if (step % save_interval) == 0 or save_flag_max:
                t = time.time()
                save_path = save_once(step)
                save_flag_final = False
                save_flag_max = False
                print('time: save_once %.6fs' % (time.time() - t))
                t = time.time()
            if steps_to_run is not None:
                steps_to_run -= 1

        if save_flag_final:
            save_path = save_once(step)
        sess.close()
        print('\nALL DONE.')
        return save_path
Exemple #10
0
def main():
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    dataloader = utils.load_dataset(args.data_path, args.batch_size, args.batch_size, args.batch_size)
    args.scaler = dataloader['scaler']

    engine = a(args)
    print("start training...")
    his_loss = []
    val_time = []
    train_time = []
    for epoch_num in range(args.epochs + 1):
        train_loss = []
        train_mape = []
        train_rmse = []
        t1 = time.time()
        dataloader['train_loader'].shuffle()
        for iter, (x, y) in enumerate(dataloader["train_loader"].get_iterator()):
            trainX = torch.Tensor(x).to(args.device)
            trainy = torch.Tensor(y).to(args.device)
            metrics = engine.train(trainX, trainy[:, :, :, 0])
            train_loss.append(metrics[0])
            train_mape.append(metrics[1])
            train_rmse.append(metrics[2])
            if iter % 500 == 0:
                log = 'Iter: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}'
                print(log.format(iter, train_loss[-1], train_mape[-1], train_rmse[-1]), flush=True)
                utils.record_info(log.format(iter, train_loss[-1], train_mape[-1], train_rmse[-1]), args.info_dir)

        t2 = time.time()
        train_time.append(t2 - t1)
        valid_loss = []
        valid_mape = []
        valid_rmse = []

        print("eval...")
        s1 = time.time()
        for iter, (x, y) in enumerate(dataloader['val_loader'].get_iterator()):
            valx = torch.Tensor(x).cuda()
            valy = torch.Tensor(y).cuda()
            metrics = engine.eval(valx, valy[:, :, :, 0])
            valid_loss.append(metrics[0])
            valid_mape.append(metrics[1])
            valid_rmse.append(metrics[2])
        s2 = time.time()
        log = 'Epoch: {:03d}, Inference Time: {:.4f} secs'
        print(log.format(epoch_num, (s2 - s1)))
        utils.record_info(log.format(epoch_num, (s2 - s1)), args.info_dir)
        val_time.append(s2 - s1)
        mtrain_loss = np.mean(train_loss)
        mtrain_mape = np.mean(train_mape)
        mtrain_rmse = np.mean(train_rmse)

        mvalid_loss = np.mean(valid_loss)
        mvalid_mape = np.mean(valid_mape)
        mvalid_rmse = np.mean(valid_rmse)
        his_loss.append(mvalid_loss)

        log = 'Epoch: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}, Valid Loss: {:.4f}, Valid MAPE: {:.4f}, Valid RMSE: {:.4f}, Training Time: {:.4f}/epoch'
        print(log.format(epoch_num, mtrain_loss, mtrain_mape, mtrain_rmse, mvalid_loss, mvalid_mape, mvalid_rmse,
                         (t2 - t1)),
              flush=True)
        utils.record_info(
            log.format(epoch_num, mtrain_loss, mtrain_mape, mtrain_rmse, mvalid_loss, mvalid_mape, mvalid_rmse,
                       (t2 - t1)), args.info_dir)
        torch.save(engine.model, "./model/" + "_epoch_" + str(epoch_num) + ".pkl")
    print("Average Training Time: {:.4f} secs/epoch".format(np.mean(train_time)))
    print("Average Inference Time: {:.4f} secs".format(np.mean(val_time)))
Exemple #11
0
parser.add_argument('--out_dim', type=int, default=1)
parser.add_argument('--epochs', type=int, default=100)
parser.add_argument("--lr", type=float, default=0.0005)
parser.add_argument("--clip", type=float, default=5.)
parser.add_argument('--weight_decay', type=float, default=0.000001, help='weight decay rate')
parser.add_argument("--his_len", type=int, default=12, help="")
parser.add_argument("--pred_len", type=int, default=12, help="")
parser.add_argument("--seed", type=int, default=1314, help="random seed")
parser.add_argument('--info_dir', type=str, default="./infos/metr12/ratio003.txt")
parser.add_argument('--channels', type=int, default=2)
parser.add_argument('--layers', type=int, default=5)
parser.add_argument('--snpsts_len', type=int, default=4)
parser.add_argument('--dropout', type=float, default=0.3)

args = parser.parse_args()
utils.record_info('snapshot增加为6,pearson矩阵数量变为0.05', args.info_dir)
utils.record_info(str(args), args.info_dir)
print(args)

if args.data == "metr":
    args.data_path = './data/METR-LA'
    args.adj_mx_path = './data/sensor_graph/adj_mx.pkl'
    args.adj_mx = torch.Tensor(utils.load_pickle(args.adj_mx_path)[-1])
    args.num_node = 207
    args.pearson_path = "./data/METR-LA/pearson_corr.pkl"
    args.dilations = [1, 2, 4, 2, 1, 1]
elif args.data == "bay":
    args.data_path = './data/PEMS-BAY'
    args.adj_mx_path = './data/sensor_graph/adj_mx_bay.pkl'
    args.adj_mx = torch.Tensor(utils.load_pickle(args.adj_mx_path)[-1])
    args.num_node = 325
Exemple #12
0
    def train(self):

        for self.epoch in range(self.numepoches):
            print('==> Epoch:[{0}/{1}][training stage]'.format(
                self.epoch, self.numepoches))

            batch_time = utils.AverageMeter()
            data_time = utils.AverageMeter()
            top1 = utils.AverageMeter()
            top5 = utils.AverageMeter()
            losses = utils.AverageMeter()

            running_loss = 0.0
            progress = tqdm(self.trainloader)
            end = time.time()
            for i, data in enumerate(progress, 0):
                data_time.update(time.time() - end)
                # get the inputs
                inputs, labels = data

                # warp them in Variable
                # inputs, labels = Variable(inputs), Variable(labels)
                if torch.cuda.is_available():
                    inputs = Variable(inputs).cuda()
                    labels = Variable(labels).cuda()
                else:
                    inputs = Variable(inputs)
                    labels = Variable(labels)

                # zero the parameter gradients
                self.optimizer.zero_grad()  # 清空上一步的梯度

                # forward
                outputs = self.net(inputs)
                # loss
                loss = self.criterion(outputs, labels)
                # backward
                loss.backward()
                # update weights
                self.optimizer.step()

                prec1, prec5 = utils.accuracy(outputs.data,
                                              labels,
                                              topk=(1, 5))
                #print(data)
                losses.update(loss.item(), len(data))
                top1.update(prec1.item(), len(data))
                top5.update(prec5.item(), len(data))

                # print statistics
                running_loss = running_loss + loss.data.item()
                if i % 2000 == 1999:  # print every 2000 mini-batches
                    print('[%d, %5d] loss: %.3f' %
                          (self.epoch + 1, i + 1, running_loss / 2000))
                    running_loss = 0.0

            info = {
                'Epoch': [self.epoch],
                'Batch Time': [round(batch_time.avg, 3)],
                'Data Time': [round(data_time.avg, 3)],
                'Loss': [round(losses.avg, 5)],
                'Prec@1': [round(top1.avg, 4)],
                'Prec@5': [round(top5.avg, 4)],
                'lr': self.optimizer.param_groups[0]['lr']
            }
            utils.record_info(info, 'record/opf_train.csv', 'train')
            # prec1, val_loss = self.validate_1epoch()  # 验证
            # print(prec1)
            is_best = running_loss > self.best_prec1
            # lr_scheduler
            # self.scheduler.step(val_loss)
            # save model
            if is_best:
                self.best_prec1 = running_loss

            self.save_checkpoint(
                self.net.state_dict(),
                is_best,
                'record/checkpoint.pth.tar',
            )
        print("Finished Training")
Exemple #13
0
    def train(self,
              time_str,
              patch_mode=None,
              patch_size=60,
              step=36,
              patch_height=1080,
              patch_width=1920,
              train_batch_size=64,
              max_epoch=2400,
              learning_rate=0.001,
              resume=None
              ):

        time_str = time_str or get_time_str()

        log_dir = os.path.join('./logs', self.net_name, self.video_name + '_QP' + str(self.QP), time_str)
        backup_dir = os.path.join('./checkpoints', self.net_name, self.video_name + '_QP' + str(self.QP), time_str)
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        if not os.path.exists(backup_dir):
            os.makedirs(backup_dir)

        writer = SummaryWriter(logdir=log_dir)

        train_input_frame = './data/HM_compressed/' + self.video_name + '_QP' + str(self.QP) + '_' + self.codec_mode + '_rec_HM.yuv'
        train_label_frame = './data/raw/' + self.video_name + '.yuv'

        print('\n')
        print('===> Loading datasets')
        
        im_input, _, _ = YUVread(train_input_frame, [self.height, self.width],self.frame_num,self.start_frame)
        im_label, _, _ = YUVread(train_label_frame, [self.height, self.width],self.frame_num,self.start_frame)

        frame_num = im_input.shape[0]
        if patch_mode == 'small':
            train_set = MultiFrameDataset(rec_y=im_input, label_y=im_label, totalFrames=frame_num, nFrames=self.neighbor_frames, width=self.width, height=self.height, width_cut=patch_size, height_cut=patch_size)
            total_count = train_set.__len__()
        else:
            train_set = MultiFrameDataset(rec_y=im_input, label_y=im_label, totalFrames=frame_num, nFrames=self.neighbor_frames, width=self.width, height=self.height,width_cut=self.width, height_cut=self.height)
            total_count = train_set.__len__()

        if patch_mode == 'small':
            training_data_loader = DataLoader(train_set, batch_size=train_batch_size, shuffle=True, num_workers=4)
        else:
            training_data_loader = DataLoader(train_set, batch_size=train_batch_size, shuffle=False, num_workers=4)
        print('===> Done\n')

        print('===> Building model ')

        model = CRNN(input_channel=1, base_channel=self.channel, neighbor_frames=self.neighbor_frames, use_norm_at_begin=self.use_BN_at_begin, use_norm_in_ru=self.use_BN_in_ru, use_norm_at_end=self.use_BN_at_end)
        calculate_variables(model, print_vars=False)
        model = model.cuda()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-8)
        l1_loss_fn = nn.L1Loss()
        l2_loss_fn = nn.MSELoss(reduction='elementwise_mean')
        l1_loss_fn = l1_loss_fn.cuda()
        l2_loss_fn = l2_loss_fn.cuda()
        print('===> Done\n')


        print('===> Try resume from checkpoint')
        if resume != 'none':
            checkpoint = torch.load( resume)
            model.load_state_dict(checkpoint['state'])
            if patch_mode == 'large':
                start_epoch = 1
            else:
                optimizer.load_state_dict(checkpoint['optimizer'])
                start_epoch = checkpoint['epoch']
            print(resume.split('_')[-7])
            psnr_gain_max = float(resume.split('_')[-7])
            print('===> Load checkpoint')
        else:
            start_epoch = 1
            psnr_gain_max = 0.0
            print('===> Start from scratch')

        # info:
        self.info_train = {}
        if resume != 'none':
            self.info_train['checkpoint_to_load'] =  resume
        self.info_train['time_str'] = time_str
        self.info_train['max_epoch'] = max_epoch
        self.info_train['learning_rate'] = str(learning_rate)
        self.info_train['num_of_patches'] = str(total_count)
        if patch_mode == 'small':
            self.info_train['patch_size'] = str(patch_size) + 'x' + str(patch_size)
        else:
            self.info_train['patch_size'] = str(patch_height) + 'x' + str(patch_width)
        self.info_train['train_batch_size'] = str(train_batch_size)
        self.info_train['log_dir'] = log_dir
        self.info_train['backup_dir'] = backup_dir
        self.info_train['train_input'] = train_input_frame
        self.info_train['train_label'] = train_label_frame
        self.info_train['loss_function'] = 'L1-absolute_difference'
        print('\n\n********** Train **********')
        print_info([self.info_train])
        print('********** ***** **********')
        record_info([self.info_top, self.info_train], os.path.join(backup_dir, 'info.txt'))
        record_info([self.info_top, self.info_train], os.path.join(log_dir, 'info.txt'))

        count = 0

        for epoch in range(start_epoch, max_epoch+1):
            # global psnr_gain_max
            model.train()
            psnr_gain = 0.0
            total_psnr_before = 0.0
            for iteration, batch in enumerate(training_data_loader):
                batch_input, batch_neighor, batch_label = batch[0], batch[1], batch[2]
                batch_input = batch_input.cuda()
                batch_neighor = batch_neighor.cuda()
                batch_label = batch_label.cuda()
                batch_output = model(batch_input, batch_neighor)
                mse_loss_before = l2_loss_fn(batch_input, batch_label)
                l1_loss = l1_loss_fn(batch_output, batch_label)
                mse_loss = l2_loss_fn(batch_output, batch_label)
                optimizer.zero_grad()
                l1_loss.backward()
                optimizer.step()

                with torch.no_grad():
                    psnr_before = np.multiply(10.0, np.log(1.0 * 1.0 / mse_loss_before.cpu()) / np.log(10.0))
                    psnr = np.multiply(10.0, np.log(1.0 * 1.0 / mse_loss.cpu()) / np.log(10.0))
                    psnr_gain += (psnr - psnr_before)

                    print(
                        "Train(%.10s:QP%.2d):> Epoch[%.4d](%.3d/%.3d)==  lr: %.8f  train-loss: %.10f  train_PSNR: %.6f  PSNR_before: %.6f  PSNR_gain: %.6f" %
                        (self.video_name,self.QP,epoch, iteration + 1, len(training_data_loader), optimizer.param_groups[0]['lr'], mse_loss.cpu(), psnr,
                         psnr_before, psnr - psnr_before))
                    total_psnr_before += psnr_before
                writer.add_scalar('Train_loss', l1_loss.cpu(), count)
                writer.add_scalar('Train_PSNR', psnr, count)

            total_psnr_before = total_psnr_before / (len(training_data_loader))
            print(total_psnr_before)
            psnr_gain = psnr_gain / (len(training_data_loader))
            self.checkpoint(model, epoch, optimizer, psnr_gain_max, backup_dir=backup_dir)

            if epoch % 50 == 0:
                self.checkpoint(model, epoch, optimizer, psnr_gain, backup_dir=backup_dir)

            if self.QP in [22, 27]:
                if (epoch + 1) == 50 or (epoch + 1) == 300:
                    for param_group in optimizer.param_groups:
                        param_group['lr'] /= 10
                    print('Learning rate decay: lr={}'.format(optimizer.param_groups[0]['lr']))
            else:
                if (epoch + 1) == 100 or (epoch + 1) == 300:
                    for param_group in optimizer.param_groups:
                        param_group['lr'] /= 10
                    print('Learning rate decay: lr={}'.format(optimizer.param_groups[0]['lr']))
Exemple #14
0
    def validate_1epoch(self):
        print('==> Epoch:[{0}/{1}][validation stage]'.format(
            self.epoch, self.nb_epochs))
        batch_time = utils.AverageMeter()
        data_time = utils.AverageMeter()
        losses = utils.myAverageMeter()
        perf = utils.myAverageMeter()

        #switch to eval mode
        self.model.eval()

        end = time.time()
        # mini-batch training
        progress = tqdm(self.test_loader, ascii=True)
        with torch.no_grad():
            for _, (image, label_map, label_pts,
                    loss_weight) in enumerate(progress):
                if Config.use_cuda:
                    image = image.cuda()
                    label_map = label_map.cuda()
                    loss_weight = loss_weight.cuda()

                # measure data loading time
                data_time.update(time.time() - end)

                pred, _ = self.model(image)
                loss = self.weighted_loss(pred, label_map, loss_weight)
                # loss = self.loss_func(pred, label_map)

                # measure accuracy and record loss
                losses.update(loss.item(), image.size(0))
                pred_pts = utils.getPointByMap(pred)

                rmse = 0.0
                for b in range(0, label_pts.size(0)):
                    x_mse = 0.0
                    y_mse = 0.0
                    b_count = 0.0
                    for p in range(0, label_pts.size(1)):
                        x_mse += loss_weight[b, p, 0] * (
                            (label_pts[b, p, 0] - pred_pts[b, p, 0]).pow(2))
                        y_mse += loss_weight[b, p, 1] * (
                            (label_pts[b, p, 1] - pred_pts[b, p, 1]).pow(2))
                        b_count += loss_weight[b, p, 0] + loss_weight[b, p, 1]
                    b_rmse = torch.sqrt((x_mse + y_mse) / b_count)
                    rmse += b_rmse
                rmse = rmse / label_pts.size(0)

                perf.update(rmse, image.size(0))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

        info = {
            'Epoch': [self.epoch],
            'Batch Time': [round(batch_time.avg, 3)],
            'Data Time': [round(data_time.avg, 3)],
            'Perf': [round(perf.average(), 5)],
            'Loss': [round(losses.average(), 5)]
        }
        utils.record_info(info, 'record/test.csv', 'test')
        return round(perf.average(), 5), round(losses.average(), 5)
Exemple #15
0
    def train_1epoch(self):
        print('==> Epoch:[{0}/{1}][training stage]'.format(
            self.epoch, self.nb_epochs))
        batch_time = utils.AverageMeter()
        data_time = utils.AverageMeter()
        losses = utils.myAverageMeter()
        perf = utils.myAverageMeter()

        #switch to train mode
        self.model.train()

        end = time.time()
        # mini-batch training
        progress = tqdm(self.train_loader, ascii=True)
        for _, (image, label_map, label_pts) in enumerate(progress):

            if Config.use_cuda:
                image = image.cuda()
                label_map = label_map.cuda()

            # measure data loading time
            data_time.update(time.time() - end)

            self.optimizer.zero_grad()
            pred, _ = self.model(image)
            loss = self.loss_func(pred, label_map)
            loss.backward()
            self.optimizer.step()

            # measure accuracy and record loss
            losses.update(loss.item(), image.size(0))
            pred_pts = utils.getPointByMap(pred)
            with torch.no_grad():
                rmse = 0.0
                for b in range(0, label_pts.size(0)):
                    x_mse = 0.0
                    y_mse = 0.0
                    for p in range(0, label_pts.size(1)):
                        x_mse += (label_pts[b, p, 0] -
                                  pred_pts[b, p, 0]).pow(2)
                        y_mse += (label_pts[b, p, 1] -
                                  pred_pts[b, p, 1]).pow(2)
                    b_rmse = torch.sqrt(
                        (x_mse + y_mse) / (2 * label_pts.size(1)))
                    # print("RMSE of the item:", b_rmse)
                    # print("pred :", pred_pts[b:, :, :])
                    # print("label:", label_pts[b:, :, :])
                    rmse += b_rmse
                rmse = rmse / label_pts.size(0)
            perf.update(rmse, image.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

        info = {
            'Epoch': [self.epoch],
            'Batch Time': [round(batch_time.avg, 3)],
            'Data Time': [round(data_time.avg, 3)],
            'Perf': [round(perf.average(), 5)],
            'Loss': [round(losses.average(), 5)],
            'lr': self.optimizer.param_groups[0]['lr']
        }
        utils.record_info(info, 'record/train.csv', 'train')