Ejemplo n.º 1
0
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        print_configs(train_config, 'Train')

        train_model = TSN1.TSNResNet('TSN',
                                     train_config['MODEL']['num_layers'],
                                     train_config['MODEL']['num_classes'],
                                     train_config['MODEL']['seg_num'], 0.00002)
        opt = fluid.optimizer.Momentum(0.001,
                                       0.9,
                                       parameter_list=train_model.parameters())

        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/tsn_model')
            train_model.load_dict(model)

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = KineticsReader(args.model_name.upper(), 'train',
                                      train_config).create_reader()

        epochs = args.epoch or train_model.epoch_num()
        for i in range(epochs):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc = train_model(img, label)

                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()

                if batch_id % 1 == 0:
                    logger.info("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
                    fluid.dygraph.save_dygraph(train_model.state_dict(),
                                               args.save_dir + '/tsn_model')
        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))
Ejemplo n.º 2
0
def eval2(weights, batch_size):
    # parse config

    use_gpu = True
    model_name = 'tsn'
    config = 'configs/tsn.txt'
    use_gpu = use_gpu
    weights = weights
    batch_size = batch_size
    log_interval = 1
    infer_topk = 1
    save_dir = './output'

    config = parse_config(config)
    val_config = config  #merge_configs(config, 'valid', vars(args))
    print_configs(val_config, "Valid")
    with fluid.dygraph.guard():
        #val_model = TSN18.TSNResNet('TSN18', val_config['MODEL']['num_layers'],
        #val_config['MODEL']['num_classes'],
        #val_config['MODEL']['seg_num'], 0.00002)

        val_model = baseline_2d_resnets_pp.ResNet50Flow(
            'ResNet50Flow', val_config['MODEL']['num_layers'],
            val_config['MODEL']['num_classes'], val_config['MODEL']['seg_num'])

        label_dic = np.load('label_dir.npy', allow_pickle=True).item()
        label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        val_reader = KineticsReader(model_name.upper(), 'valid',
                                    val_config).create_reader()

        # if no weight files specified, exit()
        if weights:
            weights = weights
        else:
            print("model path must be specified")
            exit()

        para_state_dict, _ = fluid.load_dygraph(weights)
        val_model.load_dict(para_state_dict)
        val_model.eval()

        acc_list = []
        for batch_id, data in enumerate(val_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data]).astype('int64')

            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)
            label.stop_gradient = True

            out, acc = val_model(img, label)
            acc_list.append(acc.numpy()[0])
            testacc = np.mean(acc_list)
        print("验证集准确率为:{}".format(np.mean(acc_list)))
    return testacc
Ejemplo n.º 3
0
def eval(args):
    # parse config
    config = parse_config(args.config)
    test_config = merge_configs(config, 'test', vars(args))
    print_configs(test_config, "test")
    # test_config = merge_configs(config, 'train', vars(args))
    # print_configs(test_config, "train")
    with fluid.dygraph.guard():
        test_model = resnet_3d.generate_model(
            test_config['MODEL']['num_layers'])

        # label_dic = np.load('label_dir.npy', allow_pickle=True).item()
        # label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        test_reader = Ucf101(args.model_name.upper(), 'test',
                             test_config).create_reader()
        # test_reader = Ucf101(args.model_name.upper(), 'train', test_config).create_reader()
        test_acc = []
        for num in range(20, args.epoch + 1):
            # for num in range(1,3):
            weights = 'checkpoints_models/res3d_model_' + str(num)
            print("weights", weights)
            para_state_dict, _ = fluid.load_dygraph(weights)
            test_model.load_dict(para_state_dict)
            test_model.eval()

            acc_list = []
            for batch_id, data in enumerate(test_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                dy_x_data = np.transpose(dy_x_data, (0, 2, 1, 3, 4))
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc = test_model(img, label)
                acc_list.append(acc.numpy()[0])
                if batch_id % 10 == 0:
                    logger.info("step {}: {}, acc: {}".format(
                        num, batch_id, acc.numpy()))
                    print("step {}: {}, acc: {}".format(
                        num, batch_id, acc.numpy()))
            print("测试准确率为:{}".format(np.mean(acc_list)))
            test_acc.append(np.mean(acc_list))
        print('test_acc', test_acc)
        # result_list = []
        # result_list.append(test_acc)
        name = ['test_acc']
        np_list = np.array(test_acc).T
        test = pd.DataFrame(columns=name, data=np_list)
        now = int(time.time())
        timeArray = time.localtime(now)
        today_time = time.strftime("%Y-%m-%d-%H-%M-%S", timeArray)
        test.to_csv('test_result_' + today_time + '_.csv')
Ejemplo n.º 4
0
 def __log_files_and_configs(self):
     self.results_dir_path = tools.create_results_dir("train_seq")
     self.curr_dir_path = os.path.dirname(os.path.realpath(__file__))
     tools.log_file_content(self.results_dir_path, [os.path.realpath(__file__),
                                                    os.path.join(self.curr_dir_path, "data_roller.py"),
                                                    os.path.join(self.curr_dir_path, "model.py"),
                                                    os.path.join(self.curr_dir_path, "losses.py"),
                                                    os.path.join(self.curr_dir_path, "train.py"),
                                                    os.path.join(self.curr_dir_path, "train_seq.py"),
                                                    os.path.join(self.curr_dir_path, "config.py")])
     tools.set_log_file(os.path.join(self.results_dir_path, "print_logs.txt"))
     config.print_configs(self.cfg)
Ejemplo n.º 5
0
    def __log_files_and_configs(self):
        self.results_dir_path = tools.create_results_dir("train_seq")
        self.curr_dir_path = os.path.dirname(os.path.realpath(__file__))

        files_to_log = []
        for filename in glob.iglob(os.path.join(self.curr_dir_path, "**"),
                                   recursive=True):
            if "/results/" not in filename and filename.endswith(".py"):
                files_to_log.append(filename)
        tools.log_file_content(self.results_dir_path, files_to_log)
        tools.set_log_file(
            os.path.join(self.results_dir_path, "print_logs.txt"))
        config.print_configs(self.cfg)
Ejemplo n.º 6
0
def eval(args):
    # parse config
    config = parse_config(args.config)
    val_config = merge_configs(config, 'valid', vars(args))
    train_config = merge_configs(config, 'train', vars(args))
    print_configs(val_config, "Valid")
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        val_model = ECO.ECO(num_classes=train_config['MODEL']['num_classes'],
                              num_segments=train_config['MODEL']['seg_num'])
        label_dic = np.load('label_dir.npy', allow_pickle=True).item()
        label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        # val_reader = KineticsReader(args.model_name.upper(), 'valid', val_config).create_reader()
        val_reader = KineticsReader('ECO', 'valid', val_config).create_reader()

        # if no weight files specified, exit()
        if args.weights:
            weights = args.weights
        else:
            print("model path must be specified")
            exit()
            
        para_state_dict, _ = fluid.load_dygraph(weights)
        val_model.load_dict(para_state_dict)
        val_model.eval()
        
        acc_list = []
        false_class = []
        for batch_id, data in enumerate(val_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data]).astype('int64')
            
            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)
            label.stop_gradient = True
            
            out, acc = val_model(img, label)
            if acc.numpy()[0] != 1:
                false_class.append(label.numpy()[0][0])
            acc_list.append(acc.numpy()[0])
            print(batch_id, 'acc:', np.mean(acc_list))
            if len(false_class)==0:
                continue
            print(np.sort(np.array(false_class)))
            bin = np.bincount(np.array(false_class))
            most_false = np.argmax(bin)
            print('false class:', bin)
            print('most false class num:', most_false)
        print("validate set acc:{}".format(np.mean(acc_list)))
Ejemplo n.º 7
0
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        print_configs(train_config, 'Train')

        #根据自己定义的网络,声明train_model
        #train_model = TSN1.TSNResNet(layers=train_config['MODEL']['num_layers'], class_dim=train_config['MODEL']['num_classes'], seg_num=train_config['MODEL']['seg_num'])
        train_model = I3D_TPN(config)
        paddle_weight = train_model.network.state_dict()
        for paddle_key in paddle_weight:
            print(paddle_key)
Ejemplo n.º 8
0
def eval(args):
    # parse config
    curdir = os.getcwd()
    os.chdir(os.path.join(curdir, 'work'))
    config = parse_config(args.config)
    val_config = merge_configs(config, 'test', vars(args))
    print_configs(val_config, "test")
    with fluid.dygraph.guard():
        val_model = ResNet_3d()
        # get infer reader
        test_reader = Ucf101Reader(args.model_name.upper(), 'test',
                                   val_config).create_reader()

        # if no weight files specified, exit()
        if args.weights:
            weights = args.weights
        else:
            print("model path must be specified")
            exit()

        para_state_dict, _ = fluid.load_dygraph(weights)
        val_model.load_dict(para_state_dict)
        val_model.eval()

        acc_list = []
        acc_list_local = []
        for batch_id, data in enumerate(test_reader()):

            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data]).astype('int64')

            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)
            label.stop_gradient = True

            out = val_model(img)
            acc = fluid.layers.accuracy(out, label)
            acc_list.append(acc.numpy()[0])
            acc_list_local.append(acc.numpy()[0])
            if ((batch_id % 300) == 0 and (batch_id != 0)):
                logger.info("valid Loss at step {}:  acc: {}".format(
                    batch_id, np.mean(acc_list_local)))
                print("valid Loss at  step {}:  acc: {}".format(
                    batch_id, np.mean(acc_list_local)))
                acc_list_local = []
        logger.info("验证集准确率为:{}".format(np.mean(acc_list)))
        print("验证集准确率为:{}".format(np.mean(acc_list)))
Ejemplo n.º 9
0
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        print_configs(train_config, 'Train')

        #根据自己定义的网络,声明train_model
        #train_model = TSN1.TSNResNet(layers=train_config['MODEL']['num_layers'], class_dim=train_config['MODEL']['num_classes'], seg_num=train_config['MODEL']['seg_num'])
        train_model = I3D_TPN(config)
        if train_config.TRAIN.optimizer_type == 'SGD':
            opt = fluid.optimizer.Momentum(learning_rate=train_config.TRAIN.learning_rate, 
                                           momentum=train_config.TRAIN.momentum, 
                                           parameter_list=train_model.parameters(), 
                                           use_nesterov=train_config.TRAIN.use_nesterov, 
                                           grad_clip=fluid.clip.GradientClipByNorm(clip_norm=40),
                                           regularization=fluid.regularizer.L2Decay(regularization_coeff=train_config.TRAIN.l2_weight_decay))
        elif train_config.TRAIN.optimizer_type == 'Adam':
            opt = fluid.optimizer.Adam(
                                        learning_rate=train_config['TRAIN']['learning_rate'],  
                                        regularization=fluid.regularizer.L2Decay(train_config['TRAIN']['l2_weight_decay']),
                                        parameter_list=train_model.parameters(), 
            )

        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/tsn_model')
            train_model.load_dict(model)

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = KineticsReader(args.model_name.upper(), 'train', train_config).create_reader()
        print('go to training')
        epochs = args.epoch or train_model.epoch_num()
        for i in range(epochs):
            for batch_id, data in enumerate(train_reader()):
                k =0
                if batch_id % 10 == 1:
                    print("epoch {} step {}".format(i, batch_id))

            print(batch_id)
Ejemplo n.º 10
0
def eval(args):
    # parse config
    config = parse_config(args.config)
    val_config = merge_configs(config, 'valid', vars(args))
    print_configs(val_config, "Valid")
    with fluid.dygraph.guard():
        # val_model = TSN1.TSNResNet('TSN', val_config['MODEL']['num_layers'],
        val_model = TSNResNet('TSN',
                              val_config['MODEL']['num_layers'],
                              val_config['MODEL']['num_classes'],
                              seg_num=val_config['MODEL']
                              ['seg_num'])  # 这行加了个seg_num = 这个参考infer.py文件中的修改

        label_dic = np.load('label_dir.npy', allow_pickle=True).item()
        label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        val_reader = KineticsReader(args.model_name.upper(), 'valid',
                                    val_config).create_reader()

        # if no weight files specified, exit()
        if args.weights:
            weights = args.weights
        else:
            print("model path must be specified")
            exit()

        para_state_dict, _ = fluid.load_dygraph(weights)
        val_model.load_dict(para_state_dict)
        val_model.eval()

        acc_list = []
        for batch_id, data in enumerate(val_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data]).astype('int64')

            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)
            label.stop_gradient = True

            out, acc = val_model(img, label)
            acc_list.append(acc.numpy()[0])

        print("验证集准确率为:{}".format(np.mean(acc_list)))
Ejemplo n.º 11
0
def infer(args):
    # parse config
    config = parse_config(args.config)
    infer_config = merge_configs(config, 'infer', vars(args))
    print_configs(infer_config, "Infer")
    with fluid.dygraph.guard():
        # infer_model = TSN1.TSNResNet('TSN', infer_config['MODEL']['num_layers'],
        #                             infer_config['MODEL']['num_classes'],
        #                             infer_config['MODEL']['seg_num'], 0.00002)
        # 上面注释是原代码,下面是修改后的
        infer_model = TSNResNet('TSN',
                                infer_config['MODEL']['num_layers'],
                                infer_config['MODEL']['num_classes'],
                                seg_num=infer_config['MODEL']['seg_num'])

        label_dic = np.load('label_dir.npy', allow_pickle=True).item()
        label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        infer_reader = KineticsReader(args.model_name.upper(), 'infer',
                                      infer_config).create_reader()

        # if no weight files specified, exit()
        if args.weights:
            weights = args.weights
        else:
            print("model path must be specified")
            exit()

        para_state_dict, _ = fluid.load_dygraph(weights)
        infer_model.load_dict(para_state_dict)
        infer_model.eval()

        for batch_id, data in enumerate(infer_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = [x[1] for x in data]

            img = fluid.dygraph.to_variable(dy_x_data)

            out = infer_model(img).numpy()[0]
            label_id = np.where(out == np.max(out))
            print("实际标签{}, 预测结果{}".format(y_data, label_dic[label_id[0][0]]))
Ejemplo n.º 12
0
def eval(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        val_config = merge_configs(config, 'valid', vars(args))
        print_configs(val_config, 'Valid')

        #根据自己定义的网络,声明train_model
        #train_model = TSN1.TSNResNet(layers=train_config['MODEL']['num_layers'], class_dim=train_config['MODEL']['num_classes'], seg_num=train_config['MODEL']['seg_num'])
        train_model = I3D_TPN(config)
        #opt = fluid.optimizer.Momentum(0.01, 0.9, parameter_list=train_model.parameters())

        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            model, _ = fluid.dygraph.load_dygraph(args.pretrain)
            train_model.load_dict(model)

        # get reader
        val_reader = KineticsReader(args.model_name.upper(), 'valid',
                                    val_config).create_reader()

        print('go to eval')
        train_model.eval()
        acc_list = []
        for batch_id, data in enumerate(tqdm(val_reader())):
            #print(len(data))
            print('eval %d' % batch_id)
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data]).astype('int64')
            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)
            label.stop_gradient = True
            out_jpg, acc_jpg, _ = train_model(img, label)
            out = out_jpg
            acc = fluid.layers.accuracy(input=out, label=label)
            #print(acc)
            acc_list.append(acc.numpy()[0])

        print("TPN: %.6f" % np.mean(acc_list))
Ejemplo n.º 13
0
def train(args):
    # parse config
    #place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    if args.use_gpu:
        if args.use_data_parallel:
            place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)
        else:
            place = fluid.CUDAPlace(0)
    else:
        fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        print_configs(train_config, 'Train')

        val_config = merge_configs(config, 'valid', vars(args))
        print_configs(val_config, "Valid")

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        #根据自己定义的网络,声明train_model
        #train_model = TSN1.TSNResNet(layers=train_config['MODEL']['num_layers'], class_dim=train_config['MODEL']['num_classes'], seg_num=train_config['MODEL']['seg_num'])
        train_model = I3D_TPN(config)
        step = train_config.TRAIN.step if train_config.TRAIN.step is not None else int(
            train_config.TRAIN.all_num / train_config.TRAIN.batch_size)
        print("step for lr decay: %d" % step)
        decay_epoch = train_config.TRAIN.learning_rate_decay_epoch
        learning_rate_decay = train_config.TRAIN.learning_rate_decay
        base_lr = train_config.TRAIN.learning_rate
        bd = [step * e for e in decay_epoch]
        lr = [base_lr * (learning_rate_decay**i) for i in range(len(bd) + 1)]
        if train_config.TRAIN.optimizer_type == 'SGD':
            opt = fluid.optimizer.Momentum(
                learning_rate=fluid.layers.piecewise_decay(boundaries=bd,
                                                           values=lr),
                momentum=train_config.TRAIN.momentum,
                parameter_list=train_model.parameters(),
                use_nesterov=train_config.TRAIN.use_nesterov,
                grad_clip=fluid.clip.GradientClipByNorm(clip_norm=40),
                regularization=fluid.regularizer.L2Decay(
                    regularization_coeff=train_config.TRAIN.l2_weight_decay))
        elif train_config.TRAIN.optimizer_type == 'Adam':
            opt = fluid.optimizer.Adam(
                learning_rate=fluid.layers.piecewise_decay(boundaries=bd,
                                                           values=lr),
                regularization=fluid.regularizer.L2Decay(
                    train_config['TRAIN']['l2_weight_decay']),
                parameter_list=train_model.parameters(),
            )

        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/tsn_model')
            train_model.load_dict(model)

        if args.use_data_parallel:
            train_model = fluid.dygraph.parallel.DataParallel(
                train_model, strategy)

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = KineticsReader(args.model_name.upper(), 'train',
                                      train_config).create_reader()

        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        val_reader = KineticsReader(args.model_name.upper(), 'valid',
                                    val_config).create_reader()
        print('go to training')
        epochs = args.epoch or train_model.epoch_num()
        acc_history = 0.0
        for i in range(epochs):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')
                #print(dy_x_data.shape)
                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc, loss_TPN = train_model(img, label)

                loss = fluid.layers.softmax_with_cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                avg_TPN_loss = fluid.layers.mean(loss_TPN)

                all_loss = avg_loss + avg_TPN_loss
                if args.use_data_parallel:
                    print(args.use_data_parallel)
                    all_loss = train_model.scale_loss(all_loss)
                    all_loss.backward()
                    train_model.apply_collective_grads()
                else:
                    all_loss.backward()

                opt.minimize(all_loss)
                train_model.clear_gradients()

                if batch_id % train_config.TRAIN.visual_step == 0:
                    #opt._learning_rate = float(opt._learning_rate) / 10
                    current_lr = opt.current_step_lr()
                    logger.info(
                        "Loss at epoch {} step {}: {}, AUX loss: {} acc: {}, current_lr: {}"
                        .format(i, batch_id, avg_loss.numpy(),
                                avg_TPN_loss.numpy(), acc.numpy(), current_lr))
                    print(
                        "Loss at epoch {} step {}: {}, AUX loss: {}, acc: {}, current_lr: {}"
                        .format(i, batch_id, avg_loss.numpy(),
                                avg_TPN_loss.numpy(), acc.numpy(), current_lr))
                    fluid.dygraph.save_dygraph(
                        train_model.state_dict(),
                        args.save_dir + '/I3D_tpn_model')

            print('go to eval')
            acc_list = []
            train_model.eval()
            for batch_id, data in enumerate(tqdm(val_reader())):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')
                #print(dy_x_data.shape)
                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True
                out_jpg, acc_jpg, _ = train_model(img, label)
                out = out_jpg
                acc = fluid.layers.accuracy(input=out, label=label)
                #out_jpg, out_flow, acc = val_model(img, flow_img, label)
                acc_list.append(acc.numpy()[0])

            #print("JPG+FLOW验证集准确率为:{}".format(np.mean(acc_list)))
            #print("JPG验证集准确率为:{}".format(np.mean(acc_list_jpg)))
            #print("FLOW验证集准确率为:{}".format(np.mean(acc_list_flow)))
            print("TPN验证集准确率为:%.6f" % (np.mean(acc_list)))
            print("BEST   TPN验证集准确率为:%.6f" % (acc_history))

            if np.mean(acc_list) > acc_history:
                acc_history = np.mean(acc_list)
                fluid.dygraph.save_dygraph(train_model.state_dict(),
                                           args.save_dir + '/tpn_best')
                print("TPN BEST验证集准确率为:{}".format(np.mean(acc_list)))

            train_model.train()

        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        print_configs(train_config, 'Train')
        # train_model = TSN1.TSNResNet('TSN',train_config['MODEL']['num_layers'],
        #                             train_config['MODEL']['num_classes'],
        #                             train_config['MODEL']['seg_num'],0.00002)
        train_model = resnet_3d.generate_model(train_config['MODEL']['num_layers'])
        # 根据自己定义的网络,声明train_model
        # opt = fluid.optimizer.Momentum(learning_rate=train_config['MODEL']['learning_rate'],momentum = 0.9, parameter_list=train_model.parameters())
        # opt = fluid.optimizer.Momentum(0.001, 0.9, parameter_list=train_model.parameters())
        # opt=fluid.optimizer.SGDOptimizer(learning_rate=train_config['MODEL']['learning_rate'], parameter_list=train_model.parameters())
        opt = fluid.optimizer.AdamOptimizer(learning_rate=train_config['MODEL']['learning_rate'],
                                            parameter_list=train_model.parameters())
        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            train_model = resnet_3d.generate_model(train_config['MODEL']['num_layers'], n_classes=1039)
            # model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/tsn_model')
            model, _ = fluid.dygraph.load_dygraph('data/data51645/paddle_dy')

            train_model.load_dict(model)
            train_model.fc = fluid.dygraph.Linear(512 * 4, 101, act='softmax')
            print('pretrain is ok')

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        # train_reader = KineticsReader(args.model_name.upper(), 'train', train_config).create_reader()
        train_reader = Ucf101(args.model_name.upper(), 'train', train_config).create_reader()

        epochs = args.epoch or train_model.epoch_num()

        # test
        test_config = merge_configs(config, 'test', vars(args))
        label_dic = np.load('label_dir.npy', allow_pickle=True).item()
        label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        # test_reader = Ucf101(args.model_name.upper(), 'test', test_config).create_reader()
        t_acc = []
        v_acc = []
        t_loss = []
        for i in range(epochs):
            train_acc_list = []
            train_loss_list = []
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                dy_x_data = np.transpose(dy_x_data, (0, 2, 1, 3, 4))
                y_data = np.array([[x[1]] for x in data]).astype('int64')
                # if batch_id ==0:
                #     print(dy_x_data.shape)
                #     print(y_data.shape)

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                # out, acc = train_model.forward(img, label)
                out, acc = train_model(img, label)
                train_acc_list.append(acc.numpy()[0])
                # print('shape',out.shape,label.shape)
                # print(out)
                # print(label)

                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                train_loss_list.append(avg_loss.numpy())

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()

                if batch_id % 10 == 0:
                    logger.info(
                        "Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))
            t_loss.append(np.mean(train_loss_list))
            t_acc.append(np.mean(train_acc_list))
            # val_acc_list = []
            # for batch_id, data in enumerate(test_reader()):
            #     dy_x_data = np.array([x[0] for x in data]).astype('float32')
            #     dy_x_data = np.transpose(dy_x_data,(0,2,1,3,4))
            #     y_data = np.array([[x[1]] for x in data]).astype('int64')

            #     img = fluid.dygraph.to_variable(dy_x_data)
            #     label = fluid.dygraph.to_variable(y_data)
            #     label.stop_gradient = True
            #     out, acc = train_model.forward(img, label)
            #     val_acc_list.append(acc.numpy()[0])
            # v_acc.append(np.mean(val_acc_list))
            # print("测试集准确率为:{}".format(np.mean(val_acc_list)))
            fluid.dygraph.save_dygraph(train_model.state_dict(), args.save_dir + '/res3d_model_' + str(i + 1))

        print('t_acc', t_acc)
        print('t_loss', t_loss)
        # print('v_acc',v_acc)
        # get infer reader
        # val_reader = KineticsReader(args.model_name.upper(), 'valid', val_config).create_reader()
        # logger.info("Final loss: {}".format(avg_loss.numpy()))
        # print("Final loss: {}".format(avg_loss.numpy()))
        result_list = []
        result_list.append(t_acc)
        result_list.append(t_loss)
        np_list = np.array(result_list).T
        name = ['train_acc', 'train_loss']
        test = pd.DataFrame(columns=name, data=np_list)
        now = int(time.time())
        timeArray = time.localtime(now)
        today_time = time.strftime("%Y-%m-%d-%H-%M-%S", timeArray)
        test.to_csv('train_result_' + today_time + '_.csv')
Ejemplo n.º 15
0
def train(args, distributed):
    #===================== GPU CONF =====================#
    if distributed:
        # if run on parallel mode
        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)
    else:
        # if run on single GPU mode, and select gpu number.
        args.use_gpu = True
        place = fluid.CUDAPlace(args.gpu_num) if args.use_gpu else fluid.CPUPlace()
    # ===================== Dygraph Mode =====================#
    with fluid.dygraph.guard(place):
        # leverage from TSN training script
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        val_config = merge_configs(config, 'valid', vars(args))
        print_configs(train_config, 'Train')

        # ===================== Init ECO =====================#
        train_model = ECO.ECO(num_classes=train_config['MODEL']['num_classes'],
                              num_segments=train_config['MODEL']['seg_num'])
        if distributed:
            strategy = fluid.dygraph.parallel.prepare_context()
            train_model = fluid.dygraph.parallel.DataParallel(train_model, strategy)

        # trick 1: use clip gradient method to avoid gradient explosion
        if args.gd is not None:
            clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=args.gd)
            print('clip:', clip)

        # ===================== Init Optimizer =====================#
        # optimizer config: use momentum, nesterov, weight decay, lr decay
        learning_rate = 0.001
        opt = fluid.optimizer.Momentum(learning_rate, 0.9,
                                       parameter_list=train_model.parameters(),
                                       use_nesterov=True,
                                       regularization=fluid.regularizer.L2Decay(regularization_coeff=5e-4),
                                       grad_clip=clip)
        # trick 2: Freezing BatchNorm2D except the first one.
        # trick 3: make all weight layer lr mult as 1, bias lr mult as 2.
        get_optim_policies(opt)
        print('get_optim_policies:--batch_norm_0.w_0', opt._parameter_list[2].optimize_attr,opt._parameter_list[2].stop_gradient)
        print('get_optim_policies:--batch_norm_0.b_0', opt._parameter_list[3].optimize_attr,opt._parameter_list[2].stop_gradient)

        # ===================== Use Pretrained Model =====================#
        # use pretrained model: ECO_Full_rgb_model_Kinetics.pth 2.tar(download from MZO git)
        # then transform it from torch to paddle weight except fc layer.
        if args.pretrain:
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/ECO_FULL_RGB_seg16')
            # also tried using pretrained model on torch, 32F-92.9%,16F-91.8% precision trained on torch
            # model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/eco_91.81_model_best')
            train_model.load_dict(model)

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # ===================== Init Data Reader =====================#
        # leverage from TSN training script
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = KineticsReader('ECO', 'train', train_config).create_reader()
        print('train_reader', train_reader)
        val_reader = KineticsReader('ECO', 'valid', val_config).create_reader()
        if distributed:
            train_reader = fluid.contrib.reader.distributed_batch_reader(train_reader)

        # ===================== Init Trick Params =====================#
        epochs = args.epoch or train_model.epoch_num()
        loss_summ = 0
        saturate_cnt = 0
        exp_num = 0
        best_prec1 = 0

        for i in range(epochs):
            train_model.train()
            # trick 4: Saturate lr decay: different from lr piecewise decay or others
            # calculate prec every epoch, if prec1 does not rise for 5 times(named model saturated), then use decay lr.
            if saturate_cnt == args.num_saturate:
                exp_num = exp_num + 1
                saturate_cnt = 0
                decay = 0.1 ** (exp_num)
                learning_rate = learning_rate * decay
                opt = fluid.optimizer.Momentum(learning_rate, 0.9,
                                               parameter_list=train_model.parameters(),
                                               use_nesterov=True,
                                               regularization=fluid.regularizer.L2Decay(regularization_coeff=5e-4),
                                               grad_clip=clip)
                print('get_optim_policies:--batch_norm_0.w_0', opt._parameter_list[2].optimize_attr,
                      opt._parameter_list[2].stop_gradient)
                print('get_optim_policies:--batch_norm_0.b_0', opt._parameter_list[3].optimize_attr,
                      opt._parameter_list[2].stop_gradient)
                print("- Learning rate decreases by a factor of '{}'".format(10 ** (exp_num)))
            
            for batch_id, data in enumerate(train_reader()):
                lr = opt.current_step_lr()
                print('lr:', lr)  # check lr every batch ids
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc = train_model(img, label)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                loss_summ += avg_loss
                if distributed:
                    avg_loss = train_model.scale_loss(avg_loss)
                avg_loss.backward()
                if distributed:
                    train_model.apply_collective_grads()

                if (batch_id + 1) % 4 == 0:
                    # trick 5: scale down gradients when iter size is functioning every 4 batches
                    opt.minimize(loss_summ)
                    opt.clear_gradients()
                    loss_summ = 0

                if batch_id % 1 == 0:
                    logger.info(
                        "Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))

            if (i + 1) % args.eval_freq == 0 or i == args.epochs - 1:
                train_model.eval()
                acc_list = []
                false_class = []

                for batch_id, data in enumerate(val_reader()):
                    dy_x_data = np.array([x[0] for x in data]).astype('float32')
                    y_data = np.array([[x[1]] for x in data]).astype('int64')

                    img = fluid.dygraph.to_variable(dy_x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    label.stop_gradient = True

                    out, acc = train_model(img, label)
                    if acc.numpy()[0] != 1:
                        false_class.append(label.numpy()[0][0])
                    acc_list.append(acc.numpy()[0])
                    print(batch_id, 'acc:', np.mean(acc_list))
                    if len(false_class) == 0:
                        continue
                print("validate set acc:{}".format(np.mean(acc_list)))
                prec1 = np.mean(acc_list)
                # remember best prec@1 and save checkpoint
                is_best = prec1 > best_prec1
                if is_best:
                    saturate_cnt = 0
                    fluid.dygraph.save_dygraph(train_model.state_dict(),
                                               args.save_dir + '/ECO_FULL_1/' + str(i) + '_best_' + str(prec1))
                else:
                    saturate_cnt = saturate_cnt + 1

                print("- Validation Prec@1 saturates for {} epochs.".format(saturate_cnt), best_prec1)
                best_prec1 = max(prec1, best_prec1)

        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))
Ejemplo n.º 16
0
def train(args):
    """train"""
    logger.info("Start train program")
    # parse config
    config_info = config.parse_config(args.config)
    train_config = config.merge_configs(config_info, 'train', vars(args))
    valid_config = config.merge_configs(config_info, 'valid', vars(args))
    valid_config['MODEL']['save_dir'] = args.save_dir

    bs_denominator = 1
    if args.use_gpu:
        # check number of GPUs
        gpus = os.getenv("CUDA_VISIBLE_DEVICES", "")
        if gpus == "":
            pass
        else:
            gpus = gpus.split(",")
            num_gpus = len(gpus)
            assert num_gpus == train_config.TRAIN.num_gpus, \
                "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \
                "shoud be the same as that" \
                "set in {}({})".format(
                    num_gpus, args.config, train_config.TRAIN.num_gpus)
        bs_denominator = train_config.TRAIN.num_gpus

    # adaptive batch size
    train_batch_size_in = train_config.TRAIN.batch_size
    #  train_learning_rate_in = train_config.TRAIN.learning_rate
    train_config.TRAIN.batch_size = min(
        int(train_config.TRAIN.num_samples / 10), train_batch_size_in)
    train_config.TRAIN.batch_size = int(
        train_config.TRAIN.batch_size / bs_denominator) * bs_denominator
    train_config.TRAIN.batch_size = max(train_config.TRAIN.batch_size,
                                        bs_denominator)
    # train_config.TRAIN.learning_rate = float(train_learning_rate_in) / float(train_batch_size_in) \
    #     * train_config.TRAIN.batch_size

    val_batch_size_in = valid_config.VALID.batch_size
    valid_config.VALID.batch_size = min(
        int(valid_config.VALID.num_samples / 10), val_batch_size_in)
    valid_config.VALID.batch_size = int(
        valid_config.VALID.batch_size / bs_denominator) * bs_denominator
    valid_config.VALID.batch_size = max(valid_config.VALID.batch_size,
                                        bs_denominator)

    # model remove bn when train every gpu batch_size is small
    if int(train_config.TRAIN.batch_size /
           bs_denominator) < train_config.MODEL.modelbn_min_everygpu_bs:
        train_config.MODEL.with_bn = False
        valid_config.MODEL.with_bn = False
    else:
        train_config.MODEL.with_bn = True
        valid_config.MODEL.with_bn = True

    config.print_configs(train_config, 'Train')
    train_model = action_net.ActionNet(args.model_name,
                                       train_config,
                                       mode='train')
    valid_model = action_net.ActionNet(args.model_name,
                                       valid_config,
                                       mode='valid')

    # build model
    startup = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup):
        with fluid.unique_name.guard():
            train_model.build_input(use_pyreader=True)
            train_model.build_model()
            # for the input, has the form [data1, data2,..., label], so train_feeds[-1] is label
            train_feeds = train_model.feeds()
            train_fetch_list = train_model.fetches()
            train_loss = train_fetch_list[0]
            for item in train_fetch_list:
                item.persistable = True
            optimizer = train_model.optimizer()
            optimizer.minimize(train_loss)
            train_pyreader = train_model.pyreader()

    valid_prog = fluid.Program()
    with fluid.program_guard(valid_prog, startup):
        with fluid.unique_name.guard():
            valid_model.build_input(use_pyreader=True)
            valid_model.build_model()
            valid_feeds = valid_model.feeds()
            valid_fetch_list = valid_model.fetches()
            valid_pyreader = valid_model.pyreader()
            for item in valid_fetch_list:
                item.persistable = True

    valid_prog = valid_prog.clone(for_test=True)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup)

    #print_prog(train_prog)
    #print_prog(valid_prog)

    if args.resume:
        # if resume weights is given, load resume weights directly
        assert os.path.exists(args.resume), \
            "Given resume weight dir {} not exist.".format(args.resume)

        def if_exist(var):
            return os.path.exists(os.path.join(args.resume, var.name))

        fluid.io.load_vars(exe,
                           args.resume,
                           predicate=if_exist,
                           main_program=train_prog)
    else:
        # if not in resume mode, load pretrain weights
        if args.pretrain:
            assert os.path.exists(args.pretrain), \
                "Given pretrain weight dir {} not exist.".format(args.pretrain)
            pretrain = args.pretrain or train_model.get_pretrain_weights()
            if pretrain:
                train_model.load_pretrain_params_file(exe, pretrain,
                                                      train_prog, place)

    build_strategy = fluid.BuildStrategy()
    build_strategy.enable_inplace = True

    compiled_train_prog = fluid.compiler.CompiledProgram(
        train_prog).with_data_parallel(loss_name=train_loss.name,
                                       build_strategy=build_strategy)
    compiled_valid_prog = fluid.compiler.CompiledProgram(
        valid_prog).with_data_parallel(share_vars_from=compiled_train_prog,
                                       build_strategy=build_strategy)
    # get reader
    train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size /
                                        bs_denominator)
    valid_config.VALID.batch_size = int(valid_config.VALID.batch_size /
                                        bs_denominator)
    print("config setting")
    train_dataload = feature_reader.FeatureReader(args.model_name.upper(),
                                                  'train', train_config,
                                                  bs_denominator)
    train_reader = train_dataload.create_reader()
    print("train reader")
    valid_dataload = feature_reader.FeatureReader(args.model_name.upper(),
                                                  'valid', valid_config,
                                                  bs_denominator)
    valid_reader = valid_dataload.create_reader()

    # get metrics
    train_metrics = accuracy_metrics.MetricsCalculator(args.model_name.upper(),
                                                       'train', train_config)
    valid_metrics = accuracy_metrics.MetricsCalculator(args.model_name.upper(),
                                                       'valid', valid_config)

    epochs = args.epoch_num or train_model.epoch_num()
    print("epoch is ", epochs)

    exe_places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    train_pyreader.decorate_sample_list_generator(train_reader,
                                                  places=exe_places)
    valid_pyreader.decorate_sample_list_generator(valid_reader,
                                                  places=exe_places)

    utils.train_with_pyreader(
        exe,
        train_prog,
        compiled_train_prog,  # train_exe,
        train_pyreader,
        train_fetch_list,
        train_metrics,
        epochs=epochs,
        log_interval=args.log_interval,
        valid_interval=args.valid_interval,
        save_dir=args.save_dir,
        save_model_name=args.model_name,
        compiled_test_prog=compiled_valid_prog,  # test_exe=valid_exe,
        test_pyreader=valid_pyreader,
        test_fetch_list=valid_fetch_list,
        test_metrics=valid_metrics)

    logger.info("Finish program")
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        valid_config = merge_configs(config, 'valid', vars(args))
        print_configs(train_config, 'train')

        #根据自己定义的网络,声明train_model
        train_model = ResNet_3d()
        train_model.train()
        opt = fluid.optimizer.Momentum(
            config.TRAIN.learning_rate,
            0.9,
            parameter_list=train_model.parameters(),
            regularization=fluid.regularizer.L2Decay(
                config.TRAIN.l2_weight_decay))

        #加载预训练参数
        #加载上一次训练好的模型
        if args.resume == True:
            model, _ = fluid.dygraph.load_dygraph(args.save_dir +
                                                  '/resnet_3d_model.pdparams')
            train_model.load_dict(model)
            print('Resueme from ' + args.save_dir +
                  '/resnet_3d_model.pdparams')
        # elif args.pretrain:
        #     pretrain_weights = fluid.io.load_program_state(args.pretrain)
        #     inner_state_dict = train_model.state_dict()
        #     print('Pretrain with '+ args.pretrain)
        #     for name, para in inner_state_dict.items():
        #         if((para.name in pretrain_weights) and (not('fc' in para.name))):
        #             para.set_value(pretrain_weights[para.name])
        #         else:
        #             print('del '+ para.name)
        #用3D参数初始化
        elif args.pretrain:
            pretrain_weights = fluid.io.load_program_state(
                args.pretrain + '/resnet_3d_model1.pdparams')  #预训练模型转为之后的参数
            #print(a)
            inner_state_dict = train_model.state_dict()
            print('pretrain with' + args.pretrain)
            for name, para in inner_state_dict.items():
                if ((name in pretrain_weights) and (not ('fc' in para.name))):
                    para.set_value(pretrain_weights[name])
                else:
                    print('del' + para.name)
            #train_model.set_dict(a)
        else:
            pass

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = Ucf101Reader(args.model_name.upper(), 'train',
                                    train_config).create_reader()
        valid_reader = Ucf101Reader(args.model_name.upper(), 'valid',
                                    valid_config).create_reader()
        epochs = args.epoch or train_config.TRAIN.epoch
        #print(epochs)
        for i in range(epochs):
            train_model.train()  #启用 BatchNormalization 和 Dropout
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                #                out, acc = train_model(img, label)
                #print(img.shape)
                out = train_model(img)
                acc = fluid.layers.accuracy(out, label)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()

                if batch_id % 10 == 0:
                    logger.info("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
            fluid.dygraph.save_dygraph(train_model.state_dict(),
                                       args.save_dir + '/resnet_3d_model')

            if ((i % 3) == 0 and i != 0):
                acc_list = []
                avg_loss_list = []
                train_model.eval()
                for batch_id, data in enumerate(valid_reader()):
                    dy_x_data = np.array([x[0]
                                          for x in data]).astype('float32')
                    y_data = np.array([[x[1]] for x in data]).astype('int64')

                    img = fluid.dygraph.to_variable(dy_x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    label.stop_gradient = True
                    out = train_model(img)
                    acc = fluid.layers.accuracy(out, label)
                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)
                    acc_list.append(acc.numpy()[0])
                    avg_loss_list.append(avg_loss.numpy())
                    if batch_id % 20 == 0:
                        logger.info(
                            "valid Loss at step {}: {}, acc: {}".format(
                                batch_id, avg_loss.numpy(), acc.numpy()))
                        print("valid Loss at  step {}: {}, acc: {}".format(
                            batch_id, avg_loss.numpy(), acc.numpy()))
                print("验证集准确率为:{}".format(np.mean(acc_list)))
                print("验证集loss为:{}".format(np.mean(avg_loss_list)))
Ejemplo n.º 18
0
def train(args):
    all_train_rewards = []
    all_test_rewards = []
    prev_result = 0
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        print_configs(train_config, 'Train')

        train_model = ECO.GoogLeNet(train_config['MODEL']['num_classes'],
                                    train_config['MODEL']['seg_num'],
                                    train_config['MODEL']['seglen'], 'RGB')
        opt = fluid.optimizer.Momentum(
            0.001,
            0.9,
            parameter_list=train_model.parameters(),
            use_nesterov=True,
            regularization=fluid.regularizer.L2Decay(
                regularization_coeff=0.0005))

        if args.pretrain:
            model, _ = fluid.dygraph.load_dygraph('trained_model/best_model')
            train_model.load_dict(model)

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_reader = KineticsReader(args.model_name.upper(), 'train',
                                      train_config).create_reader()

        epochs = args.epoch or train_model.epoch_num()

        train_model.train()

        for i in range(epochs):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc = train_model(img, label)

                if out is not None:

                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)

                    avg_loss.backward()

                    opt.minimize(avg_loss)
                    train_model.clear_gradients()

                    if batch_id % 200 == 0:
                        print("Loss at epoch {} step {}: {}, acc: {}".format(
                            i, batch_id, avg_loss.numpy(), acc.numpy()))
                        fluid.dygraph.save_dygraph(
                            train_model.state_dict(),
                            args.save_dir + '/ucf_model')
                        result = validate_model()

                        all_test_rewards.append(result)
                        if result > prev_result:
                            prev_result = result
                            print('The best result is ' + str(result))
                            fluid.save_dygraph(train_model.state_dict(),
                                               'trained_model/best_model')
                            np.savez('result_data/ucf_data.npz',
                                     all_train_rewards=all_train_rewards,
                                     all_test_rewards=all_test_rewards)

            all_train_rewards.append(acc.numpy())

        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))

        np.savez('result_data/ucf_data.npz',
                 all_train_rewards=all_train_rewards,
                 all_test_rewards=all_test_rewards)
import config
import tools
import os
import datetime
import model
import simple_model
import losses
import tensorflow as tf
import numpy as np
import time

# =================== CONFIGURATIONS ========================
# cfg = config.SeqTrainConfigs
cfg = config.SeqTrainLidarConfig
val_cfg = config.SeqTrainConfigsSmallStepsValidation
config.print_configs(cfg)

lr_set = 0.0001
lr_schedule = {
    0: 0.0001,
    3: 0.00008,
    7: 0.00005,
    13: 0.000002,
    20: 0.000001,
    50: 0.0000001
}
# lr_schedule = {
#     0:   0.00001,
#     40:  0.00001,
#     70:  0.00001,
#     80:  0.000002,
import numpy as np
import os
import pykitti
import transformations

convert_to_camera_frame = False
dir_name = "trajectory_results"
# kitti_seqs = ["00", "01", "02", "08", "09"]
# kitti_seqs = ["04", "05", "06", "07", "10"]
kitti_seqs = ["00", "01", "02", "04", "05", "06", "07", "08", "09", "10"]
# kitti_seqs = ["08"]
restore_model_file = "/home/cs4li/Dev/end_to_end_odometry/results/train_seq_20180813-12-32-50/model_epoch_checkpoint-145"

save_ground_truth = True
config_class = config.SeqTrainLidarConfig
config.print_configs(config_class)
cfg = config_class()
cfg_si = config_class()

# Manipulate the configurations for evaluation
cfg.timesteps = 1
cfg.sequence_stride = 1
cfg.batch_size = 1
cfg.bidir_aug = False
cfg.use_init = False

cfg_si.timesteps = 1
cfg_si.sequence_stride = 1
cfg_si.batch_size = 1
cfg_si.bidir_aug = False
# cfg_si.use_init = what ever the original setting was
Ejemplo n.º 21
0
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    curdir = os.getcwd()
    os.chdir(os.path.join(curdir, 'work'))
    print(datetime.datetime.now())
    logger.info(datetime.datetime.now())
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        valid_config = merge_configs(config, 'valid', vars(args))
        print_configs(train_config, 'Train')

        #根据自己定义的网络,声明train_model
        train_model = ResNet_3d()
        train_model.train()
        #设置优化器和学习率
        batch_size = train_config.TRAIN.batch_size
        lr = train_config.TRAIN.learning_rate
        lr_decay = train_config.TRAIN.learning_rate_decay
        step = int(train_config.TRAIN.total_videos / batch_size + 1)
        epochs = train_config.TRAIN.epoch
        bd = [step * epochs / 2]
        lr = [lr, lr * lr_decay]
        lr = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
        opt = fluid.optimizer.Momentum(
            lr,
            0.9,
            parameter_list=train_model.parameters(),
            regularization=fluid.regularizer.L2Decay(
                config.TRAIN.l2_weight_decay))

        #加载预训练参数
        if args.resume == True:
            model, _ = fluid.dygraph.load_dygraph(args.save_dir +
                                                  '/resnet_3d_model')
            train_model.load_dict(model)
            print('Resueme from ' + args.save_dir + '/resnet_3d_model')
        elif args.pretrain:
            pretrain_weights = fluid.io.load_program_state(args.pretrain)
            inner_state_dict = train_model.state_dict()
            print('Pretrain with ' + args.pretrain)
            for name, para in inner_state_dict.items():
                if ((name in pretrain_weights) and (not ('fc' in para.name))):
                    para.set_value(pretrain_weights[name])
                else:
                    print('del ' + para.name)
        #用2D参数初始化
        # elif args.pretrain:
        #     state_dict = fluid.load_program_state(args.pretrain)
        #     dict_keys = list(state_dict.keys())
        #     inner_state_dict = train_model.state_dict()
        #     for name in dict_keys:
        #         if "fc" in name:
        #             del state_dict[name]
        #             print('Delete {} from pretrained parameters. Do not load it'.format(name))
        #         if 'weights' in name:
        #             tmp = state_dict[name]
        #             tmp_shape = tmp.shape
        #             if (tmp_shape[-1] > 1) and (len(tmp_shape)) == 4:
        #                 tmp = tmp.reshape([tmp_shape[0], tmp_shape[1], 1, tmp_shape[2], tmp_shape[3]])
        #                 tmp = tmp.repeat(tmp_shape[3], axis=2)
        #                 state_dict[name] = tmp / tmp_shape[3]
        #             if (tmp_shape[-1] == 1):
        #                 state_dict[name] = tmp.reshape([tmp_shape[0], tmp_shape[1], 1, tmp_shape[2], tmp_shape[3]])

        #     dict_keys = state_dict.keys()
        #     for name, para in inner_state_dict.items():
        #         key_name =  para.name
        #         if key_name in dict_keys:
        #             para.set_value(state_dict[key_name])
        else:
            pass

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = Ucf101Reader(args.model_name.upper(), 'train',
                                    train_config).create_reader()
        valid_reader = Ucf101Reader(args.model_name.upper(), 'valid',
                                    valid_config).create_reader()

        for i in range(epochs):
            train_model.train()
            lr = opt.current_step_lr()
            logger.info('Epoch{} lr={}'.format(i, lr))
            print('Epoch{} lr={}'.format(i, lr))

            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')
                batchsize = dy_x_data.shape[0]

                img = fluid.dygraph.to_variable(dy_x_data[0:batchsize //
                                                          2, :, :, :, :])
                label = fluid.dygraph.to_variable(y_data[0:batchsize // 2, :])
                label.stop_gradient = True
                out = train_model(img)
                acc1 = fluid.layers.accuracy(out, label)
                loss1 = fluid.layers.cross_entropy(out, label)
                avg_loss1 = fluid.layers.mean(loss1) / 2
                avg_loss1.backward()

                img = fluid.dygraph.to_variable(dy_x_data[batchsize //
                                                          2:, :, :, :, :])
                label = fluid.dygraph.to_variable(y_data[batchsize // 2:, :])
                label.stop_gradient = True
                out = train_model(img)
                acc2 = fluid.layers.accuracy(out, label)
                loss2 = fluid.layers.cross_entropy(out, label)
                avg_loss2 = fluid.layers.mean(loss2) / 2
                avg_loss2.backward()

                opt.minimize(avg_loss1 + avg_loss2)
                train_model.clear_gradients()

                acc = (acc1 + acc2) / 2
                avg_loss = avg_loss1 + avg_loss2

                if batch_id % 10 == 0:
                    logger.info("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
            fluid.dygraph.save_dygraph(train_model.state_dict(),
                                       args.save_dir + '/resnet_3d_model')
            fluid.dygraph.save_dygraph(
                train_model.state_dict(),
                args.save_dir + '/resnet_3d_model_epoch{}'.format(i))
            # if((i%3)==0 and i!=0):
            if (i % 1 == 0):
                acc_list = []
                avg_loss_list = []
                train_model.eval()
                for batch_id, data in enumerate(valid_reader()):
                    dy_x_data = np.array([x[0]
                                          for x in data]).astype('float32')
                    y_data = np.array([[x[1]] for x in data]).astype('int64')

                    img = fluid.dygraph.to_variable(dy_x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    label.stop_gradient = True
                    out = train_model(img)
                    acc = fluid.layers.accuracy(out, label)
                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)
                    acc_list.append(acc.numpy()[0])
                    avg_loss_list.append(avg_loss.numpy())
                    if batch_id % 20 == 0:
                        logger.info(
                            "valid Loss at step {}: {}, acc: {}".format(
                                batch_id, avg_loss.numpy(), acc.numpy()))
                        print("valid Loss at  step {}: {}, acc: {}".format(
                            batch_id, avg_loss.numpy(), acc.numpy()))
                print("验证集准确率为:{}".format(np.mean(acc_list)))
                logger.info("验证集准确率为:{}".format(np.mean(acc_list)))
                print("验证集loss为:{}".format(np.mean(avg_loss_list)))
                logger.info("验证集loss为:{}".format(np.mean(avg_loss_list)))
Ejemplo n.º 22
0
def train(args):
    all_train_rewards = []
    all_test_rewards = []
    prev_result = 0

    config = parse_config(args.config)
    train_config = merge_configs(config, 'train', vars(args))
    print_configs(train_config, 'Train')

    train_model = ECO.GoogLeNet(train_config['MODEL']['num_classes'],
                                train_config['MODEL']['seg_num'],
                                train_config['MODEL']['seglen'], 'RGB',
                                0.00002)
    opt = paddle.optimizer.Momentum(0.001,
                                    0.9,
                                    parameters=train_model.parameters())

    if args.pretrain:
        # load the pretrained model
        model_dict = paddle.load('best_model/best_model_seg12')

        train_model.set_state_dict(model_dict)

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    train_dataset = ECO_Dataset(args.model_name.upper(),
                                train_config,
                                mode='train')

    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=paddle.CUDAPlace(0),
                                        batch_size=None,
                                        batch_sampler=None)

    epochs = args.epoch or train_model.epoch_num()

    train_model.train()

    for i in range(epochs):

        for batch_id, data in enumerate(train_loader()):

            img = data[0]
            label = data[1]

            out, acc = train_model(img, label)

            if out is not None:

                loss = paddle.nn.functional.cross_entropy(out, label)
                avg_loss = paddle.mean(loss)

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()

                if batch_id % 200 == 0:
                    print("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
                    paddle.save(train_model.state_dict(),
                                args.save_dir + '/ucf_model_hapi')
        all_train_rewards.append(acc.numpy())

        result = validate_model()

        all_test_rewards.append(result)
        if result > prev_result:
            prev_result = result
            print('The best result is ' + str(result))
            paddle.save(train_model.state_dict(),
                        'best_model/final_best_model_hapi')  #保存模型
    logger.info("Final loss: {}".format(avg_loss.numpy()))
    print("Final loss: {}".format(avg_loss.numpy()))

    np.savez('result/final_ucf_data_hapi.npz',
             all_train_rewards=all_train_rewards,
             all_test_rewards=all_test_rewards)
Ejemplo n.º 23
0
def main():
    global args, best_acc
    args = parser.parse_args()

    writer = LogWriter(args.log)
    # writer = None

    cfg = parse_config('config.txt')
    print_configs(cfg, 'TRAIN')

    main_program = fluid.default_main_program()
    start_program = fluid.default_startup_program()

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()

    with fluid.program_guard(main_program, start_program):

        # data placeholder
        input = fluid.data(name='data',
                           shape=[-1, 3, 224, 224],
                           dtype='float32')
        label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
        print(f'label shape:{label.shape}')

        model = ECOfull(input, num_segments=args.num_segments)
        net_out = model()

        cost = fluid.layers.softmax_with_cross_entropy(net_out, label)
        avg_cost = fluid.layers.mean(cost)

        acc = fluid.layers.accuracy(net_out, label)

        # test program
        eval_program = main_program.clone(for_test=True)

        # optimizer
        fluid.optimizer.SGD(args.lr).minimize(avg_cost)

    #print(main_program.all_parameters())
    reader = KineticsReader('eco', 'train', cfg).create_reader()
    feeder = fluid.DataFeeder([input, label], place)

    # 验证集
    val_reader = KineticsReader('eco', 'valid', cfg).create_reader()

    # 初始化参数
    exe = fluid.Executor(place=place)
    exe.run(start_program)

    train_exe = fluid.Executor(place=place)

    if 0:
        # fluid.io.load(train_exe, 'models/', filename='eco_full.pdparams')
        fluid.io.load(main_program, 'models/eco_full_best', train_exe)
    # # pre-trained
    else:
        f = open('program_state_dict.pkl', 'rb')
        state_dict = pickle.load(f)
        f.close()
        fluid.io.set_program_state(main_program, state_dict)

    step = 0
    best_acc = read_best_acc()
    for i in range(args.epochs):
        for index, data in enumerate(reader()):
            avg_cost_, acc_ = train_exe.run(
                main_program,
                feed=feeder.feed(data),
                fetch_list=[avg_cost.name, acc.name])

            if (index + 1) % args.print_freq == 0:
                if not writer is None:
                    writer.add_scalar(tag='train/loss',
                                      step=step,
                                      value=avg_cost_[0])
                    writer.add_scalar(tag='train/acc',
                                      step=step,
                                      value=acc_[0])
                print(
                    f'epoch:{i+1} step:{index + 1} avg loss:{avg_cost_[0]} acc:{acc_[0]}'
                )
            step += 1

        if (i + 1) % args.eval_freq == 0:
            fetch_list = [avg_cost.name, acc.name]
            validate(val_reader,
                     feeder,
                     place,
                     eval_program,
                     fetch_list,
                     epoch=i,
                     writer=writer)
Ejemplo n.º 24
0
def train(args):
    # parse config   参数配置
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()    # 是否使用 GPU

    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))       # vars 函数,返回参数表达式的值
        print_configs( train_config, 'Train configs : ' )

        train_model = ResNet3D.ResNet3D('resnet',train_config['MODEL']['num_layers'],
                                    train_config['MODEL']['num_classes'],
                                    train_config['MODEL']['seg_num'],
                                    0.00002)

        #根据自己定义的网络,声明train_model
        # parameter_list 指明在训练的时候,哪些参数(  在此是 train_model.parameters()  )会被优化
        opt = fluid.optimizer.Momentum(0.001, 0.9, parameter_list=train_model.parameters())

        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/resnet_model')
            train_model.load_dict(model)

        # 创建一个保存模型的路径
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size # 两边完全一样啊???
        # KineticsReader().create_reader()  函数返回值是  batch_size 组 <img, label> 数据        
        train_reader = KineticsReader(args.model_name.upper(), 'train', train_config).create_reader()

        epochs = args.epoch or train_model.epoch_num()
        for i in range(epochs):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                ## 获取的img 是一个5维数据:batchbatch_size,提取多少片段(seg_num*seg_len),通道数,长,宽
                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
        
                label.stop_gradient = True
                
                out, acc = train_model(img, label)
                
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()
                
                # 隔多少次训练,进行一次输出提示
                if batch_id % 1 == 0:
                    logger.info("Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))
                    fluid.dygraph.save_dygraph(train_model.state_dict(), args.save_dir + '/resnet_model')

        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))