def validate_model(): # parse config args = parse_args() config = parse_config(args.config) val_config = merge_configs(config, 'test', vars(args)) val_reader = KineticsReader(args.model_name.upper(), 'test', val_config).create_reader() val_model = ECO.GoogLeNet(val_config['MODEL']['num_classes'], val_config['MODEL']['seg_num'], val_config['MODEL']['seglen'], 'RGB') model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/ucf_model') val_model.load_dict(model) val_model.eval() acc_list = [] for batch_id, data in enumerate(val_reader()): dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([[x[1]] for x in data]).astype('int64') img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label.stop_gradient = True out, acc = val_model(img, label) if out is not None: acc_list.append(acc.numpy()[0]) val_model.train() return np.mean(acc_list)
def validate_model(): # parse config args = parse_args() config = parse_config(args.config) val_config = merge_configs(config, 'test', vars(args)) val_dataset = ECO_Dataset(args.model_name.upper(), val_config, mode='test') val_loader = paddle.io.DataLoader(val_dataset, places=paddle.CUDAPlace(0), batch_size=None, batch_sampler=None) val_model = ECO.GoogLeNet(val_config['MODEL']['num_classes'], val_config['MODEL']['seg_num'], val_config['MODEL']['seglen'], 'RGB', 0.00002) model_dict = paddle.load(args.save_dir + '/ucf_model_hapi') val_model.set_state_dict(model_dict) val_model.eval() acc_list = [] for batch_id, data in enumerate(val_loader()): img = data[0] label = data[1] out, acc = val_model(img, label) if out is not None: acc_list.append(acc.numpy()[0]) val_model.train() return np.mean(acc_list)
def eval(args): # parse config config = parse_config(args.config) val_config = merge_configs(config, 'valid', vars(args)) train_config = merge_configs(config, 'train', vars(args)) print_configs(val_config, "Valid") place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): val_model = ECO.ECO(num_classes=train_config['MODEL']['num_classes'], num_segments=train_config['MODEL']['seg_num']) label_dic = np.load('label_dir.npy', allow_pickle=True).item() label_dic = {v: k for k, v in label_dic.items()} # get infer reader # val_reader = KineticsReader(args.model_name.upper(), 'valid', val_config).create_reader() val_reader = KineticsReader('ECO', 'valid', val_config).create_reader() # if no weight files specified, exit() if args.weights: weights = args.weights else: print("model path must be specified") exit() para_state_dict, _ = fluid.load_dygraph(weights) val_model.load_dict(para_state_dict) val_model.eval() acc_list = [] false_class = [] for batch_id, data in enumerate(val_reader()): dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([[x[1]] for x in data]).astype('int64') img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label.stop_gradient = True out, acc = val_model(img, label) if acc.numpy()[0] != 1: false_class.append(label.numpy()[0][0]) acc_list.append(acc.numpy()[0]) print(batch_id, 'acc:', np.mean(acc_list)) if len(false_class)==0: continue print(np.sort(np.array(false_class))) bin = np.bincount(np.array(false_class)) most_false = np.argmax(bin) print('false class:', bin) print('most false class num:', most_false) print("validate set acc:{}".format(np.mean(acc_list)))
def eval(args): # parse config config = parse_config(args.config) val_config = merge_configs(config, 'test', vars(args)) # print_configs(val_config, "test") val_model = ECO.GoogLeNet(val_config['MODEL']['num_classes'], val_config['MODEL']['seg_num'], val_config['MODEL']['seglen'], 'RGB') label_dic = np.load('label_dir.npy', allow_pickle=True).item() label_dic = {v: k for k, v in label_dic.items()} val_dataset = ECO_Dataset(args.model_name.upper(), val_config, mode='test') val_loader = paddle.io.DataLoader(val_dataset, places=paddle.CUDAPlace(0), batch_size=None, batch_sampler=None) if args.weights: weights = args.weights else: print("model path must be specified") exit() para_state_dict = paddle.load(weights) val_model.set_state_dict(para_state_dict) val_model.eval() acc_list = [] for batch_id, data in enumerate(val_loader()): img = data[0] label = data[1] out, acc = val_model(img, label) acc_list.append(acc.numpy()[0]) print("测试集准确率为:{}".format(np.mean(acc_list)))
def test(args): config = parse_config(args.config) test_config = merge_configs(config, 'test', vars(args)) # print_configs(test_config, "test") with fluid.dygraph.guard(): test_model = ECO.GoogLeNet(test_config['MODEL']['num_classes'], test_config['MODEL']['seg_num'], test_config['MODEL']['seglen'], 'RGB') # get test reader test_reader = KineticsReader(args.model_name.upper(), 'test', test_config).create_reader() # if no weight files specified, exit() if args.weights: weights = args.weights else: print("model path must be specified") exit() para_state_dict, _ = fluid.load_dygraph(weights) test_model.load_dict(para_state_dict) test_model.eval() acc_list = [] for batch_id, data in enumerate(test_reader()): dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([[x[1]] for x in data]).astype('int64') img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label.stop_gradient = True out, acc = test_model(img, label) acc_list.append(acc.numpy()[0]) print("The accuracy for test dataset is:{}".format(np.mean(acc_list)))
def train(args): all_train_rewards = [] all_test_rewards = [] prev_result = 0 # parse config place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): config = parse_config(args.config) train_config = merge_configs(config, 'train', vars(args)) print_configs(train_config, 'Train') train_model = ECO.GoogLeNet(train_config['MODEL']['num_classes'], train_config['MODEL']['seg_num'], train_config['MODEL']['seglen'], 'RGB') opt = fluid.optimizer.Momentum( 0.001, 0.9, parameter_list=train_model.parameters(), use_nesterov=True, regularization=fluid.regularizer.L2Decay( regularization_coeff=0.0005)) if args.pretrain: model, _ = fluid.dygraph.load_dygraph('trained_model/best_model') train_model.load_dict(model) # build model if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # get reader train_reader = KineticsReader(args.model_name.upper(), 'train', train_config).create_reader() epochs = args.epoch or train_model.epoch_num() train_model.train() for i in range(epochs): for batch_id, data in enumerate(train_reader()): dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([[x[1]] for x in data]).astype('int64') img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label.stop_gradient = True out, acc = train_model(img, label) if out is not None: loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() opt.minimize(avg_loss) train_model.clear_gradients() if batch_id % 200 == 0: print("Loss at epoch {} step {}: {}, acc: {}".format( i, batch_id, avg_loss.numpy(), acc.numpy())) fluid.dygraph.save_dygraph( train_model.state_dict(), args.save_dir + '/ucf_model') result = validate_model() all_test_rewards.append(result) if result > prev_result: prev_result = result print('The best result is ' + str(result)) fluid.save_dygraph(train_model.state_dict(), 'trained_model/best_model') np.savez('result_data/ucf_data.npz', all_train_rewards=all_train_rewards, all_test_rewards=all_test_rewards) all_train_rewards.append(acc.numpy()) logger.info("Final loss: {}".format(avg_loss.numpy())) print("Final loss: {}".format(avg_loss.numpy())) np.savez('result_data/ucf_data.npz', all_train_rewards=all_train_rewards, all_test_rewards=all_test_rewards)
# Model Transform script, transform from torch to paddle. path = 'checkpoints_models/ECO_Full_rgb_model_Kinetics.pth 2.tar' save_path = 'checkpoints_models/ECO_FULL_RGB_seg16' torch_weight = torch.load(path, map_location=torch.device('cpu')) torch_weight = torch_weight['state_dict'] print('loaded') num = 0 for torch_key in torch_weight: if 'bn.num_batches_tracked' in torch_key: num += 1 print(torch_key) print(num) with fluid.dygraph.guard(): paddle_model = ECO.ECO(num_classes=101, num_segments=16) paddle_weight = paddle_model.state_dict() new_weight_dict = OrderedDict() matched_bn_var = 0 matched_bn_mean = 0 matched_fc = 0 matched_base = 0 matched_linear = 0 for paddle_key in paddle_weight.keys(): print('paddle:', paddle_key) if len(paddle_key.split('.')) == 3: # sub module torch_key = 'module.base_model.' + paddle_key.split('.')[1] + '.' + paddle_key.split('.')[2] name = 'inception' elif len(paddle_key.split('.')) == 4: torch_key = 'module.base_model.' + paddle_key.split('.')[2] + '.' + paddle_key.split('.')[3] name = '3d'
def train(args): all_train_rewards = [] all_test_rewards = [] prev_result = 0 config = parse_config(args.config) train_config = merge_configs(config, 'train', vars(args)) print_configs(train_config, 'Train') train_model = ECO.GoogLeNet(train_config['MODEL']['num_classes'], train_config['MODEL']['seg_num'], train_config['MODEL']['seglen'], 'RGB', 0.00002) opt = paddle.optimizer.Momentum(0.001, 0.9, parameters=train_model.parameters()) if args.pretrain: # load the pretrained model model_dict = paddle.load('best_model/best_model_seg12') train_model.set_state_dict(model_dict) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) train_dataset = ECO_Dataset(args.model_name.upper(), train_config, mode='train') train_loader = paddle.io.DataLoader(train_dataset, places=paddle.CUDAPlace(0), batch_size=None, batch_sampler=None) epochs = args.epoch or train_model.epoch_num() train_model.train() for i in range(epochs): for batch_id, data in enumerate(train_loader()): img = data[0] label = data[1] out, acc = train_model(img, label) if out is not None: loss = paddle.nn.functional.cross_entropy(out, label) avg_loss = paddle.mean(loss) avg_loss.backward() opt.minimize(avg_loss) train_model.clear_gradients() if batch_id % 200 == 0: print("Loss at epoch {} step {}: {}, acc: {}".format( i, batch_id, avg_loss.numpy(), acc.numpy())) paddle.save(train_model.state_dict(), args.save_dir + '/ucf_model_hapi') all_train_rewards.append(acc.numpy()) result = validate_model() all_test_rewards.append(result) if result > prev_result: prev_result = result print('The best result is ' + str(result)) paddle.save(train_model.state_dict(), 'best_model/final_best_model_hapi') #保存模型 logger.info("Final loss: {}".format(avg_loss.numpy())) print("Final loss: {}".format(avg_loss.numpy())) np.savez('result/final_ucf_data_hapi.npz', all_train_rewards=all_train_rewards, all_test_rewards=all_test_rewards)
def train(args, distributed): #===================== GPU CONF =====================# if distributed: # if run on parallel mode place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) else: # if run on single GPU mode, and select gpu number. args.use_gpu = True place = fluid.CUDAPlace(args.gpu_num) if args.use_gpu else fluid.CPUPlace() # ===================== Dygraph Mode =====================# with fluid.dygraph.guard(place): # leverage from TSN training script config = parse_config(args.config) train_config = merge_configs(config, 'train', vars(args)) val_config = merge_configs(config, 'valid', vars(args)) print_configs(train_config, 'Train') # ===================== Init ECO =====================# train_model = ECO.ECO(num_classes=train_config['MODEL']['num_classes'], num_segments=train_config['MODEL']['seg_num']) if distributed: strategy = fluid.dygraph.parallel.prepare_context() train_model = fluid.dygraph.parallel.DataParallel(train_model, strategy) # trick 1: use clip gradient method to avoid gradient explosion if args.gd is not None: clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=args.gd) print('clip:', clip) # ===================== Init Optimizer =====================# # optimizer config: use momentum, nesterov, weight decay, lr decay learning_rate = 0.001 opt = fluid.optimizer.Momentum(learning_rate, 0.9, parameter_list=train_model.parameters(), use_nesterov=True, regularization=fluid.regularizer.L2Decay(regularization_coeff=5e-4), grad_clip=clip) # trick 2: Freezing BatchNorm2D except the first one. # trick 3: make all weight layer lr mult as 1, bias lr mult as 2. get_optim_policies(opt) print('get_optim_policies:--batch_norm_0.w_0', opt._parameter_list[2].optimize_attr,opt._parameter_list[2].stop_gradient) print('get_optim_policies:--batch_norm_0.b_0', opt._parameter_list[3].optimize_attr,opt._parameter_list[2].stop_gradient) # ===================== Use Pretrained Model =====================# # use pretrained model: ECO_Full_rgb_model_Kinetics.pth 2.tar(download from MZO git) # then transform it from torch to paddle weight except fc layer. if args.pretrain: model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/ECO_FULL_RGB_seg16') # also tried using pretrained model on torch, 32F-92.9%,16F-91.8% precision trained on torch # model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/eco_91.81_model_best') train_model.load_dict(model) # build model if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # ===================== Init Data Reader =====================# # leverage from TSN training script train_config.TRAIN.batch_size = train_config.TRAIN.batch_size train_reader = KineticsReader('ECO', 'train', train_config).create_reader() print('train_reader', train_reader) val_reader = KineticsReader('ECO', 'valid', val_config).create_reader() if distributed: train_reader = fluid.contrib.reader.distributed_batch_reader(train_reader) # ===================== Init Trick Params =====================# epochs = args.epoch or train_model.epoch_num() loss_summ = 0 saturate_cnt = 0 exp_num = 0 best_prec1 = 0 for i in range(epochs): train_model.train() # trick 4: Saturate lr decay: different from lr piecewise decay or others # calculate prec every epoch, if prec1 does not rise for 5 times(named model saturated), then use decay lr. if saturate_cnt == args.num_saturate: exp_num = exp_num + 1 saturate_cnt = 0 decay = 0.1 ** (exp_num) learning_rate = learning_rate * decay opt = fluid.optimizer.Momentum(learning_rate, 0.9, parameter_list=train_model.parameters(), use_nesterov=True, regularization=fluid.regularizer.L2Decay(regularization_coeff=5e-4), grad_clip=clip) print('get_optim_policies:--batch_norm_0.w_0', opt._parameter_list[2].optimize_attr, opt._parameter_list[2].stop_gradient) print('get_optim_policies:--batch_norm_0.b_0', opt._parameter_list[3].optimize_attr, opt._parameter_list[2].stop_gradient) print("- Learning rate decreases by a factor of '{}'".format(10 ** (exp_num))) for batch_id, data in enumerate(train_reader()): lr = opt.current_step_lr() print('lr:', lr) # check lr every batch ids dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([[x[1]] for x in data]).astype('int64') img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label.stop_gradient = True out, acc = train_model(img, label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) loss_summ += avg_loss if distributed: avg_loss = train_model.scale_loss(avg_loss) avg_loss.backward() if distributed: train_model.apply_collective_grads() if (batch_id + 1) % 4 == 0: # trick 5: scale down gradients when iter size is functioning every 4 batches opt.minimize(loss_summ) opt.clear_gradients() loss_summ = 0 if batch_id % 1 == 0: logger.info( "Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy())) print("Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy())) if (i + 1) % args.eval_freq == 0 or i == args.epochs - 1: train_model.eval() acc_list = [] false_class = [] for batch_id, data in enumerate(val_reader()): dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([[x[1]] for x in data]).astype('int64') img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label.stop_gradient = True out, acc = train_model(img, label) if acc.numpy()[0] != 1: false_class.append(label.numpy()[0][0]) acc_list.append(acc.numpy()[0]) print(batch_id, 'acc:', np.mean(acc_list)) if len(false_class) == 0: continue print("validate set acc:{}".format(np.mean(acc_list))) prec1 = np.mean(acc_list) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 if is_best: saturate_cnt = 0 fluid.dygraph.save_dygraph(train_model.state_dict(), args.save_dir + '/ECO_FULL_1/' + str(i) + '_best_' + str(prec1)) else: saturate_cnt = saturate_cnt + 1 print("- Validation Prec@1 saturates for {} epochs.".format(saturate_cnt), best_prec1) best_prec1 = max(prec1, best_prec1) logger.info("Final loss: {}".format(avg_loss.numpy())) print("Final loss: {}".format(avg_loss.numpy()))