def train(config): gpus = [int(i) for i in config.gpus.split(',')] num_gpus = len(gpus) logging.info("number of gpu %d" % num_gpus) if len(gpus) == 0: kv = None else: kv = mx.kvstore.create('local') logging.info("Running on GPUs: {}".format(gpus)) # Modify to make it consistent with the distributed trainer total_batch_size = config.batch_per_device * num_gpus # Create symbol, arg and aux if config.begin_epoch > 0: sym, arg_params, aux_params = mx.model.load_checkpoint( os.path.join(config.output, 'test'), config.begin_epoch) else: # Create Network sym = create_r3d( num_class=config.num_class, no_bias=True, model_depth=config.model_depth, final_spatial_kernel=config.final_spatial_kernel, final_temporal_kernel=int(config.n_frame / 8), bn_mom=config.bn_mom, cudnn_tune=config.cudnn_tune, workspace=config.workspace, spatial_scale=720.0 / config.scale_w * config.spatial_scale, pooled_size=config.pooled_size, n_frame=config.n_frame, n_bbox=config.n_bbox, ) # Load pretrained params arg_params, aux_params = {}, {} if config.pretrained: arg_params, aux_params = load_from_caffe2_pkl( config.pretrained, sym) logging.info("load pretrained okay, num of arg_p %d, num of aux_p %d" % (len(arg_params), len(aux_params))) # Create Module # We can set fixed params here if needed m = mx.module.Module(sym, context=[mx.gpu(i) for i in gpus], data_names=['data', 'rois'], label_names=['softmax_label']) if config.plot: v = mx.viz.plot_network(sym, title='R2Plus1D-train', shape={ 'data': (total_batch_size, 3, config.n_frame, config.scale_h, config.scale_w), 'rois': (total_batch_size, config.n_frame // config.temporal_scale, config.n_bbox, 5), 'softmax_label': (total_batch_size, config.n_bbox) }) v.render(filename=os.path.join(config.output, 'vis'), cleanup=True) df_train = pd.read_pickle(config.df_train) df_test = pd.read_pickle(config.df_test) train_data = mx.io.PrefetchingIter( ClipBatchIter(df=df_train, batch_size=total_batch_size, n_frame=config.n_frame, train=True, n_bbox=config.n_bbox, scale_w=config.scale_w, scale_h=config.scale_h, batch_per_device=config.batch_per_device, temporal_scale=config.temporal_scale, use_large_bbox=config.use_large_bbox)) test_data = mx.io.PrefetchingIter( ClipBatchIter(df=df_test, batch_size=total_batch_size, n_frame=config.n_frame, train=False, n_bbox=config.n_bbox, scale_w=config.scale_w, scale_h=config.scale_h, batch_per_device=config.batch_per_device, temporal_scale=config.temporal_scale, use_large_bbox=config.use_large_bbox)) # Set optimizer optimizer = config.optimizer optimizer_params = {} optimizer_params['learning_rate'] = config.lr optimizer_params['momentum'] = config.momentum optimizer_params['wd'] = config.wd print(config.lr) print(config.lr_step) if config.lr_step: optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler( step=config.lr_step, factor=config.lr_factor) metric = RCNNAccMetric() m.fit( train_data=train_data, eval_data=test_data, eval_metric=metric, epoch_end_callback=mx.callback.do_checkpoint(config.output + '/test', 1), batch_end_callback=mx.callback.Speedometer(total_batch_size, 20), kvstore=kv, optimizer=optimizer, optimizer_params=optimizer_params, initializer=mx.init.Xavier(factor_type="in", magnitude=2.34), arg_params=arg_params, aux_params=aux_params, allow_missing=True, begin_epoch=config.begin_epoch, num_epoch=config.num_epoch, )
def train(config): gpus = [int(i) for i in config.gpus.split(',')] num_gpus = len(gpus) logging.info("number of gpu %d" % num_gpus) if len(gpus) == 0: kv = None else: kv = mx.kvstore.create('local') logging.info("Running on GPUs: {}".format(gpus)) # Modify to make it consistent with the distributed trainer total_batch_size = config.batch_per_device * num_gpus config.total_batch_size = total_batch_size # Create symbol, arg and aux if config.begin_epoch>0: sym, arg_params, aux_params = mx.model.load_checkpoint(os.path.join(config.output, 'test'), config.begin_epoch) else: # Create Network sym = create_r3d( num_class=config.num_class, no_bias=True, model_depth=config.model_depth, final_spatial_kernel=config.final_spatial_kernel, final_temporal_kernel=int(config.n_frame / 8), bn_mom=config.bn_mom, cudnn_tune=config.cudnn_tune, workspace=config.workspace, spatial_scale=config.spatial_scale, pooled_size=config.pooled_size, n_frame=config.n_frame, n_bbox=config.n_bbox, ) # Load pretrained params arg_params, aux_params = {}, {} if config.pretrained: arg_params, aux_params = load_from_caffe2_pkl(config.pretrained, sym) logging.info("load pretrained okay, num of arg_p %d, num of aux_p %d" % (len(arg_params), len(aux_params))) # Create Module # We can set fixed params here if needed m = mx.module.Module(sym, context=[mx.gpu(i) for i in gpus], data_names=['data', 'rois'], label_names=['softmax_label']) if config.plot: v = mx.viz.plot_network(sym, title='R2Plus1D-train', shape={'data': (total_batch_size, 3, config.n_frame, config.scale_h, config.scale_w), 'rois': (total_batch_size, config.n_frame // config.temporal_scale, config.n_bbox, 5), 'softmax_label': (total_batch_size, config.n_bbox, config.num_class)}) v.render(filename=os.path.join(config.output, 'vis'), cleanup=True) train_data = mx.io.PrefetchingIter(ClipBatchIter(config=config, train=True)) test_data = mx.io.PrefetchingIter(ClipBatchIter(config=config, train=False)) # Set optimizer optimizer = config.optimizer optimizer_params = {} optimizer_params['learning_rate'] = config.lr optimizer_params['momentum'] = config.momentum optimizer_params['wd'] = config.wd print(config.lr) print(config.lr_step) if config.lr_step: optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(step=config.lr_step, factor=config.lr_factor) # metric = RCNNAccMetric() def acc(label, pred): label = label.reshape((-1, config.num_class)) # print('in acc, pred.size', pred.size, 'pred.shape', pred.shape, 'label.shape', label.shape, 'numerator', (label == np.round(pred)).sum(), 'res', float((label == np.round(pred)).sum()) / pred.size) return (label == np.round(pred)).astype(np.float32).mean() def all_correct_acc(label, pred): label = label.reshape((-1, config.num_class)) # print('in acc, pred.size', pred.size, 'pred.shape', pred.shape, 'label.shape', label.shape, 'numerator', (label == np.round(pred)).sum(), 'res', float((label == np.round(pred)).sum()) / pred.size) equal = (label == np.round(pred)).astype(np.int32) equal_sum = equal.sum(axis=-1) return (equal_sum == label.shape[-1]).astype(np.float32).mean() def loss(label, pred): label = label.reshape((-1, config.num_class)) loss_all = 0 for i in range(len(pred)): loss = 0 loss -= label[i] * np.log(pred[i] + 1e-6) + (1.- label[i]) * np.log(1. + 1e-6 - pred[i]) loss_all += np.sum(loss) loss_all = float(loss_all)/float(len(pred) + 0.000001) return loss_all eval_metric = list() eval_metric.append(mx.metric.np(acc)) eval_metric.append(mx.metric.np(all_correct_acc)) eval_metric.append(mx.metric.np(loss)) m.fit( train_data=train_data, eval_data=test_data, eval_metric=eval_metric, epoch_end_callback=mx.callback.do_checkpoint(config.output + '/test', 1), batch_end_callback=mx.callback.Speedometer(total_batch_size, 20), kvstore=kv, optimizer=optimizer, optimizer_params=optimizer_params, initializer=mx.init.Xavier(factor_type="in", magnitude=2.34), arg_params=arg_params, aux_params=aux_params, allow_missing=True, begin_epoch=config.begin_epoch, num_epoch=config.num_epoch, )
def train(args): gpus = [int(i) for i in args.gpus.split(',')] num_gpus = len(gpus) logging.info("number of gpu %d" % num_gpus) if len(gpus) == 0: kv = None else: kv = mx.kvstore.create('local') logging.info("Running on GPUs: {}".format(gpus)) # Modify to make it consistent with the distributed trainer total_batch_size = args.batch_per_device * num_gpus # Round down epoch size to closest multiple of batch size across machines epoch_iters = int(args.epoch_size / total_batch_size) args.epoch_size = epoch_iters * total_batch_size logging.info("Using epoch size: {}".format(args.epoch_size)) # Create Network net = create_r3d( num_class=args.num_class, no_bias=True, model_depth=args.model_depth, final_spatial_kernel=7 if args.crop_size == 112 else 14, final_temporal_kernel=int(args.n_frame / 8), bn_mom=args.bn_mom, cudnn_tune=args.cudnn_tune, workspace=args.workspace, ) # Load pretrained params arg_p = {} aux_p = {} if args.pretrained: arg_p, aux_p = load_from_caffe2_pkl(args.pretrained, net) logging.info("load pretrained okay, num of arg_p %d, num of aux_p %d" % (len(arg_p), len(aux_p))) # Create Module m = mx.module.Module(net, context=[mx.gpu(i) for i in gpus]) # , fixed_param_names=fixed_params) if args.plot: v = mx.viz.plot_network(net, title='R2Plus1D-train', shape={'data': (total_batch_size, 3, args.n_frame, args.crop_size, args.crop_size)}) v.render(filename='models/R2Plus1D-train', cleanup=True) train_data = mx.io.PrefetchingIter(ClipBatchIter(datadir=args.datadir, batch_size=total_batch_size, n_frame=args.n_frame, crop_size=args.crop_size, train=True, scale_w=args.scale_w, scale_h=args.scale_h)) eval_data = mx.io.PrefetchingIter(ClipBatchIter(datadir=args.datadir, batch_size=total_batch_size, n_frame=args.n_frame, crop_size=args.crop_size, train=False, scale_w=args.scale_w, scale_h=args.scale_h, temporal_center=True)) # Set optimizer optimizer = args.optimizer optimizer_params = {} optimizer_params['learning_rate'] = args.lr optimizer_params['momentum'] = args.momentum optimizer_params['wd'] = args.wd if args.lr_scheduler_step: optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(step=args.lr_scheduler_step, factor=args.lr_scheduler_factor) m.fit( train_data=train_data, eval_data=eval_data, eval_metric='accuracy', epoch_end_callback=mx.callback.do_checkpoint(args.output + '/test', 1), batch_end_callback=mx.callback.Speedometer(total_batch_size, 20), kvstore=kv, optimizer=optimizer, optimizer_params=optimizer_params, initializer=mx.init.Xavier(factor_type="in", magnitude=2.34), arg_params=arg_p, aux_params=aux_p, allow_missing=True, begin_epoch=args.begin_epoch, num_epoch=args.num_epoch, )