def main(): # TODO: use argv parser = argparse.ArgumentParser( description="Caffe2: simple video training") parser.add_argument("--model_name", type=str, default='r2plus1d', help="Name of the model") parser.add_argument("--model_depth", type=int, default=18, help="Depth of the model") parser.add_argument("--train_data", type=str, default=None, help="Path to train data", required=True) parser.add_argument("--test_data", type=str, default=None, help="Path to test data") parser.add_argument("--db_type", type=str, default="minidb", help="Database type to save the training model") parser.add_argument("--gpus", type=str, help="Comma separated list of GPU devices to use") parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPU devices (instead of --gpus)") parser.add_argument("--scale_h", type=int, default=128, help="Scale image height to") parser.add_argument("--scale_w", type=int, default=171, help="Scale image width to") parser.add_argument("--crop_size", type=int, default=112, help="Input image size (to crop to)") parser.add_argument("--num_decode_threads", type=int, default=4, help="# of threads/GPU dedicated for video decoding") parser.add_argument("--clip_length_rgb", type=int, default=16, help="Length of input clips") parser.add_argument("--sampling_rate_rgb", type=int, default=1, help="Frame sampling rate") parser.add_argument("--num_labels", type=int, default=101, help="Number of labels") parser.add_argument("--num_channels", type=int, default=3, help="Number of channels") parser.add_argument("--clip_length_of", type=int, default=8, help="Frames of optical flow data") parser.add_argument("--sampling_rate_of", type=int, default=2, help="") parser.add_argument("--frame_gap_of", type=int, default=2, help="") parser.add_argument("--input_type", type=int, default=0, help="False=rgb, True=optical flow") parser.add_argument( "--flow_data_type", type=int, default=0, help="0=Flow2C, 1=Flow3C, 2=FlowWithGray, 3=FlowWithRGB") parser.add_argument("--do_flow_aggregation", type=int, default=0, help="whether to aggregate optical flow across " + "multiple frames") parser.add_argument("--get_video_id", type=int, default=0, help="Output video id") parser.add_argument("--batch_size", type=int, default=32, help="Batch size, total over all GPUs") parser.add_argument("--epoch_size", type=int, default=110000, help="Number of videos/epoch, total over all machines") parser.add_argument("--num_epochs", type=int, default=50, help="Num epochs.") parser.add_argument("--base_learning_rate", type=float, default=0.003, help="Initial learning rate.") parser.add_argument("--step_epoch", type=int, default=10, help="Reducing learning rate every step_epoch.") parser.add_argument("--gamma", type=float, default=0.1, help="Learning rate decay factor.") parser.add_argument("--display_iter", type=int, default=10, help="Display information every # of iterations.") parser.add_argument("--weight_decay", type=float, default=0.005, help="Weight decay (L2 regularization)") parser.add_argument("--cudnn_workspace_limit_mb", type=int, default=64, help="CuDNN workspace limit in MBs") parser.add_argument("--file_store_path", type=str, default="/tmp", help="Path to directory to use for saving checkpoints") parser.add_argument("--save_model_name", type=str, default="simple_c3d", help="Save the trained model to a given name") parser.add_argument( "--load_model_path", type=str, default=None, help="Load previously saved model to continue training") parser.add_argument("--use_cudnn", type=int, default=1, help="Use CuDNN") parser.add_argument("--profiling", type=int, default=0, help="Profile training time") parser.add_argument("--pred_layer_name", type=str, default=None, help="the prediction layer name") parser.add_argument("--multi_label", type=int, default=0, help="Multiple label training") parser.add_argument("--channel_multiplier", type=float, default=1.0, help="Channel multiplier") parser.add_argument("--bottleneck_multiplier", type=float, default=1.0, help="Bottleneck multiplier") parser.add_argument("--use_dropout", type=int, default=0, help="Use dropout at the prediction layer") parser.add_argument("--conv1_temporal_stride", type=int, default=1, help="Conv1 temporal striding") parser.add_argument("--conv1_temporal_kernel", type=int, default=3, help="Conv1 temporal kernel") parser.add_argument("--video_res_type", type=int, default=1, help="Video frame scaling option, 0: scaled by " + "height x width; 1: scaled by short edge") parser.add_argument("--use_pool1", type=int, default=0, help="use pool1 layer") parser.add_argument("--jitter_scales", type=str, default="128,160", required=True, help="spatial scales jitter, separated by commas") parser.add_argument("--use_local_file", type=int, default=0, help="use local file") parser.add_argument("--is_checkpoint", type=int, default=1, help="0: pretrained_model is used as initalization" + "1: pretrained_model is used as a checkpoint") parser.add_argument("--audio_input_3d", type=int, default=0, help="is audio input 3d or 2d; 0 for 2d") parser.add_argument("--g_blend", type=int, default=0, help="use gradient-blending to train model") parser.add_argument("--audio_weight", type=float, default=0.0, help="g_blend weights for audio head") parser.add_argument("--visual_weight", type=float, default=0.0, help="g_blend weights for visual head") parser.add_argument("--av_weight", type=float, default=1.0, help="g_blend weights for av head") args = parser.parse_args() log.info(args) assert model_builder.model_validation( args.model_name, args.model_depth, args.clip_length_of if args.input_type else args.clip_length_rgb, args.crop_size) Train(args)
def main(): parser = argparse.ArgumentParser( description="test_net" ) parser.add_argument("--test_data", type=str, default=None, help="Path to test data") parser.add_argument("--db_type", type=str, default='pickle', help="Db type of the testing model") parser.add_argument("--model_depth", type=int, default=18, help="Model depth") parser.add_argument("--model_name", type=str, default='r2plus1d', help="Model name") parser.add_argument("--gpus", type=str, default=None, help="Comma separated list of GPU devices to use") parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPU devices (instead of --gpus)") parser.add_argument("--scale_h", type=int, default=128, help="Scale image height to") parser.add_argument("--scale_w", type=int, default=171, help="Scale image width to") parser.add_argument("--num_iter", type=int, default=0, help="Number of test iterations; " + "0: test the whole set") parser.add_argument("--crop_size", type=int, default=112, help="Input image size (to crop to)") parser.add_argument("--clip_length_rgb", type=int, default=16, help="Length of input clips") parser.add_argument("--sampling_rate_rgb", type=int, default=1, help="Frame sampling rate") parser.add_argument("--num_labels", type=int, default=101, help="Number of labels") parser.add_argument("--num_channels", type=int, default=3, help="Number of channels") parser.add_argument("--batch_size", type=int, default=6, help="Batch size, total over all GPUs") parser.add_argument("--clip_per_video", type=int, default=10, help="Number of clips to be sampled from a video") parser.add_argument("--top_k", type=int, default=5, help="Top k video accuracy output") parser.add_argument("--aggregation", type=int, default=0, help="0: avergage pool, 1: max pooling") parser.add_argument("--load_model_path", type=str, default=None, help="Load saved model for testing") parser.add_argument("--use_cudnn", type=int, default=1, help="Use CuDNN") parser.add_argument("--pred_layer_name", type=str, default=None, help="the prediction layer name") parser.add_argument("--display_iter", type=int, default=10, help="Display information every # of iterations.") parser.add_argument("--clip_length_of", type=int, default=8, help="Frames of optical flow data") parser.add_argument("--sampling_rate_of", type=int, default=2, help="Optical flow sampling rate (in frames)") parser.add_argument("--frame_gap_of", type=int, default=2, help="") parser.add_argument("--do_flow_aggregation", type=int, default=0, help="whether to aggregate optical flow across " + " multiple frames") parser.add_argument("--flow_data_type", type=int, default=0, help="0=Flow2C, 1=Flow3C, 2=FlowWithGray, " + "3=FlowWithRGB") parser.add_argument("--input_type", type=int, default=0, help="False=rgb, True=optical flow") parser.add_argument("--get_video_id", type=int, default=0, help="Output video id") parser.add_argument("--use_dropout", type=int, default=0, help="Use dropout at the prediction layer") parser.add_argument("--use_local_file", type=int, default=0, help="Use lmdb as a list of local filenames") args = parser.parse_args() log.info(args) assert model_builder.model_validation( args.model_name, args.model_depth, args.clip_length_of if args.input_type == 1 else args.clip_length_rgb, args.crop_size ) Test(args)
def main(): parser = argparse.ArgumentParser(description="Simple feature extraction") parser.add_argument("--db_type", type=str, default='pickle', help="Db type of the testing model") parser.add_argument("--model_name", type=str, default='r2plus1d', help="Model name") parser.add_argument("--model_depth", type=int, default=18, help="Model depth") parser.add_argument("--gpus", type=str, default=None, help="Comma separated list of GPU devices to use") parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPU devices (instead of --gpus)") parser.add_argument("--scale_h", type=int, default=128, help="Scale image height to") parser.add_argument("--scale_w", type=int, default=171, help="Scale image width to") parser.add_argument("--crop_size", type=int, default=112, help="Input image size (to crop to)") parser.add_argument("--clip_length_rgb", type=int, default=4, help="Length of input clips") parser.add_argument("--sampling_rate_rgb", type=int, default=1, help="Frame sampling rate") parser.add_argument("--num_labels", type=int, default=101, help="Number of labels") parser.add_argument("--num_channels", type=int, default=3, help="Number of channels") parser.add_argument("--batch_size", type=int, default=32, help="Batch size, per-GPU") parser.add_argument("--load_model_path", type=str, default='', required=True, help="Load saved model for testing") parser.add_argument("--test_data", type=str, default="", required=True, help="Dataset on which we will extract features") parser.add_argument("--output_path", type=str, default="", help="Path to output pickle; defaults to " + "features.pickle next to <test_data>") parser.add_argument("--use_cudnn", type=int, default=1, help="Use CuDNN") parser.add_argument("--features", type=str, default="final_avg", help="Comma-separated list of blob names to fetch") parser.add_argument("--num_iterations", type=int, default=-1, help="Run only this many iterations") parser.add_argument("--num_decode_threads", type=int, default=4, help="") parser.add_argument("--clip_length_of", type=int, default=8, help="Frames of optical flow data") parser.add_argument("--sampling_rate_of", type=int, default=2, help="Sampling rate for optial flows") parser.add_argument("--frame_gap_of", type=int, default=2, help="Frame gap of optical flows") parser.add_argument("--input_type", type=int, default=0, help="0=rgb, 1=optical flow") parser.add_argument("--flow_data_type", type=int, default=0, help="0=Flow2C, 1=Flow3C, 2=FlowWithGray, " + "3=FlowWithRGB") parser.add_argument("--do_flow_aggregation", type=int, default=0, help="whether to aggregate optical flow across " + "multiple frames") parser.add_argument("--clip_per_video", type=int, default=1, help="When clips_per_video > 1, sample this many " + "clips uniformly in time") parser.add_argument("--get_video_id", type=int, default=0, help="Output video id") parser.add_argument("--sanity_check", type=int, default=0, help="Sanity check on the accuracy/auc") parser.add_argument("--decode_type", type=int, default=2, help="0: random, 1: uniform sampling, " + "2: use starting frame") parser.add_argument("--use_local_file", type=int, default=0, help="Use lmdb as a list of local filenames") args = parser.parse_args() log.info(args) assert model_builder.model_validation( args.model_name, args.model_depth, args.clip_length_of if args.input_type == 1 else args.clip_length_rgb, args.crop_size) ExtractFeatures(args)
def main(): # TODO: use argv parser = argparse.ArgumentParser( description="Caffe2: simple video training") parser.add_argument("--model_name", type=str, default='r2plus1d', help="Name of the model") parser.add_argument("--model_depth", type=int, default=18, help="Depth of the model") parser.add_argument("--train_data", type=str, default=None, help="Path to train data", required=True) parser.add_argument("--test_data", type=str, default=None, help="Path to test data") parser.add_argument("--db_type", type=str, default="minidb", help="Database type to save the training model") parser.add_argument("--gpus", type=str, help="Comma separated list of GPU devices to use") parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPU devices (instead of --gpus)") parser.add_argument("--scale_h", type=int, default=128, help="Scale image height to") parser.add_argument("--scale_w", type=int, default=171, help="Scale image width to") parser.add_argument("--crop_size", type=int, default=112, help="Input image size (to crop to)") parser.add_argument("--num_decode_threads", type=int, default=4, help="# of threads/GPU dedicated for video decoding") parser.add_argument("--clip_length_rgb", type=int, default=16, help="Length of input clips") parser.add_argument("--sampling_rate_rgb", type=int, default=1, help="Frame sampling rate") parser.add_argument("--num_labels", type=int, default=101, help="Number of labels") parser.add_argument("--num_channels", type=int, default=3, help="Number of channels") parser.add_argument("--clip_length_of", type=int, default=8, help="Frames of optical flow data") parser.add_argument("--sampling_rate_of", type=int, default=2, help="") parser.add_argument("--frame_gap_of", type=int, default=2, help="") parser.add_argument("--input_type", type=int, default=0, help="0: rgb, 1: optical flow") parser.add_argument("--flow_data_type", type=int, default=0, help="0: Flow2C, 1: Flow3C, 2: FlowWithGray, " + "3: FlowWithRGB") parser.add_argument("--do_flow_aggregation", type=int, default=0, help="whether to aggregate optical flow across " + "multiple frames") parser.add_argument("--get_video_id", type=int, default=0, help="Output video id") parser.add_argument("--batch_size", type=int, default=32, help="Batch size, total over all GPUs") parser.add_argument("--epoch_size", type=int, default=110000, help="Number of videos/epoch, total over all machines") parser.add_argument("--num_epochs", type=int, default=50, help="Num epochs.") parser.add_argument("--base_learning_rate", type=float, default=0.003, help="Initial learning rate.") parser.add_argument("--step_epoch", type=int, default=10, help="Reducing learning rate every step_epoch.") parser.add_argument("--gamma", type=float, default=0.1, help="Learning rate decay factor.") parser.add_argument("--display_iter", type=int, default=10, help="Display information every # of iterations.") parser.add_argument("--weight_decay", type=float, default=0.005, help="Weight decay (L2 regularization)") parser.add_argument("--cudnn_workspace_limit_mb", type=int, default=64, help="CuDNN workspace limit in MBs") parser.add_argument("--file_store_path", type=str, default=".", help="Path to directory to use for saving checkpoints") parser.add_argument( "--pretrained_model", type=str, default=None, help="Load saved model to continue training" + "if is_checkpoint = 1" + "Load pretrained model for finetuning" + "if is_checkpoint = 0.") parser.add_argument("--is_checkpoint", type=int, default=1, help="0: pretrained_model is used as initalization" + "1: pretrained_model is used as a checkpoint") parser.add_argument("--use_cudnn", type=int, default=1, help="Use CuDNN") parser.add_argument("--profiling", type=int, default=0, help="Profile training time") parser.add_argument("--pred_layer_name", type=str, default=None, help="the prediction layer name") parser.add_argument("--use_dropout", type=int, default=0, help="Use dropout at the prediction layer") parser.add_argument("--use_local_file", type=int, default=0, help="Use lmdb as a list of local filenames") args = parser.parse_args() log.info(args) assert model_builder.model_validation( args.model_name, args.model_depth, args.clip_length_of if args.input_type else args.clip_length_rgb, args.crop_size) Train(args)
def main(): parser = argparse.ArgumentParser( description="Tool for testing large networks") parser.add_argument("--test_data", type=str, default=None, help="Path to test data") parser.add_argument("--db_type", type=str, default='pickle', help="Db type of the testing model") parser.add_argument("--model_depth", type=int, default=18, help="Model depth") parser.add_argument("--model_name", type=str, default='r2plus1d', help="Model name") parser.add_argument("--gpus", type=str, default=None, help="Comma separated list of GPU devices to use") parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPU devices (instead of --gpus)") parser.add_argument("--scale_h", type=int, default=128, help="Scale image height to") parser.add_argument("--scale_w", type=int, default=171, help="Scale image width to") parser.add_argument( "--num_iter", type=int, default=0, help="Number of test iterations; 0: test the whole set") parser.add_argument("--crop_size", type=int, default=112, help="Input image size (to crop to)") parser.add_argument("--clip_length_rgb", type=int, default=16, help="Length of input clips") parser.add_argument("--sampling_rate_rgb", type=int, default=1, help="Frame sampling rate") parser.add_argument("--num_labels", type=int, default=101, help="Number of labels") parser.add_argument("--num_channels", type=int, default=3, help="Number of channels") parser.add_argument("--batch_size", type=int, default=1, help="Batch size, total over all GPUs") parser.add_argument("--clip_per_video", type=int, default=10, help="Number of clips to be sampled from a video") parser.add_argument("--top_k", type=int, default=5, help="Top k video accuracy output") parser.add_argument("--aggregation", type=int, default=0, help="0: avergage pool, 1: max pooling") parser.add_argument("--load_model_path", type=str, default=None, help="Load saved model for testing") parser.add_argument("--use_cudnn", type=int, default=1, help="Use CuDNN") parser.add_argument("--print_per_class_metrics", type=int, default=0, help="Log per class accuracy for multi-class setting") parser.add_argument("--pred_layer_name", type=str, default=None, help="the prediction layer name") parser.add_argument("--multi_label", type=int, default=0, help="Multiple label testing") parser.add_argument("--display_iter", type=int, default=10, help="Display information every # of iterations.") parser.add_argument("--clip_length_of", type=int, default=8, help="Frames of optical flow data") parser.add_argument("--sampling_rate_of", type=int, default=2, help="") parser.add_argument("--frame_gap_of", type=int, default=2, help="") parser.add_argument("--do_flow_aggregation", type=int, default=0, help="whether to aggregate optical flow across" + " multiple frames") parser.add_argument( "--flow_data_type", type=int, default=0, help="0=Flow2C, 1=Flow3C, 2=FlowWithGray, 3=FlowWithRGB") parser.add_argument("--input_type", type=int, default=0, help="False=rgb, True=optical flow") parser.add_argument("--num_decode_threads", type=int, default=4, help="number of decoding threads") parser.add_argument("--channel_multiplier", type=float, default=1.0, help="Channel multiplier") parser.add_argument("--bottleneck_multiplier", type=float, default=1.0, help="Bottleneck multiplier") parser.add_argument("--use_dropout", type=int, default=0, help="Use dropout at the prediction layer") parser.add_argument("--conv1_temporal_stride", type=int, default=1, help="Conv1 temporal striding") parser.add_argument("--conv1_temporal_kernel", type=int, default=3, help="Conv1 temporal kernel") parser.add_argument("--use_convolutional_pred", type=int, default=0, help="using convolutional predictions") parser.add_argument("--video_res_type", type=int, default=0, help="Video frame scaling option, 0: scaled by " + "height x width; 1: scaled by shorter edge") parser.add_argument("--use_pool1", type=int, default=0, help="use pool1 layer") parser.add_argument("--use_local_file", type=int, default=0, help="use local file") parser.add_argument("--crop_per_clip", type=int, default=1, help="number of spatial crops per clip") parser.add_argument("--crop_per_inference", type=int, default=1, help="number of spatial crops GPU memory can handle" + "per one pass of inference") args = parser.parse_args() log.info(args) assert model_builder.model_validation( args.model_name, args.model_depth, args.clip_length_of if args.input_type == 1 else args.clip_length_rgb, args.crop_size if not args.use_convolutional_pred else 112) Test(args)