Exemple #1
0
def main():
    # TODO: use argv
    parser = argparse.ArgumentParser(
        description="Caffe2: simple video training")
    parser.add_argument("--model_name",
                        type=str,
                        default='r2plus1d',
                        help="Name of the model")
    parser.add_argument("--model_depth",
                        type=int,
                        default=18,
                        help="Depth of the model")
    parser.add_argument("--train_data",
                        type=str,
                        default=None,
                        help="Path to train data",
                        required=True)
    parser.add_argument("--test_data",
                        type=str,
                        default=None,
                        help="Path to test data")
    parser.add_argument("--db_type",
                        type=str,
                        default="minidb",
                        help="Database type to save the training model")
    parser.add_argument("--gpus",
                        type=str,
                        help="Comma separated list of GPU devices to use")
    parser.add_argument("--num_gpus",
                        type=int,
                        default=1,
                        help="Number of GPU devices (instead of --gpus)")
    parser.add_argument("--scale_h",
                        type=int,
                        default=128,
                        help="Scale image height to")
    parser.add_argument("--scale_w",
                        type=int,
                        default=171,
                        help="Scale image width to")
    parser.add_argument("--crop_size",
                        type=int,
                        default=112,
                        help="Input image size (to crop to)")
    parser.add_argument("--num_decode_threads",
                        type=int,
                        default=4,
                        help="# of threads/GPU dedicated for video decoding")
    parser.add_argument("--clip_length_rgb",
                        type=int,
                        default=16,
                        help="Length of input clips")
    parser.add_argument("--sampling_rate_rgb",
                        type=int,
                        default=1,
                        help="Frame sampling rate")
    parser.add_argument("--num_labels",
                        type=int,
                        default=101,
                        help="Number of labels")
    parser.add_argument("--num_channels",
                        type=int,
                        default=3,
                        help="Number of channels")
    parser.add_argument("--clip_length_of",
                        type=int,
                        default=8,
                        help="Frames of optical flow data")
    parser.add_argument("--sampling_rate_of", type=int, default=2, help="")
    parser.add_argument("--frame_gap_of", type=int, default=2, help="")
    parser.add_argument("--input_type",
                        type=int,
                        default=0,
                        help="False=rgb, True=optical flow")
    parser.add_argument(
        "--flow_data_type",
        type=int,
        default=0,
        help="0=Flow2C, 1=Flow3C, 2=FlowWithGray, 3=FlowWithRGB")
    parser.add_argument("--do_flow_aggregation",
                        type=int,
                        default=0,
                        help="whether to aggregate optical flow across " +
                        "multiple frames")
    parser.add_argument("--get_video_id",
                        type=int,
                        default=0,
                        help="Output video id")
    parser.add_argument("--batch_size",
                        type=int,
                        default=32,
                        help="Batch size, total over all GPUs")
    parser.add_argument("--epoch_size",
                        type=int,
                        default=110000,
                        help="Number of videos/epoch, total over all machines")
    parser.add_argument("--num_epochs",
                        type=int,
                        default=50,
                        help="Num epochs.")
    parser.add_argument("--base_learning_rate",
                        type=float,
                        default=0.003,
                        help="Initial learning rate.")
    parser.add_argument("--step_epoch",
                        type=int,
                        default=10,
                        help="Reducing learning rate every step_epoch.")
    parser.add_argument("--gamma",
                        type=float,
                        default=0.1,
                        help="Learning rate decay factor.")
    parser.add_argument("--display_iter",
                        type=int,
                        default=10,
                        help="Display information every # of iterations.")
    parser.add_argument("--weight_decay",
                        type=float,
                        default=0.005,
                        help="Weight decay (L2 regularization)")
    parser.add_argument("--cudnn_workspace_limit_mb",
                        type=int,
                        default=64,
                        help="CuDNN workspace limit in MBs")
    parser.add_argument("--file_store_path",
                        type=str,
                        default="/tmp",
                        help="Path to directory to use for saving checkpoints")
    parser.add_argument("--save_model_name",
                        type=str,
                        default="simple_c3d",
                        help="Save the trained model to a given name")
    parser.add_argument(
        "--load_model_path",
        type=str,
        default=None,
        help="Load previously saved model to continue training")
    parser.add_argument("--use_cudnn", type=int, default=1, help="Use CuDNN")
    parser.add_argument("--profiling",
                        type=int,
                        default=0,
                        help="Profile training time")
    parser.add_argument("--pred_layer_name",
                        type=str,
                        default=None,
                        help="the prediction layer name")
    parser.add_argument("--multi_label",
                        type=int,
                        default=0,
                        help="Multiple label training")
    parser.add_argument("--channel_multiplier",
                        type=float,
                        default=1.0,
                        help="Channel multiplier")
    parser.add_argument("--bottleneck_multiplier",
                        type=float,
                        default=1.0,
                        help="Bottleneck multiplier")
    parser.add_argument("--use_dropout",
                        type=int,
                        default=0,
                        help="Use dropout at the prediction layer")
    parser.add_argument("--conv1_temporal_stride",
                        type=int,
                        default=1,
                        help="Conv1 temporal striding")
    parser.add_argument("--conv1_temporal_kernel",
                        type=int,
                        default=3,
                        help="Conv1 temporal kernel")
    parser.add_argument("--video_res_type",
                        type=int,
                        default=1,
                        help="Video frame scaling option, 0: scaled by " +
                        "height x width; 1: scaled by short edge")
    parser.add_argument("--use_pool1",
                        type=int,
                        default=0,
                        help="use pool1 layer")
    parser.add_argument("--jitter_scales",
                        type=str,
                        default="128,160",
                        required=True,
                        help="spatial scales jitter, separated by commas")
    parser.add_argument("--use_local_file",
                        type=int,
                        default=0,
                        help="use local file")
    parser.add_argument("--is_checkpoint",
                        type=int,
                        default=1,
                        help="0: pretrained_model is used as initalization" +
                        "1: pretrained_model is used as a checkpoint")
    parser.add_argument("--audio_input_3d",
                        type=int,
                        default=0,
                        help="is audio input 3d or 2d; 0 for 2d")
    parser.add_argument("--g_blend",
                        type=int,
                        default=0,
                        help="use gradient-blending to train model")
    parser.add_argument("--audio_weight",
                        type=float,
                        default=0.0,
                        help="g_blend weights for audio head")
    parser.add_argument("--visual_weight",
                        type=float,
                        default=0.0,
                        help="g_blend weights for visual head")
    parser.add_argument("--av_weight",
                        type=float,
                        default=1.0,
                        help="g_blend weights for av head")
    args = parser.parse_args()

    log.info(args)

    assert model_builder.model_validation(
        args.model_name, args.model_depth,
        args.clip_length_of if args.input_type else args.clip_length_rgb,
        args.crop_size)

    Train(args)
Exemple #2
0
def main():
    parser = argparse.ArgumentParser(
        description="test_net"
    )
    parser.add_argument("--test_data", type=str, default=None,
                        help="Path to test data")
    parser.add_argument("--db_type", type=str, default='pickle',
                        help="Db type of the testing model")
    parser.add_argument("--model_depth", type=int, default=18,
                        help="Model depth")
    parser.add_argument("--model_name", type=str, default='r2plus1d',
                        help="Model name")
    parser.add_argument("--gpus", type=str, default=None,
                        help="Comma separated list of GPU devices to use")
    parser.add_argument("--num_gpus", type=int, default=1,
                        help="Number of GPU devices (instead of --gpus)")
    parser.add_argument("--scale_h", type=int, default=128,
                        help="Scale image height to")
    parser.add_argument("--scale_w", type=int, default=171,
                        help="Scale image width to")
    parser.add_argument("--num_iter", type=int, default=0,
                        help="Number of test iterations; " +
                        "0: test the whole set")
    parser.add_argument("--crop_size", type=int, default=112,
                        help="Input image size (to crop to)")
    parser.add_argument("--clip_length_rgb", type=int, default=16,
                        help="Length of input clips")
    parser.add_argument("--sampling_rate_rgb", type=int, default=1,
                        help="Frame sampling rate")
    parser.add_argument("--num_labels", type=int, default=101,
                        help="Number of labels")
    parser.add_argument("--num_channels", type=int, default=3,
                        help="Number of channels")
    parser.add_argument("--batch_size", type=int, default=6,
                        help="Batch size, total over all GPUs")
    parser.add_argument("--clip_per_video", type=int, default=10,
                        help="Number of clips to be sampled from a video")
    parser.add_argument("--top_k", type=int, default=5,
                        help="Top k video accuracy output")
    parser.add_argument("--aggregation", type=int, default=0,
                        help="0: avergage pool, 1: max pooling")
    parser.add_argument("--load_model_path", type=str, default=None,
                        help="Load saved model for testing")
    parser.add_argument("--use_cudnn", type=int, default=1,
                        help="Use CuDNN")
    parser.add_argument("--pred_layer_name", type=str, default=None,
                        help="the prediction layer name")
    parser.add_argument("--display_iter", type=int, default=10,
                        help="Display information every # of iterations.")
    parser.add_argument("--clip_length_of", type=int, default=8,
                        help="Frames of optical flow data")
    parser.add_argument("--sampling_rate_of", type=int, default=2,
                        help="Optical flow sampling rate (in frames)")
    parser.add_argument("--frame_gap_of", type=int, default=2,
                        help="")
    parser.add_argument("--do_flow_aggregation", type=int, default=0,
                        help="whether to aggregate optical flow across " +
                        " multiple frames")
    parser.add_argument("--flow_data_type", type=int, default=0,
                        help="0=Flow2C, 1=Flow3C, 2=FlowWithGray, " +
                        "3=FlowWithRGB")
    parser.add_argument("--input_type", type=int, default=0,
                        help="False=rgb, True=optical flow")
    parser.add_argument("--get_video_id", type=int, default=0,
                        help="Output video id")
    parser.add_argument("--use_dropout", type=int, default=0,
                        help="Use dropout at the prediction layer")
    parser.add_argument("--use_local_file", type=int, default=0,
                        help="Use lmdb as a list of local filenames")

    args = parser.parse_args()

    log.info(args)
    assert model_builder.model_validation(
        args.model_name,
        args.model_depth,
        args.clip_length_of if args.input_type == 1 else args.clip_length_rgb,
        args.crop_size
    )

    Test(args)
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(description="Simple feature extraction")
    parser.add_argument("--db_type",
                        type=str,
                        default='pickle',
                        help="Db type of the testing model")
    parser.add_argument("--model_name",
                        type=str,
                        default='r2plus1d',
                        help="Model name")
    parser.add_argument("--model_depth",
                        type=int,
                        default=18,
                        help="Model depth")
    parser.add_argument("--gpus",
                        type=str,
                        default=None,
                        help="Comma separated list of GPU devices to use")
    parser.add_argument("--num_gpus",
                        type=int,
                        default=1,
                        help="Number of GPU devices (instead of --gpus)")
    parser.add_argument("--scale_h",
                        type=int,
                        default=128,
                        help="Scale image height to")
    parser.add_argument("--scale_w",
                        type=int,
                        default=171,
                        help="Scale image width to")
    parser.add_argument("--crop_size",
                        type=int,
                        default=112,
                        help="Input image size (to crop to)")
    parser.add_argument("--clip_length_rgb",
                        type=int,
                        default=4,
                        help="Length of input clips")
    parser.add_argument("--sampling_rate_rgb",
                        type=int,
                        default=1,
                        help="Frame sampling rate")
    parser.add_argument("--num_labels",
                        type=int,
                        default=101,
                        help="Number of labels")
    parser.add_argument("--num_channels",
                        type=int,
                        default=3,
                        help="Number of channels")
    parser.add_argument("--batch_size",
                        type=int,
                        default=32,
                        help="Batch size, per-GPU")
    parser.add_argument("--load_model_path",
                        type=str,
                        default='',
                        required=True,
                        help="Load saved model for testing")
    parser.add_argument("--test_data",
                        type=str,
                        default="",
                        required=True,
                        help="Dataset on which we will extract features")
    parser.add_argument("--output_path",
                        type=str,
                        default="",
                        help="Path to output pickle; defaults to " +
                        "features.pickle next to <test_data>")
    parser.add_argument("--use_cudnn", type=int, default=1, help="Use CuDNN")
    parser.add_argument("--features",
                        type=str,
                        default="final_avg",
                        help="Comma-separated list of blob names to fetch")
    parser.add_argument("--num_iterations",
                        type=int,
                        default=-1,
                        help="Run only this many iterations")
    parser.add_argument("--num_decode_threads", type=int, default=4, help="")
    parser.add_argument("--clip_length_of",
                        type=int,
                        default=8,
                        help="Frames of optical flow data")
    parser.add_argument("--sampling_rate_of",
                        type=int,
                        default=2,
                        help="Sampling rate for optial flows")
    parser.add_argument("--frame_gap_of",
                        type=int,
                        default=2,
                        help="Frame gap of optical flows")
    parser.add_argument("--input_type",
                        type=int,
                        default=0,
                        help="0=rgb, 1=optical flow")
    parser.add_argument("--flow_data_type",
                        type=int,
                        default=0,
                        help="0=Flow2C, 1=Flow3C, 2=FlowWithGray, " +
                        "3=FlowWithRGB")
    parser.add_argument("--do_flow_aggregation",
                        type=int,
                        default=0,
                        help="whether to aggregate optical flow across " +
                        "multiple frames")
    parser.add_argument("--clip_per_video",
                        type=int,
                        default=1,
                        help="When clips_per_video > 1, sample this many " +
                        "clips uniformly in time")
    parser.add_argument("--get_video_id",
                        type=int,
                        default=0,
                        help="Output video id")
    parser.add_argument("--sanity_check",
                        type=int,
                        default=0,
                        help="Sanity check on the accuracy/auc")
    parser.add_argument("--decode_type",
                        type=int,
                        default=2,
                        help="0: random, 1: uniform sampling, " +
                        "2: use starting frame")
    parser.add_argument("--use_local_file",
                        type=int,
                        default=0,
                        help="Use lmdb as a list of local filenames")

    args = parser.parse_args()
    log.info(args)

    assert model_builder.model_validation(
        args.model_name, args.model_depth,
        args.clip_length_of if args.input_type == 1 else args.clip_length_rgb,
        args.crop_size)

    ExtractFeatures(args)
Exemple #4
0
def main():
    # TODO: use argv
    parser = argparse.ArgumentParser(
        description="Caffe2: simple video training")
    parser.add_argument("--model_name",
                        type=str,
                        default='r2plus1d',
                        help="Name of the model")
    parser.add_argument("--model_depth",
                        type=int,
                        default=18,
                        help="Depth of the model")
    parser.add_argument("--train_data",
                        type=str,
                        default=None,
                        help="Path to train data",
                        required=True)
    parser.add_argument("--test_data",
                        type=str,
                        default=None,
                        help="Path to test data")
    parser.add_argument("--db_type",
                        type=str,
                        default="minidb",
                        help="Database type to save the training model")
    parser.add_argument("--gpus",
                        type=str,
                        help="Comma separated list of GPU devices to use")
    parser.add_argument("--num_gpus",
                        type=int,
                        default=1,
                        help="Number of GPU devices (instead of --gpus)")
    parser.add_argument("--scale_h",
                        type=int,
                        default=128,
                        help="Scale image height to")
    parser.add_argument("--scale_w",
                        type=int,
                        default=171,
                        help="Scale image width to")
    parser.add_argument("--crop_size",
                        type=int,
                        default=112,
                        help="Input image size (to crop to)")
    parser.add_argument("--num_decode_threads",
                        type=int,
                        default=4,
                        help="# of threads/GPU dedicated for video decoding")
    parser.add_argument("--clip_length_rgb",
                        type=int,
                        default=16,
                        help="Length of input clips")
    parser.add_argument("--sampling_rate_rgb",
                        type=int,
                        default=1,
                        help="Frame sampling rate")
    parser.add_argument("--num_labels",
                        type=int,
                        default=101,
                        help="Number of labels")
    parser.add_argument("--num_channels",
                        type=int,
                        default=3,
                        help="Number of channels")
    parser.add_argument("--clip_length_of",
                        type=int,
                        default=8,
                        help="Frames of optical flow data")
    parser.add_argument("--sampling_rate_of", type=int, default=2, help="")
    parser.add_argument("--frame_gap_of", type=int, default=2, help="")
    parser.add_argument("--input_type",
                        type=int,
                        default=0,
                        help="0: rgb, 1: optical flow")
    parser.add_argument("--flow_data_type",
                        type=int,
                        default=0,
                        help="0: Flow2C, 1: Flow3C, 2: FlowWithGray, " +
                        "3: FlowWithRGB")
    parser.add_argument("--do_flow_aggregation",
                        type=int,
                        default=0,
                        help="whether to aggregate optical flow across " +
                        "multiple frames")
    parser.add_argument("--get_video_id",
                        type=int,
                        default=0,
                        help="Output video id")
    parser.add_argument("--batch_size",
                        type=int,
                        default=32,
                        help="Batch size, total over all GPUs")
    parser.add_argument("--epoch_size",
                        type=int,
                        default=110000,
                        help="Number of videos/epoch, total over all machines")
    parser.add_argument("--num_epochs",
                        type=int,
                        default=50,
                        help="Num epochs.")
    parser.add_argument("--base_learning_rate",
                        type=float,
                        default=0.003,
                        help="Initial learning rate.")
    parser.add_argument("--step_epoch",
                        type=int,
                        default=10,
                        help="Reducing learning rate every step_epoch.")
    parser.add_argument("--gamma",
                        type=float,
                        default=0.1,
                        help="Learning rate decay factor.")
    parser.add_argument("--display_iter",
                        type=int,
                        default=10,
                        help="Display information every # of iterations.")
    parser.add_argument("--weight_decay",
                        type=float,
                        default=0.005,
                        help="Weight decay (L2 regularization)")
    parser.add_argument("--cudnn_workspace_limit_mb",
                        type=int,
                        default=64,
                        help="CuDNN workspace limit in MBs")
    parser.add_argument("--file_store_path",
                        type=str,
                        default=".",
                        help="Path to directory to use for saving checkpoints")
    parser.add_argument(
        "--pretrained_model",
        type=str,
        default=None,
        help="Load saved model to continue training" + "if is_checkpoint = 1" +
        "Load pretrained model for finetuning" + "if is_checkpoint = 0.")
    parser.add_argument("--is_checkpoint",
                        type=int,
                        default=1,
                        help="0: pretrained_model is used as initalization" +
                        "1: pretrained_model is used as a checkpoint")
    parser.add_argument("--use_cudnn", type=int, default=1, help="Use CuDNN")
    parser.add_argument("--profiling",
                        type=int,
                        default=0,
                        help="Profile training time")
    parser.add_argument("--pred_layer_name",
                        type=str,
                        default=None,
                        help="the prediction layer name")
    parser.add_argument("--use_dropout",
                        type=int,
                        default=0,
                        help="Use dropout at the prediction layer")
    parser.add_argument("--use_local_file",
                        type=int,
                        default=0,
                        help="Use lmdb as a list of local filenames")
    args = parser.parse_args()

    log.info(args)

    assert model_builder.model_validation(
        args.model_name, args.model_depth,
        args.clip_length_of if args.input_type else args.clip_length_rgb,
        args.crop_size)

    Train(args)
Exemple #5
0
def main():
    parser = argparse.ArgumentParser(
        description="Tool for testing large networks")
    parser.add_argument("--test_data",
                        type=str,
                        default=None,
                        help="Path to test data")
    parser.add_argument("--db_type",
                        type=str,
                        default='pickle',
                        help="Db type of the testing model")
    parser.add_argument("--model_depth",
                        type=int,
                        default=18,
                        help="Model depth")
    parser.add_argument("--model_name",
                        type=str,
                        default='r2plus1d',
                        help="Model name")
    parser.add_argument("--gpus",
                        type=str,
                        default=None,
                        help="Comma separated list of GPU devices to use")
    parser.add_argument("--num_gpus",
                        type=int,
                        default=1,
                        help="Number of GPU devices (instead of --gpus)")
    parser.add_argument("--scale_h",
                        type=int,
                        default=128,
                        help="Scale image height to")
    parser.add_argument("--scale_w",
                        type=int,
                        default=171,
                        help="Scale image width to")
    parser.add_argument(
        "--num_iter",
        type=int,
        default=0,
        help="Number of test iterations; 0: test the whole set")
    parser.add_argument("--crop_size",
                        type=int,
                        default=112,
                        help="Input image size (to crop to)")
    parser.add_argument("--clip_length_rgb",
                        type=int,
                        default=16,
                        help="Length of input clips")
    parser.add_argument("--sampling_rate_rgb",
                        type=int,
                        default=1,
                        help="Frame sampling rate")
    parser.add_argument("--num_labels",
                        type=int,
                        default=101,
                        help="Number of labels")
    parser.add_argument("--num_channels",
                        type=int,
                        default=3,
                        help="Number of channels")
    parser.add_argument("--batch_size",
                        type=int,
                        default=1,
                        help="Batch size, total over all GPUs")
    parser.add_argument("--clip_per_video",
                        type=int,
                        default=10,
                        help="Number of clips to be sampled from a video")
    parser.add_argument("--top_k",
                        type=int,
                        default=5,
                        help="Top k video accuracy output")
    parser.add_argument("--aggregation",
                        type=int,
                        default=0,
                        help="0: avergage pool, 1: max pooling")
    parser.add_argument("--load_model_path",
                        type=str,
                        default=None,
                        help="Load saved model for testing")
    parser.add_argument("--use_cudnn", type=int, default=1, help="Use CuDNN")
    parser.add_argument("--print_per_class_metrics",
                        type=int,
                        default=0,
                        help="Log per class accuracy for multi-class setting")
    parser.add_argument("--pred_layer_name",
                        type=str,
                        default=None,
                        help="the prediction layer name")
    parser.add_argument("--multi_label",
                        type=int,
                        default=0,
                        help="Multiple label testing")
    parser.add_argument("--display_iter",
                        type=int,
                        default=10,
                        help="Display information every # of iterations.")
    parser.add_argument("--clip_length_of",
                        type=int,
                        default=8,
                        help="Frames of optical flow data")
    parser.add_argument("--sampling_rate_of", type=int, default=2, help="")
    parser.add_argument("--frame_gap_of", type=int, default=2, help="")
    parser.add_argument("--do_flow_aggregation",
                        type=int,
                        default=0,
                        help="whether to aggregate optical flow across" +
                        " multiple frames")
    parser.add_argument(
        "--flow_data_type",
        type=int,
        default=0,
        help="0=Flow2C, 1=Flow3C, 2=FlowWithGray, 3=FlowWithRGB")
    parser.add_argument("--input_type",
                        type=int,
                        default=0,
                        help="False=rgb, True=optical flow")
    parser.add_argument("--num_decode_threads",
                        type=int,
                        default=4,
                        help="number of decoding threads")
    parser.add_argument("--channel_multiplier",
                        type=float,
                        default=1.0,
                        help="Channel multiplier")
    parser.add_argument("--bottleneck_multiplier",
                        type=float,
                        default=1.0,
                        help="Bottleneck multiplier")
    parser.add_argument("--use_dropout",
                        type=int,
                        default=0,
                        help="Use dropout at the prediction layer")
    parser.add_argument("--conv1_temporal_stride",
                        type=int,
                        default=1,
                        help="Conv1 temporal striding")
    parser.add_argument("--conv1_temporal_kernel",
                        type=int,
                        default=3,
                        help="Conv1 temporal kernel")
    parser.add_argument("--use_convolutional_pred",
                        type=int,
                        default=0,
                        help="using convolutional predictions")
    parser.add_argument("--video_res_type",
                        type=int,
                        default=0,
                        help="Video frame scaling option, 0: scaled by " +
                        "height x width; 1: scaled by shorter edge")
    parser.add_argument("--use_pool1",
                        type=int,
                        default=0,
                        help="use pool1 layer")
    parser.add_argument("--use_local_file",
                        type=int,
                        default=0,
                        help="use local file")
    parser.add_argument("--crop_per_clip",
                        type=int,
                        default=1,
                        help="number of spatial crops per clip")
    parser.add_argument("--crop_per_inference",
                        type=int,
                        default=1,
                        help="number of spatial crops GPU memory can handle" +
                        "per one pass of inference")

    args = parser.parse_args()

    log.info(args)
    assert model_builder.model_validation(
        args.model_name, args.model_depth,
        args.clip_length_of if args.input_type == 1 else args.clip_length_rgb,
        args.crop_size if not args.use_convolutional_pred else 112)

    Test(args)