Esempio n. 1
0
def cli_render_config(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script renders the full config from a user config.",
        prog="ludwig render_config",
        usage="%(prog)s [options]",
    )
    parser.add_argument(
        "-c",
        "--config",
        type=load_config_from_str,
        help="input user YAML config path",
    )
    parser.add_argument(
        "-o",
        "--output",
        type=str,
        help="output rendered YAML config path",
        required=False,
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("render_config", *sys_argv)

    print_ludwig("Render Config", LUDWIG_VERSION)
    render_config(**vars(args))
Esempio n. 2
0
def cli_export_triton(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model "
        "and saves it as torchscript for Triton.",
        prog="ludwig export_neuropod",
        usage="%(prog)s [options]",
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m",
                        "--model_path",
                        help="model to load",
                        required=True)
    parser.add_argument("-mn",
                        "--model_name",
                        help="model name",
                        default="ludwig_model")
    parser.add_argument("-mv",
                        "--model_version",
                        type=int,
                        help="model version",
                        default=1)

    # -----------------
    # Output parameters
    # -----------------
    parser.add_argument("-od",
                        "--output_path",
                        type=str,
                        help="path where to save the export model",
                        required=True)

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("export_triton", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.export")

    print_ludwig("Export Triton", LUDWIG_VERSION)

    export_triton(**vars(args))
Esempio n. 3
0
def cli_init_config(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script initializes a valid config from a dataset.",
        prog="ludwig init_config",
        usage="%(prog)s [options]",
    )
    parser.add_argument(
        "-d",
        "--dataset",
        type=str,
        help="input data file path",
    )
    parser.add_argument(
        "-t",
        "--target",
        type=str,
        help="target(s) to predict as output features of the model",
        action="append",
        required=False,
    )
    parser.add_argument(
        "--time_limit_s",
        type=int,
        help="time limit to train the model in seconds when using hyperopt",
        required=False,
    )
    parser.add_argument(
        "--tune_for_memory",
        type=bool,
        help=
        "refine hyperopt search space based on available host / GPU memory",
        default=False,
        required=False,
    )
    parser.add_argument(
        "--hyperopt",
        type=bool,
        help="include automl hyperopt config",
        default=False,
        required=False,
    )
    parser.add_argument(
        "-o",
        "--output",
        type=str,
        help="output initialized YAML config path",
        required=False,
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("init_config", *sys_argv)

    print_ludwig("Init Config", LUDWIG_VERSION)
    init_config(**vars(args))
Esempio n. 4
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script serves a pretrained model", prog="ludwig serve", usage="%(prog)s [options]"
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m", "--model_path", help="model to load", required=True)

    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    # ----------------
    # Server parameters
    # ----------------
    parser.add_argument(
        "-p",
        "--port",
        help="port for server (default: 8000)",
        default=8000,
        type=int,
    )

    parser.add_argument("-H", "--host", help="host for server (default: 0.0.0.0)", default="0.0.0.0")

    parser.add_argument(
        "-ao",
        "--allowed_origins",
        nargs="*",
        help="A list of origins that should be permitted to make cross-origin requests. "
        'Use "*" to allow any origin. See https://www.starlette.io/middleware/#corsmiddleware.',
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("serve", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.serve")

    print_ludwig("Serve", LUDWIG_VERSION)

    run_server(args.model_path, args.host, args.port, args.allowed_origins)
Esempio n. 5
0
def cli_export_mlflow(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
        'and saves it as an MLFlow model.',
        prog='ludwig export_mlflow',
        usage='%(prog)s [options]')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)
    parser.add_argument(
        '-mn',
        '--registered_model_name',
        help='model name to upload to in MLflow model registry',
        default='mlflow')

    # -----------------
    # Output parameters
    # -----------------
    parser.add_argument('-od',
                        '--output_path',
                        type=str,
                        help='path where to save the exported model',
                        required=True)

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline('export_mlflow', *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.export')

    print_ludwig('Export MLFlow', LUDWIG_VERSION)

    export_mlflow(**vars(args))
Esempio n. 6
0
def cli_collect_weights(sys_argv):
    """Command Line Interface to collecting the weights for the model.

    --m: Input model that is necessary to collect to the tensors, this is a
         required *option*
    --t: Tensors to collect
    --od: Output directory of the model, defaults to results
    --v: Verbose: Defines the logging level that the user will be exposed to
    """
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model " "and uses it collect weights.",
        prog="ludwig collect_weights",
        usage="%(prog)s [options]",
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m", "--model_path", help="model to load", required=True)
    parser.add_argument("-t", "--tensors", help="tensors to collect", nargs="+", required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument(
        "-od", "--output_directory", type=str, default="results", help="directory that contains the results"
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("collect_weights", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.collect")

    print_ludwig("Collect Weights", LUDWIG_VERSION)

    collect_weights(**vars(args))
Esempio n. 7
0
def cli_collect_summary(sys_argv):
    """Command Line Interface to collecting a summary of the model layers and weights.

    --m: Input model that is necessary to collect to the tensors, this is a
         required *option*
    --v: Verbose: Defines the logging level that the user will be exposed to
    """
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model "
        "and prints names of weights and layers activations "
        "to use with other collect commands",
        prog="ludwig collect_summary",
        usage="%(prog)s [options]",
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m",
                        "--model_path",
                        help="model to load",
                        required=True)

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("collect_summary", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.collect")

    print_ludwig("Collect Summary", LUDWIG_VERSION)

    print_model_summary(**vars(args))
Esempio n. 8
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script searches for optimal Hyperparameters",
        prog="ludwig hyperopt",
        usage="%(prog)s [options]",
    )

    # -------------------
    # Hyperopt parameters
    # -------------------
    parser.add_argument(
        "-sshs",
        "--skip_save_hyperopt_statistics",
        help="skips saving hyperopt statistics file",
        action="store_true",
        default=False,
    )

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument(
        "--output_directory",
        type=str,
        default="results",
        help="directory that contains the results",
    )
    parser.add_argument("--experiment_name",
                        type=str,
                        default="hyperopt",
                        help="experiment name")
    parser.add_argument("--model_name",
                        type=str,
                        default="run",
                        help="name for the model")

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        "--dataset",
        help="input data file path. "
        "If it has a split column, it will be used for splitting "
        "(0: train, 1: validation, 2: test), "
        "otherwise the dataset will be randomly split",
    )
    parser.add_argument("--training_set", help="input train data file path")
    parser.add_argument("--validation_set",
                        help="input validation data file path")
    parser.add_argument("--test_set", help="input test data file path")

    parser.add_argument(
        "--training_set_metadata",
        help="input metadata JSON file path. An intermediate preprocessed file "
        "containing the mappings of the input file created "
        "the first time a file is used, in the same directory "
        "with the same name and a .json extension",
    )

    parser.add_argument(
        "--data_format",
        help="format of the input data",
        default="auto",
        choices=[
            "auto",
            "csv",
            "excel",
            "feather",
            "fwf",
            "hdf5",
            "html"
            "tables",
            "json",
            "jsonl",
            "parquet",
            "pickle",
            "sas",
            "spss",
            "stata",
            "tsv",
        ],
    )

    parser.add_argument(
        "-sspi",
        "--skip_save_processed_input",
        help="skips saving intermediate HDF5 and JSON files",
        action="store_true",
        default=False,
    )

    # ----------------
    # Model parameters
    # ----------------
    config = parser.add_mutually_exclusive_group(required=True)
    config.add_argument(
        "-c",
        "--config",
        type=load_yaml,
        help="Path to the YAML file containing the model configuration",
    )
    config.add_argument(
        "-cs",
        "--config_str",
        dest="config",
        type=load_config_from_str,
        help="JSON or YAML serialized string of the model configuration",
    )

    parser.add_argument(
        "-mlp",
        "--model_load_path",
        help="path of a pretrained model to load as initialization",
    )
    parser.add_argument(
        "-mrp",
        "--model_resume_path",
        help="path of the model directory to resume training of",
    )
    parser.add_argument(
        "-sstd",
        "--skip_save_training_description",
        action="store_true",
        default=False,
        help="disables saving the description JSON file",
    )
    parser.add_argument(
        "-ssts",
        "--skip_save_training_statistics",
        action="store_true",
        default=False,
        help="disables saving training statistics JSON file",
    )
    parser.add_argument(
        "-ssm",
        "--skip_save_model",
        action="store_true",
        default=False,
        help="disables saving weights each time the model improves. "
        "By default Ludwig saves  weights after each epoch "
        "the validation metric (improves, but  if the model is really big "
        "that can be time consuming. If you do not want to keep "
        "the weights and just find out what performance a model can get "
        "with a set of hyperparameters, use this parameter to skip it",
    )
    parser.add_argument(
        "-ssp",
        "--skip_save_progress",
        action="store_true",
        default=False,
        help="disables saving weights after each epoch. By default ludwig saves "
        "weights after each epoch for enabling resuming of training, but "
        "if the model is really big that can be time consuming and will "
        "save twice as much space, use this parameter to skip it",
    )
    parser.add_argument(
        "-ssl",
        "--skip_save_log",
        action="store_true",
        default=False,
        help="disables saving TensorBoard logs. By default Ludwig saves "
        "logs for the TensorBoard, but if it is not needed turning it off "
        "can slightly increase the overall speed",
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-rs",
        "--random_seed",
        type=int,
        default=42,
        help="a random seed that is going to be used anywhere there is a call "
        "to a random number generator: data splitting, parameter "
        "initialization and training set shuffling",
    )
    parser.add_argument(
        "-hlv",
        "--hyperopt_log_verbosity",
        type=int,
        default=3,
        choices=[0, 1, 2, 3],
        help="Controls verbosity of ray tune log messages.  Valid values: "
        "0 = silent, 1 = only status updates, 2 = status and brief trial "
        "results, 3 = status and detailed trial results.",
    )
    parser.add_argument("-g",
                        "--gpus",
                        nargs="+",
                        type=int,
                        default=None,
                        help="list of gpus to use")
    parser.add_argument("-gml",
                        "--gpu_memory_limit",
                        type=int,
                        default=None,
                        help="maximum memory in MB to allocate per GPU device")
    parser.add_argument(
        "-b",
        "--backend",
        help="specifies backend to use for parallel / distributed execution, "
        "defaults to local execution or Horovod if called using horovodrun",
        choices=ALL_BACKENDS,
    )
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("hyperopt", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.hyperopt")

    args.backend = initialize_backend(args.backend
                                      or args.config.get("backend"))
    if args.backend.is_coordinator():
        print_ludwig("Hyperopt", LUDWIG_VERSION)

    hyperopt_cli(**vars(args))
Esempio n. 9
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
        'and uses it to predict',
        prog='ludwig predict',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument('--dataset',
                        help='input data file path',
                        required=True)
    parser.add_argument('--data_format',
                        help='format of the input data',
                        default='auto',
                        choices=[
                            'auto', 'csv', 'excel', 'feather', 'fwf', 'hdf5',
                            'html', 'tables', 'json', 'jsonl', 'parquet',
                            'pickle', 'sas', 'spss', 'stata', 'tsv'
                        ])
    parser.add_argument('-s',
                        '--split',
                        default=FULL,
                        choices=[TRAINING, VALIDATION, TEST, FULL],
                        help='the split to test the model on')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument('-od',
                        '--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('-ssuo',
                        '--skip_save_unprocessed_output',
                        help='skips saving intermediate NPY output files',
                        action='store_true',
                        default=False)
    parser.add_argument('-sstp',
                        '--skip_save_predictions',
                        help='skips saving predictions CSV files',
                        action='store_true',
                        default=False)

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument('-bs',
                        '--batch_size',
                        type=int,
                        default=128,
                        help='size of batches')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument('-g',
                        '--gpus',
                        type=int,
                        default=0,
                        help='list of gpu to use')
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        '-dpt',
        '--disable_parallel_threads',
        action='store_false',
        dest='allow_parallel_threads',
        help='disable TensorFlow from using multithreading for reproducibility'
    )
    parser.add_argument(
        "-b",
        "--backend",
        help='specifies backend to use for parallel / distributed execution, '
        'defaults to local execution or Horovod if called using horovodrun',
        choices=ALL_BACKENDS,
    )
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline('predict', *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.predict')

    args.backend = initialize_backend(args.backend)
    if args.backend.is_coordinator():
        print_ludwig('Predict', LUDWIG_VERSION)
        logger.info('Dataset path: {}'.format(args.dataset))
        logger.info('Model path: {}'.format(args.model_path))
        logger.info('')

    predict_cli(**vars(args))
Esempio n. 10
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model "
        "and evaluates its performance by comparing"
        "its predictions with ground truth.",
        prog="ludwig evaluate",
        usage="%(prog)s [options]",
    )

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument("--dataset",
                        help="input data file path",
                        required=True)
    parser.add_argument(
        "--data_format",
        help="format of the input data",
        default="auto",
        choices=[
            "auto",
            "csv",
            "excel",
            "feather",
            "fwf",
            "hdf5",
            "html"
            "tables",
            "json",
            "jsonl",
            "parquet",
            "pickle",
            "sas",
            "spss",
            "stata",
            "tsv",
        ],
    )
    parser.add_argument("-s",
                        "--split",
                        default=FULL,
                        choices=[TRAINING, VALIDATION, TEST, FULL],
                        help="the split to test the model on")

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m",
                        "--model_path",
                        help="model to load",
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument("-od",
                        "--output_directory",
                        type=str,
                        default="results",
                        help="directory that contains the results")
    parser.add_argument(
        "-ssuo",
        "--skip_save_unprocessed_output",
        help="skips saving intermediate NPY output files",
        action="store_true",
        default=False,
    )
    parser.add_argument(
        "-sses",
        "--skip_save_eval_stats",
        help="skips saving intermediate JSON eval statistics",
        action="store_true",
        default=False,
    )
    parser.add_argument("-scp",
                        "--skip_collect_predictions",
                        help="skips collecting predictions",
                        action="store_true",
                        default=False)
    parser.add_argument(
        "-scos",
        "--skip_collect_overall_stats",
        help="skips collecting overall stats",
        action="store_true",
        default=False,
    )

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument("-bs",
                        "--batch_size",
                        type=int,
                        default=128,
                        help="size of batches")

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument("-g",
                        "--gpus",
                        type=int,
                        default=0,
                        help="list of gpu to use")
    parser.add_argument("-gml",
                        "--gpu_memory_limit",
                        type=int,
                        default=None,
                        help="maximum memory in MB to allocate per GPU device")
    parser.add_argument(
        "-dpt",
        "--disable_parallel_threads",
        action="store_false",
        dest="allow_parallel_threads",
        help="disable TensorFlow from using multithreading for reproducibility",
    )
    parser.add_argument(
        "-b",
        "--backend",
        help="specifies backend to use for parallel / distributed execution, "
        "defaults to local execution or Horovod if called using horovodrun",
        choices=ALL_BACKENDS,
    )
    parser.add_argument("-dbg",
                        "--debug",
                        action="store_true",
                        default=False,
                        help="enables debugging mode")
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)
    args.evaluate_performance = True

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("evaluate", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.test_performance")

    args.backend = initialize_backend(args.backend)
    if args.backend.is_coordinator():
        print_ludwig("Evaluate", LUDWIG_VERSION)
        logger.info(f"Dataset path: {args.dataset}")
        logger.info(f"Model path: {args.model_path}")
        logger.info("")

    evaluate_cli(**vars(args))
Esempio n. 11
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(description='This script trains a model',
                                     prog='ludwig train',
                                     usage='%(prog)s [options]')

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument('--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('--experiment_name',
                        type=str,
                        default='experiment',
                        help='experiment name')
    parser.add_argument('--model_name',
                        type=str,
                        default='run',
                        help='name for the model')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        '--dataset',
        help='input data file path. '
        'If it has a split column, it will be used for splitting '
        '(0: train, 1: validation, 2: test), '
        'otherwise the dataset will be randomly split')
    parser.add_argument('--training_set', help='input train data file path')
    parser.add_argument('--validation_set',
                        help='input validation data file path')
    parser.add_argument('--test_set', help='input test data file path')

    parser.add_argument(
        '--training_set_metadata',
        help='input metadata JSON file path. An intermediate preprocessed file '
        'containing the mappings of the input file created '
        'the first time a file is used, in the same directory '
        'with the same name and a .json extension')

    parser.add_argument('--data_format',
                        help='format of the input data',
                        default='auto',
                        choices=[
                            'auto', 'csv', 'excel', 'feather', 'fwf', 'hdf5',
                            'html'
                            'tables', 'json', 'jsonl', 'parquet', 'pickle',
                            'sas', 'spss', 'stata', 'tsv'
                        ])

    parser.add_argument('-sspi',
                        '--skip_save_processed_input',
                        help='skips saving intermediate HDF5 and JSON files',
                        action='store_true',
                        default=False)

    # ----------------
    # Model parameters
    # ----------------
    config = parser.add_mutually_exclusive_group(required=True)
    config.add_argument(
        '-c',
        '--config',
        type=load_config_from_str,
        help='JSON or YAML serialized string of the model configuration')
    config.add_argument(
        '-cf',
        '--config_file',
        dest='config',
        type=load_yaml,
        help='Path to the YAML file containing the model configuration')

    parser.add_argument(
        '-mlp',
        '--model_load_path',
        help='path of a pretrained model to load as initialization')
    parser.add_argument(
        '-mrp',
        '--model_resume_path',
        help='path of the model directory to resume training of')
    parser.add_argument('-sstd',
                        '--skip_save_training_description',
                        action='store_true',
                        default=False,
                        help='disables saving the description JSON file')
    parser.add_argument('-ssts',
                        '--skip_save_training_statistics',
                        action='store_true',
                        default=False,
                        help='disables saving training statistics JSON file')
    parser.add_argument(
        '-ssm',
        '--skip_save_model',
        action='store_true',
        default=False,
        help='disables saving weights each time the model improves. '
        'By default Ludwig saves  weights after each epoch '
        'the validation metric (improves, but  if the model is really big '
        'that can be time consuming. If you do not want to keep '
        'the weights and just find out what performance a model can get '
        'with a set of hyperparameters, use this parameter to skip it')
    parser.add_argument(
        '-ssp',
        '--skip_save_progress',
        action='store_true',
        default=False,
        help='disables saving weights after each epoch. By default ludwig saves '
        'weights after each epoch for enabling resuming of training, but '
        'if the model is really big that can be time consuming and will '
        'save twice as much space, use this parameter to skip it')
    parser.add_argument(
        '-ssl',
        '--skip_save_log',
        action='store_true',
        default=False,
        help='disables saving TensorBoard logs. By default Ludwig saves '
        'logs for the TensorBoard, but if it is not needed turning it off '
        'can slightly increase the overall speed')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-rs',
        '--random_seed',
        type=int,
        default=42,
        help='a random seed that is going to be used anywhere there is a call '
        'to a random number generator: data splitting, parameter '
        'initialization and training set shuffling')
    parser.add_argument('-g',
                        '--gpus',
                        nargs='+',
                        type=int,
                        default=None,
                        help='list of gpus to use')
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        '-dpt',
        '--disable_parallel_threads',
        action='store_false',
        dest='allow_parallel_threads',
        help='disable TensorFlow from using multithreading for reproducibility'
    )
    parser.add_argument(
        "-b",
        "--backend",
        help='specifies backend to use for parallel / distributed execution, '
        'defaults to local execution or Horovod if called using horovodrun',
        choices=ALL_BACKENDS,
    )
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline('train', *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.train')

    args.backend = initialize_backend(args.backend
                                      or args.config.get('backend'))
    if args.backend.is_coordinator():
        print_ludwig('Train', LUDWIG_VERSION)

    train_cli(**vars(args))
Esempio n. 12
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script generates a synthetic dataset.",
        prog="ludwig synthesize_dataset",
        usage="%(prog)s [options]",
    )
    parser.add_argument("-od",
                        "--output_path",
                        type=str,
                        help="output CSV file path")
    parser.add_argument("-d",
                        "--dataset_size",
                        help="size of the dataset",
                        type=int,
                        default=100)
    parser.add_argument(
        "-f",
        "--features",
        default="[\
          {name: text_1, type: text, vocab_size: 20, max_len: 20}, \
          {name: text_2, type: text, vocab_size: 20, max_len: 20}, \
          {name: category_1, type: category, vocab_size: 10}, \
          {name: category_2, type: category, vocab_size: 15}, \
          {name: number_1, type: number}, \
          {name: number_2, type: number}, \
          {name: binary_1, type: binary}, \
          {name: binary_2, type: binary}, \
          {name: set_1, type: set, vocab_size: 20, max_len: 20}, \
          {name: set_2, type: set, vocab_size: 20, max_len: 20}, \
          {name: bag_1, type: bag, vocab_size: 20, max_len: 10}, \
          {name: bag_2, type: bag, vocab_size: 20, max_len: 10}, \
          {name: sequence_1, type: sequence, vocab_size: 20, max_len: 20}, \
          {name: sequence_2, type: sequence, vocab_size: 20, max_len: 20}, \
          {name: timeseries_1, type: timeseries, max_len: 20}, \
          {name: timeseries_2, type: timeseries, max_len: 20}, \
          {name: date_1, type: date}, \
          {name: date_2, type: date}, \
          {name: h3_1, type: h3}, \
          {name: h3_2, type: h3}, \
          {name: vector_1, type: vector}, \
          {name: vector_2, type: vector}, \
        ]",
        type=yaml.safe_load,
        help="list of features to generate in YAML format. "
        "Provide a list containing one dictionary for each feature, "
        "each dictionary must include a name, a type "
        "and can include some generation parameters depending on the type",
    )
    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("synthesize_dataset", *sys_argv)

    # No log level parameter this is placeholder if we add at later date
    # args.logging_level = logging_level_registry[args.logging_level]
    # logging.getLogger('ludwig').setLevel(
    #     args.logging_level
    # )
    # global logger
    # logger = logging.getLogger('ludwig.data.dataset_synthesizer')

    print_ludwig("Synthesize Dataset", LUDWIG_VERSION)

    cli_synthesize_dataset(**vars(args))
Esempio n. 13
0
def cli_collect_activations(sys_argv):
    """Command Line Interface to communicate with the collection of tensors and there are several options that can
    specified when calling this function:

    --data_csv: Filepath for the input csv
    --data_hdf5: Filepath for the input hdf5 file, if there is a csv file, this
                 is not read
    --d: Refers to the dataset type of the file being read, by default is
         *generic*
    --s: Refers to the split of the data, can be one of: train, test,
         validation, full
    --m: Input model that is necessary to collect to the tensors, this is a
         required *option*
    --t: Tensors to collect
    --od: Output directory of the model, defaults to results
    --bs: Batch size
    --g: Number of gpus that are to be used
    --gf: Fraction of each GPUs memory to use.
    --dbg: Debug if the model is to be started with python debugger
    --v: Verbose: Defines the logging level that the user will be exposed to
    """
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model and uses it collect "
        "tensors for each datapoint in the dataset.",
        prog="ludwig collect_activations",
        usage="%(prog)s [options]",
    )

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument("--dataset",
                        help="input data file path",
                        required=True)
    parser.add_argument(
        "--data_format",
        help="format of the input data",
        default="auto",
        choices=[
            "auto",
            "csv",
            "excel",
            "feather",
            "fwf",
            "hdf5",
            "html"
            "tables",
            "json",
            "jsonl",
            "parquet",
            "pickle",
            "sas",
            "spss",
            "stata",
            "tsv",
        ],
    )
    parser.add_argument(
        "-s",
        "--split",
        default=FULL,
        choices=[TRAINING, VALIDATION, TEST, FULL],
        help="the split to obtain the model activations from",
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m",
                        "--model_path",
                        help="model to load",
                        required=True)
    parser.add_argument("-lyr",
                        "--layers",
                        help="tensors to collect",
                        nargs="+",
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument("-od",
                        "--output_directory",
                        type=str,
                        default="results",
                        help="directory that contains the results")

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument("-bs",
                        "--batch_size",
                        type=int,
                        default=128,
                        help="size of batches")

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument("-g",
                        "--gpus",
                        type=int,
                        default=0,
                        help="list of gpu to use")
    parser.add_argument("-gml",
                        "--gpu_memory_limit",
                        type=int,
                        default=None,
                        help="maximum memory in MB to allocate per GPU device")
    parser.add_argument(
        "-dpt",
        "--disable_parallel_threads",
        action="store_false",
        dest="allow_parallel_threads",
        help="disable TensorFlow from using multithreading for reproducibility",
    )
    parser.add_argument(
        "-b",
        "--backend",
        help="specifies backend to use for parallel / distributed execution, "
        "defaults to local execution or Horovod if called using horovodrun",
        choices=ALL_BACKENDS,
    )
    parser.add_argument("-dbg",
                        "--debug",
                        action="store_true",
                        default=False,
                        help="enables debugging mode")
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("collect_activations", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.collect")

    print_ludwig("Collect Activations", LUDWIG_VERSION)

    collect_activations(**vars(args))
Esempio n. 14
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script preprocess a dataset',
        prog='ludwig preprocess',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        '--dataset',
        help='input data file path. '
        'If it has a split column, it will be used for splitting '
        '(0: train, 1: validation, 2: test), '
        'otherwise the dataset will be randomly split')
    parser.add_argument('--training_set', help='input train data file path')
    parser.add_argument('--validation_set',
                        help='input validation data file path')
    parser.add_argument('--test_set', help='input test data file path')

    parser.add_argument(
        '--training_set_metadata',
        help='input metadata JSON file path. An intermediate preprocessed file '
        'containing the mappings of the input file created '
        'the first time a file is used, in the same directory '
        'with the same name and a .json extension')

    parser.add_argument('--data_format',
                        help='format of the input data',
                        default='auto',
                        choices=[
                            'auto', 'csv', 'excel', 'feather', 'fwf', 'hdf5',
                            'html'
                            'tables', 'json', 'jsonl', 'parquet', 'pickle',
                            'sas', 'spss', 'stata', 'tsv'
                        ])

    # ----------------
    # Model parameters
    # ----------------
    preprocessing_def = parser.add_mutually_exclusive_group(required=True)
    preprocessing_def.add_argument(
        '-pc',
        '--preprocessing_config',
        type=yaml.safe_load,
        help='preproceesing config. '
        'Uses the same format of config, '
        'but ignores encoder specific parameters, '
        'decoder specific paramters, combiner and training parameters')
    preprocessing_def.add_argument(
        '-pcf',
        '--preprocessing_config_file',
        dest='preprocessing_config',
        type=load_yaml,
        help='YAML file describing the preprocessing. '
        'Ignores --preprocessing_config.'
        'Uses the same format of config, '
        'but ignores encoder specific parameters, '
        'decoder specific paramters, combiner and training parameters')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-rs',
        '--random_seed',
        type=int,
        default=42,
        help='a random seed that is going to be used anywhere there is a call '
        'to a random number generator: data splitting, parameter '
        'initialization and training set shuffling')
    parser.add_argument(
        "-b",
        "--backend",
        help='specifies backend to use for parallel / distributed execution, '
        'defaults to local execution or Horovod if called using horovodrun',
        choices=ALL_BACKENDS,
    )
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline('preprocess', *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.preprocess')

    args.backend = initialize_backend(args.backend)
    if args.backend.is_coordinator():
        print_ludwig('Preprocess', LUDWIG_VERSION)

    preprocess_cli(**vars(args))
Esempio n. 15
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script trains and evaluates a model", prog="ludwig experiment", usage="%(prog)s [options]"
    )

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument("--output_directory", type=str, default="results", help="directory that contains the results")
    parser.add_argument("--experiment_name", type=str, default="experiment", help="experiment name")
    parser.add_argument("--model_name", type=str, default="run", help="name for the model")

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        "--dataset",
        help="input data file path. "
        "If it has a split column, it will be used for splitting "
        "(0: train, 1: validation, 2: test), "
        "otherwise the dataset will be randomly split",
    )
    parser.add_argument("--training_set", help="input train data file path")
    parser.add_argument("--validation_set", help="input validation data file path")
    parser.add_argument("--test_set", help="input test data file path")

    parser.add_argument(
        "--training_set_metadata",
        help="input metadata JSON file path. An intermediate preprocessed file "
        "containing the mappings of the input file created "
        "the first time a file is used, in the same directory "
        "with the same name and a .json extension",
    )

    parser.add_argument(
        "--data_format",
        help="format of the input data",
        default="auto",
        choices=[
            "auto",
            "csv",
            "excel",
            "feather",
            "fwf",
            "hdf5",
            "html" "tables",
            "json",
            "jsonl",
            "parquet",
            "pickle",
            "sas",
            "spss",
            "stata",
            "tsv",
        ],
    )

    parser.add_argument(
        "-es",
        "--eval_split",
        default=TEST,
        choices=[TRAINING, VALIDATION, TEST, FULL],
        help="the split to evaluate the model on",
    )

    parser.add_argument(
        "-sspi",
        "--skip_save_processed_input",
        help="skips saving intermediate HDF5 and JSON files",
        action="store_true",
        default=False,
    )
    parser.add_argument(
        "-ssuo",
        "--skip_save_unprocessed_output",
        help="skips saving intermediate NPY output files",
        action="store_true",
        default=False,
    )

    # -----------------
    # K-fold parameters
    # -----------------
    parser.add_argument(
        "-kf", "--k_fold", type=int, default=None, help="number of folds for a k-fold cross validation run "
    )
    parser.add_argument(
        "-skfsi",
        "--skip_save_k_fold_split_indices",
        action="store_true",
        default=False,
        help="disables saving indices generated to split training data set "
        "for the k-fold cross validation run, but if it is not needed "
        "turning it off can slightly increase the overall speed",
    )

    # ----------------
    # Model parameters
    # ----------------
    config = parser.add_mutually_exclusive_group(required=True)
    config.add_argument(
        "-c", "--config", type=load_config_from_str, help="JSON or YAML serialized string of the model configuration"
    )
    config.add_argument(
        "-cf",
        "--config_file",
        dest="config",
        type=load_yaml,
        help="Path to the YAML file containing the model configuration",
    )

    parser.add_argument("-mlp", "--model_load_path", help="path of a pretrained model to load as initialization")
    parser.add_argument("-mrp", "--model_resume_path", help="path of the model directory to resume training of")
    parser.add_argument(
        "-sstd",
        "--skip_save_training_description",
        action="store_true",
        default=False,
        help="disables saving the description JSON file",
    )
    parser.add_argument(
        "-ssts",
        "--skip_save_training_statistics",
        action="store_true",
        default=False,
        help="disables saving training statistics JSON file",
    )
    parser.add_argument(
        "-sstp",
        "--skip_save_predictions",
        help="skips saving test predictions CSV files",
        action="store_true",
        default=False,
    )
    parser.add_argument(
        "-sstes",
        "--skip_save_eval_stats",
        help="skips saving eval statistics JSON file",
        action="store_true",
        default=False,
    )
    parser.add_argument(
        "-ssm",
        "--skip_save_model",
        action="store_true",
        default=False,
        help="disables saving model weights and hyperparameters each time "
        "the model improves. "
        "By default Ludwig saves model weights after each epoch "
        "the validation metric imprvoes, but if the model is really big "
        "that can be time consuming. If you do not want to keep "
        "the weights and just find out what performance a model can get "
        "with a set of hyperparameters, use this parameter to skip it,"
        "but the model will not be loadable later on",
    )
    parser.add_argument(
        "-ssp",
        "--skip_save_progress",
        action="store_true",
        default=False,
        help="disables saving progress each epoch. By default Ludwig saves "
        "weights and stats after each epoch for enabling resuming "
        "of training, but if the model is really big that can be "
        "time consuming and will uses twice as much space, use "
        "this parameter to skip it, but training cannot be resumed "
        "later on",
    )
    parser.add_argument(
        "-ssl",
        "--skip_save_log",
        action="store_true",
        default=False,
        help="disables saving TensorBoard logs. By default Ludwig saves "
        "logs for the TensorBoard, but if it is not needed turning it off "
        "can slightly increase the overall speed",
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-rs",
        "--random_seed",
        type=int,
        default=42,
        help="a random seed that is going to be used anywhere there is a call "
        "to a random number generator: data splitting, parameter "
        "initialization and training set shuffling",
    )
    parser.add_argument("-g", "--gpus", nargs="+", type=int, default=None, help="list of GPUs to use")
    parser.add_argument(
        "-gml", "--gpu_memory_limit", type=int, default=None, help="maximum memory in MB to allocate per GPU device"
    )
    parser.add_argument(
        "-dpt",
        "--disable_parallel_threads",
        action="store_false",
        dest="allow_parallel_threads",
        help="disable TensorFlow from using multithreading for reproducibility",
    )
    parser.add_argument(
        "-b",
        "--backend",
        help="specifies backend to use for parallel / distributed execution, "
        "defaults to local execution or Horovod if called using horovodrun",
        choices=ALL_BACKENDS,
    )
    parser.add_argument("-dbg", "--debug", action="store_true", default=False, help="enables debugging mode")
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("experiment", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.experiment")

    args.backend = initialize_backend(args.backend or args.config.get("backend"))
    if args.backend.is_coordinator():
        print_ludwig("Experiment", LUDWIG_VERSION)

    if args.k_fold is None:
        experiment_cli(**vars(args))
    else:
        kfold_cross_validate_cli(**vars(args))
Esempio n. 16
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script serves a pretrained model',
        prog='ludwig serve',
        usage='%(prog)s [options]')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)

    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    # ----------------
    # Server parameters
    # ----------------
    parser.add_argument(
        '-p',
        '--port',
        help='port for server (default: 8000)',
        default=8000,
        type=int,
    )

    parser.add_argument('-H',
                        '--host',
                        help='host for server (default: 0.0.0.0)',
                        default='0.0.0.0')

    parser.add_argument(
        '-ao',
        '--allowed_origins',
        nargs='*',
        help=
        'A list of origins that should be permitted to make cross-origin requests. '
        'Use "*" to allow any origin. See https://www.starlette.io/middleware/#corsmiddleware.',
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline('serve', *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.serve')

    print_ludwig('Serve', LUDWIG_VERSION)

    run_server(args.model_path, args.host, args.port, args.allowed_origins)
Esempio n. 17
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script preprocess a dataset",
        prog="ludwig preprocess",
        usage="%(prog)s [options]")

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        "--dataset",
        help="input data file path. "
        "If it has a split column, it will be used for splitting "
        "(0: train, 1: validation, 2: test), "
        "otherwise the dataset will be randomly split",
    )
    parser.add_argument("--training_set", help="input train data file path")
    parser.add_argument("--validation_set",
                        help="input validation data file path")
    parser.add_argument("--test_set", help="input test data file path")

    parser.add_argument(
        "--training_set_metadata",
        help="input metadata JSON file path. An intermediate preprocessed file "
        "containing the mappings of the input file created "
        "the first time a file is used, in the same directory "
        "with the same name and a .json extension",
    )

    parser.add_argument(
        "--data_format",
        help="format of the input data",
        default="auto",
        choices=[
            "auto",
            "csv",
            "excel",
            "feather",
            "fwf",
            "hdf5",
            "html"
            "tables",
            "json",
            "jsonl",
            "parquet",
            "pickle",
            "sas",
            "spss",
            "stata",
            "tsv",
        ],
    )

    # ----------------
    # Model parameters
    # ----------------
    preprocessing_def = parser.add_mutually_exclusive_group(required=True)
    preprocessing_def.add_argument(
        "-pc",
        "--preprocessing_config",
        dest="preprocessing_config",
        type=load_yaml,
        help="YAML file describing the preprocessing. "
        "Ignores --preprocessing_config."
        "Uses the same format of config, "
        "but ignores encoder specific parameters, "
        "decoder specific parameters, combiner and training parameters",
    )
    preprocessing_def.add_argument(
        "-pcs",
        "--preprocessing_config_str",
        type=yaml.safe_load,
        help="preproceesing config. "
        "Uses the same format of config, "
        "but ignores encoder specific parameters, "
        "decoder specific parameters, combiner and training parameters",
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-rs",
        "--random_seed",
        type=int,
        default=42,
        help="a random seed that is going to be used anywhere there is a call "
        "to a random number generator: data splitting, parameter "
        "initialization and training set shuffling",
    )
    parser.add_argument(
        "-b",
        "--backend",
        help="specifies backend to use for parallel / distributed execution, "
        "defaults to local execution or Horovod if called using horovodrun",
        choices=ALL_BACKENDS,
    )
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("preprocess", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.preprocess")

    args.backend = initialize_backend(args.backend)
    if args.backend.is_coordinator():
        print_ludwig("Preprocess", LUDWIG_VERSION)

    preprocess_cli(**vars(args))
Esempio n. 18
0
def cli_export_torchscript(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model "
        "and saves it as torchscript.",
        prog="ludwig export_torchscript",
        usage="%(prog)s [options]",
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m",
                        "--model_path",
                        help="model to load",
                        required=True)
    parser.add_argument(
        "-mo",
        "--model_only",
        help="Script and export the model only.",
        action="store_true",
    )
    parser.add_argument(
        "-d",
        "--device",
        type=str,
        help=
        ('Device to use for torchscript tracing (e.g. "cuda" or "cpu"). Ideally, this is the same as the device '
         "used when the model is loaded."),
        default=None,
    )

    # -----------------
    # Output parameters
    # -----------------
    parser.add_argument("-od",
                        "--output_path",
                        type=str,
                        help="path where to save the export model",
                        required=True)

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("export_torchscript", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.export")

    print_ludwig("Export Torchscript", LUDWIG_VERSION)

    export_torchscript(**vars(args))