Beispiel #1
0
def cli_collect_weights(sys_argv):
    """Command Line Interface to collecting the weights for the model
    --m: Input model that is necessary to collect to the tensors, this is a
         required *option*
    --t: Tensors to collect
    --od: Output directory of the model, defaults to results
    --dbg: Debug if the model is to be started with python debugger
    --v: Verbose: Defines the logging level that the user will be exposed to
    """
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
        'and uses it collect weights.',
        prog='ludwig collect_weights',
        usage='%(prog)s [options]')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)
    parser.add_argument('-t',
                        '--tensors',
                        help='tensors to collect',
                        nargs='+',
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument('-od',
                        '--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)

    logging.basicConfig(stream=sys.stdout,
                        level=logging_level_registry[args.logging_level],
                        format='%(message)s')

    print_ludwig('Collect Weights', LUDWIG_VERSION)
    collect_weights(**vars(args))
Beispiel #2
0
def cli_export_triton(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model "
        "and saves it as torchscript for Triton.",
        prog="ludwig export_neuropod",
        usage="%(prog)s [options]",
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m",
                        "--model_path",
                        help="model to load",
                        required=True)
    parser.add_argument("-mn",
                        "--model_name",
                        help="model name",
                        default="ludwig_model")
    parser.add_argument("-mv",
                        "--model_version",
                        type=int,
                        help="model version",
                        default=1)

    # -----------------
    # Output parameters
    # -----------------
    parser.add_argument("-od",
                        "--output_path",
                        type=str,
                        help="path where to save the export model",
                        required=True)

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("export_triton", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.export")

    print_ludwig("Export Triton", LUDWIG_VERSION)

    export_triton(**vars(args))
Beispiel #3
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script generates a synthetic dataset.',
        prog='ludwig synthesize_dataset',
        usage='%(prog)s [options]')
    parser.add_argument('-od',
                        '--output_path',
                        type=str,
                        help='output CSV file path')
    parser.add_argument('-d',
                        '--dataset_size',
                        help='size of the dataset',
                        type=int,
                        default=100)
    parser.add_argument(
        '-f',
        '--features',
        default='[\
          {name: text_1, type: text, vocab_size: 20, max_len: 20}, \
          {name: text_2, type: text, vocab_size: 20, max_len: 20}, \
          {name: category_1, type: category, vocab_size: 10}, \
          {name: category_2, type: category, vocab_size: 15}, \
          {name: numerical_1, type: numerical}, \
          {name: numerical_2, type: numerical}, \
          {name: binary_1, type: binary}, \
          {name: binary_2, type: binary}, \
          {name: set_1, type: set, vocab_size: 20, max_len: 20}, \
          {name: set_2, type: set, vocab_size: 20, max_len: 20}, \
          {name: bag_1, type: bag, vocab_size: 20, max_len: 10}, \
          {name: bag_2, type: bag, vocab_size: 20, max_len: 10}, \
          {name: sequence_1, type: sequence, vocab_size: 20, max_len: 20}, \
          {name: sequence_2, type: sequence, vocab_size: 20, max_len: 20}, \
          {name: timeseries_1, type: timeseries, max_len: 20}, \
          {name: timeseries_2, type: timeseries, max_len: 20}, \
          {name: date_1, type: date}, \
          {name: date_2, type: date}, \
          {name: h3_1, type: h3}, \
          {name: h3_2, type: h3}, \
          {name: vector_1, type: vector}, \
          {name: vector_2, type: vector}, \
        ]',
        type=yaml.safe_load,
        help='list of features to generate in YAML format. '
        'Provide a list containing one dictionary for each feature, '
        'each dictionary must include a name, a type '
        'and can include some generation parameters depending on the type')
    args = parser.parse_args(sys_argv)

    # No log level parameter this is placeholder if we add at later date
    # args.logging_level = logging_level_registry[args.logging_level]
    # logging.getLogger('ludwig').setLevel(
    #     args.logging_level
    # )
    # global logger
    # logger = logging.getLogger('ludwig.data.dataset_synthesizer')

    if is_on_master():
        print_ludwig('Synthesize Dataset', LUDWIG_VERSION)

    cli_synthesize_dataset(**vars(args))
Beispiel #4
0
def cli_render_config(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script renders the full config from a user config.",
        prog="ludwig render_config",
        usage="%(prog)s [options]",
    )
    parser.add_argument(
        "-c",
        "--config",
        type=load_config_from_str,
        help="input user YAML config path",
    )
    parser.add_argument(
        "-o",
        "--output",
        type=str,
        help="output rendered YAML config path",
        required=False,
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("render_config", *sys_argv)

    print_ludwig("Render Config", LUDWIG_VERSION)
    render_config(**vars(args))
Beispiel #5
0
def cli_export_neuropod(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
                    'and saves it as a Neuropod.',
        prog='ludwig export_neuropod',
        usage='%(prog)s [options]'
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument(
        '-m',
        '--model_path',
        help='model to load',
        required=True
    )
    parser.add_argument(
        '-mn',
        '--model_name',
        help='model name',
        default='neuropod'
    )

    # -----------------
    # Output parameters
    # -----------------
    parser.add_argument(
        '-od',
        '--output_path',
        type=str,
        help='path where to save the export model',
        required=True
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset']
    )

    args = parser.parse_args(sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(
        args.logging_level
    )
    global logger
    logger = logging.getLogger('ludwig.export')

    print_ludwig('Export Neuropod', LUDWIG_VERSION)

    export_neuropod(**vars(args))
Beispiel #6
0
def cli_init_config(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script initializes a valid config from a dataset.",
        prog="ludwig init_config",
        usage="%(prog)s [options]",
    )
    parser.add_argument(
        "-d",
        "--dataset",
        type=str,
        help="input data file path",
    )
    parser.add_argument(
        "-t",
        "--target",
        type=str,
        help="target(s) to predict as output features of the model",
        action="append",
        required=False,
    )
    parser.add_argument(
        "--time_limit_s",
        type=int,
        help="time limit to train the model in seconds when using hyperopt",
        required=False,
    )
    parser.add_argument(
        "--tune_for_memory",
        type=bool,
        help=
        "refine hyperopt search space based on available host / GPU memory",
        default=False,
        required=False,
    )
    parser.add_argument(
        "--hyperopt",
        type=bool,
        help="include automl hyperopt config",
        default=False,
        required=False,
    )
    parser.add_argument(
        "-o",
        "--output",
        type=str,
        help="output initialized YAML config path",
        required=False,
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("init_config", *sys_argv)

    print_ludwig("Init Config", LUDWIG_VERSION)
    init_config(**vars(args))
Beispiel #7
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script serves a pretrained model',
        prog='ludwig serve',
        usage='%(prog)s [options]')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)

    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    # ----------------
    # Server parameters
    # ----------------
    parser.add_argument(
        '-p',
        '--port',
        help='port for server (default: 8000)',
        default=8000,
        type=int,
    )

    parser.add_argument('-H',
                        '--host',
                        help='host for server (default: 0.0.0.0)',
                        default='0.0.0.0')

    parser.add_argument(
        '-ao',
        '--allowed_origins',
        nargs='*',
        help=
        'A list of origins that should be permitted to make cross-origin requests. '
        'Use "*" to allow any origin. See https://www.starlette.io/middleware/#corsmiddleware.',
    )

    args = parser.parse_args(sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.serve')

    print_ludwig('Serve', LUDWIG_VERSION)

    run_server(args.model_path, args.host, args.port, args.allowed_origins)
Beispiel #8
0
def cli():
    parser = argparse.ArgumentParser(
        description='This script exports a Ludwig model in the Neuropod format'
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument(
        '-m',
        '--ludwig_model_path',
        help='path to the Ludwig model to export',
        required=True
    )

    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset']
    )

    # -------------------
    # Neuropod parameters
    # -------------------
    parser.add_argument(
        '-n',
        '--neuropod_path',
        help='path of the output Neuropod package file',
        required=True
    )
    parser.add_argument(
        '-nm',
        '--neuropod_model_name',
        help='path of the output Neuropod package file',
        default='ludwig_model'
    )

    args = parser.parse_args()

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(
        args.logging_level
    )
    global logger
    logger = logging.getLogger('ludwig.serve')

    print_ludwig('Export Neuropod', LUDWIG_VERSION)

    export_neuropod(
        args.ludwig_model_path,
        args.neuropod_path,
        args.neuropod_model_name,
    )
Beispiel #9
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script serves a pretrained model',
        prog='ludwig serve',
        usage='%(prog)s [options]'
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument(
        '-m',
        '--model_path',
        help='model to load',
        required=True
    )

    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset']
    )

    # ----------------
    # Server parameters
    # ----------------
    parser.add_argument(
        '-p',
        '--port',
        help='port for server (default: 8000)',
        default=8000,
        type=int,
    )

    parser.add_argument(
        '-H',
        '--host',
        help='host for server (default: 0.0.0.0)',
        default='0.0.0.0'
    )

    args = parser.parse_args(sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(
        args.logging_level
    )
    global logger
    logger = logging.getLogger('ludwig.serve')

    print_ludwig('Serve', LUDWIG_VERSION)

    run_server(args.model_path, args.host, args.port)
Beispiel #10
0
def cli_export_mlflow(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
        'and saves it as an MLFlow model.',
        prog='ludwig export_mlflow',
        usage='%(prog)s [options]')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)
    parser.add_argument(
        '-mn',
        '--registered_model_name',
        help='model name to upload to in MLflow model registry',
        default='mlflow')

    # -----------------
    # Output parameters
    # -----------------
    parser.add_argument('-od',
                        '--output_path',
                        type=str,
                        help='path where to save the exported model',
                        required=True)

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline('export_mlflow', *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.export')

    print_ludwig('Export MLFlow', LUDWIG_VERSION)

    export_mlflow(**vars(args))
Beispiel #11
0
def cli_collect_weights(sys_argv):
    """Command Line Interface to collecting the weights for the model.

    --m: Input model that is necessary to collect to the tensors, this is a
         required *option*
    --t: Tensors to collect
    --od: Output directory of the model, defaults to results
    --v: Verbose: Defines the logging level that the user will be exposed to
    """
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model " "and uses it collect weights.",
        prog="ludwig collect_weights",
        usage="%(prog)s [options]",
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m", "--model_path", help="model to load", required=True)
    parser.add_argument("-t", "--tensors", help="tensors to collect", nargs="+", required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument(
        "-od", "--output_directory", type=str, default="results", help="directory that contains the results"
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("collect_weights", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.collect")

    print_ludwig("Collect Weights", LUDWIG_VERSION)

    collect_weights(**vars(args))
Beispiel #12
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script serves a pretrained model", prog="ludwig serve", usage="%(prog)s [options]"
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m", "--model_path", help="model to load", required=True)

    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    # ----------------
    # Server parameters
    # ----------------
    parser.add_argument(
        "-p",
        "--port",
        help="port for server (default: 8000)",
        default=8000,
        type=int,
    )

    parser.add_argument("-H", "--host", help="host for server (default: 0.0.0.0)", default="0.0.0.0")

    parser.add_argument(
        "-ao",
        "--allowed_origins",
        nargs="*",
        help="A list of origins that should be permitted to make cross-origin requests. "
        'Use "*" to allow any origin. See https://www.starlette.io/middleware/#corsmiddleware.',
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("serve", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.serve")

    print_ludwig("Serve", LUDWIG_VERSION)

    run_server(args.model_path, args.host, args.port, args.allowed_origins)
Beispiel #13
0
def cli_collect_summary(sys_argv):
    """Command Line Interface to collecting a summary of the model layers and weights.

    --m: Input model that is necessary to collect to the tensors, this is a
         required *option*
    --v: Verbose: Defines the logging level that the user will be exposed to
    """
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model "
        "and prints names of weights and layers activations "
        "to use with other collect commands",
        prog="ludwig collect_summary",
        usage="%(prog)s [options]",
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m",
                        "--model_path",
                        help="model to load",
                        required=True)

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("collect_summary", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.collect")

    print_ludwig("Collect Summary", LUDWIG_VERSION)

    print_model_summary(**vars(args))
Beispiel #14
0
def cli_collect_summary(sys_argv):
    """Command Line Interface to collecting a summary of the model layers and weights.
    --m: Input model that is necessary to collect to the tensors, this is a
         required *option*
    --v: Verbose: Defines the logging level that the user will be exposed to
    """
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
                    'and prints names of weights and layers activations '
                    'to use with other collect commands',
        prog='ludwig collect_summary',
        usage='%(prog)s [options]'
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument(
        '-m',
        '--model_path',
        help='model to load',
        required=True
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset']
    )

    args = parser.parse_args(sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(
        args.logging_level
    )
    global logger
    logger = logging.getLogger('ludwig.collect')

    print_ludwig('Collect Summary', LUDWIG_VERSION)

    print_model_summary(**vars(args))
Beispiel #15
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This command downloads and lists Ludwig-ready datasets.",
        prog="ludwig datasets",
        usage="%(prog)s [options]",
    )
    sub_parsers = parser.add_subparsers(dest="command",
                                        help="download and list datasets")

    parser_download = sub_parsers.add_parser("download",
                                             help="download a dataset")
    parser_download.add_argument("dataset", help="dataset to download")
    parser_download.add_argument(
        "-o",
        "--output_dir",
        type=str,
        default=".",
        help="output directory to download into",
        required=False,
    )

    sub_parsers.add_parser("list", help="list datasets")

    parser_describe = sub_parsers.add_parser("describe",
                                             help="describe datasets")
    parser_describe.add_argument("dataset", help="dataset to describe")

    args = parser.parse_args(sys_argv)
    print_ludwig(f"Datasets {args.command}", LUDWIG_VERSION)

    if args.command == "list":
        datasets = list_datasets()
        for ds in datasets:
            print(ds)
    elif args.command == "describe":
        print(describe_dataset(args.dataset))
    elif args.command == "download":
        download_dataset(args.dataset, args.output_dir)
    else:
        raise ValueError(f"Unrecognized command: {args.command}")
Beispiel #16
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script searches for optimal Hyperparameters",
        prog="ludwig hyperopt",
        usage="%(prog)s [options]",
    )

    # -------------------
    # Hyperopt parameters
    # -------------------
    parser.add_argument(
        "-sshs",
        "--skip_save_hyperopt_statistics",
        help="skips saving hyperopt statistics file",
        action="store_true",
        default=False,
    )

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument(
        "--output_directory",
        type=str,
        default="results",
        help="directory that contains the results",
    )
    parser.add_argument("--experiment_name",
                        type=str,
                        default="hyperopt",
                        help="experiment name")
    parser.add_argument("--model_name",
                        type=str,
                        default="run",
                        help="name for the model")

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        "--dataset",
        help="input data file path. "
        "If it has a split column, it will be used for splitting "
        "(0: train, 1: validation, 2: test), "
        "otherwise the dataset will be randomly split",
    )
    parser.add_argument("--training_set", help="input train data file path")
    parser.add_argument("--validation_set",
                        help="input validation data file path")
    parser.add_argument("--test_set", help="input test data file path")

    parser.add_argument(
        "--training_set_metadata",
        help="input metadata JSON file path. An intermediate preprocessed file "
        "containing the mappings of the input file created "
        "the first time a file is used, in the same directory "
        "with the same name and a .json extension",
    )

    parser.add_argument(
        "--data_format",
        help="format of the input data",
        default="auto",
        choices=[
            "auto",
            "csv",
            "excel",
            "feather",
            "fwf",
            "hdf5",
            "html"
            "tables",
            "json",
            "jsonl",
            "parquet",
            "pickle",
            "sas",
            "spss",
            "stata",
            "tsv",
        ],
    )

    parser.add_argument(
        "-sspi",
        "--skip_save_processed_input",
        help="skips saving intermediate HDF5 and JSON files",
        action="store_true",
        default=False,
    )

    # ----------------
    # Model parameters
    # ----------------
    config = parser.add_mutually_exclusive_group(required=True)
    config.add_argument(
        "-c",
        "--config",
        type=load_yaml,
        help="Path to the YAML file containing the model configuration",
    )
    config.add_argument(
        "-cs",
        "--config_str",
        dest="config",
        type=load_config_from_str,
        help="JSON or YAML serialized string of the model configuration",
    )

    parser.add_argument(
        "-mlp",
        "--model_load_path",
        help="path of a pretrained model to load as initialization",
    )
    parser.add_argument(
        "-mrp",
        "--model_resume_path",
        help="path of the model directory to resume training of",
    )
    parser.add_argument(
        "-sstd",
        "--skip_save_training_description",
        action="store_true",
        default=False,
        help="disables saving the description JSON file",
    )
    parser.add_argument(
        "-ssts",
        "--skip_save_training_statistics",
        action="store_true",
        default=False,
        help="disables saving training statistics JSON file",
    )
    parser.add_argument(
        "-ssm",
        "--skip_save_model",
        action="store_true",
        default=False,
        help="disables saving weights each time the model improves. "
        "By default Ludwig saves  weights after each epoch "
        "the validation metric (improves, but  if the model is really big "
        "that can be time consuming. If you do not want to keep "
        "the weights and just find out what performance a model can get "
        "with a set of hyperparameters, use this parameter to skip it",
    )
    parser.add_argument(
        "-ssp",
        "--skip_save_progress",
        action="store_true",
        default=False,
        help="disables saving weights after each epoch. By default ludwig saves "
        "weights after each epoch for enabling resuming of training, but "
        "if the model is really big that can be time consuming and will "
        "save twice as much space, use this parameter to skip it",
    )
    parser.add_argument(
        "-ssl",
        "--skip_save_log",
        action="store_true",
        default=False,
        help="disables saving TensorBoard logs. By default Ludwig saves "
        "logs for the TensorBoard, but if it is not needed turning it off "
        "can slightly increase the overall speed",
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-rs",
        "--random_seed",
        type=int,
        default=42,
        help="a random seed that is going to be used anywhere there is a call "
        "to a random number generator: data splitting, parameter "
        "initialization and training set shuffling",
    )
    parser.add_argument(
        "-hlv",
        "--hyperopt_log_verbosity",
        type=int,
        default=3,
        choices=[0, 1, 2, 3],
        help="Controls verbosity of ray tune log messages.  Valid values: "
        "0 = silent, 1 = only status updates, 2 = status and brief trial "
        "results, 3 = status and detailed trial results.",
    )
    parser.add_argument("-g",
                        "--gpus",
                        nargs="+",
                        type=int,
                        default=None,
                        help="list of gpus to use")
    parser.add_argument("-gml",
                        "--gpu_memory_limit",
                        type=int,
                        default=None,
                        help="maximum memory in MB to allocate per GPU device")
    parser.add_argument(
        "-b",
        "--backend",
        help="specifies backend to use for parallel / distributed execution, "
        "defaults to local execution or Horovod if called using horovodrun",
        choices=ALL_BACKENDS,
    )
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("hyperopt", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.hyperopt")

    args.backend = initialize_backend(args.backend
                                      or args.config.get("backend"))
    if args.backend.is_coordinator():
        print_ludwig("Hyperopt", LUDWIG_VERSION)

    hyperopt_cli(**vars(args))
Beispiel #17
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script trains and evaluates a model',
        prog='ludwig experiment',
        usage='%(prog)s [options]')

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument('--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('--experiment_name',
                        type=str,
                        default='experiment',
                        help='experiment name')
    parser.add_argument('--model_name',
                        type=str,
                        default='run',
                        help='name for the model')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        '--dataset',
        help='input data file path. '
        'If it has a split column, it will be used for splitting '
        '(0: train, 1: validation, 2: test), '
        'otherwise the dataset will be randomly split')
    parser.add_argument('--training_set', help='input train data file path')
    parser.add_argument('--validation_set',
                        help='input validation data file path')
    parser.add_argument('--test_set', help='input test data file path')

    parser.add_argument(
        '--training_set_metadata',
        help='input metadata JSON file path. An intermediate preprocessed file '
        'containing the mappings of the input file created '
        'the first time a file is used, in the same directory '
        'with the same name and a .json extension')

    parser.add_argument('--data_format',
                        help='format of the input data',
                        default='auto',
                        choices=[
                            'auto', 'csv', 'excel', 'feather', 'fwf', 'hdf5',
                            'html'
                            'tables', 'json', 'jsonl', 'parquet', 'pickle',
                            'sas', 'spss', 'stata', 'tsv'
                        ])

    parser.add_argument('-es',
                        '--eval_split',
                        default=TEST,
                        choices=[TRAINING, VALIDATION, TEST, FULL],
                        help='the split to evaluate the model on')

    parser.add_argument('-sspi',
                        '--skip_save_processed_input',
                        help='skips saving intermediate HDF5 and JSON files',
                        action='store_true',
                        default=False)
    parser.add_argument('-ssuo',
                        '--skip_save_unprocessed_output',
                        help='skips saving intermediate NPY output files',
                        action='store_true',
                        default=False)

    # -----------------
    # K-fold parameters
    # -----------------
    parser.add_argument(
        '-kf',
        '--k_fold',
        type=int,
        default=None,
        help='number of folds for a k-fold cross validation run ')
    parser.add_argument(
        '-skfsi',
        '--skip_save_k_fold_split_indices',
        action='store_true',
        default=False,
        help='disables saving indices generated to split training data set '
        'for the k-fold cross validation run, but if it is not needed '
        'turning it off can slightly increase the overall speed')

    # ----------------
    # Model parameters
    # ----------------
    config = parser.add_mutually_exclusive_group(required=True)
    config.add_argument('-c', '--config', type=yaml.safe_load, help='config')
    config.add_argument(
        '-cf',
        '--config_file',
        help='YAML file describing the model. Ignores --model_hyperparameters')

    parser.add_argument(
        '-mlp',
        '--model_load_path',
        help='path of a pretrained model to load as initialization')
    parser.add_argument(
        '-mrp',
        '--model_resume_path',
        help='path of the model directory to resume training of')
    parser.add_argument('-sstd',
                        '--skip_save_training_description',
                        action='store_true',
                        default=False,
                        help='disables saving the description JSON file')
    parser.add_argument('-ssts',
                        '--skip_save_training_statistics',
                        action='store_true',
                        default=False,
                        help='disables saving training statistics JSON file')
    parser.add_argument('-sstp',
                        '--skip_save_predictions',
                        help='skips saving test predictions CSV files',
                        action='store_true',
                        default=False)
    parser.add_argument('-sstes',
                        '--skip_save_eval_stats',
                        help='skips saving eval statistics JSON file',
                        action='store_true',
                        default=False)
    parser.add_argument(
        '-ssm',
        '--skip_save_model',
        action='store_true',
        default=False,
        help='disables saving model weights and hyperparameters each time '
        'the model improves. '
        'By default Ludwig saves model weights after each epoch '
        'the validation metric imprvoes, but if the model is really big '
        'that can be time consuming. If you do not want to keep '
        'the weights and just find out what performance a model can get '
        'with a set of hyperparameters, use this parameter to skip it,'
        'but the model will not be loadable later on')
    parser.add_argument(
        '-ssp',
        '--skip_save_progress',
        action='store_true',
        default=False,
        help='disables saving progress each epoch. By default Ludwig saves '
        'weights and stats after each epoch for enabling resuming '
        'of training, but if the model is really big that can be '
        'time consuming and will uses twice as much space, use '
        'this parameter to skip it, but training cannot be resumed '
        'later on')
    parser.add_argument(
        '-ssl',
        '--skip_save_log',
        action='store_true',
        default=False,
        help='disables saving TensorBoard logs. By default Ludwig saves '
        'logs for the TensorBoard, but if it is not needed turning it off '
        'can slightly increase the overall speed')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-rs',
        '--random_seed',
        type=int,
        default=42,
        help='a random seed that is going to be used anywhere there is a call '
        'to a random number generator: data splitting, parameter '
        'initialization and training set shuffling')
    parser.add_argument('-g',
                        '--gpus',
                        nargs='+',
                        type=int,
                        default=None,
                        help='list of GPUs to use')
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        '-dpt',
        '--disable_parallel_threads',
        action='store_false',
        dest='allow_parallel_threads',
        help='disable TensorFlow from using multithreading for reproducibility'
    )
    parser.add_argument(
        "-b",
        "--backend",
        help='specifies backend to use for parallel / distributed execution, '
        'defaults to local execution or Horovod if called using horovodrun',
        choices=ALL_BACKENDS,
    )
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.experiment')

    args.backend = initialize_backend(args.backend)
    if args.backend.is_coordinator():
        print_ludwig('Experiment', LUDWIG_VERSION)

    if args.k_fold is None:
        experiment_cli(**vars(args))
    else:
        kfold_cross_validate_cli(**vars(args))
Beispiel #18
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script searches for optimal Hyperparameters",
        prog="ludwig hyperopt",
        usage="%(prog)s [options]",
    )

    # -------------------
    # Hyperopt parameters
    # -------------------
    parser.add_argument(
        "-sshs",
        "--skip_save_hyperopt_statistics",
        help="skips saving hyperopt statistics file",
        action="store_true",
        default=False,
    )

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument(
        "--output_directory",
        type=str,
        default="results",
        help="directory that contains the results",
    )
    parser.add_argument("--experiment_name",
                        type=str,
                        default="hyperopt",
                        help="experiment name")
    parser.add_argument("--model_name",
                        type=str,
                        default="run",
                        help="name for the model")

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        "--data_csv",
        help="input data CSV file. "
        "If it has a split column, it will be used for splitting "
        "(0: train, 1: validation, 2: test), "
        "otherwise the dataset will be randomly split",
    )
    parser.add_argument("--data_train_csv", help="input train data CSV file")
    parser.add_argument("--data_validation_csv",
                        help="input validation data CSV file")
    parser.add_argument("--data_test_csv", help="input test data CSV file")

    parser.add_argument(
        "--data_hdf5",
        help="input data HDF5 file. It is an intermediate preprocess version of"
        " the input CSV created the first time a CSV file is used in the "
        "same directory with the same name and a hdf5 extension",
    )
    parser.add_argument(
        "--data_train_hdf5",
        help="input train data HDF5 file. It is an intermediate preprocess "
        "version of the input CSV created the first time a CSV file is "
        "used in the same directory with the same name and a hdf5 "
        "extension",
    )
    parser.add_argument(
        "--data_validation_hdf5",
        help="input validation data HDF5 file. It is an intermediate preprocess"
        " version of the input CSV created the first time a CSV file is "
        "used in the same directory with the same name and a hdf5 "
        "extension",
    )
    parser.add_argument(
        "--data_test_hdf5",
        help="input test data HDF5 file. It is an intermediate preprocess "
        "version of the input CSV created the first time a CSV file is "
        "used in the same directory with the same name and a hdf5 "
        "extension",
    )

    parser.add_argument(
        "--train_set_metadata_json",
        help="input metadata JSON file. It is an intermediate preprocess file "
        "containing the mappings of the input CSV created the first time a"
        " CSV file is used in the same directory with the same name and a "
        "json extension",
    )

    parser.add_argument(
        "-sspi",
        "--skip_save_processed_input",
        help="skips saving intermediate HDF5 and JSON files",
        action="store_true",
        default=False,
    )

    # ----------------
    # Model parameters
    # ----------------
    model_definition = parser.add_mutually_exclusive_group(required=True)
    model_definition.add_argument("-md",
                                  "--model_definition",
                                  type=yaml.safe_load,
                                  help="model definition")
    model_definition.add_argument(
        "-mdf",
        "--model_definition_file",
        help="YAML file describing the model. Ignores --model_hyperparameters",
    )

    parser.add_argument(
        "-mlp",
        "--model_load_path",
        help="path of a pretrained model to load as initialization",
    )
    parser.add_argument(
        "-mrp",
        "--model_resume_path",
        help="path of a the model directory to resume training of",
    )
    parser.add_argument(
        "-sstd",
        "--skip_save_training_description",
        action="store_true",
        default=False,
        help="disables saving the description JSON file",
    )
    parser.add_argument(
        "-ssts",
        "--skip_save_training_statistics",
        action="store_true",
        default=False,
        help="disables saving training statistics JSON file",
    )
    parser.add_argument(
        "-ssm",
        "--skip_save_model",
        action="store_true",
        default=False,
        help="disables saving weights each time the model imrpoves. "
        "By default Ludwig saves  weights after each epoch "
        "the validation metric imrpvoes, but  if the model is really big "
        "that can be time consuming if you do not want to keep "
        "the weights and just find out what performance can a model get "
        "with a set of hyperparameters, use this parameter to skip it",
    )
    parser.add_argument(
        "-ssp",
        "--skip_save_progress",
        action="store_true",
        default=False,
        help="disables saving weights after each epoch. By default ludwig saves "
        "weights after each epoch for enabling resuming of training, but "
        "if the model is really big that can be time consuming and will "
        "save twice as much space, use this parameter to skip it",
    )
    parser.add_argument(
        "-ssl",
        "--skip_save_log",
        action="store_true",
        default=False,
        help="disables saving TensorBoard logs. By default Ludwig saves "
        "logs for the TensorBoard, but if it is not needed turning it off "
        "can slightly increase the overall speed",
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-rs",
        "--random_seed",
        type=int,
        default=42,
        help="a random seed that is going to be used anywhere there is a call "
        "to a random number generator: data splitting, parameter "
        "initialization and training set shuffling",
    )
    parser.add_argument("-g",
                        "--gpus",
                        nargs="+",
                        type=int,
                        default=None,
                        help="list of gpus to use")
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        "-uh",
        "--use_horovod",
        action="store_true",
        default=False,
        help="uses horovod for distributed training",
    )
    parser.add_argument(
        "-dbg",
        "--debug",
        action="store_true",
        default=False,
        help="enables debugging mode",
    )
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    args = parser.parse_args(sys_argv)

    logging.getLogger('ludwig').setLevel(
        logging_level_registry[args.logging_level])
    global logger
    logger = logging.getLogger('ludwig.hyperopt')

    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig("Hyperopt", LUDWIG_VERSION)

    hyperopt(**vars(args))
Beispiel #19
0
def cli_collect_activations(sys_argv):
    """Command Line Interface to communicate with the collection of tensors and
    there are several options that can specified when calling this function:

    --data_csv: Filepath for the input csv
    --data_hdf5: Filepath for the input hdf5 file, if there is a csv file, this
                 is not read
    --d: Refers to the dataset type of the file being read, by default is
         *generic*
    --s: Refers to the split of the data, can be one of: train, test,
         validation, full
    --m: Input model that is necessary to collect to the tensors, this is a
         required *option*
    --t: Tensors to collect
    --od: Output directory of the model, defaults to results
    --bs: Batch size
    --g: Number of gpus that are to be used
    --gf: Fraction of each GPUs memory to use.
    --dbg: Debug if the model is to be started with python debugger
    --v: Verbose: Defines the logging level that the user will be exposed to
    """
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model and uses it collect '
        'tensors for each datapoint in the dataset.',
        prog='ludwig collect_activations',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('--data_csv', help='input data CSV file')
    group.add_argument('--data_hdf5', help='input data HDF5 file')

    parser.add_argument('-s',
                        '--split',
                        default='test',
                        choices=['training', 'validation', 'test', 'full'],
                        help='the split to test the model on')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)
    parser.add_argument('-t',
                        '--tensors',
                        help='tensors to collect',
                        nargs='+',
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument('-od',
                        '--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument('-bs',
                        '--batch_size',
                        type=int,
                        default=128,
                        help='size of batches')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument('-g',
                        '--gpus',
                        type=int,
                        default=0,
                        help='list of gpu to use')
    parser.add_argument(
        '-gf',
        '--gpu_fraction',
        type=float,
        default=1.0,
        help='fraction of gpu memory to initialize the process with')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)

    logging.getLogger('ludwig').setLevel(
        logging_level_registry[args.logging_level])

    print_ludwig('Collect Activations', LUDWIG_VERSION)

    collect_activations(**vars(args))
Beispiel #20
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
        'and uses it to predict',
        prog='ludwig predict',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument('--dataset',
                        help='input data file path',
                        required=True)
    parser.add_argument('--data_format',
                        help='format of the input data',
                        default='auto',
                        choices=[
                            'auto', 'csv', 'excel', 'feather', 'fwf', 'hdf5',
                            'html', 'tables', 'json', 'jsonl', 'parquet',
                            'pickle', 'sas', 'spss', 'stata', 'tsv'
                        ])
    parser.add_argument('-s',
                        '--split',
                        default=FULL,
                        choices=[TRAINING, VALIDATION, TEST, FULL],
                        help='the split to test the model on')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument('-od',
                        '--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('-ssuo',
                        '--skip_save_unprocessed_output',
                        help='skips saving intermediate NPY output files',
                        action='store_true',
                        default=False)
    parser.add_argument('-sstp',
                        '--skip_save_predictions',
                        help='skips saving predictions CSV files',
                        action='store_true',
                        default=False)

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument('-bs',
                        '--batch_size',
                        type=int,
                        default=128,
                        help='size of batches')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument('-g',
                        '--gpus',
                        type=int,
                        default=0,
                        help='list of gpu to use')
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        '-dpt',
        '--disable_parallel_threads',
        action='store_false',
        dest='allow_parallel_threads',
        help='disable TensorFlow from using multithreading for reproducibility'
    )
    parser.add_argument(
        "-b",
        "--backend",
        help='specifies backend to use for parallel / distributed execution, '
        'defaults to local execution or Horovod if called using horovodrun',
        choices=ALL_BACKENDS,
    )
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline('predict', *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.predict')

    args.backend = initialize_backend(args.backend)
    if args.backend.is_coordinator():
        print_ludwig('Predict', LUDWIG_VERSION)
        logger.info('Dataset path: {}'.format(args.dataset))
        logger.info('Model path: {}'.format(args.model_path))
        logger.info('')

    predict_cli(**vars(args))
Beispiel #21
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model "
        "and evaluates its performance by comparing"
        "its predictions with ground truth.",
        prog="ludwig evaluate",
        usage="%(prog)s [options]",
    )

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument("--dataset",
                        help="input data file path",
                        required=True)
    parser.add_argument(
        "--data_format",
        help="format of the input data",
        default="auto",
        choices=[
            "auto",
            "csv",
            "excel",
            "feather",
            "fwf",
            "hdf5",
            "html"
            "tables",
            "json",
            "jsonl",
            "parquet",
            "pickle",
            "sas",
            "spss",
            "stata",
            "tsv",
        ],
    )
    parser.add_argument("-s",
                        "--split",
                        default=FULL,
                        choices=[TRAINING, VALIDATION, TEST, FULL],
                        help="the split to test the model on")

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m",
                        "--model_path",
                        help="model to load",
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument("-od",
                        "--output_directory",
                        type=str,
                        default="results",
                        help="directory that contains the results")
    parser.add_argument(
        "-ssuo",
        "--skip_save_unprocessed_output",
        help="skips saving intermediate NPY output files",
        action="store_true",
        default=False,
    )
    parser.add_argument(
        "-sses",
        "--skip_save_eval_stats",
        help="skips saving intermediate JSON eval statistics",
        action="store_true",
        default=False,
    )
    parser.add_argument("-scp",
                        "--skip_collect_predictions",
                        help="skips collecting predictions",
                        action="store_true",
                        default=False)
    parser.add_argument(
        "-scos",
        "--skip_collect_overall_stats",
        help="skips collecting overall stats",
        action="store_true",
        default=False,
    )

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument("-bs",
                        "--batch_size",
                        type=int,
                        default=128,
                        help="size of batches")

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument("-g",
                        "--gpus",
                        type=int,
                        default=0,
                        help="list of gpu to use")
    parser.add_argument("-gml",
                        "--gpu_memory_limit",
                        type=int,
                        default=None,
                        help="maximum memory in MB to allocate per GPU device")
    parser.add_argument(
        "-dpt",
        "--disable_parallel_threads",
        action="store_false",
        dest="allow_parallel_threads",
        help="disable TensorFlow from using multithreading for reproducibility",
    )
    parser.add_argument(
        "-b",
        "--backend",
        help="specifies backend to use for parallel / distributed execution, "
        "defaults to local execution or Horovod if called using horovodrun",
        choices=ALL_BACKENDS,
    )
    parser.add_argument("-dbg",
                        "--debug",
                        action="store_true",
                        default=False,
                        help="enables debugging mode")
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)
    args.evaluate_performance = True

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("evaluate", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.test_performance")

    args.backend = initialize_backend(args.backend)
    if args.backend.is_coordinator():
        print_ludwig("Evaluate", LUDWIG_VERSION)
        logger.info(f"Dataset path: {args.dataset}")
        logger.info(f"Model path: {args.model_path}")
        logger.info("")

    evaluate_cli(**vars(args))
Beispiel #22
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script preprocess a dataset",
        prog="ludwig preprocess",
        usage="%(prog)s [options]")

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        "--dataset",
        help="input data file path. "
        "If it has a split column, it will be used for splitting "
        "(0: train, 1: validation, 2: test), "
        "otherwise the dataset will be randomly split",
    )
    parser.add_argument("--training_set", help="input train data file path")
    parser.add_argument("--validation_set",
                        help="input validation data file path")
    parser.add_argument("--test_set", help="input test data file path")

    parser.add_argument(
        "--training_set_metadata",
        help="input metadata JSON file path. An intermediate preprocessed file "
        "containing the mappings of the input file created "
        "the first time a file is used, in the same directory "
        "with the same name and a .json extension",
    )

    parser.add_argument(
        "--data_format",
        help="format of the input data",
        default="auto",
        choices=[
            "auto",
            "csv",
            "excel",
            "feather",
            "fwf",
            "hdf5",
            "html"
            "tables",
            "json",
            "jsonl",
            "parquet",
            "pickle",
            "sas",
            "spss",
            "stata",
            "tsv",
        ],
    )

    # ----------------
    # Model parameters
    # ----------------
    preprocessing_def = parser.add_mutually_exclusive_group(required=True)
    preprocessing_def.add_argument(
        "-pc",
        "--preprocessing_config",
        dest="preprocessing_config",
        type=load_yaml,
        help="YAML file describing the preprocessing. "
        "Ignores --preprocessing_config."
        "Uses the same format of config, "
        "but ignores encoder specific parameters, "
        "decoder specific parameters, combiner and training parameters",
    )
    preprocessing_def.add_argument(
        "-pcs",
        "--preprocessing_config_str",
        type=yaml.safe_load,
        help="preproceesing config. "
        "Uses the same format of config, "
        "but ignores encoder specific parameters, "
        "decoder specific parameters, combiner and training parameters",
    )

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-rs",
        "--random_seed",
        type=int,
        default=42,
        help="a random seed that is going to be used anywhere there is a call "
        "to a random number generator: data splitting, parameter "
        "initialization and training set shuffling",
    )
    parser.add_argument(
        "-b",
        "--backend",
        help="specifies backend to use for parallel / distributed execution, "
        "defaults to local execution or Horovod if called using horovodrun",
        choices=ALL_BACKENDS,
    )
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("preprocess", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.preprocess")

    args.backend = initialize_backend(args.backend)
    if args.backend.is_coordinator():
        print_ludwig("Preprocess", LUDWIG_VERSION)

    preprocess_cli(**vars(args))
Beispiel #23
0
def cli_collect_activations(sys_argv):
    """Command Line Interface to communicate with the collection of tensors and
    there are several options that can specified when calling this function:

    --data_csv: Filepath for the input csv
    --data_hdf5: Filepath for the input hdf5 file, if there is a csv file, this
                 is not read
    --d: Refers to the dataset type of the file being read, by default is
         *generic*
    --s: Refers to the split of the data, can be one of: train, test,
         validation, full
    --m: Input model that is necessary to collect to the tensors, this is a
         required *option*
    --t: Tensors to collect
    --od: Output directory of the model, defaults to results
    --bs: Batch size
    --g: Number of gpus that are to be used
    --gf: Fraction of each GPUs memory to use.
    --dbg: Debug if the model is to be started with python debugger
    --v: Verbose: Defines the logging level that the user will be exposed to
    """
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model and uses it collect '
        'tensors for each datapoint in the dataset.',
        prog='ludwig collect_activations',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument('--dataset',
                        help='input data file path',
                        required=True)
    parser.add_argument('--data_format',
                        help='format of the input data',
                        default='auto',
                        choices=[
                            'auto', 'csv', 'excel', 'feather', 'fwf', 'hdf5',
                            'html'
                            'tables', 'json', 'jsonl', 'parquet', 'pickle',
                            'sas', 'spss', 'stata', 'tsv'
                        ])
    parser.add_argument('-s',
                        '--split',
                        default=FULL,
                        choices=[TRAINING, VALIDATION, TEST, FULL],
                        help='the split to obtain the model activations from')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)
    parser.add_argument('-lyr',
                        '--layers',
                        help='tensors to collect',
                        nargs='+',
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument('-od',
                        '--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument('-bs',
                        '--batch_size',
                        type=int,
                        default=128,
                        help='size of batches')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument('-g',
                        '--gpus',
                        type=int,
                        default=0,
                        help='list of gpu to use')
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        '-dpt',
        '--disable_parallel_threads',
        action='store_false',
        dest='allow_parallel_threads',
        help='disable TensorFlow from using multithreading for reproducibility'
    )
    parser.add_argument(
        "-b",
        "--backend",
        help='specifies backend to use for parallel / distributed execution, '
        'defaults to local execution or Horovod if called using horovodrun',
        choices=ALL_BACKENDS,
    )
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.collect')

    print_ludwig('Collect Activations', LUDWIG_VERSION)

    collect_activations(**vars(args))
Beispiel #24
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script preprocess a dataset',
        prog='ludwig preprocess',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        '--dataset',
        help='input data file path. '
        'If it has a split column, it will be used for splitting '
        '(0: train, 1: validation, 2: test), '
        'otherwise the dataset will be randomly split')
    parser.add_argument('--training_set', help='input train data file path')
    parser.add_argument('--validation_set',
                        help='input validation data file path')
    parser.add_argument('--test_set', help='input test data file path')

    parser.add_argument(
        '--training_set_metadata',
        help='input metadata JSON file path. An intermediate preprocessed file '
        'containing the mappings of the input file created '
        'the first time a file is used, in the same directory '
        'with the same name and a .json extension')

    parser.add_argument('--data_format',
                        help='format of the input data',
                        default='auto',
                        choices=[
                            'auto', 'csv', 'excel', 'feather', 'fwf', 'hdf5',
                            'html'
                            'tables', 'json', 'jsonl', 'parquet', 'pickle',
                            'sas', 'spss', 'stata', 'tsv'
                        ])

    # ----------------
    # Model parameters
    # ----------------
    preprocessing_def = parser.add_mutually_exclusive_group(required=True)
    preprocessing_def.add_argument(
        '-pd',
        '--preprocessing_config',
        type=yaml.safe_load,
        help='preproceesing config. '
        'Uses the same format of config, '
        'but ignores encoder specific parameters, '
        'decoder specific paramters, combiner and training parameters')
    preprocessing_def.add_argument(
        '-pcf',
        '--preprocessing_config_file',
        help='YAML file describing the preprocessing. '
        'Ignores --preprocessing_config.'
        'Uses the same format of config, '
        'but ignores encoder specific parameters, '
        'decoder specific paramters, combiner and training parameters')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-rs',
        '--random_seed',
        type=int,
        default=42,
        help='a random seed that is going to be used anywhere there is a call '
        'to a random number generator: data splitting, parameter '
        'initialization and training set shuffling')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.preprocess')

    if is_on_master():
        print_ludwig('Preprocess', LUDWIG_VERSION)

    preprocess_cli(**vars(args))
Beispiel #25
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
        'and evaluates its performance by comparing'
        'its predictions with ground truth.',
        prog='ludwig evaluate',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument('--dataset',
                        help='input data file path',
                        required=True)
    parser.add_argument('--data_format',
                        help='format of the input data',
                        default='auto',
                        choices=[
                            'auto', 'csv', 'excel', 'feather', 'fwf', 'hdf5',
                            'html'
                            'tables', 'json', 'jsonl', 'parquet', 'pickle',
                            'sas', 'spss', 'stata', 'tsv'
                        ])
    parser.add_argument('-s',
                        '--split',
                        default=FULL,
                        choices=[TRAINING, VALIDATION, TEST, FULL],
                        help='the split to test the model on')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument('-od',
                        '--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('-ssuo',
                        '--skip_save_unprocessed_output',
                        help='skips saving intermediate NPY output files',
                        action='store_true',
                        default=False)
    parser.add_argument('-sses',
                        '--skip_save_eval_stats',
                        help='skips saving intermediate JSON eval statistics',
                        action='store_true',
                        default=False)
    parser.add_argument('-scp',
                        '--skip_collect_predictions',
                        help='skips collecting predictions',
                        action='store_true',
                        default=False)
    parser.add_argument('-scos',
                        '--skip_collect_overall_stats',
                        help='skips collecting overall stats',
                        action='store_true',
                        default=False)

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument('-bs',
                        '--batch_size',
                        type=int,
                        default=128,
                        help='size of batches')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument('-g',
                        '--gpus',
                        type=int,
                        default=0,
                        help='list of gpu to use')
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        '-dpt',
        '--disable_parallel_threads',
        action='store_false',
        dest='allow_parallel_threads',
        help='disable TensorFlow from using multithreading for reproducibility'
    )
    parser.add_argument('-uh',
                        '--use_horovod',
                        action='store_true',
                        default=None,
                        help='uses horovod for distributed training')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)
    args.evaluate_performance = True

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.test_performance')

    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig('Test', LUDWIG_VERSION)
        logger.info('Dataset path: {}'.format(args.dataset))
        logger.info('Model path: {}'.format(args.model_path))
        logger.info('')

    evaluate_cli(**vars(args))
Beispiel #26
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
        'and uses it to predict',
        prog='ludwig predict',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument('--dataset',
                        help='input data file path',
                        required=True)
    parser.add_argument('--data_format',
                        help='format of the input data',
                        default='auto',
                        choices=['auto', 'csv', 'hdf5'])

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument('-od',
                        '--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('-ssuo',
                        '--skip_save_unprocessed_output',
                        help='skips saving intermediate NPY output files',
                        action='store_true',
                        default=False)
    parser.add_argument('-sstp',
                        '--skip_save_predictions',
                        help='skips saving predictions CSV files',
                        action='store_true',
                        default=False)

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument('-bs',
                        '--batch_size',
                        type=int,
                        default=128,
                        help='size of batches')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument('-g',
                        '--gpus',
                        type=int,
                        default=0,
                        help='list of gpu to use')
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        '-dpt',
        '--disable_parallel_threads',
        action='store_false',
        dest='allow_parallel_threads',
        help='disable TensorFlow from using multithreading for reproducibility'
    )
    parser.add_argument('-uh',
                        '--use_horovod',
                        action='store_true',
                        default=None,
                        help='uses horovod for distributed training')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger('ludwig').setLevel(args.logging_level)
    global logger
    logger = logging.getLogger('ludwig.predict')

    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig('Predict', LUDWIG_VERSION)
        logger.info('Dataset path: {}'.format(args.dataset))
        logger.info('Model path: {}'.format(args.model_path))
        logger.info('')

    predict_cli(**vars(args))
Beispiel #27
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(description='This script trains a model.',
                                     prog='ludwig train',
                                     usage='%(prog)s [options]')

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument('--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('--experiment_name',
                        type=str,
                        default='experiment',
                        help='experiment name')
    parser.add_argument('--model_name',
                        type=str,
                        default='run',
                        help='name for the model')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        '--data_csv',
        help='input data CSV file. '
        'If it has a split column, it will be used for splitting '
        '(0: train, 1: validation, 2: test), '
        'otherwise the dataset will be randomly split')
    parser.add_argument('--data_train_csv', help='input train data CSV file')
    parser.add_argument('--data_validation_csv',
                        help='input validation data CSV file')
    parser.add_argument('--data_test_csv', help='input test data CSV file')

    parser.add_argument(
        '--data_hdf5',
        help='input data HDF5 file. It is an intermediate preprocess version of'
        ' the input CSV created the first time a CSV file is used in the '
        'same directory with the same name and a hdf5 extension')
    parser.add_argument(
        '--data_train_hdf5',
        help='input train data HDF5 file. It is an intermediate preprocess '
        'version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')
    parser.add_argument(
        '--data_validation_hdf5',
        help='input validation data HDF5 file. It is an intermediate preprocess'
        ' version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')
    parser.add_argument(
        '--data_test_hdf5',
        help='input test data HDF5 file. It is an intermediate preprocess '
        'version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')

    parser.add_argument(
        '--train_set_metadata_json',
        help='input metadata JSON file. It is an intermediate preprocess file '
        'containing the mappings of the input CSV created the first time a'
        ' CSV file is used in the same directory with the same name and a '
        'json extension')

    parser.add_argument('-sspi',
                        '--skip_save_processed_input',
                        help='skips saving intermediate HDF5 and JSON files',
                        action='store_true',
                        default=False)

    # ----------------
    # Model parameters
    # ----------------
    model_definition = parser.add_mutually_exclusive_group(required=True)
    model_definition.add_argument('-md',
                                  '--model_definition',
                                  type=yaml.safe_load,
                                  help='model definition')
    model_definition.add_argument(
        '-mdf',
        '--model_definition_file',
        help='YAML file describing the model. Ignores --model_hyperparameters')

    parser.add_argument(
        '-mlp',
        '--model_load_path',
        help='path of a pretrained model to load as initialization')
    parser.add_argument(
        '-mrp',
        '--model_resume_path',
        help='path of a the model directory to resume training of')
    parser.add_argument(
        '-ssm',
        '--skip_save_model',
        action='store_true',
        default=False,
        help='disables saving weights each time the model imrpoves. '
        'By default Ludwig saves  weights after each epoch '
        'the validation measure imrpvoes, but  if the model is really big '
        'that can be time consuming if you do not want to keep '
        'the weights and just find out what performance can a model get '
        'with a set of hyperparameters, use this parameter to skip it.')
    parser.add_argument(
        '-ssp',
        '--skip_save_progress',
        action='store_true',
        default=False,
        help='disables saving weights after each epoch. By default ludwig saves '
        'weights after each epoch for enabling resuming of training, but '
        'if the model is really big that can be time consuming and will '
        'save twice as much space, use this parameter to skip it.')
    parser.add_argument(
        '-ssl',
        '--skip_save_log',
        action='store_true',
        default=False,
        help='disables saving TensorBoard logs. By default Ludwig saves '
        'logs for the TensorBoard, but if it is not needed turning it off '
        'can slightly increase the overall speed.')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-rs',
        '--random_seed',
        type=int,
        default=42,
        help='a random seed that is going to be used anywhere there is a call '
        'to a random number generator: data splitting, parameter '
        'initialization and training set shuffling')
    parser.add_argument('-g',
                        '--gpus',
                        nargs='+',
                        type=int,
                        default=None,
                        help='list of gpus to use')
    parser.add_argument(
        '-gf',
        '--gpu_fraction',
        type=float,
        default=1.0,
        help='fraction of gpu memory to initialize the process with')
    parser.add_argument('-uh',
                        '--use_horovod',
                        action='store_true',
                        default=False,
                        help='uses horovod for distributed training')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)

    logging.getLogger('ludwig').setLevel(
        logging_level_registry[args.logging_level])
    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig('Train', LUDWIG_VERSION)

    full_train(**vars(args))
Beispiel #28
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
        'and tests its performance by comparing'
        'its predictions with ground truth.',
        prog='ludwig test',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument(
        '--data_csv',
        help='input data CSV file. '
        'If it has a split column, it will be used for splitting '
        '(0: train, 1: validation, 2: test), '
        'otherwise the dataset will be randomly split')
    group.add_argument(
        '--data_hdf5',
        help='input data HDF5 file. It is an intermediate preprocess version of'
        ' the input CSV created the first time a CSV file is used in the '
        'same directory with the same name and a hdf5 extension')
    parser.add_argument(
        '--train_set_metadata_json',
        help='input metadata JSON file. It is an intermediate preprocess file '
        'containing the mappings of the input CSV created the first time '
        'a CSV file is used in the same directory with the same name and '
        'a json extension')

    parser.add_argument('-s',
                        '--split',
                        default=TEST,
                        choices=[TRAINING, VALIDATION, TEST, FULL],
                        help='the split to test the model on')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument('-od',
                        '--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('-ssuo',
                        '--skip_save_unprocessed_output',
                        help='skips saving intermediate NPY output files',
                        action='store_true',
                        default=False)

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument('-bs',
                        '--batch_size',
                        type=int,
                        default=128,
                        help='size of batches')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument('-g',
                        '--gpus',
                        type=int,
                        default=0,
                        help='list of gpu to use')
    parser.add_argument(
        '-gf',
        '--gpu_fraction',
        type=float,
        default=1.0,
        help='fraction of gpu memory to initialize the process with')
    parser.add_argument('-uh',
                        '--use_horovod',
                        action='store_true',
                        default=False,
                        help='uses horovod for distributed training')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)
    args.evaluate_performance = True

    logging.getLogger('ludwig').setLevel(
        logging_level_registry[args.logging_level])
    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig('Test', LUDWIG_VERSION)

    full_predict(**vars(args))
Beispiel #29
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script trains and tests a model',
        prog='ludwig experiment',
        usage='%(prog)s [options]')

    # ----------------------------
    # Experiment naming parameters
    # ----------------------------
    parser.add_argument('--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('--experiment_name',
                        type=str,
                        default='experiment',
                        help='experiment name')
    parser.add_argument('--model_name',
                        type=str,
                        default='run',
                        help='name for the model')

    # ---------------
    # Data parameters
    # ---------------
    parser.add_argument(
        '--data_csv',
        help='input data CSV file. If it has a split column, it will be used '
        'for splitting (0: train, 1: validation, 2: test), otherwise the '
        'dataset will be randomly split')
    parser.add_argument('--data_train_csv', help='input train data CSV file')
    parser.add_argument('--data_validation_csv',
                        help='input validation data CSV file')
    parser.add_argument('--data_test_csv', help='input test data CSV file')

    parser.add_argument(
        '--data_hdf5',
        help='input data HDF5 file. It is an intermediate preprocess version of'
        ' the input CSV created the first time a CSV file is used in the '
        'same directory with the same name and a hdf5 extension')
    parser.add_argument(
        '--data_train_hdf5',
        help='input train data HDF5 file. It is an intermediate preprocess '
        'version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')
    parser.add_argument(
        '--data_validation_hdf5',
        help='input validation data HDF5 file. It is an intermediate preprocess'
        ' version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')
    parser.add_argument(
        '--data_test_hdf5',
        help='input test data HDF5 file. It is an intermediate preprocess '
        'version of the input CSV created the first time a CSV file is '
        'used in the same directory with the same name and a hdf5 '
        'extension')

    parser.add_argument(
        '--train_set_metadata_json',
        help='input metadata JSON file. It is an intermediate preprocess file'
        ' containing the mappings of the input CSV created the first time '
        'a CSV file is used in the same directory with the same name and a'
        ' json extension')

    parser.add_argument('-sspi',
                        '--skip_save_processed_input',
                        help='skips saving intermediate HDF5 and JSON files',
                        action='store_true',
                        default=False)
    parser.add_argument('-ssuo',
                        '--skip_save_unprocessed_output',
                        help='skips saving intermediate NPY output files',
                        action='store_true',
                        default=False)

    # -----------------
    # K-fold parameters
    # -----------------
    parser.add_argument(
        '-kf',
        '--k_fold',
        type=int,
        default=None,
        help='number of folds for a k-fold cross validation run ')
    parser.add_argument(
        '-skfsi',
        '--skip_save_k_fold_split_indices',
        action='store_true',
        default=False,
        help='disables saving indices generated to split training data set '
        'for the k-fold cross validation run, but if it is not needed '
        'turning it off can slightly increase the overall speed')

    # ----------------
    # Model parameters
    # ----------------
    model_definition = parser.add_mutually_exclusive_group(required=True)
    model_definition.add_argument('-md',
                                  '--model_definition',
                                  type=yaml.safe_load,
                                  help='model definition')
    model_definition.add_argument(
        '-mdf',
        '--model_definition_file',
        help='YAML file describing the model. Ignores --model_hyperparameters')

    parser.add_argument(
        '-mlp',
        '--model_load_path',
        help='path of a pretrained model to load as initialization')
    parser.add_argument(
        '-mrp',
        '--model_resume_path',
        help='path of a the model directory to resume training of')
    parser.add_argument('-sstd',
                        '--skip_save_training_description',
                        action='store_true',
                        default=False,
                        help='disables saving the description JSON file')
    parser.add_argument('-ssts',
                        '--skip_save_training_statistics',
                        action='store_true',
                        default=False,
                        help='disables saving training statistics JSON file')
    parser.add_argument('-sstp',
                        '--skip_save_test_predictions',
                        help='skips saving test predictions CSV files',
                        action='store_true',
                        default=False)
    parser.add_argument('-sstes',
                        '--skip_save_test_statistics',
                        help='skips saving test statistics JSON file',
                        action='store_true',
                        default=False)
    parser.add_argument(
        '-ssm',
        '--skip_save_model',
        action='store_true',
        default=False,
        help='disables saving model weights and hyperparameters each time '
        'the model improves. '
        'By default Ludwig saves model weights after each epoch '
        'the validation metric imprvoes, but if the model is really big '
        'that can be time consuming if you do not want to keep '
        'the weights and just find out what performance can a model get '
        'with a set of hyperparameters, use this parameter to skip it,'
        'but the model will not be loadable later on')
    parser.add_argument(
        '-ssp',
        '--skip_save_progress',
        action='store_true',
        default=False,
        help='disables saving progress each epoch. By default Ludwig saves '
        'weights and stats  after each epoch for enabling resuming '
        'of training, but if the model is really big that can be '
        'time consuming and will uses twice as much space, use '
        'this parameter to skip it, but training cannot be resumed '
        'later on')
    parser.add_argument(
        '-ssl',
        '--skip_save_log',
        action='store_true',
        default=False,
        help='disables saving TensorBoard logs. By default Ludwig saves '
        'logs for the TensorBoard, but if it is not needed turning it off '
        'can slightly increase the overall speed')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        '-rs',
        '--random_seed',
        type=int,
        default=42,
        help='a random seed that is going to be used anywhere there is a call '
        'to a random number generator: data splitting, parameter '
        'initialization and training set shuffling')
    parser.add_argument('-g',
                        '--gpus',
                        nargs='+',
                        type=int,
                        default=None,
                        help='list of GPUs to use')
    parser.add_argument('-gml',
                        '--gpu_memory_limit',
                        type=int,
                        default=None,
                        help='maximum memory in MB to allocate per GPU device')
    parser.add_argument(
        '-dpt',
        '--disable_parallel_threads',
        action='store_false',
        dest='allow_parallel_threads',
        help='disable TensorFlow from using multithreading for reproducibility'
    )
    parser.add_argument('-uh',
                        '--use_horovod',
                        action='store_true',
                        default=None,
                        help='uses horovod for distributed training')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)

    logging.getLogger('ludwig').setLevel(
        logging_level_registry[args.logging_level])
    global logger
    logger = logging.getLogger('ludwig.experiment')

    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig('Experiment', LUDWIG_VERSION)

    if args.k_fold is None:
        full_experiment(**vars(args))
    else:
        full_kfold_cross_validate(**vars(args))
Beispiel #30
0
def cli_export_torchscript(sys_argv):
    parser = argparse.ArgumentParser(
        description="This script loads a pretrained model "
        "and saves it as torchscript.",
        prog="ludwig export_torchscript",
        usage="%(prog)s [options]",
    )

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument("-m",
                        "--model_path",
                        help="model to load",
                        required=True)
    parser.add_argument(
        "-mo",
        "--model_only",
        help="Script and export the model only.",
        action="store_true",
    )
    parser.add_argument(
        "-d",
        "--device",
        type=str,
        help=
        ('Device to use for torchscript tracing (e.g. "cuda" or "cpu"). Ideally, this is the same as the device '
         "used when the model is loaded."),
        default=None,
    )

    # -----------------
    # Output parameters
    # -----------------
    parser.add_argument("-od",
                        "--output_path",
                        type=str,
                        help="path where to save the export model",
                        required=True)

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument(
        "-l",
        "--logging_level",
        default="info",
        help="the level of logging to use",
        choices=["critical", "error", "warning", "info", "debug", "notset"],
    )

    add_contrib_callback_args(parser)
    args = parser.parse_args(sys_argv)

    args.callbacks = args.callbacks or []
    for callback in args.callbacks:
        callback.on_cmdline("export_torchscript", *sys_argv)

    args.logging_level = logging_level_registry[args.logging_level]
    logging.getLogger("ludwig").setLevel(args.logging_level)
    global logger
    logger = logging.getLogger("ludwig.export")

    print_ludwig("Export Torchscript", LUDWIG_VERSION)

    export_torchscript(**vars(args))