def export_neuropod( ludwig_model_path, neuropod_path, neuropod_model_name="ludwig_model", entrypoint="get_model", ): try: from neuropod.backends.python.packager import create_python_neuropod except ImportError: raise RuntimeError( 'The "neuropod" package is not installed in your environment.') data_paths = [ { "path": os.path.join(ludwig_model_path, MODEL_HYPERPARAMETERS_FILE_NAME), "packaged_name": MODEL_HYPERPARAMETERS_FILE_NAME, }, { "path": os.path.join(ludwig_model_path, TRAIN_SET_METADATA_FILE_NAME), "packaged_name": TRAIN_SET_METADATA_FILE_NAME, }, { "path": os.path.join(ludwig_model_path, "checkpoint"), "packaged_name": "checkpoint" }, ] for filename in os.listdir(ludwig_model_path): if MODEL_WEIGHTS_FILE_NAME in filename: data_paths.append({ "path": os.path.join(ludwig_model_path, filename), "packaged_name": filename }) logger.debug(f"data_paths: {data_paths}") ludwig_config = load_json( os.path.join(ludwig_model_path, MODEL_HYPERPARAMETERS_FILE_NAME)) training_set_metadata = load_json( os.path.join(ludwig_model_path, TRAIN_SET_METADATA_FILE_NAME)) input_spec = [] for feature in ludwig_config["input_features"]: input_spec.append({ NAME: feature[NAME], "dtype": "str", "shape": (None, 1) }) logger.debug(f"input_spec: {input_spec}") output_spec = [] for feature in ludwig_config["output_features"]: feature_type = feature[TYPE] feature_name = feature[NAME] if feature_type == BINARY: output_spec.append({ "name": feature_name + "_predictions", "dtype": "str", "shape": (None, 1) }) output_spec.append({ "name": feature_name + "_probability", "dtype": "float64", "shape": (None, 1) }) output_spec.append({ "name": feature_name + "_probabilities", "dtype": "float64", "shape": (None, 2) }) elif feature_type == NUMERICAL: output_spec.append({ "name": feature_name + "_predictions", "dtype": "float64", "shape": (None, 1) }) elif feature_type == CATEGORY: output_spec.append({ "name": feature_name + "_predictions", "dtype": "str", "shape": (None, 1) }) output_spec.append({ "name": feature_name + "_probability", "dtype": "float64", "shape": (None, 1) }) output_spec.append({ "name": feature_name + "_probabilities", "dtype": "float64", "shape": (None, training_set_metadata[feature[NAME]]["vocab_size"]), }) elif feature_type == SEQUENCE or feature_type == TEXT: output_spec.append({ "name": feature_name + "_predictions", "dtype": "str", "shape": (None, 1) }) elif feature_type == SET: output_spec.append({ "name": feature_name + "_predictions", "dtype": "str", "shape": (None, 1) }) output_spec.append({ "name": feature_name + "_probability", "dtype": "str", "shape": (None, 1) }) output_spec.append({ "name": feature_name + "_probabilities", "dtype": "float64", "shape": (None, training_set_metadata[feature[NAME]]["vocab_size"]), }) elif feature_type == VECTOR: output_spec.append({ "name": feature["name"] + "_predictions", "dtype": "float64", "shape": (None, training_set_metadata[feature[NAME]]["vector_size"]), }) else: output_spec.append({ "name": feature["name"] + "_predictions", "dtype": "str", "shape": (None, 1) }) logger.debug(f"output_spec: {output_spec}") if os.path.exists(neuropod_path): if os.path.isfile(neuropod_path): logger.warning(f"Removing file: {neuropod_path}") os.remove(neuropod_path) else: logger.warning(f"Removing directory: {neuropod_path}") shutil.rmtree(neuropod_path, ignore_errors=True) from pathlib import Path path = Path(ludwig_path) logger.debug(f"python_root: {path.parent.parent}") create_python_neuropod( neuropod_path=neuropod_path, model_name=neuropod_model_name, data_paths=data_paths, code_path_spec=[{ "python_root": path.parent.parent, "dirs_to_package": ["ludwig"], # Package everything in the python_root }], entrypoint_package="ludwig.utils.neuropod_utils", entrypoint=entrypoint, skip_virtualenv=True, input_spec=input_spec, output_spec=output_spec, ) logger.info(f"Neuropod saved to: {neuropod_path}")
def export_neuropod(ludwig_model_path, neuropod_path, neuropod_model_name="ludwig_model"): try: from neuropod.backends.python.packager import create_python_neuropod except ImportError: logger.error( 'The "neuropod" package is not installed in your environment.') sys.exit(-1) data_paths = [ { "path": os.path.join(ludwig_model_path, MODEL_HYPERPARAMETERS_FILE_NAME), "packaged_name": MODEL_HYPERPARAMETERS_FILE_NAME }, { "path": os.path.join(ludwig_model_path, TRAIN_SET_METADATA_FILE_NAME), "packaged_name": TRAIN_SET_METADATA_FILE_NAME }, { "path": os.path.join(ludwig_model_path, 'checkpoint'), "packaged_name": 'checkpoint' }, ] for filename in os.listdir(ludwig_model_path): if (MODEL_WEIGHTS_FILE_NAME in filename and MODEL_WEIGHTS_PROGRESS_FILE_NAME not in filename): data_paths.append({ "path": os.path.join(ludwig_model_path, filename), "packaged_name": filename }) logger.debug('data_paths: {}'.format(data_paths)) ludwig_model_definition = load_json( os.path.join(ludwig_model_path, MODEL_HYPERPARAMETERS_FILE_NAME)) training_set_metadata = load_json( os.path.join(ludwig_model_path, TRAIN_SET_METADATA_FILE_NAME)) input_spec = [] for feature in ludwig_model_definition['input_features']: input_spec.append({ "name": feature['name'], "dtype": "str", "shape": (None, 1) }) logger.debug('input_spec: {}'.format(input_spec)) output_spec = [] for feature in ludwig_model_definition['output_features']: feature_type = feature['type'] feature_name = feature['name'] if feature_type == BINARY: output_spec.append({ "name": feature['name'] + '_predictions', "dtype": "str", "shape": (None, 1) }) output_spec.append({ "name": feature['name'] + '_probabilities', "dtype": "float32", "shape": (None, 1) }) elif feature_type == NUMERICAL: output_spec.append({ "name": feature['name'] + '_predictions', "dtype": "float32", "shape": (None, 1) }) elif feature_type == CATEGORY: output_spec.append({ "name": feature['name'] + '_predictions', "dtype": "str", "shape": (None, 1) }) output_spec.append({ "name": feature['name'] + '_probability', "dtype": "float32", "shape": (None, 1) }) output_spec.append({ "name": feature['name'] + '_probabilities', "dtype": "float32", "shape": (None, training_set_metadata[feature_name]['vocab_size']) }) elif feature_type == SEQUENCE: output_spec.append({ "name": feature['name'] + '_predictions', "dtype": "str", "shape": (None, 1) }) elif feature_type == TEXT: output_spec.append({ "name": feature['name'] + '_predictions', "dtype": "str", "shape": (None, 1) }) elif feature_type == SET: output_spec.append({ "name": feature['name'] + '_predictions', "dtype": "str", "shape": (None, 1) }) output_spec.append({ "name": feature['name'] + '_probability', "dtype": "str", "shape": (None, 1) }) output_spec.append({ "name": feature['name'] + '_probabilities', "dtype": "float32", "shape": (None, training_set_metadata[feature_name]['vocab_size']) }) elif feature_type == VECTOR: output_spec.append({ "name": feature['name'] + '_predictions', "dtype": "float32", "shape": (None, training_set_metadata[feature_name]['vector_size']) }) else: output_spec.append({ "name": feature['name'] + '_predictions', "dtype": "str", "shape": (None, 1) }) logger.debug('output_spec: {}'.format(output_spec)) if os.path.exists(neuropod_path): if os.path.isfile(neuropod_path): logger.warning('Removing file: {}'.format(neuropod_path)) os.remove(neuropod_path) else: logger.warning('Removing directory: {}'.format(neuropod_path)) shutil.rmtree(neuropod_path, ignore_errors=True) from pathlib import Path path = Path(ludwig_path) logger.debug('python_root: {}'.format(path.parent.parent)) create_python_neuropod( neuropod_path=neuropod_path, model_name=neuropod_model_name, data_paths=data_paths, code_path_spec=[{ "python_root": path.parent.parent, "dirs_to_package": [ "ludwig" # Package everything in the python_root ], }], entrypoint_package="ludwig.neuropod_export", entrypoint="get_model", # test_deps=['torch', 'numpy'], skip_virtualenv=True, input_spec=input_spec, output_spec=output_spec) logger.info('Neuropod saved to: {}'.format(neuropod_path))