def get_dataset(data_pars): """ """ pred_length = data_pars["prediction_length"] features = data_pars["col_Xinput"] target = data_pars["col_ytarget"] feat_len = len(features) # when train and test both are provided if data_pars.get("test_data_path"): test = pd.read_csv(path_norm(data_pars["test_data_path"])) test = test.fillna(method="pad") ntest = pred_length # len(test) test = test.iloc[-ntest:] x_test = test[features] x_test = x_test.values.reshape(-1, ntest, feat_len) y_test = test[target] y_test = y_test.values.reshape(-1, ntest, 1) if data_pars["predict"]: return x_test, y_test train = pd.read_csv(path_norm(data_pars["train_data_path"])) train = train.fillna(method="pad") ntrain = pred_length # len(train) train = train.iloc[-ntrain:] x_train = train[features] x_train = x_train.values.reshape(-1, ntrain, feat_len) y_train = train[target].shift().fillna(0) y_train = y_train.values.reshape(-1, ntrain, 1) return x_train, y_train, x_test, y_test # for when only train is provided df = pd.read_csv(path_norm(data_pars["train_data_path"])) df = df.fillna(method="pad") x_train = df[features].iloc[:-pred_length] x_train = x_train.values.reshape(-1, pred_length, feat_len) y_train = df[target].iloc[:-pred_length].shift().fillna(method="pad") y_train = y_train.values.reshape(-1, pred_length, 1) x_test = df.iloc[-pred_length:][target] x_test = x_test.values.reshape(-1, pred_length, feat_len) y_test = df[target].iloc[-pred_length:].shift().fillna(method="pad") y_test = y_test.values.reshape(-1, pred_length, 1) if data_pars["predict"]: return x_test, y_test return x_train, y_train, x_test, y_test
def get_dataset_titanic(data_pars): # Preparing training data. train_data_path = data_pars['train_data_path'] test_data_path = data_pars['test_data_path'] train_data_path = path_norm(train_data_path) test_data_path = path_norm(test_data_path) x_train = pd.read_csv(train_data_path) y_train = x_train.pop('survived') # Preparing testing data. x_test = pd.read_csv(test_data_path) y_test = x_test.pop('survived') return x_train, y_train, x_test, y_test
def analyze_datainfo_paths(data_info): data_path = path_norm(data_info.get("data_path", None)) dataset = data_info.get("dataset", None) if not dataset or not data_path: raise Exception("please add these 'data_path','dataset' in data_info") if dataset.find('.') > -1: dataset_name = dataset.split('.')[0] else: raise Exception("please add dataset Extension like that : dataset.txt") path_train = os.path.join(data_path, 'train') os.makedirs(path_train, exist_ok=True) path_train_dataset = os.path.join(path_train, dataset_name + '.csv') path_valid = os.path.join(data_path, 'test') os.makedirs(path_valid, exist_ok=True) path_valid_dataset = os.path.join(path_valid, dataset_name + '.csv') dataset_path = os.path.join(data_path, dataset) return dataset_path, path_train_dataset, path_valid_dataset
def get_params(param_pars={}, **kw): data_path = param_pars["data_path"] config_mode = param_pars["config_mode"] if param_pars["choice"] == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, mode='r')) cf = cf[config_mode] return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[ 'out_pars'] if param_pars["choice"] == "test0": log("#### Path params ##########################################") data_path = path_norm(data_path) out_path = path_norm("ztest/model_keras/armdn/") os.makedirs(out_path, exist_ok=True) log(data_path, out_path) data_pars = { "train_data_path": data_path + "timeseries/milk.csv", "train": False, "prediction_length": 12, "col_Xinput": ["milk_production_pounds"], "col_ytarget": "milk_production_pounds" } model_pars = { "lstm_h_list": [300, 200, 24], "last_lstm_neuron": 12, "timesteps": 12, "dropout_rate": 0.1, "n_mixes": 3, "dense_neuron": 10, } compute_pars = { "batch_size": 32, "clip_gradient": 100, "ctx": None, "epochs": 10, "learning_rate": 0.05, "patience": 50 } outpath = out_path + "result" out_pars = {"outpath": outpath} return model_pars, data_pars, compute_pars, out_pars
def get_params(param_pars={}, **kw): from jsoncomment import JsonComment json = JsonComment() choice = param_pars['choice'] config_mode = param_pars['config_mode'] data_path = param_pars['data_path'] if choice == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, mode='r')) cf = cf[config_mode] return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[ 'out_pars'] if choice == "test01": log("#### Path params ##########################################") data_path = path_norm("dataset/text/imdb.csv") out_path = path_norm("ztest/model_keras/textcnn/model.h5") model_path = out_path data_pars = { "path": data_path, "train": 1, "maxlen": 40, "max_features": 5, } model_pars = { "maxlen": 40, "max_features": 5, "embedding_dims": 50, } compute_pars = { "engine": "adam", "loss": "binary_crossentropy", "metrics": ["accuracy"], "batch_size": 1000, "epochs": 1 } out_pars = {"path": out_path, "model_path": model_path} return model_pars, data_pars, compute_pars, out_pars else: raise Exception(f"Not support choice {choice} yet")
def get_params(param_pars={}, **kw): from jsoncomment import JsonComment json = JsonComment() pp = param_pars choice = pp["choice"] config_mode = pp["config_mode"] data_path = pp["data_path"] if choice == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, mode="r")) cf = cf[config_mode] return cf["model_pars"], cf["data_pars"], cf["compute_pars"], cf[ "out_pars"] if choice == "test01": log("#### Path params ##########################################") data_path = path_norm("dataset/text/ner_dataset.csv") out_path = path_norm("ztest/model_keras/crf_bilstm/") model_path = os.path.join(out_path, "model") data_pars = { "path": data_path, "train": 1, "maxlen": 400, "max_features": 10, } model_pars = {} compute_pars = { "engine": "adam", "loss": "binary_crossentropy", "metrics": ["accuracy"], "batch_size": 32, "epochs": 1, } out_pars = {"path": out_path, "model_path": model_path} log(data_pars, out_pars) return model_pars, data_pars, compute_pars, out_pars else: raise Exception(f"Not support choice {choice} yet")
def test(data_path="dataset/", pars_choice="json", config_mode="test"): ### Local test from mlmodels.util import path_norm data_path = path_norm(data_path) log("Json file path: ", data_path) log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "data_path": data_path, "config_mode": config_mode } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log(model_pars, data_pars, compute_pars, out_pars) log("#### Loading dataset #############################################") Xtuple = get_dataset(data_pars) print(len(Xtuple)) log("#### Model init #############################################") session = None model = Model(model_pars, data_pars, compute_pars) log("#### Model fit #############################################") data_pars["train"] = 1 model, session = fit(model, session, data_pars, compute_pars, out_pars) log("#### Save ########################################################") save_pars = {"path": out_pars['path'] + "/model.pkl"} save(model, session, save_pars=save_pars) log("#### Load ########################################################") model2, session2 = load(save_pars) log("#### Predict from Load ###########################################") data_pars["train"] = 0 ypred, _ = predict(model2, session2, data_pars, compute_pars, out_pars) log("#### Predict #####################################################") data_pars["train"] = 0 ypred, _ = predict(model, session, data_pars, compute_pars, out_pars) # print("ypred : ", ypred) # print("ypred shape: ", ypred.shape) log("#### metrics #####################################################") metrics_val = evaluate(model, session, data_pars, compute_pars, out_pars) log(metrics_val) log("#### Plot ########################################################")
def get_params(param_pars, **kw): choice = param_pars['choice'] config_mode = param_pars['config_mode'] data_path = param_pars['data_path'] if choice == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, mode='r')) cf = cf[config_mode] cf = path_norm_dict(cf) return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[ 'out_pars'] if choice == "test01": """
def load(load_pars): from mlmodels.util import load_tch import pickle load_pars2 = copy.deepcopy(load_pars) path = path_norm( load_pars['path'] + "/torch_model/" ) ### Setup Model d = pickle.load( open(path + "/torch_model_pars.pkl", mode="rb") ) model = Model(model_pars= d['model_pars'], compute_pars= d['compute_pars'], data_pars= d['data_pars']) ### Specialized part load_pars2['path'] = path model2 = load_tch(load_pars2) model.model = model2.model return model
def get_params(param_pars=None, **kw): from jsoncomment import JsonComment json = JsonComment() pp = param_pars choice = pp['choice'] config_mode = pp['config_mode'] data_path = pp['data_path'] if choice == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, 'r')) cf = cf[config_mode] return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[ 'out_pars'] if choice == "test01": log("#### Path params ##########################################") data_path = path_norm("dataset/text/imdb.csv") out_path = path_norm("ztest/model_tch/textcnn/") model_path = os.path.join(out_path, "model") data_pars = { "data_path": path_norm("dataset/recommender/IMDB_sample.txt"), "train_path": path_norm("dataset/recommender/IMDB_train.csv"), "valid_path": path_norm("dataset/recommender/IMDB_valid.csv"), "split_if_exists": True, "frac": 0.99, "lang": "en", "pretrained_emb": "glove.6B.300d", "batch_size": 64, "val_batch_size": 64, } model_pars = { "dim_channel": 100, "kernel_height": [3, 4, 5], "dropout_rate": 0.5, "num_class": 2 } compute_pars = { "learning_rate": 0.001, "epochs": 1, "checkpointdir": out_path + "/checkpoint/" } out_pars = { "path": model_path, "checkpointdir": out_path + "/checkpoint/" } return model_pars, data_pars, compute_pars, out_pars
def get_params(param_pars=None, **kw): pp = param_pars choice = pp['choice'] config_mode = pp['config_mode'] data_path = pp['data_path'] if choice == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, mode='r')) cf = cf[config_mode] ####Normalize path : add /models/dataset/ cf['data_pars'] = path_norm_dict(cf['data_pars']) cf['out_pars'] = path_norm_dict(cf['out_pars']) return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf['out_pars'] else: raise Exception(f"Not support choice {choice} yet")
def save(model, session=None, save_pars=None): import pickle from mlmodels.util import save_tch save2 = copy.deepcopy(save_pars) path = path_norm( save_pars['path'] + "/torch_model/") os.makedirs(Path(path), exist_ok = True) ### Specialized part save2['path'] = path save_tch(model=model, save_pars=save2) ### Setup Model d = {"model_pars" : model.model_pars, "compute_pars": model.compute_pars, "data_pars" : model.data_pars } pickle.dump(d, open(path + "/torch_model_pars.pkl", mode="wb")) log(path, os.listdir(path))
def test(data_path="dataset/", pars_choice="test01", config_mode="test"): ### Local test from mlmodels.util import path_norm data_path = path_norm(data_path) log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "data_path": data_path, "config_mode": config_mode } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log("#### Loading dataset #############################################") Xtuple = get_dataset(data_pars) log("#### Model init, fit #############################################") model = Model(model_pars, compute_pars) model, session = fit(model, data_pars, model_pars, compute_pars, out_pars) log("#### Predict #####################################################") ypred = predict(model, session, data_pars, compute_pars, out_pars) log("#### metrics #####################################################") metrics_val = evaluate(model, ypred, data_pars, compute_pars, out_pars) print(metrics_val) log("#### Plot ########################################################") log("#### Save ###################################################") save_pars = {"path": out_pars['path']} save(model, session, save_pars=save_pars) log("#### Load #####################################################") model2, session2 = load(save_pars) print(model2, session2) log("#### Predict ################################################") ypred = predict(model2, session2, data_pars, compute_pars, out_pars) print(ypred)
def test(data_path="dataset/", pars_choice="json", config_mode="test"): ### Local test from mlmodels.util import path_norm data_path = path_norm(data_path) log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "data_path": data_path, "config_mode": config_mode } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log("#### Loading daaset #############################################") Xtuple = get_dataset(data_pars) log("#### Model init, fit #############################################") session = None model = Model(model_pars, data_pars, compute_pars) model, session = fit(model, data_pars, compute_pars, out_pars) log("#### Predict #####################################################") data_pars["train"] = 0 ypred = predict(model, session, data_pars, compute_pars, out_pars) log("#### metrics #####################################################") metrics_val = evaluate(model, data_pars, compute_pars, out_pars) print(metrics_val) log("#### Plot ########################################################") log("#### Save/Load ###################################################") save(model, session, save_pars=out_pars) model2 = load(out_pars) # ypred = predict(model2, data_pars, compute_pars, out_pars) # metrics_val = metrics(model2, ypred, data_pars, compute_pars, out_pars) print(model2)
def get_config_file(): return path_norm('config/model_tch/Imagecnn.json')
def get_params(param_pars={}, **kw): from jsoncomment import JsonComment ; json = JsonComment() pp = param_pars choice = pp['choice'] config_mode = pp['config_mode'] data_path = pp['data_path'] if choice == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, mode='r')) cf = cf[config_mode] return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf['out_pars'] if choice == "test01": log("#### Path params ##########################################") root = path_norm() data_path = path_norm( "dataset/text/imdb.npz" ) out_path = path_norm( "ztest/model_keras/charcnn/" ) model_path = os.path.join(out_path , "model") model_pars = { "embedding_size": 128, "conv_layers": [[256, 10 ], [256, 7 ], [256, 5 ], [256, 3 ] ], "fully_connected_layers": [ 1024, 1024 ], "threshold": 1e-6, "dropout_p": 0.1, "optimizer": "adam", "loss": "categorical_crossentropy" } data_pars = { "train": True, "alphabet": "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}", "alphabet_size": 69, "input_size": 1014, "num_of_classes": 4, "train_data_source": path_norm("dataset/text/ag_news_csv/train.csv") , "val_data_source": path_norm("dataset/text/ag_news_csv/test.csv") } compute_pars = { "epochs": 1, "batch_size": 128 } out_pars = { "path": path_norm( "ztest/ml_keras/charcnn/charcnn.h5"), "data_type": "pandas", "size": [0, 0, 6], "output_size": [0, 6] } return model_pars, data_pars, compute_pars, out_pars else: raise Exception(f"Not support choice {choice} yet")
def __init__(self, **args): self.train_data = path_norm(args["train"]["dataset"]) self.train_reader = path_norm(args["train"]["uri"]) self.test_data = path_norm(args["test"]["dataset"]) self.test_reader = path_norm(args["test"]["uri"])
def get_params(choice="", data_path="dataset/", config_mode="test", **kwargs): if choice == "json": model_pars, data_pars, compute_pars, out_pars = config_load(data_path, file_default="model_keras/01_deepctr.json", config_mode=config_mode) return model_pars, data_pars, compute_pars, out_pars if choice == 0: log("#### Path params ###################################################") data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path) out_path = path_norm("ztest/model_keras/deepctr/model.h5") train_data_path = data_path + "recommender/criteo_sample.txt" data_pars = {"train_data_path": train_data_path, "dataset_type": "criteo", "test_size": 0.2} log("#### Model params #################################################") model_pars = {"task": "binary", "model_name": "DeepFM", "optimization": "adam", "cost": "binary_crossentropy"} compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2} out_pars = {"path": out_path} elif choice == 1: log("#### Path params ##################################################") data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path) out_path = path_norm("ztest/model_keras/deepctr/model.h5") train_data_path = data_path + "recommender/criteo_sample.txt" data_pars = {"train_data_path": train_data_path, "hash_feature": True, "dataset_type": "criteo", "test_size": 0.2} log("#### Model params #################################################") model_pars = {"task": "binary", "model_name": "DeepFM", "optimization": "adam", "cost": "binary_crossentropy"} compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2} out_pars = {"path": out_path} elif choice == 2: log("#### Path params ################################################") data_path, _ = path_setup(out_folder="/ here_test/", data_path=data_path) out_path = path_norm("ztest/model_keras/deepctr/model.h5") train_data_path = data_path + "/recommender/movielens_sample.txt" data_pars = {"train_data_path": train_data_path, "dataset_type": "movie_len", "test_size": 0.2} log("#### Model params ################################################") model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"} compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2} out_pars = {"path": out_path} elif choice == 3: log("#### Path params ##################################################") data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path) out_path = path_norm("ztest/model_keras/deepctr/model.h5") train_data_path = data_path + "/recommender/movielens_sample.txt" data_pars = {"train_data_path": train_data_path, "multiple_value": True, "dataset_type": "movie_len", "test_size": 0.2} log("#### Model params ################################################") model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"} compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2} out_pars = {"path": out_path} elif choice == 4: log("#### Path params #################################################") data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path) out_path = path_norm("ztest/model_keras/deepctr/model.h5") train_data_path = data_path + "/recommender/movielens_sample.txt" data_pars = {"train_data_path": train_data_path, "multiple_value": True, "hash_feature": True, "dataset_type": "movie_len", "test_size": 0.2} log("#### Model params ################################################") model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"} compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2} out_pars = {"path": out_path} elif choice == 5: model_name = kwargs["model_name"] log("#### Path params #################################################") model_name = kwargs["model_name"] out_path = path_norm(f"ztest/model_keras/deepctr/model_{model_name}.h5") data_pars = {"dataset_type": "synthesis", "sample_size": 8, "test_size": 0.2, "dataset_name": model_name, **DATA_PARAMS[model_name]} log("#### Model params ################################################") model_pars = {"model_name": model_name, "optimization": "adam", "cost": "mse"} compute_pars = {"batch_size": 100, "epochs": 1, "validation_split": 0.5} out_pars = {"path": out_path} return model_pars, data_pars, compute_pars, out_pars
""" import os from keras.callbacks import EarlyStopping ######## Logs from mlmodels.util import os_package_root_path, log, get_model_uri #### Import EXISTING model and re-map to mlmodels from mlmodels.model_keras.raw.char_cnn.data_utils import Data from mlmodels.model_keras.raw.char_cnn.models.char_cnn_zhang import CharCNNZhang from mlmodels.util import path_norm print(path_norm("dataset")) #################################################################################################### VERBOSE = False MODEL_URI = get_model_uri(__file__) #################################################################################################### class Model: def __init__(self, model_pars=None, data_pars=None, compute_pars=None): ### Model Structure ################################ if model_pars is None: self.model = None