Example #1
0
def get_dataset(data_pars):
    """


    """
    pred_length = data_pars["prediction_length"]
    features = data_pars["col_Xinput"]
    target = data_pars["col_ytarget"]
    feat_len = len(features)

    # when train and test both are provided
    if data_pars.get("test_data_path"):
        test = pd.read_csv(path_norm(data_pars["test_data_path"]))
        test = test.fillna(method="pad")
        ntest = pred_length  # len(test)
        test = test.iloc[-ntest:]

        x_test = test[features]
        x_test = x_test.values.reshape(-1, ntest, feat_len)
        y_test = test[target]
        y_test = y_test.values.reshape(-1, ntest, 1)

        if data_pars["predict"]:
            return x_test, y_test

        train = pd.read_csv(path_norm(data_pars["train_data_path"]))
        train = train.fillna(method="pad")

        ntrain = pred_length  # len(train)
        train = train.iloc[-ntrain:]

        x_train = train[features]
        x_train = x_train.values.reshape(-1, ntrain, feat_len)
        y_train = train[target].shift().fillna(0)
        y_train = y_train.values.reshape(-1, ntrain, 1)

        return x_train, y_train, x_test, y_test

    # for when only train is provided
    df = pd.read_csv(path_norm(data_pars["train_data_path"]))
    df = df.fillna(method="pad")
    x_train = df[features].iloc[:-pred_length]
    x_train = x_train.values.reshape(-1, pred_length, feat_len)
    y_train = df[target].iloc[:-pred_length].shift().fillna(method="pad")
    y_train = y_train.values.reshape(-1, pred_length, 1)

    x_test = df.iloc[-pred_length:][target]
    x_test = x_test.values.reshape(-1, pred_length, feat_len)
    y_test = df[target].iloc[-pred_length:].shift().fillna(method="pad")
    y_test = y_test.values.reshape(-1, pred_length, 1)

    if data_pars["predict"]:
        return x_test, y_test
    return x_train, y_train, x_test, y_test
Example #2
0
def get_dataset_titanic(data_pars):
    # Preparing training data.
    train_data_path = data_pars['train_data_path']
    test_data_path = data_pars['test_data_path']
    train_data_path = path_norm(train_data_path)
    test_data_path = path_norm(test_data_path)
    x_train = pd.read_csv(train_data_path)
    y_train = x_train.pop('survived')
    # Preparing testing data.
    x_test = pd.read_csv(test_data_path)
    y_test = x_test.pop('survived')
    return x_train, y_train, x_test, y_test
Example #3
0
def analyze_datainfo_paths(data_info):
    data_path = path_norm(data_info.get("data_path", None))
    dataset = data_info.get("dataset", None)

    if not dataset or not data_path:
        raise Exception("please add these 'data_path','dataset' in data_info")

    if dataset.find('.') > -1:
        dataset_name = dataset.split('.')[0]
    else:
        raise Exception("please add dataset Extension like that : dataset.txt")

    path_train = os.path.join(data_path, 'train')
    os.makedirs(path_train, exist_ok=True)

    path_train_dataset = os.path.join(path_train, dataset_name + '.csv')

    path_valid = os.path.join(data_path, 'test')
    os.makedirs(path_valid, exist_ok=True)

    path_valid_dataset = os.path.join(path_valid, dataset_name + '.csv')

    dataset_path = os.path.join(data_path, dataset)

    return dataset_path, path_train_dataset, path_valid_dataset
Example #4
0
def get_params(param_pars={}, **kw):
    data_path = param_pars["data_path"]
    config_mode = param_pars["config_mode"]

    if param_pars["choice"] == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, mode='r'))
        cf = cf[config_mode]
        return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[
            'out_pars']

    if param_pars["choice"] == "test0":
        log("#### Path params   ##########################################")
        data_path = path_norm(data_path)
        out_path = path_norm("ztest/model_keras/armdn/")
        os.makedirs(out_path, exist_ok=True)
        log(data_path, out_path)

        data_pars = {
            "train_data_path": data_path + "timeseries/milk.csv",
            "train": False,
            "prediction_length": 12,
            "col_Xinput": ["milk_production_pounds"],
            "col_ytarget": "milk_production_pounds"
        }

        model_pars = {
            "lstm_h_list": [300, 200, 24],
            "last_lstm_neuron": 12,
            "timesteps": 12,
            "dropout_rate": 0.1,
            "n_mixes": 3,
            "dense_neuron": 10,
        }

        compute_pars = {
            "batch_size": 32,
            "clip_gradient": 100,
            "ctx": None,
            "epochs": 10,
            "learning_rate": 0.05,
            "patience": 50
        }
        outpath = out_path + "result"
        out_pars = {"outpath": outpath}

    return model_pars, data_pars, compute_pars, out_pars
Example #5
0
def get_params(param_pars={}, **kw):
    from jsoncomment import JsonComment
    json = JsonComment()
    choice = param_pars['choice']
    config_mode = param_pars['config_mode']
    data_path = param_pars['data_path']

    if choice == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, mode='r'))
        cf = cf[config_mode]
        return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[
            'out_pars']

    if choice == "test01":
        log("#### Path params   ##########################################")
        data_path = path_norm("dataset/text/imdb.csv")
        out_path = path_norm("ztest/model_keras/textcnn/model.h5")
        model_path = out_path

        data_pars = {
            "path": data_path,
            "train": 1,
            "maxlen": 40,
            "max_features": 5,
        }

        model_pars = {
            "maxlen": 40,
            "max_features": 5,
            "embedding_dims": 50,
        }

        compute_pars = {
            "engine": "adam",
            "loss": "binary_crossentropy",
            "metrics": ["accuracy"],
            "batch_size": 1000,
            "epochs": 1
        }

        out_pars = {"path": out_path, "model_path": model_path}

        return model_pars, data_pars, compute_pars, out_pars

    else:
        raise Exception(f"Not support choice {choice} yet")
Example #6
0
def get_params(param_pars={}, **kw):
    from jsoncomment import JsonComment
    json = JsonComment()

    pp = param_pars
    choice = pp["choice"]
    config_mode = pp["config_mode"]
    data_path = pp["data_path"]

    if choice == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, mode="r"))
        cf = cf[config_mode]
        return cf["model_pars"], cf["data_pars"], cf["compute_pars"], cf[
            "out_pars"]

    if choice == "test01":
        log("#### Path params   ##########################################")
        data_path = path_norm("dataset/text/ner_dataset.csv")
        out_path = path_norm("ztest/model_keras/crf_bilstm/")
        model_path = os.path.join(out_path, "model")

        data_pars = {
            "path": data_path,
            "train": 1,
            "maxlen": 400,
            "max_features": 10,
        }

        model_pars = {}
        compute_pars = {
            "engine": "adam",
            "loss": "binary_crossentropy",
            "metrics": ["accuracy"],
            "batch_size": 32,
            "epochs": 1,
        }

        out_pars = {"path": out_path, "model_path": model_path}

        log(data_pars, out_pars)

        return model_pars, data_pars, compute_pars, out_pars

    else:
        raise Exception(f"Not support choice {choice} yet")
Example #7
0
def test(data_path="dataset/", pars_choice="json", config_mode="test"):
    ### Local test
    from mlmodels.util import path_norm
    data_path = path_norm(data_path)
    log("Json file path: ", data_path)

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "data_path": data_path,
        "config_mode": config_mode
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)
    log(model_pars, data_pars, compute_pars, out_pars)

    log("#### Loading dataset   #############################################")
    Xtuple = get_dataset(data_pars)
    print(len(Xtuple))

    log("#### Model init       #############################################")
    session = None
    model = Model(model_pars, data_pars, compute_pars)

    log("#### Model fit        #############################################")
    data_pars["train"] = 1
    model, session = fit(model, session, data_pars, compute_pars, out_pars)

    log("#### Save   ########################################################")
    save_pars = {"path": out_pars['path'] + "/model.pkl"}
    save(model, session, save_pars=save_pars)

    log("#### Load   ########################################################")
    model2, session2 = load(save_pars)

    log("#### Predict from Load   ###########################################")
    data_pars["train"] = 0
    ypred, _ = predict(model2, session2, data_pars, compute_pars, out_pars)

    log("#### Predict   #####################################################")
    data_pars["train"] = 0
    ypred, _ = predict(model, session, data_pars, compute_pars, out_pars)
    # print("ypred : ", ypred)
    # print("ypred shape: ", ypred.shape)

    log("#### metrics   #####################################################")
    metrics_val = evaluate(model, session, data_pars, compute_pars, out_pars)
    log(metrics_val)

    log("#### Plot   ########################################################")
Example #8
0
def get_params(param_pars, **kw):
    choice = param_pars['choice']
    config_mode = param_pars['config_mode']
    data_path = param_pars['data_path']

    if choice == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, mode='r'))
        cf = cf[config_mode]
        cf = path_norm_dict(cf)
        return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[
            'out_pars']


    if choice == "test01":
        """
Example #9
0
def load(load_pars):
    from mlmodels.util import load_tch
    import pickle
    load_pars2 = copy.deepcopy(load_pars)
    path = path_norm( load_pars['path']  + "/torch_model/" )

    ### Setup Model
    d = pickle.load( open(path + "/torch_model_pars.pkl", mode="rb")  )
    model = Model(model_pars= d['model_pars'], compute_pars= d['compute_pars'],
                  data_pars= d['data_pars'])  

    ### Specialized part
    load_pars2['path'] = path
    model2 = load_tch(load_pars2)
    model.model = model2.model

    return model
Example #10
0
def get_params(param_pars=None, **kw):
    from jsoncomment import JsonComment
    json = JsonComment()
    pp = param_pars
    choice = pp['choice']
    config_mode = pp['config_mode']
    data_path = pp['data_path']

    if choice == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, 'r'))
        cf = cf[config_mode]
        return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[
            'out_pars']

    if choice == "test01":
        log("#### Path params   ##########################################")
        data_path = path_norm("dataset/text/imdb.csv")
        out_path = path_norm("ztest/model_tch/textcnn/")
        model_path = os.path.join(out_path, "model")

        data_pars = {
            "data_path": path_norm("dataset/recommender/IMDB_sample.txt"),
            "train_path": path_norm("dataset/recommender/IMDB_train.csv"),
            "valid_path": path_norm("dataset/recommender/IMDB_valid.csv"),
            "split_if_exists": True,
            "frac": 0.99,
            "lang": "en",
            "pretrained_emb": "glove.6B.300d",
            "batch_size": 64,
            "val_batch_size": 64,
        }

        model_pars = {
            "dim_channel": 100,
            "kernel_height": [3, 4, 5],
            "dropout_rate": 0.5,
            "num_class": 2
        }

        compute_pars = {
            "learning_rate": 0.001,
            "epochs": 1,
            "checkpointdir": out_path + "/checkpoint/"
        }

        out_pars = {
            "path": model_path,
            "checkpointdir": out_path + "/checkpoint/"
        }

        return model_pars, data_pars, compute_pars, out_pars
Example #11
0
def get_params(param_pars=None, **kw):
    pp          = param_pars
    choice      = pp['choice']
    config_mode = pp['config_mode']
    data_path   = pp['data_path']

    if choice == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, mode='r'))
        cf = cf[config_mode]

        ####Normalize path  : add /models/dataset/
        cf['data_pars'] = path_norm_dict(cf['data_pars'])
        cf['out_pars']  = path_norm_dict(cf['out_pars'])

        return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf['out_pars']

    else:
        raise Exception(f"Not support choice {choice} yet")
Example #12
0
def save(model, session=None, save_pars=None):
    import pickle
    from mlmodels.util import save_tch
    save2 = copy.deepcopy(save_pars)
    path = path_norm( save_pars['path'] + "/torch_model/")
    os.makedirs(Path(path), exist_ok = True)


    ### Specialized part
    save2['path'] = path
    save_tch(model=model, save_pars=save2)


    ### Setup Model
    d = {"model_pars"  :  model.model_pars, 
         "compute_pars":  model.compute_pars,
         "data_pars"   :  model.data_pars
        }
    pickle.dump(d, open(path + "/torch_model_pars.pkl", mode="wb"))
    log(path, os.listdir(path))
Example #13
0
def test(data_path="dataset/", pars_choice="test01", config_mode="test"):
    ### Local test
    from mlmodels.util import path_norm
    data_path = path_norm(data_path)

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "data_path": data_path,
        "config_mode": config_mode
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)

    log("#### Loading dataset   #############################################")
    Xtuple = get_dataset(data_pars)

    log("#### Model init, fit   #############################################")
    model = Model(model_pars, compute_pars)
    model, session = fit(model, data_pars, model_pars, compute_pars, out_pars)

    log("#### Predict   #####################################################")
    ypred = predict(model, session, data_pars, compute_pars, out_pars)

    log("#### metrics   #####################################################")
    metrics_val = evaluate(model, ypred, data_pars, compute_pars, out_pars)
    print(metrics_val)

    log("#### Plot   ########################################################")

    log("#### Save   ###################################################")
    save_pars = {"path": out_pars['path']}
    save(model, session, save_pars=save_pars)

    log("#### Load #####################################################")
    model2, session2 = load(save_pars)
    print(model2, session2)

    log("#### Predict   ################################################")
    ypred = predict(model2, session2, data_pars, compute_pars, out_pars)
    print(ypred)
Example #14
0
def test(data_path="dataset/", pars_choice="json", config_mode="test"):
    ### Local test
    from mlmodels.util import path_norm
    data_path = path_norm(data_path)

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "data_path": data_path,
        "config_mode": config_mode
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)

    log("#### Loading daaset   #############################################")
    Xtuple = get_dataset(data_pars)

    log("#### Model init, fit   #############################################")
    session = None
    model = Model(model_pars, data_pars, compute_pars)
    model, session = fit(model, data_pars, compute_pars, out_pars)

    log("#### Predict   #####################################################")
    data_pars["train"] = 0
    ypred = predict(model, session, data_pars, compute_pars, out_pars)

    log("#### metrics   #####################################################")
    metrics_val = evaluate(model, data_pars, compute_pars, out_pars)
    print(metrics_val)

    log("#### Plot   ########################################################")

    log("#### Save/Load   ###################################################")
    save(model, session, save_pars=out_pars)
    model2 = load(out_pars)
    #     ypred = predict(model2, data_pars, compute_pars, out_pars)
    #     metrics_val = metrics(model2, ypred, data_pars, compute_pars, out_pars)
    print(model2)
Example #15
0
def get_config_file():
    return path_norm('config/model_tch/Imagecnn.json')
Example #16
0
def get_params(param_pars={}, **kw):
    from jsoncomment import JsonComment ; json = JsonComment()
    pp = param_pars
    choice = pp['choice']
    config_mode = pp['config_mode']
    data_path = pp['data_path']


    if choice == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, mode='r'))
        cf = cf[config_mode]
        return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf['out_pars']


    if choice == "test01":
        log("#### Path params   ##########################################")
        root       = path_norm()
        data_path  = path_norm( "dataset/text/imdb.npz"  )   
        out_path   = path_norm( "ztest/model_keras/charcnn/" )
        model_path = os.path.join(out_path , "model")


        model_pars = {
            "embedding_size": 128,
            "conv_layers": [[256, 10 ], [256, 7 ], [256, 5 ], [256, 3 ] ], 
            "fully_connected_layers": [
                1024,
                1024
            ],
            "threshold": 1e-6,
            "dropout_p": 0.1,
            "optimizer": "adam",
            "loss": "categorical_crossentropy"
        }

        data_pars = {
            "train": True,
            "alphabet": "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}",
            "alphabet_size": 69,
            "input_size": 1014,
            "num_of_classes": 4,
            "train_data_source": path_norm("dataset/text/ag_news_csv/train.csv") ,
            "val_data_source": path_norm("dataset/text/ag_news_csv/test.csv")
        }


        compute_pars = {
            "epochs": 1,
            "batch_size": 128
        }

        out_pars = {
            "path":  path_norm( "ztest/ml_keras/charcnn/charcnn.h5"),
            "data_type": "pandas",
            "size": [0, 0, 6],
            "output_size": [0, 6]
        }

        return model_pars, data_pars, compute_pars, out_pars

    else:
        raise Exception(f"Not support choice {choice} yet")
Example #17
0
 def __init__(self, **args):
     self.train_data = path_norm(args["train"]["dataset"])
     self.train_reader = path_norm(args["train"]["uri"])
     self.test_data = path_norm(args["test"]["dataset"])
     self.test_reader = path_norm(args["test"]["uri"])
Example #18
0
def get_params(choice="", data_path="dataset/", config_mode="test", **kwargs):
    if choice == "json":
        model_pars, data_pars, compute_pars, out_pars = config_load(data_path,
                                                                    file_default="model_keras/01_deepctr.json",
                                                                    config_mode=config_mode)
        return model_pars, data_pars, compute_pars, out_pars

    if choice == 0:
        log("#### Path params   ###################################################")
        data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path)
        out_path = path_norm("ztest/model_keras/deepctr/model.h5")

        train_data_path = data_path + "recommender/criteo_sample.txt"
        data_pars = {"train_data_path": train_data_path, "dataset_type": "criteo", "test_size": 0.2}

        log("#### Model params   #################################################")
        model_pars = {"task": "binary", "model_name": "DeepFM", "optimization": "adam", "cost": "binary_crossentropy"}
        compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2}
        out_pars = {"path": out_path}


    elif choice == 1:
        log("#### Path params   ##################################################")
        data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path)
        out_path = path_norm("ztest/model_keras/deepctr/model.h5")

        train_data_path = data_path + "recommender/criteo_sample.txt"
        data_pars = {"train_data_path": train_data_path, "hash_feature": True,
                     "dataset_type": "criteo", "test_size": 0.2}

        log("#### Model params   #################################################")
        model_pars = {"task": "binary", "model_name": "DeepFM", "optimization": "adam", "cost": "binary_crossentropy"}
        compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2}
        out_pars = {"path": out_path}


    elif choice == 2:
        log("#### Path params   ################################################")
        data_path, _ = path_setup(out_folder="/ here_test/", data_path=data_path)
        out_path = path_norm("ztest/model_keras/deepctr/model.h5")

        train_data_path = data_path + "/recommender/movielens_sample.txt"
        data_pars = {"train_data_path": train_data_path, "dataset_type": "movie_len",
                     "test_size": 0.2}

        log("#### Model params   ################################################")
        model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"}
        compute_pars = {"batch_size": 256, "epochs": 10,
                        "validation_split": 0.2}
        out_pars = {"path": out_path}


    elif choice == 3:
        log("#### Path params   ##################################################")
        data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path)
        out_path = path_norm("ztest/model_keras/deepctr/model.h5")

        train_data_path = data_path + "/recommender/movielens_sample.txt"
        data_pars = {"train_data_path": train_data_path, "multiple_value": True,
                     "dataset_type": "movie_len", "test_size": 0.2}

        log("#### Model params   ################################################")
        model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"}
        compute_pars = {"batch_size": 256, "epochs": 10,
                        "validation_split": 0.2}
        out_pars = {"path": out_path}

    elif choice == 4:
        log("#### Path params   #################################################")
        data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path)
        out_path = path_norm("ztest/model_keras/deepctr/model.h5")

        train_data_path = data_path + "/recommender/movielens_sample.txt"
        data_pars = {"train_data_path": train_data_path, "multiple_value": True,
                     "hash_feature": True, "dataset_type": "movie_len", "test_size": 0.2}

        log("#### Model params   ################################################")
        model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"}
        compute_pars = {"batch_size": 256, "epochs": 10,
                        "validation_split": 0.2}
        out_pars = {"path": out_path}

    elif choice == 5:
        model_name = kwargs["model_name"]

        log("#### Path params   #################################################")
        model_name = kwargs["model_name"]
        out_path = path_norm(f"ztest/model_keras/deepctr/model_{model_name}.h5")

        data_pars = {"dataset_type": "synthesis", "sample_size": 8, "test_size": 0.2, "dataset_name": model_name, **DATA_PARAMS[model_name]}

        log("#### Model params   ################################################")
        model_pars = {"model_name": model_name, "optimization": "adam", "cost": "mse"}
        compute_pars = {"batch_size": 100, "epochs": 1,
                        "validation_split": 0.5}
        out_pars = {"path": out_path}

    return model_pars, data_pars, compute_pars, out_pars
Example #19
0


"""
import os
from keras.callbacks import EarlyStopping

######## Logs
from mlmodels.util import os_package_root_path, log, get_model_uri

#### Import EXISTING model and re-map to mlmodels
from mlmodels.model_keras.raw.char_cnn.data_utils import Data
from mlmodels.model_keras.raw.char_cnn.models.char_cnn_zhang import CharCNNZhang

from mlmodels.util import path_norm
print(path_norm("dataset"))

####################################################################################################

VERBOSE = False

MODEL_URI = get_model_uri(__file__)


####################################################################################################
class Model:
    def __init__(self, model_pars=None, data_pars=None, compute_pars=None):
        ### Model Structure        ################################
        if model_pars is None:
            self.model = None