예제 #1
0
def path_setup(out_folder="", sublevel=0, data_path="dataset/"):
    #### Relative path
    data_path = os_package_root_path(__file__, sublevel=sublevel, path_add=data_path)
    out_path = os.getcwd() + "/" + out_folder
    os.makedirs(out_path, exist_ok=True)
    log(data_path, out_path)
    return data_path, out_path
예제 #2
0
파일: armdn.py 프로젝트: Ruhul964/dsa2
def fit(model=None, data_pars={}, compute_pars={}, out_pars={}, **kw):
    """

      keras.callbacks.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1)


    """
    batch_size = compute_pars['batch_size']
    epochs = compute_pars['epochs']
    patience = compute_pars["patience"]

    sess = None
    log("#### Loading dataset   #############################################")
    data_pars["predict"] = False
    x_train, y_train, x_test, y_test = get_dataset(data_pars)

    early_stopping = EarlyStopping(monitor='loss',
                                   patience=patience,
                                   mode='min')
    history = model.model.fit(x_train,
                              y_train,
                              batch_size=batch_size,
                              epochs=epochs,
                              callbacks=[early_stopping])

    model.fit_metrics = history.history
    return model, sess
예제 #3
0
def create_tabular_dataset(data_info, **args):
    disable = [
        'tagger', 'parser', 'ner', 'textcat'
        'entity_ruler', 'sentencizer', 'merge_noun_chunks', 'merge_entities',
        'merge_subtokens'
    ]

    lang = args.get('lang', 'en')
    pretrained_emb = args.get('pretrained_emb', 'glove.6B.300d')

    _, path_train_dataset, path_valid_dataset = analyze_datainfo_paths(
        data_info)

    try:
        spacy_en = spacy.load(f'{lang}_core_web_sm', disable=disable)

    except:
        log(f"Download {lang}")
        import importlib

        os.system(f"python -m spacy download {lang}")
        spacy_en = importlib.import_module(f'{lang}_core_web_sm').load(
            disable=disable)

    #    sleep(60)
    #    spacy_en = spacy.load( f'{lang}_core_web_sm', disable= disable)

    def tokenizer(text):
        return [tok.text for tok in spacy_en.tokenizer(text)]

    # Creating field for text and label
    TEXT = Field(sequential=True, tokenize=tokenizer, lower=True)
    LABEL = Field(sequential=False)

    print('Preprocessing the text...')
    # clean the text
    TEXT.preprocessing = torchtext.data.Pipeline(clean_str)

    print('Creating tabular datasets...It might take a while to finish!')
    train_datafield = [('text', TEXT), ('label', LABEL)]
    tabular_train = TabularDataset(path=path_train_dataset,
                                   format='csv',
                                   skip_header=True,
                                   fields=train_datafield)

    valid_datafield = [('text', TEXT), ('label', LABEL)]

    tabular_valid = TabularDataset(path=path_valid_dataset,
                                   format='csv',
                                   skip_header=True,
                                   fields=valid_datafield)

    print('Building vocaulary...')
    TEXT.build_vocab(tabular_train, vectors=pretrained_emb)
    LABEL.build_vocab(tabular_train)

    return tabular_train, tabular_valid, TEXT.vocab
예제 #4
0
def get_params(param_pars=None, **kw):
    from jsoncomment import JsonComment
    json = JsonComment()
    pp = param_pars
    choice = pp['choice']
    config_mode = pp['config_mode']
    data_path = pp['data_path']

    if choice == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, 'r'))
        cf = cf[config_mode]
        return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[
            'out_pars']

    if choice == "test01":
        log("#### Path params   ##########################################")
        data_path = path_norm("dataset/text/imdb.csv")
        out_path = path_norm("ztest/model_tch/textcnn/")
        model_path = os.path.join(out_path, "model")

        data_pars = {
            "data_path": path_norm("dataset/recommender/IMDB_sample.txt"),
            "train_path": path_norm("dataset/recommender/IMDB_train.csv"),
            "valid_path": path_norm("dataset/recommender/IMDB_valid.csv"),
            "split_if_exists": True,
            "frac": 0.99,
            "lang": "en",
            "pretrained_emb": "glove.6B.300d",
            "batch_size": 64,
            "val_batch_size": 64,
        }

        model_pars = {
            "dim_channel": 100,
            "kernel_height": [3, 4, 5],
            "dropout_rate": 0.5,
            "num_class": 2
        }

        compute_pars = {
            "learning_rate": 0.001,
            "epochs": 1,
            "checkpointdir": out_path + "/checkpoint/"
        }

        out_pars = {
            "path": model_path,
            "checkpointdir": out_path + "/checkpoint/"
        }

        return model_pars, data_pars, compute_pars, out_pars
예제 #5
0
def test(data_path="dataset/", pars_choice="json", config_mode="test"):
    ### Local test

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "data_path": data_path,
        "config_mode": config_mode
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)
    print(">>>>>> model_pars, data_pars, compute_pars, out_pars: ", model_pars,
          data_pars, compute_pars, out_pars)
    log("#### Loading dataset   #############################################")
    Xtuple = get_dataset(data_pars)
    print(">>>> Xtuple: ", Xtuple)
예제 #6
0
파일: armdn.py 프로젝트: Ruhul964/dsa2
def get_params(param_pars={}, **kw):
    data_path = param_pars["data_path"]
    config_mode = param_pars["config_mode"]

    if param_pars["choice"] == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, mode='r'))
        cf = cf[config_mode]
        return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[
            'out_pars']

    if param_pars["choice"] == "test0":
        log("#### Path params   ##########################################")
        data_path = path_norm(data_path)
        out_path = path_norm("ztest/model_keras/armdn/")
        os.makedirs(out_path, exist_ok=True)
        log(data_path, out_path)

        data_pars = {
            "train_data_path": data_path + "timeseries/milk.csv",
            "train": False,
            "prediction_length": 12,
            "col_Xinput": ["milk_production_pounds"],
            "col_ytarget": "milk_production_pounds"
        }

        model_pars = {
            "lstm_h_list": [300, 200, 24],
            "last_lstm_neuron": 12,
            "timesteps": 12,
            "dropout_rate": 0.1,
            "n_mixes": 3,
            "dense_neuron": 10,
        }

        compute_pars = {
            "batch_size": 32,
            "clip_gradient": 100,
            "ctx": None,
            "epochs": 10,
            "learning_rate": 0.05,
            "patience": 50
        }
        outpath = out_path + "result"
        out_pars = {"outpath": outpath}

    return model_pars, data_pars, compute_pars, out_pars
예제 #7
0
def get_params(param_pars={}, **kw):
    from jsoncomment import JsonComment
    json = JsonComment()
    choice = param_pars['choice']
    config_mode = param_pars['config_mode']
    data_path = param_pars['data_path']

    if choice == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, mode='r'))
        cf = cf[config_mode]
        return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[
            'out_pars']

    if choice == "test01":
        log("#### Path params   ##########################################")
        data_path = path_norm("dataset/text/imdb.csv")
        out_path = path_norm("ztest/model_keras/textcnn/model.h5")
        model_path = out_path

        data_pars = {
            "path": data_path,
            "train": 1,
            "maxlen": 40,
            "max_features": 5,
        }

        model_pars = {
            "maxlen": 40,
            "max_features": 5,
            "embedding_dims": 50,
        }

        compute_pars = {
            "engine": "adam",
            "loss": "binary_crossentropy",
            "metrics": ["accuracy"],
            "batch_size": 1000,
            "epochs": 1
        }

        out_pars = {"path": out_path, "model_path": model_path}

        return model_pars, data_pars, compute_pars, out_pars

    else:
        raise Exception(f"Not support choice {choice} yet")
예제 #8
0
def fit(model,
        sess=None,
        data_pars=None,
        compute_pars=None,
        out_pars=None,
        **kwargs):

    model0 = model.model
    lr = compute_pars['learning_rate']
    epochs = compute_pars["epochs"]
    device = _get_device()
    train_loss = []
    train_acc = []
    test_loss = []
    test_acc = []
    best_test_acc = -1
    optimizer = optim.Adam(model0.parameters(), lr=lr)
    train_iter, valid_iter, vocab = get_dataset(data_pars, out_pars)

    # load word embeddings to model
    model0.rebuild_embed(vocab)

    for epoch in range(1, epochs + 1):

        tr_loss, tr_acc = _train(model0, device, train_iter, optimizer, epoch,
                                 epochs)
        print(f'Train Epoch: {epoch} \t Loss: {tr_loss} \t Accuracy: {tr_acc}')

        ts_loss, ts_acc = _valid(model0, device, valid_iter)
        print(f'Train Epoch: {epoch} \t Loss: {ts_loss} \t Accuracy: {ts_acc}')

        if ts_acc > best_test_acc:
            best_test_acc = ts_acc
            #save paras(snapshot)
            log(f"model saves at {best_test_acc}% accuracy")
            os.makedirs(out_pars["checkpointdir"], exist_ok=True)
            torch.save(
                model0.state_dict(),
                os.path.join(out_pars["checkpointdir"], "best_accuracy"))

        train_loss.append(tr_loss)
        train_acc.append(tr_acc)
        test_loss.append(ts_loss)
        test_acc.append(ts_acc)

    model.model = model0
    return model, None
예제 #9
0
def get_params(param_pars={}, **kw):
    from jsoncomment import JsonComment
    json = JsonComment()

    pp = param_pars
    choice = pp["choice"]
    config_mode = pp["config_mode"]
    data_path = pp["data_path"]

    if choice == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, mode="r"))
        cf = cf[config_mode]
        return cf["model_pars"], cf["data_pars"], cf["compute_pars"], cf[
            "out_pars"]

    if choice == "test01":
        log("#### Path params   ##########################################")
        data_path = path_norm("dataset/text/ner_dataset.csv")
        out_path = path_norm("ztest/model_keras/crf_bilstm/")
        model_path = os.path.join(out_path, "model")

        data_pars = {
            "path": data_path,
            "train": 1,
            "maxlen": 400,
            "max_features": 10,
        }

        model_pars = {}
        compute_pars = {
            "engine": "adam",
            "loss": "binary_crossentropy",
            "metrics": ["accuracy"],
            "batch_size": 32,
            "epochs": 1,
        }

        out_pars = {"path": out_path, "model_path": model_path}

        log(data_pars, out_pars)

        return model_pars, data_pars, compute_pars, out_pars

    else:
        raise Exception(f"Not support choice {choice} yet")
예제 #10
0
def fit(model,
        data_pars=None,
        model_pars=None,
        compute_pars=None,
        out_pars=None,
        *args,
        **kw):
    """
    """

    log("############ Dataloader setup  #############################")
    data_readers, interal_states = get_dataset(data_pars)
    train_reader, val_reader = data_readers

    train_data = SentencesDataset(train_reader.get_examples('train.gz'),
                                  model=model.model)
    train_dataloader = DataLoader(train_data,
                                  shuffle=True,
                                  batch_size=compute_pars["batch_size"])

    val_data = SentencesDataset(val_reader.get_examples('val/sts-dev.csv'),
                                model=model.model)
    val_dataloader = DataLoader(val_data,
                                shuffle=True,
                                batch_size=compute_pars["batch_size"])

    log("############ Fit setup  ##################################")
    emb_dim = model.model.get_sentence_embedding_dimension()
    train_num_labels = train_reader.get_num_labels()

    train_loss = getattr(losses, compute_pars["loss"])(
        model=model.model,
        sentence_embedding_dimension=emb_dim,
        num_labels=train_num_labels)
    train_loss.float()
    evaluator = EmbeddingSimilarityEvaluator(val_dataloader)
    model.model.float()

    model.fit_metrics = model.model.fit(
        train_objectives=[(train_dataloader, train_loss)],
        evaluator=evaluator,
        epochs=compute_pars["num_epochs"],
        evaluation_steps=compute_pars["evaluation_steps"],
        warmup_steps=compute_pars["warmup_steps"],
        output_path=out_pars["model_path"])
    return model, None
예제 #11
0
파일: torchhub.py 프로젝트: Ruhul964/dsa2
def save(model, session=None, save_pars=None):
    import pickle
    from mlmodels.util import save_tch
    save2 = copy.deepcopy(save_pars)
    path = path_norm( save_pars['path'] + "/torch_model/")
    os.makedirs(Path(path), exist_ok = True)


    ### Specialized part
    save2['path'] = path
    save_tch(model=model, save_pars=save2)


    ### Setup Model
    d = {"model_pars"  :  model.model_pars, 
         "compute_pars":  model.compute_pars,
         "data_pars"   :  model.data_pars
        }
    pickle.dump(d, open(path + "/torch_model_pars.pkl", mode="wb"))
    log(path, os.listdir(path))
예제 #12
0
def fit2(model,
         data_pars=None,
         model_pars=None,
         compute_pars=None,
         out_pars=None,
         *args,
         **kw):
    """
    """
    log("############ Dataloader setup  ###########################")
    data_pars['is_train'] = 1
    train_dataloader, val_dataloader, pars = get_dataset2(data_pars,
                                                          model=model)

    log("############ Fit setup  ##################################")
    emb_dim = model.model.get_sentence_embedding_dimension()
    train_num_labels = pars["train_num_labels"]
    # train_num_labels = train_reader.get_num_labels()

    train_loss = getattr(losses, compute_pars["loss"])(
        model=model.model,
        sentence_embedding_dimension=emb_dim,
        num_labels=train_num_labels)
    train_loss.float()

    evaluator = EmbeddingSimilarityEvaluator(val_dataloader)
    model.model.float()

    model.fit_metrics = model.model.fit(
        train_objectives=[(train_dataloader, train_loss)],
        evaluator=evaluator,
        epochs=compute_pars["num_epochs"],
        evaluation_steps=compute_pars["evaluation_steps"],
        warmup_steps=compute_pars["warmup_steps"],
        output_path=out_pars["model_path"])
    return model, None
예제 #13
0
def test(data_path="dataset/", pars_choice="test01", config_mode="test"):
    ### Local test
    from mlmodels.util import path_norm
    data_path = path_norm(data_path)

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "data_path": data_path,
        "config_mode": config_mode
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)

    log("#### Loading dataset   #############################################")
    Xtuple = get_dataset(data_pars)

    log("#### Model init, fit   #############################################")
    model = Model(model_pars, compute_pars)
    model, session = fit(model, data_pars, model_pars, compute_pars, out_pars)

    log("#### Predict   #####################################################")
    ypred = predict(model, session, data_pars, compute_pars, out_pars)

    log("#### metrics   #####################################################")
    metrics_val = evaluate(model, ypred, data_pars, compute_pars, out_pars)
    print(metrics_val)

    log("#### Plot   ########################################################")

    log("#### Save   ###################################################")
    save_pars = {"path": out_pars['path']}
    save(model, session, save_pars=save_pars)

    log("#### Load #####################################################")
    model2, session2 = load(save_pars)
    print(model2, session2)

    log("#### Predict   ################################################")
    ypred = predict(model2, session2, data_pars, compute_pars, out_pars)
    print(ypred)
예제 #14
0
def test(data_path="dataset/", pars_choice="json", config_mode="test"):
    ### Local test

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "data_path": data_path,
        "config_mode": config_mode,
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)

    log("#### Loading dataset   #############################################")
    Xtuple = get_dataset(data_pars)

    log("#### Model init, fit   #############################################")
    from mlmodels.models import module_load_full, fit, predict

    module, model = module_load_full(
        "model_keras.namentity_crm_bilstm_dataloader",
        model_pars,
        data_pars,
        compute_pars,
    )
    model, sess = fit(module,
                      model,
                      data_pars=data_pars,
                      compute_pars=compute_pars,
                      out_pars=out_pars)

    # model = Model(model_pars, data_pars, compute_pars)
    # model, session = fit(model, data_pars, compute_pars, out_pars)

    log("#### Predict   #####################################################")
    data_pars["train"] = 0
    ypred = predict(module,
                    model,
                    data_pars=data_pars,
                    compute_pars=compute_pars,
                    out_pars=out_pars)

    log("#### metrics   #####################################################")
    metrics_val = fit_metrics(model,
                              data_pars=data_pars,
                              compute_pars=compute_pars,
                              out_pars=out_pars)
    print(metrics_val)

    log("#### Plot   ########################################################")

    log("#### Save/Load   ###################################################")
예제 #15
0
파일: Autokeras.py 프로젝트: Ruhul964/dsa2
def test_single(data_path="dataset/", pars_choice="json", config_mode="test"):
    ### Local test

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "data_path": data_path,
        "config_mode": config_mode
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)
    log(data_pars, out_pars)

    log("#### Loading dataset   #############################################")
    #xtuple = get_dataset(data_pars)

    log("#### Model init, fit   #############################################")
    model = Model(model_pars, data_pars, compute_pars)
    fitted_model = fit(model.model, data_pars, compute_pars, out_pars)

    log("#### Predict   #####################################################")
    ypred = predict(fitted_model, data_pars, compute_pars, out_pars)
    print(ypred[:10])

    log("#### metrics   #####################################################")
    metrics_val = evaluate(fitted_model, data_pars, compute_pars, out_pars)
    print(metrics_val)

    log("#### Plot   ########################################################")

    log("#### Save/Load   ###################################################")
    ## Export as a Keras Model.
    save_model = fitted_model.export_model()
    save(model=save_model, save_pars=out_pars, config_mode=config_mode)
    loaded_model = load(out_pars, config_mode)
    ypred = predict(loaded_model,
                    data_pars=data_pars,
                    compute_pars=compute_pars,
                    out_pars=out_pars)
    print(ypred[:10])
예제 #16
0
파일: charcnn.py 프로젝트: Ruhul964/dsa2
def get_params(param_pars={}, **kw):
    from jsoncomment import JsonComment ; json = JsonComment()
    pp = param_pars
    choice = pp['choice']
    config_mode = pp['config_mode']
    data_path = pp['data_path']


    if choice == "json":
        data_path = path_norm(data_path)
        cf = json.load(open(data_path, mode='r'))
        cf = cf[config_mode]
        return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf['out_pars']


    if choice == "test01":
        log("#### Path params   ##########################################")
        root       = path_norm()
        data_path  = path_norm( "dataset/text/imdb.npz"  )   
        out_path   = path_norm( "ztest/model_keras/charcnn/" )
        model_path = os.path.join(out_path , "model")


        model_pars = {
            "embedding_size": 128,
            "conv_layers": [[256, 10 ], [256, 7 ], [256, 5 ], [256, 3 ] ], 
            "fully_connected_layers": [
                1024,
                1024
            ],
            "threshold": 1e-6,
            "dropout_p": 0.1,
            "optimizer": "adam",
            "loss": "categorical_crossentropy"
        }

        data_pars = {
            "train": True,
            "alphabet": "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}",
            "alphabet_size": 69,
            "input_size": 1014,
            "num_of_classes": 4,
            "train_data_source": path_norm("dataset/text/ag_news_csv/train.csv") ,
            "val_data_source": path_norm("dataset/text/ag_news_csv/test.csv")
        }


        compute_pars = {
            "epochs": 1,
            "batch_size": 128
        }

        out_pars = {
            "path":  path_norm( "ztest/ml_keras/charcnn/charcnn.h5"),
            "data_type": "pandas",
            "size": [0, 0, 6],
            "output_size": [0, 6]
        }

        return model_pars, data_pars, compute_pars, out_pars

    else:
        raise Exception(f"Not support choice {choice} yet")
예제 #17
0
def test(data_path="dataset/", pars_choice=0, **kwargs):
    ### Local test

    log("#### Loading params   ##############################################")
    model_pars, data_pars, compute_pars, out_pars = get_params(choice=pars_choice,
                                                               data_path=data_path, **kwargs)
    print(model_pars, data_pars, compute_pars, out_pars)

    log("#### Loading dataset   #############################################")
    dataset = get_dataset(data_pars)

    log("#### Model init, fit   #############################################")
    from mlmodels.models import module_load_full, fit, predict
    module, model = module_load_full("model_keras.01_deepctr", model_pars, data_pars, compute_pars, dataset=dataset)
    model = fit(module, model, data_pars=data_pars, compute_pars=compute_pars, out_pars=out_pars, dataset=dataset)

    # log("#### Predict   ####################################################")
    ypred = predict(module, model, compute_pars=compute_pars, data_pars=data_pars, out_pars=out_pars, dataset=dataset)

    log("#### metrics   ####################################################")
    metrics_val = metrics(ypred, dataset[1], compute_pars=compute_pars, data_pars=data_pars, out_pars=out_pars)
    print(metrics_val)

    log("#### Plot   #######################################################")

    log("#### Save/Load   ##################################################")
    save_keras(model, save_pars=out_pars)
    from deepctr.layers import custom_objects
    model2 = load_keras(out_pars, custom_pars={"custom_objects": custom_objects})
    model2.model.summary()
예제 #18
0
def get_params(choice="", data_path="dataset/", config_mode="test", **kwargs):
    if choice == "json":
        model_pars, data_pars, compute_pars, out_pars = config_load(data_path,
                                                                    file_default="model_keras/01_deepctr.json",
                                                                    config_mode=config_mode)
        return model_pars, data_pars, compute_pars, out_pars

    if choice == 0:
        log("#### Path params   ###################################################")
        data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path)
        out_path = path_norm("ztest/model_keras/deepctr/model.h5")

        train_data_path = data_path + "recommender/criteo_sample.txt"
        data_pars = {"train_data_path": train_data_path, "dataset_type": "criteo", "test_size": 0.2}

        log("#### Model params   #################################################")
        model_pars = {"task": "binary", "model_name": "DeepFM", "optimization": "adam", "cost": "binary_crossentropy"}
        compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2}
        out_pars = {"path": out_path}


    elif choice == 1:
        log("#### Path params   ##################################################")
        data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path)
        out_path = path_norm("ztest/model_keras/deepctr/model.h5")

        train_data_path = data_path + "recommender/criteo_sample.txt"
        data_pars = {"train_data_path": train_data_path, "hash_feature": True,
                     "dataset_type": "criteo", "test_size": 0.2}

        log("#### Model params   #################################################")
        model_pars = {"task": "binary", "model_name": "DeepFM", "optimization": "adam", "cost": "binary_crossentropy"}
        compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2}
        out_pars = {"path": out_path}


    elif choice == 2:
        log("#### Path params   ################################################")
        data_path, _ = path_setup(out_folder="/ here_test/", data_path=data_path)
        out_path = path_norm("ztest/model_keras/deepctr/model.h5")

        train_data_path = data_path + "/recommender/movielens_sample.txt"
        data_pars = {"train_data_path": train_data_path, "dataset_type": "movie_len",
                     "test_size": 0.2}

        log("#### Model params   ################################################")
        model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"}
        compute_pars = {"batch_size": 256, "epochs": 10,
                        "validation_split": 0.2}
        out_pars = {"path": out_path}


    elif choice == 3:
        log("#### Path params   ##################################################")
        data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path)
        out_path = path_norm("ztest/model_keras/deepctr/model.h5")

        train_data_path = data_path + "/recommender/movielens_sample.txt"
        data_pars = {"train_data_path": train_data_path, "multiple_value": True,
                     "dataset_type": "movie_len", "test_size": 0.2}

        log("#### Model params   ################################################")
        model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"}
        compute_pars = {"batch_size": 256, "epochs": 10,
                        "validation_split": 0.2}
        out_pars = {"path": out_path}

    elif choice == 4:
        log("#### Path params   #################################################")
        data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path)
        out_path = path_norm("ztest/model_keras/deepctr/model.h5")

        train_data_path = data_path + "/recommender/movielens_sample.txt"
        data_pars = {"train_data_path": train_data_path, "multiple_value": True,
                     "hash_feature": True, "dataset_type": "movie_len", "test_size": 0.2}

        log("#### Model params   ################################################")
        model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"}
        compute_pars = {"batch_size": 256, "epochs": 10,
                        "validation_split": 0.2}
        out_pars = {"path": out_path}

    elif choice == 5:
        model_name = kwargs["model_name"]

        log("#### Path params   #################################################")
        model_name = kwargs["model_name"]
        out_path = path_norm(f"ztest/model_keras/deepctr/model_{model_name}.h5")

        data_pars = {"dataset_type": "synthesis", "sample_size": 8, "test_size": 0.2, "dataset_name": model_name, **DATA_PARAMS[model_name]}

        log("#### Model params   ################################################")
        model_pars = {"model_name": model_name, "optimization": "adam", "cost": "mse"}
        compute_pars = {"batch_size": 100, "epochs": 1,
                        "validation_split": 0.5}
        out_pars = {"path": out_path}

    return model_pars, data_pars, compute_pars, out_pars
예제 #19
0
파일: armdn.py 프로젝트: Ruhul964/dsa2
def test(data_path="dataset/", pars_choice="test0", config_mode="test"):
    path = data_path

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "config_mode": config_mode,
        "data_path": path
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)

    log("#### Model init   ##################################################")
    model = Model(model_pars=model_pars,
                  data_pars=data_pars,
                  compute_pars=compute_pars)

    log("### Model Fit ######################################################")
    fit(model=model, data_pars=data_pars, compute_pars=compute_pars)
    log("fitted metrics", model.fit_metrics)

    log("#### Predict   #####################################################")
    data_pars["predict"] = True
    y_pred, y_test = predict(model=model,
                             model_pars=model_pars,
                             data_pars=data_pars)
    # from mlmodels import metrics
    # log( metrics.metric_eval([ "mean_absolute_error" ], y_test, y_pred))

    log("### Plot #########################################################3#")
    data_pars["predict"] = True
    metrics_params = {
        "plot_type": "line",
        "pred": y_pred,
        "outpath": out_pars["outpath"],
        "actual": y_test
    }
    metrics_plot(metrics_params)

    log("#### Save ###################################################")
    save(model=model, session=None, save_pars=out_pars)

    log("#### Load ###################################################")
예제 #20
0
def test(data_path="dataset/", pars_choice="json", config_mode="test"):
    ### Local test
    from mlmodels.util import path_norm
    data_path = path_norm(data_path)

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "data_path": data_path,
        "config_mode": config_mode
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)

    log("#### Loading daaset   #############################################")
    Xtuple = get_dataset(data_pars)

    log("#### Model init, fit   #############################################")
    session = None
    model = Model(model_pars, data_pars, compute_pars)
    model, session = fit(model, data_pars, compute_pars, out_pars)

    log("#### Predict   #####################################################")
    data_pars["train"] = 0
    ypred = predict(model, session, data_pars, compute_pars, out_pars)

    log("#### metrics   #####################################################")
    metrics_val = evaluate(model, data_pars, compute_pars, out_pars)
    print(metrics_val)

    log("#### Plot   ########################################################")

    log("#### Save/Load   ###################################################")
    save(model, session, save_pars=out_pars)
    model2 = load(out_pars)
    #     ypred = predict(model2, data_pars, compute_pars, out_pars)
    #     metrics_val = metrics(model2, ypred, data_pars, compute_pars, out_pars)
    print(model2)
예제 #21
0
파일: torchhub.py 프로젝트: Ruhul964/dsa2
def test2(data_path="dataset/", pars_choice="json", config_mode="test"):
    ### Local test

    log("#### Loading params   ##############################################")
    param_pars = {"choice":pars_choice,  "data_path":data_path,  "config_mode": config_mode}
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)
    log(  data_pars, out_pars )

    log("#### Loading dataset   #############################################")
    #xtuple = get_dataset(data_pars)


    log("#### Model init, fit   #############################################")
    session = None
    model = Model(model_pars, data_pars, compute_pars)
    #model, session = fit(model, data_pars, compute_pars, out_pars)


    log("#### Predict   #####################################################")
    predict(model, session, data_pars, compute_pars, out_pars)


    log("#### metrics   #####################################################")
    #metrics_val = evaluate(model, data_pars, compute_pars, out_pars)
    #print(metrics_val)


    log("#### Plot   ########################################################")


    log("#### Save/Load   ###################################################")
    save_pars = { "path": out_pars["path"]  }
    save(model=model, save_pars=save_pars)
    model2 = load( save_pars )
    ypred = predict(model2, data_pars=data_pars, compute_pars=compute_pars, out_pars=out_pars)
    print(model2)
예제 #22
0
def get_dataset2(data_pars=None, model=None, **kw):
    """
    JSON data_pars to get dataset
    "data_pars":    { "data_path": "dataset/GOOG-year.csv", "data_type": "pandas",
    "size": [0, 0, 6], "output_size": [0, 6] },
    """
    # data_path = path_norm(data_pars["data_path"])

    istrain = data_pars.get("is_train", 0)
    mode = "train" if istrain else "test"
    data_type = data_pars[f"{mode}_type"].lower()

    def get_reader(data_type, path):
        if data_type == 'nli': Reader = readers.NLIDataReader
        elif data_type == 'sts': Reader = readers.STSDataReader
        else:
            Reader = "MyCustomReader()"

        path = os.path.join(path)
        reader = Reader(path)
        return reader

    def get_filename(data_type, mode='test'):
        if mode == 'train':
            fname = 'train.gz' if data_pars["train_type"].lower(
            ) == 'nli' else 'sts-train.csv'

        if mode == 'test':
            fname = 'dev.gz' if data_pars["test_type"].lower(
            ) == 'nli' else 'sts-dev.csv'

        return fname

    log("############ Dataloader setup  #############################")
    train_dataloader = None
    if istrain:
        train_pars = data_pars.copy()
        train_pars.update(train=1)
        train_fname = get_filename(
            data_pars, mode='train'
        )  # 'train.gz' if data_pars["train_type"].lower() == 'nli'else 'sts-train.csv'
        train_reader = get_reader(data_type, data_pars['train_path'])
        train_data = SentencesDataset(train_reader.get_examples(train_fname),
                                      model=model.model)
        train_dataloader = DataLoader(train_data,
                                      shuffle=True,
                                      batch_size=data_pars["batch_size"])

        val_pars = data_pars.copy()
        val_pars.update(train=0)
        val_fname = get_filename(
            data_pars, mode='test'
        )  #'dev.gz' if data_pars["test_type"].lower() == 'nli'  else 'sts-dev.csv'
        val_reader = get_reader(data_type, data_pars['test_path'])
        val_data = SentencesDataset(val_reader.get_examples(val_fname),
                                    model=model.model)
        val_dataloader = DataLoader(val_data,
                                    shuffle=True,
                                    batch_size=data_pars["batch_size"])

        pars = {"train_num_labels": train_reader.get_num_labels()}
        return train_dataloader, val_dataloader, pars

    else:
        #### Inference part
        val_pars = data_pars.copy()
        val_pars.update(train=0)
        val_fname = get_filename(
            data_pars, mode='test'
        )  #'dev.gz' if data_pars["test_type"].lower() == 'nli'  else 'sts-dev.csv'
        val_reader = get_reader(data_type, data_pars['test_path'])

        pars = {
            "train_fname":
            'train.gz'
            if data_pars["train_type"].lower() == 'nli' else 'sts-train.csv'
        }

        return val_reader, pars
예제 #23
0
def test(data_path="dataset/", pars_choice="json", config_mode="test"):
    ### Local test
    from mlmodels.util import path_norm
    data_path = path_norm(data_path)
    log("Json file path: ", data_path)

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "data_path": data_path,
        "config_mode": config_mode
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)
    log(model_pars, data_pars, compute_pars, out_pars)

    log("#### Loading dataset   #############################################")
    Xtuple = get_dataset(data_pars)
    print(len(Xtuple))

    log("#### Model init       #############################################")
    session = None
    model = Model(model_pars, data_pars, compute_pars)

    log("#### Model fit        #############################################")
    data_pars["train"] = 1
    model, session = fit(model, session, data_pars, compute_pars, out_pars)

    log("#### Save   ########################################################")
    save_pars = {"path": out_pars['path'] + "/model.pkl"}
    save(model, session, save_pars=save_pars)

    log("#### Load   ########################################################")
    model2, session2 = load(save_pars)

    log("#### Predict from Load   ###########################################")
    data_pars["train"] = 0
    ypred, _ = predict(model2, session2, data_pars, compute_pars, out_pars)

    log("#### Predict   #####################################################")
    data_pars["train"] = 0
    ypred, _ = predict(model, session, data_pars, compute_pars, out_pars)
    # print("ypred : ", ypred)
    # print("ypred shape: ", ypred.shape)

    log("#### metrics   #####################################################")
    metrics_val = evaluate(model, session, data_pars, compute_pars, out_pars)
    log(metrics_val)

    log("#### Plot   ########################################################")
예제 #24
0
파일: matchZoo.py 프로젝트: Ruhul964/dsa2
def test_train(data_path, pars_choice, model_name):
    ### Local test

    log("#### Loading params   ##############################################")
    param_pars = {
        "choice": pars_choice,
        "data_path": data_path,
        "model_name": model_name
    }
    model_pars, data_pars, compute_pars, out_pars = get_params(param_pars)
    log(data_pars, out_pars)

    log("#### Loading dataset   #############################################")
    #xtuple = get_dataset(data_pars)

    log("#### Model init   ##################################################")
    session = None
    model = Model(model_pars, data_pars, compute_pars)

    log("#### Model  fit   #############################################")
    model, session = fit(model, data_pars, compute_pars, out_pars)

    log("#### Predict   #####################################################")
    #ypred = predict(model, session, data_pars, compute_pars, out_pars)

    log("#### metrics   #####################################################")
    #metrics_val = evaluate(model, data_pars, compute_pars, out_pars)
    # print(metrics_val)

    log("#### Plot   ########################################################")

    log("#### Save   ########################################################")
    save_pars = {"path": out_pars["path"]}
    save(model=model, save_pars=save_pars)

    log("#### Load   ###################################################")
    model2 = load(save_pars)

    log("#### Predict after Load   ###########################################"
        )
    ypred = predict(model2,
                    data_pars=data_pars,
                    compute_pars=compute_pars,
                    out_pars=out_pars)
    print(model2)