Exemple #1
0
def pred(model):
    model = model.model
    X_test = np.matrix([decode('fma_small/024/024420.mp3')[:10000]])
    X_test = np.expand_dims(X_test, axis=2)
    preds = model.predict_on_batch(X_test)
    save_prediction(preds[0, :],
                    config.model_predictions_path,
                    config.frame_rate,
                    ext=".wav")
Exemple #2
0
    def predict(self):
        prediction = self.nn.predict(self.test_data)
        data = np.loadtxt(self.test_data.name,
                          dtype=np.uint8,
                          skiprows=1,
                          delimiter=',')
        x = np.array([i.reshape(28, 28) for i in data])

        for i in range(x.shape[0]):
            save_prediction(x[i], prediction[i], i, self.nn.model_name)
        return
def pred(model):
    model = model.model
    X_test = decode('fma_small/005/005159.mp3')
    print X_test.shape
    X_test = np.matrix(np.split(X_test[:1320000], 30))
    X_test = 10.0 * np.expand_dims(X_test, axis=2)
    preds = model.predict_on_batch(X_test) / 10.0
    preds = preds.flatten()
    save_prediction(preds,
                    config.model_predictions_path,
                    config.frame_rate,
                    ext=".wav")
Exemple #4
0
def simple_pred(model):
    model = model.model
    test_track = os.path.join(config.simple_data_dir, "00021.wav")
    white_list = set(["00021.wav"])
    batch = []
    for track in os.listdir(config.simple_data_dir):
        if track in white_list:
            _, arr = read(os.path.join(config.simple_data_dir, track))
            batch.append(np.array(arr)[:10000])
    cur_batch = np.array(batch)
    x = np.expand_dims(cur_batch, axis=2)
    preds = model.predict_on_batch(x)
    save_prediction(preds[0, :],
                    config.model_predictions_path,
                    config.frame_rate,
                    ext=".wav")
def tf_ml_baseline(block=200,
                   model_name="RandomForest",
                   data_name="bookcorpus",
                   downsample=-1,
                   history=None,
                   n_jobs=10,
                   device="cpu"):
    print("loading data")

    # tfidf as feature
    if data_name == "bookcorpus":
        if history is None:
            x_train, y_train = load_tfidf("train", block, verbose=True)
            x_test, y_test = load_tfidf("test", block, verbose=True)
        else:
            x_train, y_train = load_tfidf_long("train",
                                               block,
                                               verbose=True,
                                               history=history)
            x_test, y_test = load_tfidf_long("test",
                                             block,
                                             verbose=True,
                                             history=history)
    elif data_name == "coda19":
        x_train, y_train = coda_load_tfidf("train", block, verbose=True)
        x_test, y_test = coda_load_tfidf("test", block, verbose=True)
    else:
        print("Not supported yet!")
        quit()

    if downsample != -1:
        random_index = np.random.RandomState(5516).permutation(
            x_train.shape[0])[:88720]
        x_train, y_train = x_train[random_index], y_train[random_index]

    # do sampling if the training data is too big
    if x_train.shape[0] > 1000000:
        index_list = np.random.RandomState(seed=RANDOM_SEED).permutation(
            x_train.shape[0])[:1000000]
        index_list = np.sort(index_list)
        x_train, y_train = x_train[index_list], y_train[index_list]

    x_train, y_train = x_train.astype(np.float32), y_train.astype(np.float32)
    x_test, y_test = x_test.astype(np.float32), y_test.astype(np.float32)

    x_train, y_train = x_train.todense(), y_train.todense()
    x_test, y_test = x_test.todense(), y_test.todense()

    print("train: x = {}, y = {}".format(str(x_train.shape),
                                         str(y_train.shape)))
    print("test: x = {}, y = {}".format(str(x_test.shape), str(y_test.shape)))
    print("building model using", model_name)

    # parameter setting
    rf_param = {
        "max_depth": 10,
        "random_state": RANDOM_SEED,
        "n_jobs": n_jobs,
        "n_estimators": 30,
        "verbose": 10,
    }
    lgbm_param = {
        "max_depth": 3,
        "num_leaves": 5,
        "random_state": RANDOM_SEED,
        "n_estimators": 100,
        "n_jobs": 1,
        "verbose": -1,
        "force_row_wise": True,
        "device": "gpu",
    }
    if model_name == "RandomForest":
        model = RandomForestRegressor(**rf_param)
    elif model_name == "LGBM":
        model = MultiOutputRegressor(LGBMRegressor(**lgbm_param),
                                     n_jobs=n_jobs)
    else:
        print("Please use the available model")

    print("training")
    model.fit(x_train, y_train)

    if history is None:
        model_output = os.path.join(
            model_dir, data_name,
            "block{}_{}.joblib".format(block, model_name))
        filename = os.path.join(result_dir, f"{data_name}_ml_baseline.json")
    else:
        model_output = os.path.join(
            model_dir, data_name,
            "history_block{}_{}.joblib".format(block, model_name))
        filename = os.path.join(result_dir,
                                f"history_exp_{data_name}_ml_baseline.json")

    # save model
    joblib.dump(model, model_output)

    # make prediction
    print("prediting")
    print("block number = {}".format(block))
    y_pred = model.predict(x_test)
    res = tfidf_metric(y_test, y_pred, device=device)
    print("cosine", res)
    print_tfidf_metric(
        {
            "cosine": float(res),
            "block": block,
            "model": model_name,
            "note": "clean - tfidf - downsample"
            if downsample != -1 else "clean - tfidf",
            "history": history,
        },
        filename=filename)

    # output y_pred
    if downsample == -1:
        if history:
            outpath = os.path.join(
                predict_dir, "bookcorpus",
                f"history_block{block}_{model_name}_h{history}.h5")
        else:
            outpath = os.path.join(predict_dir, "bookcorpus",
                                   f"block{block}_{model_name}.h5")
    else:
        outpath = os.path.join(predict_dir, "bookcorpus",
                               f"downsample_block{block}_{model_name}.h5")
    save_prediction(outpath, y_pred)