def pred(model): model = model.model X_test = np.matrix([decode('fma_small/024/024420.mp3')[:10000]]) X_test = np.expand_dims(X_test, axis=2) preds = model.predict_on_batch(X_test) save_prediction(preds[0, :], config.model_predictions_path, config.frame_rate, ext=".wav")
def predict(self): prediction = self.nn.predict(self.test_data) data = np.loadtxt(self.test_data.name, dtype=np.uint8, skiprows=1, delimiter=',') x = np.array([i.reshape(28, 28) for i in data]) for i in range(x.shape[0]): save_prediction(x[i], prediction[i], i, self.nn.model_name) return
def pred(model): model = model.model X_test = decode('fma_small/005/005159.mp3') print X_test.shape X_test = np.matrix(np.split(X_test[:1320000], 30)) X_test = 10.0 * np.expand_dims(X_test, axis=2) preds = model.predict_on_batch(X_test) / 10.0 preds = preds.flatten() save_prediction(preds, config.model_predictions_path, config.frame_rate, ext=".wav")
def simple_pred(model): model = model.model test_track = os.path.join(config.simple_data_dir, "00021.wav") white_list = set(["00021.wav"]) batch = [] for track in os.listdir(config.simple_data_dir): if track in white_list: _, arr = read(os.path.join(config.simple_data_dir, track)) batch.append(np.array(arr)[:10000]) cur_batch = np.array(batch) x = np.expand_dims(cur_batch, axis=2) preds = model.predict_on_batch(x) save_prediction(preds[0, :], config.model_predictions_path, config.frame_rate, ext=".wav")
def tf_ml_baseline(block=200, model_name="RandomForest", data_name="bookcorpus", downsample=-1, history=None, n_jobs=10, device="cpu"): print("loading data") # tfidf as feature if data_name == "bookcorpus": if history is None: x_train, y_train = load_tfidf("train", block, verbose=True) x_test, y_test = load_tfidf("test", block, verbose=True) else: x_train, y_train = load_tfidf_long("train", block, verbose=True, history=history) x_test, y_test = load_tfidf_long("test", block, verbose=True, history=history) elif data_name == "coda19": x_train, y_train = coda_load_tfidf("train", block, verbose=True) x_test, y_test = coda_load_tfidf("test", block, verbose=True) else: print("Not supported yet!") quit() if downsample != -1: random_index = np.random.RandomState(5516).permutation( x_train.shape[0])[:88720] x_train, y_train = x_train[random_index], y_train[random_index] # do sampling if the training data is too big if x_train.shape[0] > 1000000: index_list = np.random.RandomState(seed=RANDOM_SEED).permutation( x_train.shape[0])[:1000000] index_list = np.sort(index_list) x_train, y_train = x_train[index_list], y_train[index_list] x_train, y_train = x_train.astype(np.float32), y_train.astype(np.float32) x_test, y_test = x_test.astype(np.float32), y_test.astype(np.float32) x_train, y_train = x_train.todense(), y_train.todense() x_test, y_test = x_test.todense(), y_test.todense() print("train: x = {}, y = {}".format(str(x_train.shape), str(y_train.shape))) print("test: x = {}, y = {}".format(str(x_test.shape), str(y_test.shape))) print("building model using", model_name) # parameter setting rf_param = { "max_depth": 10, "random_state": RANDOM_SEED, "n_jobs": n_jobs, "n_estimators": 30, "verbose": 10, } lgbm_param = { "max_depth": 3, "num_leaves": 5, "random_state": RANDOM_SEED, "n_estimators": 100, "n_jobs": 1, "verbose": -1, "force_row_wise": True, "device": "gpu", } if model_name == "RandomForest": model = RandomForestRegressor(**rf_param) elif model_name == "LGBM": model = MultiOutputRegressor(LGBMRegressor(**lgbm_param), n_jobs=n_jobs) else: print("Please use the available model") print("training") model.fit(x_train, y_train) if history is None: model_output = os.path.join( model_dir, data_name, "block{}_{}.joblib".format(block, model_name)) filename = os.path.join(result_dir, f"{data_name}_ml_baseline.json") else: model_output = os.path.join( model_dir, data_name, "history_block{}_{}.joblib".format(block, model_name)) filename = os.path.join(result_dir, f"history_exp_{data_name}_ml_baseline.json") # save model joblib.dump(model, model_output) # make prediction print("prediting") print("block number = {}".format(block)) y_pred = model.predict(x_test) res = tfidf_metric(y_test, y_pred, device=device) print("cosine", res) print_tfidf_metric( { "cosine": float(res), "block": block, "model": model_name, "note": "clean - tfidf - downsample" if downsample != -1 else "clean - tfidf", "history": history, }, filename=filename) # output y_pred if downsample == -1: if history: outpath = os.path.join( predict_dir, "bookcorpus", f"history_block{block}_{model_name}_h{history}.h5") else: outpath = os.path.join(predict_dir, "bookcorpus", f"block{block}_{model_name}.h5") else: outpath = os.path.join(predict_dir, "bookcorpus", f"downsample_block{block}_{model_name}.h5") save_prediction(outpath, y_pred)