def main(): #---------------LOAD PARAMETERS, INITIALIZE VARS--------------- results = [] para = params_setup("ridge") # "ridge" for ridge VAR # Which variables are > thresh? thresh = para.variable_threshold # How far in the future are we predicting? offset = para.horizon # What is the window of previous timesteps we're looking at? attn_length = para.attention_len #---------------PREPARE DATA------------------ (X_cut, y_cut) = load_data(para) num_timesteps = X_cut.shape[0] num_features = X_cut.shape[1] X = list() for i in range(attn_length, num_timesteps - offset): X.append( (X_cut[i - attn_length:i, :]).flatten() ) # must flatten (timesteps, features) into 1D b/c model only takes up to 2D X = np.array(X) y = y_cut[attn_length + offset:num_timesteps] # Split into training and testing cutoff_idx = math.floor(X.shape[0] * 0.8) X_train = X[0:cutoff_idx, :] y_train = y[0:cutoff_idx] X_test = X[cutoff_idx:, :] y_test = y[cutoff_idx:] print("X_train shape:", X_train.shape) print("y_train shape:", y_train.shape) print("X_test shape:", X_test.shape) print("y_test shape:", y_test.shape) for aa in ALPHAS: clf = linear_model.Ridge(alpha=aa, normalize=True) clf.fit(X_train, y_train) #0 rated emotion signal #1-13 MFCCs #14-26 dMFCCs #27-39 ddMFCCs #40 clarity #41 brightness #42 key_strength #43 rms #44 centroid #45 spread #46 skewness #47 kurtosis #48-59 chroma #60 mode #61 compress #62 hcdf #63 flux #64-74 lpcs # Determine which variables (timesteps, since this is AR) are most important inds = [ j for (i, j) in zip(clf.coef_, range(len(clf.coef_))) if abs(i) >= thresh ] inds_mod = [x % num_features for x in inds] # Get RMSE RMSE = ((len(y_test)**-1) * sum((clf.predict(X_test) - y_test)**2))**.5 print("RMSE for alpha " + str(aa) + ": " + (str)(RMSE)) plt.plot(range(len(y_test)), clf.predict(X_test)) plt.plot(range(len(y_test)), y_test) #plt.show() #plt.pause(3) #plt.close() results.append( Result(aa, RMSE, clf.coef_, sorted(inds_mod), clf.intercept_)) min_rmse = results[0].RMSE min_idx = 0 for i in range(0, len(results)): if (results[i].RMSE < min_rmse): min_rmse = results[i].RMSE min_idx = i print("Minimum RMSE: " + str(min_rmse) + " for alpha=" + str(results[min_idx].alpha)) # ----------------- WRITE RESULT OF BEST ALPHA TO FILE ------------------- best_result = results[min_idx] with open(para.output_filename, 'w') as f: f.write("RMSE: " + (str)(best_result.RMSE)) f.write("\nAlpha: " + (str)(best_result.alpha)) f.write("\nCoefficients:\n") f.write(np.array2string(best_result.coefs, threshold=np.nan)) f.write("\nCoefficient indices over threshold " + str(para.variable_threshold) + ":\n") f.write(' '.join(str(x) for x in best_result.coef_indices)) f.write("\nTotal number of coefficients over threshold: " + str(len(best_result.coef_indices))) f.write("\nRegression bias term: " + str(best_result.bias)) # Write overall RMSE for each alpha as well f.write("\n\nRMSEs for each alpha:\n") for result in results: f.write("Alpha: " + str(result.alpha) + ",\tRMSE: " + str(result.RMSE) + "\n") print("Successfully wrote results to file " + para.output_filename)
def main(): para = params_setup("ar") # "ar" for autoregression clf = linear_model.LinearRegression(normalize=True) #Which variables are > thresh? thresh = para.variable_threshold #How far in the future are we predicting? offset = para.horizon #What is the length of the autoregression attn_length = para.attention_len (X_cut, y_cut) = load_data(para) #Make the autoregressive IVs y_regress = list() for i in range(offset + attn_length, len(y_cut)): y_regress.append(y_cut[i - attn_length - offset:i - offset]) y_regress = np.array(y_regress) #Split into training and testing if (para.test_set_at_beginning): cutoff_idx = math.floor(y_regress.shape[0] * 0.2) X_test = y_regress[0:(cutoff_idx - attn_length - offset), :] y_test = y_cut[attn_length + offset:cutoff_idx] X_train = y_regress[cutoff_idx - attn_length - offset:, :] y_train = y_cut[cutoff_idx:] else: cutoff_idx = math.floor(y_regress.shape[0] * 0.8) X_train = y_regress[0:(cutoff_idx - attn_length - offset), :] y_train = y_cut[attn_length + offset:cutoff_idx] X_test = y_regress[cutoff_idx - attn_length - offset:, :] y_test = y_cut[cutoff_idx:] print("X_train shape:", X_train.shape) print("y_train shape:", y_train.shape) clf.fit(X_train, y_train) # 0 rated emotion signal # 1-13 MFCCs # 14-26 dMFCCs # 27-39 ddMFCCs # 40 clarity # 41 brightness # 42 key_strength # 43 rms # 44 centroid # 45 spread # 46 skewness # 47 kurtosis # 48-59 chroma # 60 mode # 61 compress # 62 hcdf # 63 flux # 64-74 lpcs #Determine which variables (timesteps, since this is AR) are most important inds = [ j for (i, j) in zip(clf.coef_, range(len(clf.coef_))) if abs(i) >= thresh ] # print(sorted(inds)) # print(sum(abs(clf.coef_)>thresh)) # print((abs(clf.coef_)>thresh)) # print(nlargest(3, clf.coef_)) # print(clf.intercept_) #Get RMSE RMSE = ((len(y_test)**-1) * sum((clf.predict(X_test) - y_test)**2))**0.5 print("RMSE: " + (str)(RMSE)) if (para.show_plots): plt.plot(range(len(y_test[:-2])), clf.predict(X_test[:][:-2])) plt.plot(range(len(y_test[:-2])), y_test[:-2]) plt.title('Autoregression predictions vs truth') plt.legend(['predictions', 'truth'], loc='upper right') plt.show() with open(para.output_filename, 'w') as f: f.write("RMSE: " + (str)(RMSE)) f.write("\nCoefficients:\n") f.write(np.array2string(clf.coef_, threshold=np.nan)) f.write("\nCoefficient indices over threshold " + str(para.variable_threshold) + ":\n") f.write(' '.join(str(x) for x in sorted(inds))) f.write("\nTotal number of coefficients over threshold: " + str(sum(abs(clf.coef_) > thresh))) f.write("\nRegression bias term: " + str(clf.intercept_)) print("Successfully wrote results to file " + para.output_filename)