def linear_regression(X_train, X_test, Y_train, Y_test, plot=False): # Create linear regression object # The training data should be column vectors X_train, X_test = np.array(X_train).reshape((len(X_train), 1)), np.array(X_test).reshape((len(X_test), 1)) regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(X_train, Y_train) predict = regr.predict(X_test) return regression_evaluate(Y_test, predict)
def linear_regression(X_train, X_test, Y_train, Y_test, plot=False): # Create linear regression object # The training data should be column vectors X_train, X_test = np.array(X_train).reshape( (len(X_train), 1)), np.array(X_test).reshape((len(X_test), 1)) regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(X_train, Y_train) predict = regr.predict(X_test) return regression_evaluate(Y_test, predict)
print('The true values is: %s' % true_values[i]) return arithmetic, geometric if __name__ == '__main__': ########################################### Hyper-parameters ########################################### target = 'arousal' # values: "valence", "arousal" categorical = 'all' # values: 'all', "book", "car", "laptop", "hotel", "news", "political" ######################################################################################################## # texts, valence, arousal = read_mix_data(categorical) from load_data import load_CVAT_3 # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical) texts, valence, arousal = load_CVAT_3('./resources/CVAT (utf-8).csv','./resources/tokenized_texts_(newest3.31).p', categorical=categorical) lexicon = load_CVAW() d = dict() if target == 'valence': ind = 1 true_values = valence print('Valence prediction...') elif target == 'arousal': ind = 2 true_values = arousal print('Arousal preddiction...') for l in lexicon: d[l[0]] = l[ind] arithmetic, geometric = va_prediction(texts, d, true_values) print('Prediction result (arithmetic average):') regression_evaluate(true_values, arithmetic) print('Prediction result (geometric average):') regression_evaluate(true_values, geometric)
from mix_data import read_mix_data texts, valence, arousal = read_mix_data(categorical) if option == 'V': Y = valence elif option == 'A': Y = arousal else: raise Exception('Wrong parameters!') lexicon = load_CVAW(extended=using_extended_lexicon) d = dict() ind = 1 if option == 'V' else 2 for l in lexicon: d[l[0]] = l[ind] predicted_ratings = mean_ratings(texts, d, mean_method, Y) print(predicted_ratings) print(Y) out = regression_evaluate(Y, predicted_ratings) draw_scatter(Y, predicted_ratings, 'True Values', 'Predicted Values', title='Scatter') out2 = cv(predicted_ratings, Y) Dims = 'Valence' if option == 'V' else 'Arousal' Mean_Method = 'Geometric' if mean_method == 'tf_geo' else 'Arithmetic' print('|%s|%s|False|%.3f|%.3f|%.3f|' % (Dims, Mean_Method, out[0], out[1], out[2])) print('|%s|%s|True|%.3f|%.3f|%.3f|' % (Dims, Mean_Method, out2[0], out2[1], out2[2]))
Y = valence elif option == 'A': Y = arousal else: raise Exception('Wrong parameters!') lexicon = load_CVAW(extended=using_extended_lexicon) d = dict() ind = 1 if option == 'V' else 2 for l in lexicon: d[l[0]] = l[ind] predicted_ratings = mean_ratings(texts, d, mean_method, Y) print(predicted_ratings) print(Y) out = regression_evaluate(Y, predicted_ratings) draw_scatter(Y, predicted_ratings, 'True Values', 'Predicted Values', title='Scatter') out2 = cv(predicted_ratings, Y) Dims = 'Valence' if option == 'V' else 'Arousal' Mean_Method = 'Geometric' if mean_method == 'tf_geo' else 'Arithmetic' print('|%s|%s|False|%.3f|%.3f|%.3f|' % (Dims, Mean_Method, out[0], out[1], out[2])) print('|%s|%s|True|%.3f|%.3f|%.3f|' % (Dims, Mean_Method, out2[0], out2[1], out2[2]))
if __name__ == '__main__': ########################################### Hyper-parameters ########################################### target = 'arousal' # values: "valence", "arousal" categorical = 'all' # values: 'all', "book", "car", "laptop", "hotel", "news", "political" ######################################################################################################## # texts, valence, arousal = read_mix_data(categorical) from load_data import load_CVAT_3 # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical) texts, valence, arousal = load_CVAT_3( './resources/CVAT (utf-8).csv', './resources/tokenized_texts_(newest3.31).p', categorical=categorical) lexicon = load_CVAW() d = dict() if target == 'valence': ind = 1 true_values = valence print('Valence prediction...') elif target == 'arousal': ind = 2 true_values = arousal print('Arousal preddiction...') for l in lexicon: d[l[0]] = l[ind] arithmetic, geometric = va_prediction(texts, d, true_values) print('Prediction result (arithmetic average):') regression_evaluate(true_values, arithmetic) print('Prediction result (geometric average):') regression_evaluate(true_values, geometric)