def linear_regression(X_train, X_test, Y_train, Y_test, plot=False):
    # Create linear regression object
    # The training data should be column vectors
    X_train, X_test = np.array(X_train).reshape((len(X_train), 1)), np.array(X_test).reshape((len(X_test), 1))
    regr = linear_model.LinearRegression()
    # Train the model using the training sets
    regr.fit(X_train, Y_train)
    predict = regr.predict(X_test)
    return regression_evaluate(Y_test, predict)
def linear_regression(X_train, X_test, Y_train, Y_test, plot=False):
    # Create linear regression object
    # The training data should be column vectors
    X_train, X_test = np.array(X_train).reshape(
        (len(X_train), 1)), np.array(X_test).reshape((len(X_test), 1))
    regr = linear_model.LinearRegression()
    # Train the model using the training sets
    regr.fit(X_train, Y_train)
    predict = regr.predict(X_test)
    return regression_evaluate(Y_test, predict)
        print('The true values is: %s' % true_values[i])
    return arithmetic, geometric

if __name__ == '__main__':
    ########################################### Hyper-parameters ###########################################
    target = 'arousal' # values: "valence", "arousal"
    categorical = 'all'  # values: 'all', "book", "car", "laptop", "hotel", "news", "political"
    ########################################################################################################
    # texts, valence, arousal = read_mix_data(categorical)
    from load_data import load_CVAT_3
    # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical)
    texts, valence, arousal = load_CVAT_3('./resources/CVAT (utf-8).csv','./resources/tokenized_texts_(newest3.31).p', categorical=categorical)

    lexicon = load_CVAW()
    d = dict()
    if target == 'valence':
        ind = 1
        true_values = valence
        print('Valence prediction...')
    elif target == 'arousal':
        ind = 2
        true_values = arousal
        print('Arousal preddiction...')
    for l in lexicon:
        d[l[0]] = l[ind]

    arithmetic, geometric = va_prediction(texts, d, true_values)
    print('Prediction result (arithmetic average):')
    regression_evaluate(true_values, arithmetic)
    print('Prediction result (geometric average):')
    regression_evaluate(true_values, geometric)
    from mix_data import read_mix_data

    texts, valence, arousal = read_mix_data(categorical)

    if option == 'V':
        Y = valence
    elif option == 'A':
        Y = arousal
    else:
        raise Exception('Wrong parameters!')

    lexicon = load_CVAW(extended=using_extended_lexicon)
    d = dict()
    ind = 1 if option == 'V' else 2
    for l in lexicon:
        d[l[0]] = l[ind]

    predicted_ratings = mean_ratings(texts, d, mean_method, Y)
    print(predicted_ratings)
    print(Y)
    out = regression_evaluate(Y, predicted_ratings)

    draw_scatter(Y, predicted_ratings, 'True Values', 'Predicted Values', title='Scatter')

    out2 = cv(predicted_ratings, Y)

    Dims = 'Valence' if option == 'V' else 'Arousal'
    Mean_Method = 'Geometric' if mean_method == 'tf_geo' else 'Arithmetic'
    print('|%s|%s|False|%.3f|%.3f|%.3f|' % (Dims, Mean_Method, out[0], out[1], out[2]))
    print('|%s|%s|True|%.3f|%.3f|%.3f|' % (Dims, Mean_Method, out2[0], out2[1], out2[2]))
        Y = valence
    elif option == 'A':
        Y = arousal
    else:
        raise Exception('Wrong parameters!')

    lexicon = load_CVAW(extended=using_extended_lexicon)
    d = dict()
    ind = 1 if option == 'V' else 2
    for l in lexicon:
        d[l[0]] = l[ind]

    predicted_ratings = mean_ratings(texts, d, mean_method, Y)
    print(predicted_ratings)
    print(Y)
    out = regression_evaluate(Y, predicted_ratings)

    draw_scatter(Y,
                 predicted_ratings,
                 'True Values',
                 'Predicted Values',
                 title='Scatter')

    out2 = cv(predicted_ratings, Y)

    Dims = 'Valence' if option == 'V' else 'Arousal'
    Mean_Method = 'Geometric' if mean_method == 'tf_geo' else 'Arithmetic'
    print('|%s|%s|False|%.3f|%.3f|%.3f|' %
          (Dims, Mean_Method, out[0], out[1], out[2]))
    print('|%s|%s|True|%.3f|%.3f|%.3f|' %
          (Dims, Mean_Method, out2[0], out2[1], out2[2]))
if __name__ == '__main__':
    ########################################### Hyper-parameters ###########################################
    target = 'arousal'  # values: "valence", "arousal"
    categorical = 'all'  # values: 'all', "book", "car", "laptop", "hotel", "news", "political"
    ########################################################################################################
    # texts, valence, arousal = read_mix_data(categorical)
    from load_data import load_CVAT_3
    # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical)
    texts, valence, arousal = load_CVAT_3(
        './resources/CVAT (utf-8).csv',
        './resources/tokenized_texts_(newest3.31).p',
        categorical=categorical)

    lexicon = load_CVAW()
    d = dict()
    if target == 'valence':
        ind = 1
        true_values = valence
        print('Valence prediction...')
    elif target == 'arousal':
        ind = 2
        true_values = arousal
        print('Arousal preddiction...')
    for l in lexicon:
        d[l[0]] = l[ind]

    arithmetic, geometric = va_prediction(texts, d, true_values)
    print('Prediction result (arithmetic average):')
    regression_evaluate(true_values, arithmetic)
    print('Prediction result (geometric average):')
    regression_evaluate(true_values, geometric)