from load_data import load_CVAW
from load_data import load_NTUSD

def save_list(filename, list):
    with open(filename,'w',encoding='utf-8') as f:
        for l in list:
            if l != "":
                f.write(l+"\n")
lexicon_data = load_CVAW()
cvaw_words = [line[0] for line in lexicon_data]
print('The words in CVAW lexicons: %s' % str(cvaw_words))
NTUSD_positive_words = load_NTUSD('./resources/ntusd-positive (zh-tw).txt')
NTUSD_negative_words = load_NTUSD('./resources/ntusd-negative (zh-tw).txt')
print('NTUSD')
print("Positive: %s"%str(NTUSD_positive_words))
print("Negative: %s"%str(NTUSD_negative_words))
print('same words in ntusd_postive')
print(sorted(list(set(NTUSD_positive_words) & set(cvaw_words))))
print('same words in ntusd_negative')
print(sorted(list(set(NTUSD_negative_words) & set(cvaw_words))))
common_words = sorted(list((set(NTUSD_positive_words) & set(cvaw_words)) | (set(NTUSD_negative_words) & set(cvaw_words))))


ntusd_p = (sorted(list(set(NTUSD_positive_words) - set(cvaw_words))))
ntusd_n = (sorted(list(set(NTUSD_negative_words) - set(cvaw_words))))

not_in_CVAW = sorted(list((set(NTUSD_positive_words) - set(cvaw_words)) | (set(NTUSD_negative_words) - set(cvaw_words))))

save_list('./resources/common_words.txt', common_words)
save_list('./resources/different_words.txt', not_in_CVAW)
save_list('./resources/NTUSD_p.txt', ntusd_p)
        print('The predicted values is (using Geometric  Average): %s'% predicted_value_g)
        geometric.append(predicted_value_g)
        print('The true values is: %s' % true_values[i])
    return arithmetic, geometric

if __name__ == '__main__':
    ########################################### Hyper-parameters ###########################################
    target = 'arousal' # values: "valence", "arousal"
    categorical = 'all'  # values: 'all', "book", "car", "laptop", "hotel", "news", "political"
    ########################################################################################################
    # texts, valence, arousal = read_mix_data(categorical)
    from load_data import load_CVAT_3
    # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical)
    texts, valence, arousal = load_CVAT_3('./resources/CVAT (utf-8).csv','./resources/tokenized_texts_(newest3.31).p', categorical=categorical)

    lexicon = load_CVAW()
    d = dict()
    if target == 'valence':
        ind = 1
        true_values = valence
        print('Valence prediction...')
    elif target == 'arousal':
        ind = 2
        true_values = arousal
        print('Arousal preddiction...')
    for l in lexicon:
        d[l[0]] = l[ind]

    arithmetic, geometric = va_prediction(texts, d, true_values)
    print('Prediction result (arithmetic average):')
    regression_evaluate(true_values, arithmetic)
    # texts, valence, arousal = load_CVAT_2("./resources/valence_arousal(sigma=1.5).csv", categorical=categorical)
    from load_data import load_CVAT_3
    # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical)
    # texts, valence, arousal = load_CVAT_3('./resources/valence_arousal(sigma=1.5).csv','./resources/tokenized_texts_(old).p', categorical=categorical)
    from mix_data import read_mix_data

    texts, valence, arousal = read_mix_data(categorical)

    if option == 'V':
        Y = valence
    elif option == 'A':
        Y = arousal
    else:
        raise Exception('Wrong parameters!')

    lexicon = load_CVAW(extended=using_extended_lexicon)
    d = dict()
    ind = 1 if option == 'V' else 2
    for l in lexicon:
        d[l[0]] = l[ind]

    predicted_ratings = mean_ratings(texts, d, mean_method, Y)
    print(predicted_ratings)
    print(Y)
    out = regression_evaluate(Y, predicted_ratings)

    draw_scatter(Y, predicted_ratings, 'True Values', 'Predicted Values', title='Scatter')

    out2 = cv(predicted_ratings, Y)

    Dims = 'Valence' if option == 'V' else 'Arousal'
    # texts, valence, arousal = load_CVAT_2("./resources/valence_arousal(sigma=1.5).csv", categorical=categorical)
    from load_data import load_CVAT_3
    # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical)
    # texts, valence, arousal = load_CVAT_3('./resources/valence_arousal(sigma=1.5).csv','./resources/tokenized_texts_(old).p', categorical=categorical)
    from mix_data import read_mix_data

    texts, valence, arousal = read_mix_data(categorical)

    if option == 'V':
        Y = valence
    elif option == 'A':
        Y = arousal
    else:
        raise Exception('Wrong parameters!')

    lexicon = load_CVAW(extended=using_extended_lexicon)
    d = dict()
    ind = 1 if option == 'V' else 2
    for l in lexicon:
        d[l[0]] = l[ind]

    predicted_ratings = mean_ratings(texts, d, mean_method, Y)
    print(predicted_ratings)
    print(Y)
    out = regression_evaluate(Y, predicted_ratings)

    draw_scatter(Y,
                 predicted_ratings,
                 'True Values',
                 'Predicted Values',
                 title='Scatter')
from load_data import load_CVAW
from load_data import load_NTUSD


def save_list(filename, list):
    with open(filename, 'w', encoding='utf-8') as f:
        for l in list:
            if l != "":
                f.write(l + "\n")


lexicon_data = load_CVAW()
cvaw_words = [line[0] for line in lexicon_data]
print('The words in CVAW lexicons: %s' % str(cvaw_words))
NTUSD_positive_words = load_NTUSD('./resources/ntusd-positive (zh-tw).txt')
NTUSD_negative_words = load_NTUSD('./resources/ntusd-negative (zh-tw).txt')
print('NTUSD')
print("Positive: %s" % str(NTUSD_positive_words))
print("Negative: %s" % str(NTUSD_negative_words))
print('same words in ntusd_postive')
print(sorted(list(set(NTUSD_positive_words) & set(cvaw_words))))
print('same words in ntusd_negative')
print(sorted(list(set(NTUSD_negative_words) & set(cvaw_words))))
common_words = sorted(
    list((set(NTUSD_positive_words) & set(cvaw_words))
         | (set(NTUSD_negative_words) & set(cvaw_words))))

ntusd_p = (sorted(list(set(NTUSD_positive_words) - set(cvaw_words))))
ntusd_n = (sorted(list(set(NTUSD_negative_words) - set(cvaw_words))))

not_in_CVAW = sorted(

if __name__ == '__main__':
    ########################################### Hyper-parameters ###########################################
    target = 'arousal'  # values: "valence", "arousal"
    categorical = 'all'  # values: 'all', "book", "car", "laptop", "hotel", "news", "political"
    ########################################################################################################
    # texts, valence, arousal = read_mix_data(categorical)
    from load_data import load_CVAT_3
    # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical)
    texts, valence, arousal = load_CVAT_3(
        './resources/CVAT (utf-8).csv',
        './resources/tokenized_texts_(newest3.31).p',
        categorical=categorical)

    lexicon = load_CVAW()
    d = dict()
    if target == 'valence':
        ind = 1
        true_values = valence
        print('Valence prediction...')
    elif target == 'arousal':
        ind = 2
        true_values = arousal
        print('Arousal preddiction...')
    for l in lexicon:
        d[l[0]] = l[ind]

    arithmetic, geometric = va_prediction(texts, d, true_values)
    print('Prediction result (arithmetic average):')
    regression_evaluate(true_values, arithmetic)