words, valence = np.array(words), np.array(valence)
    words_pos, valence_pos = words[valence > np.average(valence)], valence[
        valence > np.average(valence)]  # avg = 5.1511713456
    words_neg, valence_neg = words[valence < np.average(valence)], valence[
        valence < np.average(valence)]  # avg = 5.1511713456

    pos = avg_valence(texts, words_pos, valence_pos)
    neg = avg_valence(texts, words_neg, valence_neg)

    from visualization import draw_scatter_with_color

    draw_scatter_with_color(pos, neg, labels, 'pos', 'neg')
    # classify
    polarity = []
    for pos_score, neg_score in zip(pos, neg):
        if pos_score == -1:
            polarity.append(0)
        elif neg_score == -1:
            polarity.append(1)
        else:
            if pos_score > neg_score:
                polarity.append(1)
            else:
                polarity.append(0)

    from analysis import analysis_result

    print(labels)
    print(polarity)
    analysis_result(labels, polarity)
__author__ = "hs"
__author__ = "hs"
__author__ = "NLP-PC"
import feature_generating
import classifiers
import analysis
from load_data import load_train_data, load_processed_data
from load_data import load_test_data
from save_data import dump_picle
from vectorizers import TFIDF_estimator, anew_estimator
from analysis import analysis_result
from classifiers import mNB
from load_data import load_selected_data

print("Start")
vectorizer = TFIDF_estimator()
texts, train_labels = load_selected_data(data_type="train")
transformed_train = vectorizer.fit_transform(texts)
testdata, true_labels = load_selected_data(data_type="test")
transformed_test = vectorizer.transform(testdata)

predict = mNB(transformed_train, train_labels, transformed_test)

analysis_result(predict, true_labels)
    words, valence = np.array(words), np.array(valence)
    words_pos, valence_pos = words[valence > np.average(valence)], valence[
        valence > np.average(valence)]  # avg = 5.1511713456
    words_neg, valence_neg = words[valence < np.average(valence)], valence[
        valence < np.average(valence)]  # avg = 5.1511713456

    pos = avg_valence(texts, words_pos, valence_pos)
    neg = avg_valence(texts, words_neg, valence_neg)

    from visualization import draw_scatter_with_color

    draw_scatter_with_color(pos, neg, labels, 'pos', 'neg')
    # classify
    polarity = []
    for pos_score, neg_score in zip(pos, neg):
        if pos_score == -1:
            polarity.append(0)
        elif neg_score == -1:
            polarity.append(1)
        else:
            if pos_score > neg_score:
                polarity.append(1)
            else:
                polarity.append(0)

    from analysis import analysis_result

    print(labels)
    print(polarity)
    analysis_result(labels, polarity)
    if count != 0:
        vec /= count
    return vec


from sklearn.preprocessing import scale

train_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_train])
if scaling == True:
    train_vecs = scale(train_vecs)

# Train word2vec on test tweets
# imdb_w2v.train(x_test)

# Build test tweet vectors then scale
test_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_test])
if scaling == True:
    test_vecs = scale(test_vecs)

# scaling to [0, 1] interval
min_max_scaler = MinMaxScaler()
train_vecs = min_max_scaler.fit_transform(train_vecs)
test_vecs = min_max_scaler.fit_transform(test_vecs)

# Use classification algorithm (i.e. Stochastic Logistic Regression) on training set, then assess model performance on test set
from classifiers import gNB, mNB
from analysis import analysis_result

pre = mNB(train_vecs, y_train, test_vecs)
analysis_result(pre, y_test)
    if count != 0:
        vec /= count
    return vec


from sklearn.preprocessing import scale

train_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_train])
if scaling == True:
    train_vecs = scale(train_vecs)

# Train word2vec on test tweets
# imdb_w2v.train(x_test)

# Build test tweet vectors then scale
test_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_test])
if scaling == True:
    test_vecs = scale(test_vecs)

# scaling to [0, 1] interval
min_max_scaler = MinMaxScaler()
train_vecs = min_max_scaler.fit_transform(train_vecs)
test_vecs = min_max_scaler.fit_transform(test_vecs)

# Use classification algorithm (i.e. Stochastic Logistic Regression) on training set, then assess model performance on test set
from classifiers import gNB, mNB
from analysis import analysis_result

pre = mNB(train_vecs, y_train, test_vecs)
analysis_result(pre, y_test)
Beispiel #6
0
     test_predict, outputs_, test_loss = sess.run([pred, outputs, loss],
                                                  feed_dict={
                                                      x_: test_xs,
                                                      y_: test_ys
                                                  })
     loss_val = sess.run(loss, feed_dict={y_: test_ys, pred: test_predict})
     print outputs_, 'outputs shape', np.shape(outputs_)
     print("RMSE: {}".format(loss_val))
     # print test_predict
     raise KeyboardInterrupt
 except KeyboardInterrupt as kbi:
     print '#### result ###'
     pred = test_predict * normalize_factor
     test_ys = test_ys * normalize_factor
     analysis.analysis_result(true=test_ys,
                              pred=pred,
                              error_range_percent=5)
     test_ys = test_ys * (max_ - min_) + min_
     utils.plot_xy(test_predict=test_predict,
                   test_ys=test_ys,
                   savename='./dynalog_result_last_' +
                   args.save_folder_name + '.png')
     print '########'
     print test_ys[:10]
     print test_predict[:10]
     sess.close()
     print 'start evaluation'
     eval.eval(x=test_xs,
               y=test_ys,
               error_range_percent=5,
               model_path=os.path.join(
Beispiel #7
0
__author__ = 'hs'
__author__ = 'hs'
__author__ = 'NLP-PC'
import feature_generating
import classifiers
import analysis
from load_data import load_train_data, load_processed_data
from load_data import load_test_data
from save_data import dump_picle
from vectorizers import TFIDF_estimator, anew_estimator
from analysis import analysis_result
from classifiers import mNB
from load_data import load_selected_data

print('Start')
vectorizer = TFIDF_estimator()
texts, train_labels = load_selected_data(data_type='train')
transformed_train = vectorizer.fit_transform(texts)
testdata, true_labels = load_selected_data(data_type='test')
transformed_test = vectorizer.transform(testdata)

predict = mNB(transformed_train, train_labels, transformed_test)

analysis_result(predict, true_labels)