Example #1
0
 def calc(args, X, X_test, final_model):
     print("-" * 100)
     print(LocalTime.get(), "  Words selected report: SVM ")
     print("-" * 100)
     best_c = Linear_SVM.get_best_hyperparameter(args.X_train, args.Y_train,
                                                 args.Y_val, args.X_val)
     final_svm = LinearSVC(C=best_c)
     final_svm.fit(X, args.Target)
     final_accuracy = final_svm.predict(X_test)
     final_accuracy_score = accuracy_score(args.Target_test, final_accuracy)
     print("Final SVM Accuracy: %s" % final_accuracy_score)
     Report_Matricies.accuracy(args.Target_test, final_accuracy)
     feature_names = zip(args.Cv.get_feature_names(), final_model.coef_[0])
     feature_to_coef = {word: coef for word, coef in feature_names}
     itemz = feature_to_coef.items()
     list_positive = sorted(itemz, key=lambda x: x[1],
                            reverse=True)[:args.Number_we_are_interested_in]
     print("-" * 100)
     print(LocalTime.get(), "--- Most popular positve words")
     for best_positive in list_positive:
         print(best_positive)
     print("-" * 100)
     print(LocalTime.get(), "--- Most popular negative words")
     list_negative = sorted(
         itemz, key=lambda x: x[1])[:args.Number_we_are_interested_in]
     for best_negative in list_negative:
         print(best_negative)
Example #2
0
 def calc(args, no_of_words):
     print("-" * 100)
     print(LocalTime.get(), "  Words selected report: NGram where n = ",
           no_of_words)
     print("-" * 100)
     ngram_vectorizer = CountVectorizer(binary=True,
                                        ngram_range=(1, no_of_words))
     X = ngram_vectorizer.fit_transform(args.Train_text)
     X_test = ngram_vectorizer.transform(args.Test_text)
     best_c = Logistic_Regression.get_best_hyperparameter(
         args.X_train, args.Y_train, args.Y_val, args.X_val)
     final_ngram = LogisticRegression(C=best_c)
     final_ngram.fit(X, args.Target)
     final_accuracy = final_ngram.predict(X_test)
     final_accuracy_score = accuracy_score(args.Target_test, final_accuracy)
     print("Final NGram Accuracy: %s" % final_accuracy_score)
     Report_Matricies.accuracy(args.Target_test, final_accuracy)
     feature_names = zip(ngram_vectorizer.get_feature_names(),
                         final_ngram.coef_[0])
     feature_to_coef = {word: coef for word, coef in feature_names}
     itemz = feature_to_coef.items()
     list_positive = sorted(itemz, key=lambda x: x[1], reverse=True)
     print("-" * 100)
     print(LocalTime.get(), "--- Most popular positve words")
     for best_positive in list_positive[:args.Number_we_are_interested_in]:
         print(best_positive)
     print("-" * 100)
     print(LocalTime.get(), "--- Most popular negative words")
     list_negative = sorted(itemz, key=lambda x: x[1])
     for best_negative in list_negative[:args.Number_we_are_interested_in]:
         print(best_negative)
Example #3
0
    def get_best_hyperparameter(X_train, y_train, y_val, X_val):
        best_accuracy = 0.0
        best_c = 0.0
        for c in [0.01, 0.05, 0.25, 0.5, 1]:
            svm = LinearSVC(C=c)
            svm.fit(X_train, y_train)
            accuracy_ = accuracy_score(y_val, svm.predict(X_val))
            if accuracy_ > best_accuracy:
                best_accuracy = accuracy_
                best_c = c
            print ("---SVM Accuracy for C=%s: %s" % (c, accuracy_))

        print(LocalTime.get(), "best hyperparameter c = ", best_c)
        return best_c
Example #4
0
    def get_best_hyperparameter(X_train, y_train, y_val, X_val):
        # This gets the best hyperparameter for Regularisation
        best_accuracy = 0.0
        best_c = 0.0
        for c in [0.01, 0.05, 0.25, 0.5, 1]:
            lr = LogisticRegression(C=c)
            lr.fit(X_train, y_train)
            accuracy_ = accuracy_score(y_val, lr.predict(X_val))
            if accuracy_ > best_accuracy:
                best_accuracy = accuracy_
                best_c = c
            print("---Accuracy for C=%s: %s" % (c, accuracy_))

        print(LocalTime.get(), "best hyperparameter for regularisation: c = ",
              best_c)
        return best_c
Example #5
0
#%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
from scipy.stats import truncnorm
from files import Files
from local_time import LocalTime
from neural_network import NeuralNetwork

print('\n' * 10)
print("*"*100)
print("** Start at ", LocalTime.get())
print("*"*100)
image_size = 28 # width and height
no_of_different_labels = 10 #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size
f = Files("pickled_mnist.pkl")
with open(os.path.join(".", f.file_path), "br") as fh:
    data = pickle.load(fh)

train_imgs = data[0]
test_imgs = data[1]
train_labels = data[2]
test_labels = data[3]
train_labels_one_hot = data[4]
test_labels_one_hot = data[5]
image_size = 28             # width and length
no_of_different_labels = 10 #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size
Example #6
0
from datalook import Datalook
from files import Files
from logistic_regression import Logistic_Regression
from linear_svm import Linear_SVM
from local_time import LocalTime
from sentences1 import Sentences1
from textblob import TextBlob
from wordcloud import WordCloud, STOPWORDS
from n_gram import N_Gram
from svm import SVM
from params import Params

number_we_are_interested_in = 5
print('\n' * 10)
print("=" * 80)
print("Local current time started :", LocalTime.get())
print("=" * 80)
twitter_file = "auspol2019.csv"
f1 = Files(twitter_file)
print(LocalTime.get(), twitter_file, " read")
twitter = pd.read_csv(f1.file_path,
                      parse_dates=['created_at', 'user_created_at'])
twitter['sentiment'] = twitter['full_text'].map(
    lambda text: TextBlob(text).sentiment.polarity)

print("twitter number of rows = ", twitter.shape[0])
##### Remove neutral sentiments
twitter1 = twitter[twitter.sentiment != 0]
####### Set targets to 1 for positive sentiment and 0 for negative sentiment

print(LocalTime.get(), "sentiment rating calculated")
from local_time import LocalTime
from sentences1 import Sentences1
from textblob import TextBlob
from wordcloud import WordCloud, STOPWORDS
from n_gram import N_Gram
from svm import SVM
from params import Params
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer


number_we_are_interested_in = 5
print('\n' * 10)
print("="*80)
print("Local current time started :", LocalTime.get())
print("="*80)
twitter_file = "auspol2019.csv"

print(LocalTime.get(), twitter_file, " read")
twitter = pd.read_csv('C:/Users/asus/Desktop/701_coursework-master/files/auspol2019.csv')
print(twitter.shape)
t1 = twitter.drop(['id','user_screen_name','user_location','user_description','user_name'], axis = 1)
t1['full_text'] = t1['full_text'].astype(str)
t1['full_text'] = t1['full_text'].apply(lambda x: " ".join(x.lower() for x in x.split()))
stop = stopwords.words('english')
t1['full_text'] = t1['full_text'].apply(lambda x:" ".join(x for x in x.split() if x not in stop))
st = PorterStemmer()
t1['full_text'] = t1['full_text'].apply(lambda x:" ".join([st.stem(word) for word in x.split()]))
t1['sentiment'] = t1['full_text'].map(lambda text: TextBlob(text).sentiment.polarity)
twitter1 = t1[t1.sentiment != 0]
Example #8
0
#%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
from scipy.stats import truncnorm
from files import Files
from local_time import LocalTime
from neural_network import NeuralNetwork
print("*" * 100)
print("** Start at ", LocalTime.get())
print("*" * 100)
image_size = 28  # width and height
no_of_different_labels = 10  #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size
f = Files("pickled_mnist.pkl")
with open(os.path.join(".", f.file_path), "br") as fh:
    data = pickle.load(fh)

train_imgs = data[0]
test_imgs = data[1]
train_labels = data[2]
test_labels = data[3]
train_labels_one_hot = data[4]
test_labels_one_hot = data[5]

lr = np.arange(no_of_different_labels)
# transform labels into one hot representation
train_labels_one_hot = (lr == train_labels).astype(np.float)
test_labels_one_hot = (lr == test_labels).astype(np.float)
# we don't want zeroes and ones in the labels neither: