def test_tokenizing(self): tweet = "One Two Three" self.assertEqual(preprocess(tweet), ['One', 'Two', 'Three']) tweet = ":) ;) :op ;OP" self.assertEqual(preprocess(tweet), [':)', ';)', ':op', ';OP']) tweet = "RT @marcobonzanini: just an example! :D http://example.com #NLP" self.assertEqual(preprocess(tweet), ['RT', '@marcobonzanini', ':', 'just', 'an', 'example', '!', ':D', 'http://example.com', '#NLP'])
def train_classifier(self): trial = y = data = None test = False while not test: trial, y, data = self.trial, self.y, self.data test = trial.shape[0] == y.shape[0] and y.shape[0] == data.shape[0] # last 6 trials (3 trials per class) n_back = 12 min_trial = max(0, self.current_trial - (n_back - 1)) good = np.logical_and(y != 2, trial >= min_trial) sigs_train = data[good] y_train = y[good] #.astype('float32') if classifier.should_preprocess: classifier.train_pre_flow(sigs_train) sigs_train, y_train = classifier.preprocess(sigs_train, y_train) # print(sigs_train.shape, y_train.shape) # print(list(y_train)) y_train = y_train.astype('float32') # print(self.data.shape, self.y.shape, self.trial.shape) # inp = classifier.get_inp_xy(sigs_train, y_train) f = self.flow try: print('training classifier...') self.flow = classifier.get_flow(sigs_train, y_train) self.should_classify = True print('updated classifier!') except FlowException as e: self.flow = f print "FlowException error:\n{0}".format(e)
def classify(self): X = self.data[-500:] if classifier.should_preprocess: X = classifier.preprocess(X) # print(X.shape) out = self.flow(X) s = out[-1] if abs(s) > self.threshold: s = np.sign(s) else: s = 0 if time.time() > self.start_trial + 0.5 and (not self.pause_now) and (not self.running_arm): #if (not self.pause_now) and (not self.running_arm): if s == self.current_class: self.good_times += 1 self.total_times += 1 output = out[-1][0] if self.running_arm and self.arm: if s == 1: self.arm.write('a') elif s == -1: self.arm.write('A') # else: # bias # if self.current_class != 2: # output += self.current_class * 0.3 # output = np.clip(output, -1, 1) if not self.pause_now: self.send_it('state', output) print('classify', output)
X = X_res Y = resDataset.iloc[:, 5] else: resDataset = collection(dataset, parameter='res') data = open('ResData', 'ab') pickle.dump(resDataset, data) data.close() X_res = resDataset.iloc[:, resDataset.columns != 'Class'] X = X_res Y = resDataset.iloc[:, 5] # Preparing dataset for all parameter. if parameter == 'all': allDataset = [X_dia, X_sys, X_eda, X_res] X = pd.concat(allDataset, axis=1) # box_plot(X) # For plotting the box plot. print('Dataset for ' + str(parameter) + ' is processed.') accuracy, precision, recall, matrix = preprocess( X, Y) # Classifying the processed data. # Printing the results. print('Results') print('The Average accuracy of the Classifier for ' + str(parameter) + ' is: ' + str(accuracy)) print('The Average precision is: ' + str(precision)) print('The Average recall is: ' + str(recall)) print('The Average confusion matrix is: ' + str(matrix))
import tensorflow as tf from neural_net import LeNet from neural_net import evaluate from classifier import import_dataset from classifier import preprocess # Load and normalize dataset X_train, y_train, X_valid, y_valid, X_test, y_test = import_dataset() X_train_norm, X_valid_norm, X_test_norm = preprocess(X_train, X_valid, X_test) #Prepare to load trained network save_file = './saved_models/lenet' keep_prob = 1 tf.reset_default_graph() x = tf.placeholder(tf.float32, (None, 32, 32, 3)) y = tf.placeholder(tf.int32, (None)) one_hot_y = tf.one_hot(y, 43) logits = LeNet(x, keep_prob) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_y, logits=logits) loss_operation = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer(learning_rate=0.0001) training_operation = optimizer.minimize(loss_operation) correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1)) accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) saver = tf.train.Saver() BATCH_SIZE = 128 with tf.Session() as sess:
import pandas as pd from classifier import preprocess from classifier import get_pos_tags from sklearn.feature_extraction.text import TfidfVectorizer #load data df = load_dataset('dataset/dataset_dummy_classes.csv') tweets = df['text'] #Get POS tags for tweets and save as a string tweet_tags = [] for t in tweets: clean = preprocess(t) tags =get_pos_tags(clean) tag_str = " ".join(tags) tweet_tags.append(tag_str) #We can use the TFIDF vectorizer to get a token matrix for the POS tags pos_vectorizer = TfidfVectorizer( tokenizer=None, lowercase=False, preprocessor=None, ngram_range=(1, 3), stop_words=None, use_idf=False, smooth_idf=False, norm=None,