Example #1
0
    def test_tokenizing(self):

        tweet = "One Two Three"
        self.assertEqual(preprocess(tweet), ['One', 'Two', 'Three'])

        tweet = ":) ;) :op ;OP"
        self.assertEqual(preprocess(tweet), [':)', ';)', ':op', ';OP'])

        tweet = "RT @marcobonzanini: just an example! :D http://example.com #NLP"
        self.assertEqual(preprocess(tweet), ['RT', '@marcobonzanini', ':', 'just', 'an', 'example', '!', ':D', 'http://example.com', '#NLP'])
    def train_classifier(self):

        trial = y = data = None
        test = False
        while not test:
            trial, y, data = self.trial, self.y, self.data
            test = trial.shape[0] == y.shape[0] and y.shape[0] == data.shape[0]

            
        # last 6 trials (3 trials per class)
        n_back = 12
        min_trial = max(0, self.current_trial - (n_back - 1))

        good = np.logical_and(y != 2, trial >= min_trial)
        sigs_train = data[good]
        y_train = y[good] #.astype('float32')

        if classifier.should_preprocess:
            classifier.train_pre_flow(sigs_train)
            sigs_train, y_train = classifier.preprocess(sigs_train, y_train)
            # print(sigs_train.shape, y_train.shape)
            # print(list(y_train))

        y_train = y_train.astype('float32')
        
        # print(self.data.shape, self.y.shape, self.trial.shape)

        # inp = classifier.get_inp_xy(sigs_train, y_train)
        f = self.flow
        try:
            print('training classifier...')
            self.flow = classifier.get_flow(sigs_train, y_train)
            self.should_classify = True
            print('updated classifier!')
        except FlowException as e:
            self.flow = f
            print "FlowException error:\n{0}".format(e)
    def classify(self):
        X = self.data[-500:]
        if classifier.should_preprocess:
            X = classifier.preprocess(X)
            # print(X.shape)

        
        out = self.flow(X)
        s = out[-1]
        if abs(s) > self.threshold:
            s = np.sign(s)
        else:
            s = 0

        if time.time() > self.start_trial + 0.5 and (not self.pause_now) and (not self.running_arm):
        #if (not self.pause_now) and (not self.running_arm):
            if s == self.current_class:
                self.good_times += 1
            self.total_times += 1

        output = out[-1][0]

        if self.running_arm and self.arm:
            if s == 1:
                self.arm.write('a')
            elif s == -1:
                self.arm.write('A')
        # else: # bias
        #     if self.current_class != 2:
        #         output += self.current_class * 0.3
        #         output = np.clip(output, -1, 1)


        if not self.pause_now:
            self.send_it('state', output)
            print('classify', output)
        X = X_res
        Y = resDataset.iloc[:, 5]
    else:
        resDataset = collection(dataset, parameter='res')
        data = open('ResData', 'ab')
        pickle.dump(resDataset, data)
        data.close()
        X_res = resDataset.iloc[:, resDataset.columns != 'Class']
        X = X_res
        Y = resDataset.iloc[:, 5]

# Preparing dataset for all parameter.
if parameter == 'all':
    allDataset = [X_dia, X_sys, X_eda, X_res]
    X = pd.concat(allDataset, axis=1)

# box_plot(X)           # For plotting the box plot.

print('Dataset for ' + str(parameter) + ' is processed.')
accuracy, precision, recall, matrix = preprocess(
    X, Y)  # Classifying the processed data.

# Printing the results.
print('Results')

print('The Average accuracy of the Classifier for ' + str(parameter) +
      ' is: ' + str(accuracy))
print('The Average precision is: ' + str(precision))
print('The Average recall is: ' + str(recall))
print('The Average confusion matrix is: ' + str(matrix))
import tensorflow as tf
from neural_net import LeNet
from neural_net import evaluate
from classifier import import_dataset
from classifier import preprocess

# Load and normalize dataset
X_train, y_train, X_valid, y_valid, X_test, y_test = import_dataset()
X_train_norm, X_valid_norm, X_test_norm = preprocess(X_train, X_valid, X_test)

#Prepare to load trained network
save_file = './saved_models/lenet'
keep_prob = 1
tf.reset_default_graph()
x = tf.placeholder(tf.float32, (None, 32, 32, 3))
y = tf.placeholder(tf.int32, (None))
one_hot_y = tf.one_hot(y, 43)

logits = LeNet(x, keep_prob)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_y, logits=logits)
loss_operation = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
training_operation = optimizer.minimize(loss_operation)

correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()

BATCH_SIZE = 128

with tf.Session() as sess:
import pandas as pd
from classifier import preprocess
from classifier import get_pos_tags
from sklearn.feature_extraction.text import TfidfVectorizer



#load data
df = load_dataset('dataset/dataset_dummy_classes.csv')
tweets = df['text']


#Get POS tags for tweets and save as a string
tweet_tags = []
for t in tweets:
    clean = preprocess(t)
    tags =get_pos_tags(clean)
    tag_str = " ".join(tags)
    tweet_tags.append(tag_str)


#We can use the TFIDF vectorizer to get a token matrix for the POS tags
pos_vectorizer = TfidfVectorizer(
    tokenizer=None,
    lowercase=False,
    preprocessor=None,
    ngram_range=(1, 3),
    stop_words=None,
    use_idf=False,
    smooth_idf=False,
    norm=None,