Ejemplo n.º 1
0
 def add_hdf5data(self, frequency_configuration):
     dialog_txt = 'Pick a file for :' + str(frequency_configuration)
     openname = QFileDialog.getOpenFileName(self, dialog_txt)
     if openname:
         logging.debug(str(frequency_configuration) + ':' + str(openname))
         self.dispData[str(frequency_configuration)] = str(openname)
         functions.load_dataset(self.dispData, self.dicData, frequency_configuration)
         self.update_data_disp()
Ejemplo n.º 2
0
def intentPrediction(hashTagSubject):
    print('hashTagSubject', hashTagSubject)
    filename = './data.csv'
    intent, unique_intent, sentences = load_dataset(filename)
    public_tweets_path = os.getcwd() + '/' + hashTagSubject + '.csv'

    if path.exists(public_tweets_path):
        print('hai')
        public_tweets = pd.read_csv(os.path.realpath(public_tweets_path))
        prediction = []
        final_data = []
        for text in public_tweets['tweet']:
            predict = predictions(text)
            prediction.append(predict)
        for pred in prediction:
            final_data.append(get_final_output(pred, unique_intent))
        intentData = []
        for i in range(len(public_tweets['tweet'])):
            intentData.append({
                'tweet': public_tweets['tweet'][i],
                'intent': final_data[i]
            })
        return intentData

    else:
        print('nai hai')
        public_tweets_file = getTwitterData(hashTagSubject)
        public_tweets_path = public_tweets_file + '/' + hashTagSubject + '.csv'
        public_tweets = pd.read_csv(os.path.realpath(public_tweets_path))

        prediction = []
        final_data = []
        for text in public_tweets['tweet']:
            predict = predictions(text)
            prediction.append(predict)
        for pred in prediction:
            final_data.append(get_final_output(pred, unique_intent))
        intentData = []
        for i in range(len(public_tweets['tweet'])):
            intentData.append({
                'tweet': public_tweets['tweet'][i],
                'intent': final_data[i]
            })
        return intentData
Ejemplo n.º 3
0
def train():
    filename = './data.csv'

    intent, unique_intent, sentences = load_dataset(filename)
    cleaned_words = cleaning(sentences)

    word_tokenizer = create_tokenizer(cleaned_words)
    vocab_size = len(word_tokenizer.word_index) + 1
    max_len = max_length(cleaned_words)

    encoded_doc = encoding_doc(word_tokenizer, cleaned_words)
    padded_doc = padding_doc(encoded_doc, max_len)

    output_tokenizer = create_tokenizer(
        unique_intent, filters='!"#$%&()*+,-/:;<=>?@[\]^`{|}~')

    encoded_output = encoding_doc(output_tokenizer, intent)
    encoded_output = np.array(encoded_output).reshape(len(encoded_output), 1)
    output_one_hot = one_hot(encoded_output)

    train_X, val_X, train_Y, val_Y = train_test_split(padded_doc,
                                                      output_one_hot,
                                                      shuffle=True,
                                                      test_size=0.2)

    model = create_model(vocab_size, max_len)
    model.compile(loss="categorical_crossentropy",
                  optimizer="adam",
                  metrics=["accuracy"])

    filename = 'model.h5'

    checkpoint = ModelCheckpoint(filename,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')
    hist = model.fit(train_X,
                     train_Y,
                     epochs=100,
                     batch_size=32,
                     validation_data=(val_X, val_Y),
                     callbacks=[checkpoint])
Ejemplo n.º 4
0
    def load_settings(self):
        openname = QFileDialog.getOpenFileName(self, "Open settingsfile")
        if openname:
            obj_text = codecs.open(openname, 'r', encoding='utf-8').read()
            self.dispData = json.loads(obj_text)
            # with open(openname, "r") as myFile:
            #     self.dispData = cPickle.load(myFile)

            self.dispData['Settings file'] = str(openname)
            if 'hdf5_on' in self.dispData:
                functions.load_dataset(self.dispData, self.dicData, 'hdf5_on')
                functions.load_dataset(self.dispData, self.dicData, 'hdf5_off')
                logging.debug('Single Amp settings and files loaded')
            elif 'on11' in self.dispData:
                functions.load_dataset(self.dispData, self.dicData, 'on11')
                functions.load_dataset(self.dispData, self.dicData, 'on22')
                functions.load_dataset(self.dispData, self.dicData, 'on12')
                functions.load_dataset(self.dispData, self.dicData, 'on21')
                functions.load_dataset(self.dispData, self.dicData, 'off12')
                functions.load_dataset(self.dispData, self.dicData, 'off21')
                logging.debug('Double Amp settings and files loaded')
            self.update_table()
            self.update_data_disp()
Ejemplo n.º 5
0
from functions import load_dataset

if __name__ == "__main__":
    PATH = '../../data/full/exp'

    df = load_dataset(PATH)

    print(df)
Ejemplo n.º 6
0
import nltk
import re
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM, Bidirectional, Embedding, Dropout
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

import functions

#loading dataset named "Dataset.csv"
intent, unique_intent, sentences = functions.load_dataset("Dataset.csv")

#Print first 5 sentences
print(sentences[:5])

#download Natural Language tool kit
functions.downloadnltk()

stemmer = LancasterStemmer()

#clean words of puncuation/special characters. Also lemmatiz
cleaned_words = functions.cleaning(sentences)

#2.########################## Encoding ##################
#using tokenizer (a class of keras)
word_tokenizer = functions.create_tokenizer(cleaned_words)
Ejemplo n.º 7
0
import json
with open("configs/" + configName + ".json", "r") as read_file:
    config_parameters = json.load(read_file)

# Name of the track dataset
track_name = config_parameters["track_name"]

# Name of the driver/vehicle dataset
dataset_name = config_parameters["dataset_name"]

# Load the track dataset
trackMap = functions.load_track(track_name)

# Load the driver/vehicle dataset
df = functions.load_dataset(dataset_name)

# All of the driver/vehicle/road features needed in the prediction framework
driver_vehicle_road_features = config_parameters[
    "driver_vehicle_road_features"]

driver_vehicle_road_features.append(
    'Session-Lap')  # Session-Lap is needed to manage the dataset

# All of the driver/vehicle features
driver_vehicle_features = config_parameters["driver_vehicle_features"]

# The driver/vehicle features to predict (primary and secondary)
prediction_features = config_parameters["prediction_features"]

# The primary features (the features of interest in the prediction problem)