def add_hdf5data(self, frequency_configuration): dialog_txt = 'Pick a file for :' + str(frequency_configuration) openname = QFileDialog.getOpenFileName(self, dialog_txt) if openname: logging.debug(str(frequency_configuration) + ':' + str(openname)) self.dispData[str(frequency_configuration)] = str(openname) functions.load_dataset(self.dispData, self.dicData, frequency_configuration) self.update_data_disp()
def intentPrediction(hashTagSubject): print('hashTagSubject', hashTagSubject) filename = './data.csv' intent, unique_intent, sentences = load_dataset(filename) public_tweets_path = os.getcwd() + '/' + hashTagSubject + '.csv' if path.exists(public_tweets_path): print('hai') public_tweets = pd.read_csv(os.path.realpath(public_tweets_path)) prediction = [] final_data = [] for text in public_tweets['tweet']: predict = predictions(text) prediction.append(predict) for pred in prediction: final_data.append(get_final_output(pred, unique_intent)) intentData = [] for i in range(len(public_tweets['tweet'])): intentData.append({ 'tweet': public_tweets['tweet'][i], 'intent': final_data[i] }) return intentData else: print('nai hai') public_tweets_file = getTwitterData(hashTagSubject) public_tweets_path = public_tweets_file + '/' + hashTagSubject + '.csv' public_tweets = pd.read_csv(os.path.realpath(public_tweets_path)) prediction = [] final_data = [] for text in public_tweets['tweet']: predict = predictions(text) prediction.append(predict) for pred in prediction: final_data.append(get_final_output(pred, unique_intent)) intentData = [] for i in range(len(public_tweets['tweet'])): intentData.append({ 'tweet': public_tweets['tweet'][i], 'intent': final_data[i] }) return intentData
def train(): filename = './data.csv' intent, unique_intent, sentences = load_dataset(filename) cleaned_words = cleaning(sentences) word_tokenizer = create_tokenizer(cleaned_words) vocab_size = len(word_tokenizer.word_index) + 1 max_len = max_length(cleaned_words) encoded_doc = encoding_doc(word_tokenizer, cleaned_words) padded_doc = padding_doc(encoded_doc, max_len) output_tokenizer = create_tokenizer( unique_intent, filters='!"#$%&()*+,-/:;<=>?@[\]^`{|}~') encoded_output = encoding_doc(output_tokenizer, intent) encoded_output = np.array(encoded_output).reshape(len(encoded_output), 1) output_one_hot = one_hot(encoded_output) train_X, val_X, train_Y, val_Y = train_test_split(padded_doc, output_one_hot, shuffle=True, test_size=0.2) model = create_model(vocab_size, max_len) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) filename = 'model.h5' checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min') hist = model.fit(train_X, train_Y, epochs=100, batch_size=32, validation_data=(val_X, val_Y), callbacks=[checkpoint])
def load_settings(self): openname = QFileDialog.getOpenFileName(self, "Open settingsfile") if openname: obj_text = codecs.open(openname, 'r', encoding='utf-8').read() self.dispData = json.loads(obj_text) # with open(openname, "r") as myFile: # self.dispData = cPickle.load(myFile) self.dispData['Settings file'] = str(openname) if 'hdf5_on' in self.dispData: functions.load_dataset(self.dispData, self.dicData, 'hdf5_on') functions.load_dataset(self.dispData, self.dicData, 'hdf5_off') logging.debug('Single Amp settings and files loaded') elif 'on11' in self.dispData: functions.load_dataset(self.dispData, self.dicData, 'on11') functions.load_dataset(self.dispData, self.dicData, 'on22') functions.load_dataset(self.dispData, self.dicData, 'on12') functions.load_dataset(self.dispData, self.dicData, 'on21') functions.load_dataset(self.dispData, self.dicData, 'off12') functions.load_dataset(self.dispData, self.dicData, 'off21') logging.debug('Double Amp settings and files loaded') self.update_table() self.update_data_disp()
from functions import load_dataset if __name__ == "__main__": PATH = '../../data/full/exp' df = load_dataset(PATH) print(df)
import nltk import re from sklearn.preprocessing import OneHotEncoder import matplotlib.pyplot as plt from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences from keras.utils import to_categorical from keras.models import Sequential, load_model from keras.layers import Dense, LSTM, Bidirectional, Embedding, Dropout from keras.callbacks import ModelCheckpoint from sklearn.model_selection import train_test_split import functions #loading dataset named "Dataset.csv" intent, unique_intent, sentences = functions.load_dataset("Dataset.csv") #Print first 5 sentences print(sentences[:5]) #download Natural Language tool kit functions.downloadnltk() stemmer = LancasterStemmer() #clean words of puncuation/special characters. Also lemmatiz cleaned_words = functions.cleaning(sentences) #2.########################## Encoding ################## #using tokenizer (a class of keras) word_tokenizer = functions.create_tokenizer(cleaned_words)
import json with open("configs/" + configName + ".json", "r") as read_file: config_parameters = json.load(read_file) # Name of the track dataset track_name = config_parameters["track_name"] # Name of the driver/vehicle dataset dataset_name = config_parameters["dataset_name"] # Load the track dataset trackMap = functions.load_track(track_name) # Load the driver/vehicle dataset df = functions.load_dataset(dataset_name) # All of the driver/vehicle/road features needed in the prediction framework driver_vehicle_road_features = config_parameters[ "driver_vehicle_road_features"] driver_vehicle_road_features.append( 'Session-Lap') # Session-Lap is needed to manage the dataset # All of the driver/vehicle features driver_vehicle_features = config_parameters["driver_vehicle_features"] # The driver/vehicle features to predict (primary and secondary) prediction_features = config_parameters["prediction_features"] # The primary features (the features of interest in the prediction problem)