def load(fiName): numClass, labVal = getNumClass(fiName, 0) print("numClass=", numClass) # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data, labels = load_csv(fiName, target_column=0, categorical_labels=True, n_classes=numClass) # Map Convert the number string input into # numerics. numRow = len(data) numCol = len(data[0]) for rndx in range(0, numRow): for cndx in range(0, numCol): try: data[rndx][cndx] = float(data[rndx][cndx]) except ValueError: data[rndx][cndx] = -9999.0 data = np.array(data, dtype=np.float32) #print("data as float array=", data) #print("labels=", labels) numCol = len(data[0]) numRow = len(data) return (numRow, numCol, numClass, data, labels, labVal)
def load_test_dataset(self, filename='testDatasetWithOutNeuTwo', vocab_name='vocabProc'): """ Something is wrong with this function. Does not get the same result as before when loading in the new data... """ """ Load test dataset """ self.test_x, self.test_y = load_csv('datasets/' + filename, target_column=1) """ Get restored vocabulary processor """ self.vocabProcessor = VocabularyProcessor(self.max_words) self.vocabProcessor.restore(vocab_name) """ Encode pos, neu and neg to numbers """ labelEncoder = LabelEncoder() labelEncoder.fit(self.test_y) self.test_y = labelEncoder.transform(self.test_y) """ Change the list of sentences to a list of sequence of words """ self.test_x = np.array( list(self.vocabProcessor.transform([x[0] for x in self.test_x]))) """ Pad the sequences to fit the longest sentence """ self.test_x = pad_sequences(self.test_x, maxlen=self.max_words, value=0.) """ Convert labels to binary vector """ self.test_y = to_categorical(self.test_y, nb_classes=self.number_of_classes)
def load_dataset_training(self, vocab_name, filename='datasetWithoutNeutral'): """ Load the dataset """ X, Y = load_csv('datasets/' + filename, target_column=2, columns_to_ignore=[0]) """ Count max words from the longest sentence """ self.max_words = max([len(x[0].split(" ")) for x in X]) """ Get vocabulare size from longest sentence """ self.vocabProcessor = VocabularyProcessor(self.max_words) """ Encode pos, neu and neg to numbers """ labelEncoder = LabelEncoder() labelEncoder.fit(Y) Y = labelEncoder.transform(Y) """ Change the list of sentences to a list of sequence of words """ X = np.array(list(self.vocabProcessor.fit_transform([x[0] for x in X]))) """ Split the datasets to training set and test test """ self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split( X, Y, test_size=0.10, random_state=7) """ Pad the sequences to fit the longest sentence """ self.X_train = pad_sequences(self.X_train, maxlen=self.max_words, value=0.) self.X_test = pad_sequences(self.X_test, maxlen=self.max_words, value=0.) """ Convert labels to binary vector """ self.Y_train = to_categorical(self.Y_train, nb_classes=self.number_of_classes) self.Y_test = to_categorical(self.Y_test, nb_classes=self.number_of_classes) self.vocabProcessor.save(vocab_name)
def main(): # import the weather data including header row DATA_FILE = 'c:/Users/Trent/Documents/School/201840/CS_5890/PythonStuff/Weather_Prediction/weather.csv' data, labels = load_csv(DATA_FILE, target_column=11, columns_to_ignore=[0]) TrainingSetFeatures = data TrainingSetFeatures = preProcessData(TrainingSetFeatures) TrainingSetLabels = labels catagorizeLabels(TrainingSetLabels) TrainingSetLabels = to_categorical(TrainingSetLabels, 9) # Create a test and training set from the data net = tflearn.input_data(shape=[None, 12]) net = tflearn.fully_connected(net, 64) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 16) net = tflearn.fully_connected(net, 9, activation="softmax") net = tflearn.regression(net) # Define model model = tflearn.DNN(net, tensorboard_verbose=3) # Start training model.fit(TrainingSetFeatures, TrainingSetLabels, n_epoch = 15, validation_set = 0.15, batch_size=12, show_metric=True) '''Differnt architectures and their average accuracy:
def load(): pd.set_option('display.max_columns', 10) pd.set_option('display.max_rows', 100) # Download the Titanic dataset titanic.download_dataset('titanic_dataset.csv') # Load CSV file, indicate that the first column represents labels data, labels = load_csv('titanic_dataset.csv', target_column=0, has_header=True, categorical_labels=False, n_classes=2) # Make a df out of it for convenience df = pd.DataFrame(data, columns=[ "pclass", "name", "sex", "age", "sibsp", "parch", "ticket", "fare" ]) df = df.drop(columns=['name', 'ticket']) # bin ages df['age'] = df['age'].astype('float64') group_names = ['A', 'B', 'C', 'D', 'E'] bins = [-1, 17, 35, 50, 65, 1000] df['age_group'] = pd.cut(df['age'], bins=bins, labels=group_names) res = pd.get_dummies(df['age_group'], prefix='age_group') df = pd.concat([df, res], axis=1) df = df.drop(columns=['age', 'age_group']) # normalize parch # df['parch'] = df['parch'].astype('float64') # df["parch"] = df["parch"] / df["parch"].max() # normalize age # df['age'] = df['age'].astype('float64') # df["age"] = df["age"] / df["age"].max() # normalize fare df['fare'] = df['fare'].astype('float64') df["fare"] = df["fare"] / df["fare"].max() # normalize sibsp df['sibsp'] = df['sibsp'].astype('float64') df["sibsp"] = df["sibsp"] / df["sibsp"].max() # normalize parch df['parch'] = df['parch'].astype('float64') df["parch"] = df["parch"] / df["parch"].max() # convert sex df = df.replace(["male", "female"], [0, 1]) print(df) return df, labels
def sentiment_analysis(self, sentencedata): file_path = 'Cleaned-Masita corpus 2.csv' data, labels = load_csv(file_path, target_column=0, categorical_labels=True, n_classes=2) pdata = self.preprocess_server(data) unique_words = self.get_uniquewords(pdata) data = self.preprocess_vector(pdata, unique_words) neurons = len(data[0]) # shuffle the dataset data, labels = shuffle(data, labels) reset_default_graph() network = input_data(shape=[None, neurons]) network = fully_connected(network, 8, activation='relu') network = fully_connected(network, 8 * 2, activation='relu') network = fully_connected(network, 8, activation='relu') network = dropout(network, 0.5) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.01, loss='categorical_crossentropy') model = tflearn.DNN(network) #model.fit(data, labels, n_epoch=40, shuffle=True, validation_set=None , show_metric=True, batch_size=None, snapshot_epoch=True, run_id='task-classifier') #model.save("./model/thaitext-classifier-mashita.tfl") #print("Network trained and saved as thaitext-classifier-mashita.tfl") model.load("./model/thaitext-classifier-mashita.tfl") #file_path3 = 'Cleaned-Masita-traindataset-2.csv' input_sentencedata = self.preprocess_server(sentencedata) vector_one = [] for word in unique_words: if word in input_sentencedata: vector_one.append(1) else: vector_one.append(0) vector_one = np.array(vector_one, dtype=np.float32) label = model.predict_label([vector_one]) #print (label) pred = model.predict([vector_one]) #print(pred) return pred
def __load_data(self, path='./data/testAndGrades.csv'): """ Loads the data for the model. :param path: Path to the data of the model. It should be a csv file. :return: A pandas csv with all the data for the model to train. """ self.data, self.labels = load_csv(path, has_header=True, target_column=0, categorical_labels=True, n_classes=10) return self.data, self.labels
def __init__(self, conf_file): """ Private constructor initializing the csv data loader with configurations""" with open(conf_file) as f: # use safe_load instead load self.data_conf = yaml.safe_load(f) self.rows, self.labels = load_csv( self.data_conf['data_file'], target_column=self.data_conf['target_column'], categorical_labels=True, n_classes=self.data_conf['n_classes']) self.columns_to_ignore = self.data_conf['columns_to_ignore'] self.conversion_map = self.data_conf['conversion_map']
def parse_csv(csv_file, column_number): check_feature_num(csv_file, column_number) features, labels = load_csv(csv_file, target_column=column_number, columns_to_ignore=None, has_header=True) feature_tensor = np.array(features).reshape(len(features), len( features[0])).astype(np.float) label_tensor = np.array(labels).reshape(len(labels), -1).astype(np.float) return feature_tensor, label_tensor
def sentiment_analysis(self, sentencedata): file_path = './corpus/Combined_inhousedata_UTF8-2.csv' data, labels = load_csv(file_path, target_column=0, categorical_labels=True, n_classes=2) pdata = self.preprocess_server(data) unique_words = self.get_uniquewords(pdata) data = self.preprocess_vector(pdata, unique_words) neurons = len(unique_words) # shuffle the dataset data, labels = shuffle(data, labels) reset_default_graph() network = input_data(shape=[None, neurons]) network = fully_connected(network, 8, activation='relu') network = fully_connected(network, 8 * 2, activation='relu') network = fully_connected(network, 8, activation='relu') network = dropout(network, 0.5) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.01, loss='categorical_crossentropy') model = tflearn.DNN(network) model.load("./model/thaitext-classifier-CID_UTF8-burgerking-2.tfl") input_sentencedata = self.preprocess_server_2(sentencedata) #input_uniquewords = self.get_uniquewords(input_sentencedata) vector_one = [] for word in unique_words: if word in input_sentencedata: vector_one.append(1) else: vector_one.append(0) vector_one = np.array(vector_one, dtype=np.float32) #print(vector_one) label = model.predict_label([vector_one]) pred = model.predict([vector_one]) return pred
def sentiment_analysis(self, sentencedata): file_path = './corpus/BurgerKing_UTF8.csv' data, labels = load_csv(file_path, target_column=0, categorical_labels=True, n_classes=2) pdata =self.preprocess_server(data) unique_words = self.get_uniquewords(pdata) data = self.preprocess_vector(pdata, unique_words) neurons = len(data[0]) # shuffle the dataset data, labels = shuffle(data, labels)
def get_multiply(percentage=1): name = 'multiply' batch_size = 128 nb_classes = 1 input_shape = (2, ) x, y = load_csv('multiply.csv', target_column=2) N = 10000*percentage TRAIN_SIZE = int(N*0.8) TEST_SIZE = int(N*0.2) x_train = x[:TRAIN_SIZE] x_test = x[TRAIN_SIZE:] y_train = y[:TRAIN_SIZE] y_test = y[TRAIN_SIZE:] return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test)
def prepare_csv(): # Download the Titanic dataset from tflearn.datasets import titanic titanic.download_dataset('titanic_dataset.csv') # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data, labels = load_csv('titanic_dataset.csv', target_column=0, has_header=False, categorical_labels=True, n_classes=2) # Preprocess data data = preprocess_csv(data, to_ignore) return data, labels
def predicting(self): data, labels = load_csv("Colour.csv", target_column=1, categorical_labels=True, n_classes=2, columns_to_ignore=[0]) # name = input("What's your name? \n") self.name.capitalize() # colour = input("What's your favourite colour, %s? \n" % name) self.colour.capitalize() if self.colour not in self.colours: # print("Sorry, that colour is not in my database, try to be simpler") colour = input("What's your favourite colour, %s? \n" % self.name) colour.capitalize() if colour not in self.colours: # print("You Failed.") return if self.colour in self.colours: colourNum = str((self.colours.index(self.colour))) # age = input("How old are you?\n") net = tflearn.input_data( shape=[None, 2] ) # An input layer, with variable input size of examples with 6 features (the [None, 6]) net = tflearn.fully_connected(net, 32) # Two hidden layers with 32 nodes net = tflearn.fully_connected( net, 32) # net tells the computer to add it to the line above net = tflearn.fully_connected( net, 2, activation='softmax' ) # An output later of 2 nodes, and a "softmax" activation (more on activations later) net = tflearn.regression(net) # find the pattern model = tflearn.DNN(net) model.fit(data, labels, n_epoch=100, batch_size=16, show_metric=True) predict = model.predict([[self.age, colourNum]])[0][0] # print(predict) predict = round(predict, 0) if predict == 1: self.predict = "Male" else: self.predict = "Female"
def load_titanic(): import tflearn import numpy as np """ Download Titanic dataset len = 1309 """ from tflearn.datasets import titanic titanic.download_dataset('titanic_dataset.csv') # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data, titanic_labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2) # Preprocessing function def preprocess(passengers, columns_to_delete): # Sort by descending id and delete columns for column_to_delete in sorted(columns_to_delete, reverse=True): [passenger.pop(column_to_delete) for passenger in passengers] for i in range(len(passengers)): # Converting 'sex' field to float (id is 1 after removing labels column) passengers[i][1] = 1. if passengers[i][1] == 'female' else 0. return np.array(passengers, dtype=np.float32) # Ignore 'name' and 'ticket' columns (id 1 & 6 of data array) to_ignore = [1, 6] # Preprocess data x_train = preprocess(data, to_ignore) y_train = titanic_labels x_test = x_train y_test = y_train input_dim = (6, ) num_classes = 2 return x_train, y_train, x_test, y_test, input_dim, num_classes
class uniqueword(): def preprocess_server(data): rlist = [] preprocessdata = [] for i in range(len(data)): x = requests.get('http://174.138.26.245:5000/preprocess/'+data[i][0]) resu = x.json() preprocessdata.append(resu['result']) for i in range(len(preprocessdata)): r = requests.get('http://174.138.26.245:5000/tokenize/'+preprocessdata[i]) result = r.json() rlist.append(result['result']) return rlist def get_uniquewords(listdata): uniquewords = [] for line in range(len(listdata)): words = listdata[line] inner_data = [] for word in words: if word not in uniquewords: #w = repr(word.encode('utf-8')) uniquewords.append(word) return uniquewords def csv_writer(write_data): with open('./uniqueword/Combined_inhousedata_UTF8-2.csv', 'wb') as write_file: writer = csv.writer(write_file) for line in write_data: print(line) writer.writerow(line) file_path = './corpus/Combined_inhousedata_UTF8-2.csv' data, labels = load_csv(file_path, target_column=0, categorical_labels=True, n_classes=2) pdata =preprocess_server(data) unique_words = get_uniquewords(pdata) csv_writer(unique_words)
def get_titanic(percentage=1): name = 'titanic' nb_classes = 2 batch_size = 4 input_shape = (6,) data, labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2) data = preprocess_titanic(data) x_train = data y_train = np.array(labels, np.int32) my_random = random.sample(range(len(data)), int(len(data)/10)) x_test = data[my_random] y_test = labels[my_random] TRAIN_SIZE = int(len(x_train) * percentage) TEST_SIZE = int(len(x_test) * percentage) x_train = x_train[:TRAIN_SIZE] y_train = y_train[:TRAIN_SIZE] x_test = x_test[:TEST_SIZE] y_test = y_test[:TEST_SIZE] return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test)
from __future__ import print_function import numpy as np import tflearn # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data, labels = load_csv('../data/train/gest_train_ratio2.csv', target_column=0, categorical_labels=True, n_classes=7) print("data=", data) print("labels=", labels) data = np.array(data, dtype=np.float32) print("data as float array=", data) # Build neural network net = tflearn.input_data(shape=[None, 6]) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 7, activation='softmax') net = tflearn.regression(net) # Define model model = tflearn.DNN(net) # Start training (apply gradient descent algorithm)
class Thai_segment(): file_path = './corpus/Cleaned-Masita corpus 2.csv' data, labels = load_csv(file_path, target_column=0, categorical_labels=True, n_classes=2) def preprocess_server(data): rlist = [] preprocessdata = [] for i in range(len(data)): x = requests.get('http://174.138.26.245:5000/preprocess/' + data[i][0]) resu = x.json() preprocessdata.append(resu['result']) for i in range(len(preprocessdata)): r = requests.get('http://174.138.26.245:5000/tokenize/' + preprocessdata[i]) result = r.json() rlist.append(result['result']) return rlist def get_uniquewords(listdata): uniquewords = [] for line in range(len(listdata)): words = listdata[line] inner_data = [] for word in words: if word not in uniquewords: #w = repr(word.encode('utf-8')) uniquewords.append(word) return uniquewords def preprocess_vector(listdata, uniquewords): sentences = [] vectors = [] #f = open(file_path, 'r') for line in range(len(listdata)): words = listdata[line] inner_data = [] for word in words: inner_data.append(word) sentences.append(inner_data) for sentence in sentences: inner_vector = [] for word in uniquewords: if word in sentence: inner_vector.append(1) else: inner_vector.append(0) vectors.append(inner_vector) return np.array(vectors, dtype=np.float32) pdata = preprocess_server(data) unique_words = get_uniquewords(pdata) data = preprocess_vector(pdata, unique_words) neurons = len(data[0]) # shuffle the dataset data, labels = shuffle(data, labels) reset_default_graph() network = input_data(shape=[None, neurons]) network = fully_connected(network, 8, activation='relu') network = fully_connected(network, 8 * 2, activation='relu') network = fully_connected(network, 8, activation='relu') network = dropout(network, 0.5) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.01, loss='categorical_crossentropy') file_path3 = './trainingdataset/Cleaned-Masita-traindataset-2.csv' testdata, testlabels = load_csv(file_path3, target_column=0, categorical_labels=True, n_classes=2) resultdata = preprocess_server(testdata) resultdata = preprocess_vector(resultdata, unique_words) model = tflearn.DNN(network) model.fit(data, labels, n_epoch=40, shuffle=True, validation_set=(resultdata, testlabels), show_metric=True, batch_size=None, snapshot_epoch=True, run_id='task-classifier') model.save("thaitext-classifier-mashita.tfl") print("Network trained and saved as Pthaitext-classifier-mashita.tfl") result = model.evaluate(resultdata, testlabels) print("Evaluation result: %s" % result)
def load_data(filename): num_classes = 30 data, labels = load_csv(filename, categorical_labels=True, n_classes=num_classes) return data, labels, num_classes
import tflearn import numpy from tflearn.datasets import titanic titanic.download_dataset("titanic_dataset.csv") from tflearn.data_utils import load_csv data, labels = load_csv( "titanic_dataset.csv", target_column=0, categorical_labels=True, n_classes=2, columns_to_ignore=[2, 7]) # two target columns/labels :survived or dead for p in data: if p[1] == "female": p[1] = 1 else: p[1] = 0 net = tflearn.input_data(shape=[None, 6]) # first layer of network has 6 layers net = tflearn.fully_connected(net, 20) net = tflearn.fully_connected(net, 20) net = tflearn.fully_connected(net, 2, activation='softmax') net = tflearn.regression(net) # define model model = tflearn.DNN(net)
# -*- coding: utf-8 -*- from __future__ import print_function import numpy as np import tflearn # Download the Titanic dataset from tflearn.datasets import titanic titanic.download_dataset('titanic_dataset.csv') # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data, labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2) # Preprocessing function def preprocess(data, columns_to_ignore): # Sort by descending id and delete columns for id in sorted(columns_to_ignore, reverse=True): [r.pop(id) for r in data] for i in range(len(data)): # Converting 'sex' field to float (id is 1 after removing labels column) data[i][1] = 1. if data[i][1] == 'female' else 0. return np.array(data, dtype=np.float32) # Ignore 'name' and 'ticket' columns (id 1 & 6 of data array) to_ignore=[1, 6] # Preprocess data data = preprocess(data, to_ignore)
import numpy as np import tflearn from tflearn.datasets import titanic from tflearn.data_utils import load_csv data, labels = load_csv('measurements2.csv', target_column=0, columns_to_ignore=[5, 7, 8, 9, 10, 11], categorical_labels=True, n_classes=1) for p in data: if not p[2]: p[2] = None if p[4] == "SP98": p[4] = 1 else: p[4] = 0 #print(p) for x in data: print(x) # for p in data: # if p[1] == "female": # p[1] = 1 # else: # p[1] = 0 # for x in data:
import numpy as np import tflearn from tflearn.data_utils import load_csv # Load CSV file, indicate that the first column represents labels data, labels = load_csv('peaks.csv', target_column=0, categorical_labels=True, n_classes=2) # Preprocessing function def preprocess(data, columns_to_ignore): # Sort by descending id and delete columns for id in sorted(columns_to_ignore, reverse=True): [r.pop(id) for r in data] return np.array(data, dtype=np.float32) to_ignore = [0] # Preprocess data data = preprocess(data, to_ignore) # Build neural network net = tflearn.input_data(shape=[None, 40]) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 2, activation='softmax') net = tflearn.regression(net)
import numpy as np import tflearn # Download the Titanic dataset from tflearn.datasets import titanic titanic.download_dataset('titanic_dataset.csv') # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv #preprocessing data data, labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2, columns_to_ignore=[2,7]) data2 = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2) for p in data: if p[1] == 'female': p[1] = 1 else: p[1] = 0 net = tflearn.input_data(shape=[None, 6]) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 2, activation = 'softmax') net = tflearn.regression(net) model = tflearn.DNN(net) model.fit(data, labels, n_epoch=100, batch_size=16, show_metric=True) dicaprio = [3, 'Jack Dawson', 'male', 19, 0,0,'N/A', 5.000] print (model.predict([[2, 0, 14, 0, 0, 33]]))
import numpy as np import tflearn import csv # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data, labelsA = load_csv('clus_is_training_3.2_2.5.csv', target_column=0, categorical_labels=True, n_classes=2) input, labelsB = load_csv('clus_is_test_3.2_2.5.csv', target_column=0, categorical_labels=True, n_classes=2) # Build neural network #Data has 5 features net = tflearn.input_data(shape=[None, 5]) net = tflearn.fully_connected(net, 32) dropout1 = tflearn.dropout(net, 0.5) net = tflearn.fully_connected(dropout1, 2, activation='softmax', bias=False, weights_init='truncated_normal') net = tflearn.regression(net) # Define model model = tflearn.DNN(net) # Start training (apply gradient descent algorithm) model.fit(data, labelsA, n_epoch=10, batch_size=1, show_metric=True, validation_set=0.1) # Test Data predict = model.predict(input) with open('/Users/Anne-Katherine/Desktop/ATLAS_ML/clus_is_out_25_32.csv','wb') as f: csv_writer = csv.writer(f, delimiter=',')
from __future__ import print_function import csv import pandas as pd import tensorflow as tf import numpy as np import math import tflearn from datetime import datetime from tflearn.datasets import titanic from tflearn.data_utils import load_csv, to_categorical # The file containing the weather samples (including the column header) WEATHER_SAMPLE_FILE = 'weather.csv' #data, labels = load_csv(WEATHER_SAMPLE_FILE, target_column=12, categorical_labels=True, n_classes=2) data, labels = load_csv(WEATHER_SAMPLE_FILE, target_column=11, columns_to_ignore=[0]) TrainingSetFeatures = data #TestSetFeatures = data TrainingSetLabels = labels #TestSetLabels = labels #TrainingSetFeatures = data[:6000] #TestSetFeatures = data[6000:] #TrainingSetLabels = labels[:6000] #TestSetLabels = labels[6000:] def preprocessor(data): copyData = np.zeros((len(data), 12)) for i in range(len(data)): sample = data[i]
from sklearn.metrics import classification_report from plot_confusion_matrix import plot_confusion_matrix import matplotlib.pyplot as plt from tflearn.data_utils import to_categorical ''' merge X and Y train files # df = pd.read_csv('./Preprocessed Data/X_train.csv', header=0) # l = pd.read_csv('./Preprocessed Data/y_train.csv', header=0) # df.insert(loc=0, column = 'Activity', value=l.values) # df.to_csv('./Preprocessed Data/train.csv', index = False) ''' # load train.csv as input file. # 7 classes due to tensorflow using 0-based indexing. Transforming labels into a 1 x 7 matrix data, labels = load_csv('./Preprocessed Data/train.csv', target_column=0, categorical_labels=True, n_classes=7) data = np.array(data, dtype=np.float64) labels = np.array(labels, dtype=np.int) class_names = [ 'WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING', 'STANDING', 'LAYING' ] input_layer = tflearn.input_data(shape=[None, 561]) # 1st hidden layer dense1 = tflearn.fully_connected(input_layer, 64) # dropout layer to prevent overfitting dropout1 = tflearn.dropout(dense1, 0.5) # 2nd hidden layer dense2 = tflearn.fully_connected(dropout1, 64) dropout2 = tflearn.dropout(dense2, 0.5)
import tflearn from tflearn.estimators import RandomForestClassifier from tflearn.data_utils import load_csv import numpy as np from datetime import datetime data, labels = load_csv('train.txt', target_column=13, categorical_labels=True, n_classes=2) # Transformation lists # US size map size_labels = ['?', 'unsized', 'xxs', 'xs', 's', 'm', 'l', 'xl', 'xxl', 'xxxl'] size_values = [0, 0, 30, 32, 35, 41, 43, 46, 48, 50] gender_labels = ['?', 'Mrs', 'Mr', 'Family', 'not reported', 'Company'] gender_values = [0, 2, 3, 4, 1, 5] color_labels = [ '?', 'denim', 'ocher', 'curry', 'green', 'black', 'brown', 'red', 'mocca', 'anthracite', 'olive', 'petrol', 'blue', 'grey', 'beige', 'ecru', 'turquoise', 'magenta', 'purple', 'pink', 'khaki', 'navy', 'habana', 'silver', 'white', 'nature', 'stained', 'orange', 'azure', 'apricot', 'mango', 'berry', 'ash', 'hibiscus', 'fuchsia', 'blau', 'dark denim', 'mint', 'ivory', 'yellow', 'bordeaux', 'pallid', 'ancient', 'baltic blue', 'almond', 'aquamarine', 'brwon', 'aubergine', 'aqua', 'dark garnet', 'dark grey', 'avocado', 'creme', 'champagner', 'cortina mocca', 'currant purple', 'cognac', 'aviator', 'gold', 'ebony', 'cobalt blue', 'kanel', 'curled', 'caramel', 'antique pink', 'darkblue', 'copper coin', 'terracotta', 'basalt', 'amethyst', 'coral', 'jade', 'opal', 'striped', 'mahagoni', 'floral', 'dark navy', 'dark oliv', 'vanille', 'ingwer', 'iron', 'graphite', 'leopard', 'oliv', 'bronze', 'crimson', 'lemon',
import tflearn import numpy as np from tflearn.datasets import titanic titanic.download_dataset('titanic_dataset.csv') from tflearn.data_utils import load_csv data, labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2) def preprocess(data, columns_to_ignore): # Sort by descending id and delete columns for id in sorted(columns_to_ignore, reverse=True): [r.pop(id) for r in data] for i in range(len(data)): # Converting 'sex' field to float (id is 1 after removing labels column) data[i][1] = 1. if data[i][1] == 'female' else 0. return np.array(data, dtype=np.float32) # Ignore 'name' and 'ticket' columns (id 1 & 6 of data array) to_ignore = [1, 6] # Preprocess data data = preprocess(data, to_ignore) net = tflearn.input_data(shape=[None, 6])
def load_data(filename): data, labels = load_csv(filename, target_column=8, categorical_labels=False) return data, labels