def load(fiName):
  numClass, labVal = getNumClass(fiName, 0)
  print("numClass=", numClass)
  # Load CSV file, indicate that the first column represents labels
  from tflearn.data_utils import load_csv
  data, labels = load_csv(fiName,
      target_column=0, categorical_labels=True, n_classes=numClass)

  # Map Convert the number string input into
  # numerics.
  numRow = len(data)
  numCol = len(data[0])
  for rndx in range(0, numRow):
    for cndx in range(0, numCol):
      try:
        data[rndx][cndx] = float(data[rndx][cndx])
      except ValueError:
         data[rndx][cndx] = -9999.0
      
  data = np.array(data, dtype=np.float32)

  #print("data as float array=", data)
  #print("labels=", labels)
  numCol = len(data[0])
  numRow = len(data)
  return (numRow, numCol, numClass, data, labels, labVal)
 def load_test_dataset(self,
                       filename='testDatasetWithOutNeuTwo',
                       vocab_name='vocabProc'):
     """
         Something is wrong with this function. Does not get the same result
         as before when loading in the new data...
     """
     """ Load test dataset """
     self.test_x, self.test_y = load_csv('datasets/' + filename,
                                         target_column=1)
     """ Get restored vocabulary processor """
     self.vocabProcessor = VocabularyProcessor(self.max_words)
     self.vocabProcessor.restore(vocab_name)
     """ Encode pos, neu and neg to numbers  """
     labelEncoder = LabelEncoder()
     labelEncoder.fit(self.test_y)
     self.test_y = labelEncoder.transform(self.test_y)
     """ Change the list of sentences to a list of sequence of words """
     self.test_x = np.array(
         list(self.vocabProcessor.transform([x[0] for x in self.test_x])))
     """ Pad the sequences to fit the longest sentence """
     self.test_x = pad_sequences(self.test_x,
                                 maxlen=self.max_words,
                                 value=0.)
     """ Convert labels to binary vector """
     self.test_y = to_categorical(self.test_y,
                                  nb_classes=self.number_of_classes)
 def load_dataset_training(self,
                           vocab_name,
                           filename='datasetWithoutNeutral'):
     """ Load the dataset """
     X, Y = load_csv('datasets/' + filename,
                     target_column=2,
                     columns_to_ignore=[0])
     """ Count max words from the longest sentence """
     self.max_words = max([len(x[0].split(" ")) for x in X])
     """ Get vocabulare size from longest sentence """
     self.vocabProcessor = VocabularyProcessor(self.max_words)
     """ Encode pos, neu and neg to numbers """
     labelEncoder = LabelEncoder()
     labelEncoder.fit(Y)
     Y = labelEncoder.transform(Y)
     """ Change the list of sentences to a list of sequence of words """
     X = np.array(list(self.vocabProcessor.fit_transform([x[0]
                                                          for x in X])))
     """ Split the datasets to training set and test test """
     self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(
         X, Y, test_size=0.10, random_state=7)
     """ Pad the sequences to fit the longest sentence """
     self.X_train = pad_sequences(self.X_train,
                                  maxlen=self.max_words,
                                  value=0.)
     self.X_test = pad_sequences(self.X_test,
                                 maxlen=self.max_words,
                                 value=0.)
     """ Convert labels to binary vector """
     self.Y_train = to_categorical(self.Y_train,
                                   nb_classes=self.number_of_classes)
     self.Y_test = to_categorical(self.Y_test,
                                  nb_classes=self.number_of_classes)
     self.vocabProcessor.save(vocab_name)
def main():
	# import the weather data including header row
	DATA_FILE = 'c:/Users/Trent/Documents/School/201840/CS_5890/PythonStuff/Weather_Prediction/weather.csv'
	data, labels = load_csv(DATA_FILE, target_column=11, columns_to_ignore=[0])

	TrainingSetFeatures = data
	TrainingSetFeatures = preProcessData(TrainingSetFeatures)
	TrainingSetLabels = labels
	catagorizeLabels(TrainingSetLabels)
	TrainingSetLabels = to_categorical(TrainingSetLabels, 9)
	
	# Create a test and training set from the data
	net = tflearn.input_data(shape=[None, 12])
	net = tflearn.fully_connected(net, 64)
	net = tflearn.fully_connected(net, 32)
	net = tflearn.fully_connected(net, 16)
	net = tflearn.fully_connected(net, 9, activation="softmax")
	net = tflearn.regression(net)

	# Define model
	model = tflearn.DNN(net, tensorboard_verbose=3)

	# Start training
	model.fit(TrainingSetFeatures, TrainingSetLabels, n_epoch = 15, validation_set = 0.15, batch_size=12, show_metric=True)	

	'''Differnt architectures and their average accuracy:
Exemple #5
0
def load():
    pd.set_option('display.max_columns', 10)
    pd.set_option('display.max_rows', 100)

    # Download the Titanic dataset
    titanic.download_dataset('titanic_dataset.csv')

    # Load CSV file, indicate that the first column represents labels
    data, labels = load_csv('titanic_dataset.csv',
                            target_column=0,
                            has_header=True,
                            categorical_labels=False,
                            n_classes=2)

    # Make a df out of it for convenience
    df = pd.DataFrame(data,
                      columns=[
                          "pclass", "name", "sex", "age", "sibsp", "parch",
                          "ticket", "fare"
                      ])

    df = df.drop(columns=['name', 'ticket'])

    # bin ages
    df['age'] = df['age'].astype('float64')

    group_names = ['A', 'B', 'C', 'D', 'E']
    bins = [-1, 17, 35, 50, 65, 1000]
    df['age_group'] = pd.cut(df['age'], bins=bins, labels=group_names)

    res = pd.get_dummies(df['age_group'], prefix='age_group')
    df = pd.concat([df, res], axis=1)
    df = df.drop(columns=['age', 'age_group'])

    # normalize parch
    # df['parch'] = df['parch'].astype('float64')
    # df["parch"] = df["parch"] / df["parch"].max()

    # normalize age
    # df['age'] = df['age'].astype('float64')
    # df["age"] = df["age"] / df["age"].max()

    # normalize fare
    df['fare'] = df['fare'].astype('float64')
    df["fare"] = df["fare"] / df["fare"].max()

    # normalize sibsp
    df['sibsp'] = df['sibsp'].astype('float64')
    df["sibsp"] = df["sibsp"] / df["sibsp"].max()

    # normalize parch
    df['parch'] = df['parch'].astype('float64')
    df["parch"] = df["parch"] / df["parch"].max()

    # convert sex
    df = df.replace(["male", "female"], [0, 1])

    print(df)

    return df, labels
    def sentiment_analysis(self, sentencedata):

        file_path = 'Cleaned-Masita corpus 2.csv'
        data, labels = load_csv(file_path,
                                target_column=0,
                                categorical_labels=True,
                                n_classes=2)

        pdata = self.preprocess_server(data)
        unique_words = self.get_uniquewords(pdata)
        data = self.preprocess_vector(pdata, unique_words)

        neurons = len(data[0])

        # shuffle the dataset
        data, labels = shuffle(data, labels)

        reset_default_graph()
        network = input_data(shape=[None, neurons])
        network = fully_connected(network, 8, activation='relu')
        network = fully_connected(network, 8 * 2, activation='relu')
        network = fully_connected(network, 8, activation='relu')
        network = dropout(network, 0.5)

        network = fully_connected(network, 2, activation='softmax')
        network = regression(network,
                             optimizer='adam',
                             learning_rate=0.01,
                             loss='categorical_crossentropy')

        model = tflearn.DNN(network)
        #model.fit(data, labels, n_epoch=40, shuffle=True, validation_set=None , show_metric=True, batch_size=None, snapshot_epoch=True, run_id='task-classifier')
        #model.save("./model/thaitext-classifier-mashita.tfl")
        #print("Network trained and saved as thaitext-classifier-mashita.tfl")

        model.load("./model/thaitext-classifier-mashita.tfl")
        #file_path3 = 'Cleaned-Masita-traindataset-2.csv'

        input_sentencedata = self.preprocess_server(sentencedata)

        vector_one = []
        for word in unique_words:
            if word in input_sentencedata:
                vector_one.append(1)
            else:
                vector_one.append(0)

        vector_one = np.array(vector_one, dtype=np.float32)

        label = model.predict_label([vector_one])
        #print (label)

        pred = model.predict([vector_one])
        #print(pred)
        return pred
Exemple #7
0
 def __load_data(self, path='./data/testAndGrades.csv'):
     """
     Loads the data for the model.
     :param path: Path to the data of the model. It should be a csv file.
     :return: A pandas csv with all the data for the model to train.
     """
     self.data, self.labels = load_csv(path,
                                       has_header=True,
                                       target_column=0,
                                       categorical_labels=True,
                                       n_classes=10)
     return self.data, self.labels
    def __init__(self, conf_file):
        """ Private constructor initializing the csv data loader with configurations"""
        with open(conf_file) as f:
            # use safe_load instead load
            self.data_conf = yaml.safe_load(f)

        self.rows, self.labels = load_csv(
            self.data_conf['data_file'],
            target_column=self.data_conf['target_column'],
            categorical_labels=True,
            n_classes=self.data_conf['n_classes'])

        self.columns_to_ignore = self.data_conf['columns_to_ignore']
        self.conversion_map = self.data_conf['conversion_map']
Exemple #9
0
def parse_csv(csv_file, column_number):
    check_feature_num(csv_file, column_number)

    features, labels = load_csv(csv_file,
                                target_column=column_number,
                                columns_to_ignore=None,
                                has_header=True)

    feature_tensor = np.array(features).reshape(len(features), len(
        features[0])).astype(np.float)

    label_tensor = np.array(labels).reshape(len(labels), -1).astype(np.float)

    return feature_tensor, label_tensor
Exemple #10
0
    def sentiment_analysis(self, sentencedata):

        file_path = './corpus/Combined_inhousedata_UTF8-2.csv'
        data, labels = load_csv(file_path,
                                target_column=0,
                                categorical_labels=True,
                                n_classes=2)

        pdata = self.preprocess_server(data)
        unique_words = self.get_uniquewords(pdata)
        data = self.preprocess_vector(pdata, unique_words)

        neurons = len(unique_words)

        # shuffle the dataset
        data, labels = shuffle(data, labels)

        reset_default_graph()
        network = input_data(shape=[None, neurons])
        network = fully_connected(network, 8, activation='relu')
        network = fully_connected(network, 8 * 2, activation='relu')
        network = fully_connected(network, 8, activation='relu')
        network = dropout(network, 0.5)

        network = fully_connected(network, 2, activation='softmax')
        network = regression(network,
                             optimizer='adam',
                             learning_rate=0.01,
                             loss='categorical_crossentropy')

        model = tflearn.DNN(network)
        model.load("./model/thaitext-classifier-CID_UTF8-burgerking-2.tfl")

        input_sentencedata = self.preprocess_server_2(sentencedata)
        #input_uniquewords = self.get_uniquewords(input_sentencedata)

        vector_one = []
        for word in unique_words:
            if word in input_sentencedata:
                vector_one.append(1)
            else:
                vector_one.append(0)
        vector_one = np.array(vector_one, dtype=np.float32)
        #print(vector_one)

        label = model.predict_label([vector_one])
        pred = model.predict([vector_one])

        return pred
    def sentiment_analysis(self, sentencedata):
        
        file_path = './corpus/BurgerKing_UTF8.csv'
        data, labels = load_csv(file_path, target_column=0, categorical_labels=True, n_classes=2)

        pdata =self.preprocess_server(data)
        unique_words = self.get_uniquewords(pdata)
        data = self.preprocess_vector(pdata, unique_words)

        

        neurons = len(data[0])

        # shuffle the dataset
        data, labels = shuffle(data, labels)
def get_multiply(percentage=1):
    name = 'multiply'
    batch_size = 128
    nb_classes = 1
    input_shape = (2, )
    x, y = load_csv('multiply.csv', target_column=2)
    N = 10000*percentage
    TRAIN_SIZE = int(N*0.8)
    TEST_SIZE = int(N*0.2)

    x_train = x[:TRAIN_SIZE]
    x_test = x[TRAIN_SIZE:]
    y_train = y[:TRAIN_SIZE]
    y_test = y[TRAIN_SIZE:]

    return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test)
Exemple #13
0
def prepare_csv():
    # Download the Titanic dataset
    from tflearn.datasets import titanic
    titanic.download_dataset('titanic_dataset.csv')

    # Load CSV file, indicate that the first column represents labels
    from tflearn.data_utils import load_csv
    data, labels = load_csv('titanic_dataset.csv',
                            target_column=0,
                            has_header=False,
                            categorical_labels=True,
                            n_classes=2)

    # Preprocess data
    data = preprocess_csv(data, to_ignore)

    return data, labels
Exemple #14
0
    def predicting(self):
        data, labels = load_csv("Colour.csv",
                                target_column=1,
                                categorical_labels=True,
                                n_classes=2,
                                columns_to_ignore=[0])
        # name = input("What's your name? \n")
        self.name.capitalize()
        # colour = input("What's your favourite colour, %s? \n" % name)
        self.colour.capitalize()
        if self.colour not in self.colours:
            # print("Sorry, that colour is not in my database, try to be simpler")
            colour = input("What's your favourite colour, %s? \n" % self.name)
            colour.capitalize()
            if colour not in self.colours:
                # print("You Failed.")
                return
        if self.colour in self.colours:
            colourNum = str((self.colours.index(self.colour)))
        # age = input("How old are you?\n")

        net = tflearn.input_data(
            shape=[None, 2]
        )  # An input layer, with variable input size of examples with 6 features (the [None, 6])
        net = tflearn.fully_connected(net,
                                      32)  # Two hidden layers with 32 nodes
        net = tflearn.fully_connected(
            net, 32)  # net tells the computer to add it to the line above
        net = tflearn.fully_connected(
            net, 2, activation='softmax'
        )  # An output later of 2 nodes, and a "softmax" activation (more on activations later)
        net = tflearn.regression(net)  # find the pattern

        model = tflearn.DNN(net)
        model.fit(data, labels, n_epoch=100, batch_size=16, show_metric=True)

        predict = model.predict([[self.age, colourNum]])[0][0]
        # print(predict)
        predict = round(predict, 0)
        if predict == 1:
            self.predict = "Male"

        else:
            self.predict = "Female"
Exemple #15
0
def load_titanic():

    import tflearn
    import numpy as np
    """
        Download Titanic dataset
        len = 1309
    """
    from tflearn.datasets import titanic
    titanic.download_dataset('titanic_dataset.csv')

    # Load CSV file, indicate that the first column represents labels
    from tflearn.data_utils import load_csv
    data, titanic_labels = load_csv('titanic_dataset.csv',
                                    target_column=0,
                                    categorical_labels=True,
                                    n_classes=2)

    # Preprocessing function
    def preprocess(passengers, columns_to_delete):
        # Sort by descending id and delete columns
        for column_to_delete in sorted(columns_to_delete, reverse=True):
            [passenger.pop(column_to_delete) for passenger in passengers]
        for i in range(len(passengers)):
            # Converting 'sex' field to float (id is 1 after removing labels column)
            passengers[i][1] = 1. if passengers[i][1] == 'female' else 0.
        return np.array(passengers, dtype=np.float32)

    # Ignore 'name' and 'ticket' columns (id 1 & 6 of data array)
    to_ignore = [1, 6]

    # Preprocess data
    x_train = preprocess(data, to_ignore)
    y_train = titanic_labels

    x_test = x_train
    y_test = y_train

    input_dim = (6, )
    num_classes = 2

    return x_train, y_train, x_test, y_test, input_dim, num_classes
class uniqueword():

    def preprocess_server(data):
            rlist = []
            preprocessdata = []
            for i in range(len(data)):
                x = requests.get('http://174.138.26.245:5000/preprocess/'+data[i][0])
                resu = x.json()
                preprocessdata.append(resu['result'])
            for i in range(len(preprocessdata)):
                r = requests.get('http://174.138.26.245:5000/tokenize/'+preprocessdata[i])
                result = r.json()
                rlist.append(result['result'])
            return rlist

    def get_uniquewords(listdata):
        uniquewords = []
        for line in range(len(listdata)):
            words = listdata[line]
            inner_data = []
            for word in words:
                if word not in uniquewords:
                    #w = repr(word.encode('utf-8'))
                    uniquewords.append(word)
        return uniquewords

    def csv_writer(write_data):
        with open('./uniqueword/Combined_inhousedata_UTF8-2.csv', 'wb') as write_file:
            writer = csv.writer(write_file)
            for line in write_data:
                print(line)
                writer.writerow(line)

    file_path = './corpus/Combined_inhousedata_UTF8-2.csv'
    data, labels = load_csv(file_path, target_column=0, categorical_labels=True, n_classes=2)

    pdata =preprocess_server(data)
    unique_words = get_uniquewords(pdata)
    csv_writer(unique_words)
def get_titanic(percentage=1):
    name = 'titanic'
    nb_classes = 2
    batch_size = 4
    input_shape = (6,)

    data, labels = load_csv('titanic_dataset.csv', target_column=0,
                        categorical_labels=True, n_classes=2)
    data = preprocess_titanic(data)
    x_train = data
    y_train = np.array(labels, np.int32)
    my_random = random.sample(range(len(data)), int(len(data)/10))
    x_test = data[my_random]
    y_test = labels[my_random]

    TRAIN_SIZE = int(len(x_train) * percentage)
    TEST_SIZE = int(len(x_test) * percentage)

    x_train = x_train[:TRAIN_SIZE]
    y_train = y_train[:TRAIN_SIZE]
    x_test = x_test[:TEST_SIZE]
    y_test = y_test[:TEST_SIZE]

    return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test) 
Exemple #18
0
from __future__ import print_function

import numpy as np
import tflearn

# Load CSV file, indicate that the first column represents labels
from tflearn.data_utils import load_csv

data, labels = load_csv('../data/train/gest_train_ratio2.csv',
                        target_column=0,
                        categorical_labels=True,
                        n_classes=7)

print("data=", data)

print("labels=", labels)

data = np.array(data, dtype=np.float32)

print("data as float array=", data)

# Build neural network
net = tflearn.input_data(shape=[None, 6])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 7, activation='softmax')
net = tflearn.regression(net)

# Define model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
Exemple #19
0
class Thai_segment():

    file_path = './corpus/Cleaned-Masita corpus 2.csv'
    data, labels = load_csv(file_path,
                            target_column=0,
                            categorical_labels=True,
                            n_classes=2)

    def preprocess_server(data):
        rlist = []
        preprocessdata = []
        for i in range(len(data)):
            x = requests.get('http://174.138.26.245:5000/preprocess/' +
                             data[i][0])
            resu = x.json()
            preprocessdata.append(resu['result'])
        for i in range(len(preprocessdata)):
            r = requests.get('http://174.138.26.245:5000/tokenize/' +
                             preprocessdata[i])
            result = r.json()
            rlist.append(result['result'])
        return rlist

    def get_uniquewords(listdata):
        uniquewords = []
        for line in range(len(listdata)):
            words = listdata[line]
            inner_data = []
            for word in words:
                if word not in uniquewords:
                    #w = repr(word.encode('utf-8'))
                    uniquewords.append(word)
        return uniquewords

    def preprocess_vector(listdata, uniquewords):
        sentences = []
        vectors = []
        #f = open(file_path, 'r')
        for line in range(len(listdata)):
            words = listdata[line]
            inner_data = []
            for word in words:
                inner_data.append(word)
            sentences.append(inner_data)

        for sentence in sentences:
            inner_vector = []
            for word in uniquewords:
                if word in sentence:
                    inner_vector.append(1)
                else:
                    inner_vector.append(0)
            vectors.append(inner_vector)
        return np.array(vectors, dtype=np.float32)

    pdata = preprocess_server(data)
    unique_words = get_uniquewords(pdata)
    data = preprocess_vector(pdata, unique_words)

    neurons = len(data[0])

    # shuffle the dataset
    data, labels = shuffle(data, labels)

    reset_default_graph()
    network = input_data(shape=[None, neurons])
    network = fully_connected(network, 8, activation='relu')
    network = fully_connected(network, 8 * 2, activation='relu')
    network = fully_connected(network, 8, activation='relu')
    network = dropout(network, 0.5)

    network = fully_connected(network, 2, activation='softmax')
    network = regression(network,
                         optimizer='adam',
                         learning_rate=0.01,
                         loss='categorical_crossentropy')

    file_path3 = './trainingdataset/Cleaned-Masita-traindataset-2.csv'
    testdata, testlabels = load_csv(file_path3,
                                    target_column=0,
                                    categorical_labels=True,
                                    n_classes=2)
    resultdata = preprocess_server(testdata)
    resultdata = preprocess_vector(resultdata, unique_words)

    model = tflearn.DNN(network)
    model.fit(data,
              labels,
              n_epoch=40,
              shuffle=True,
              validation_set=(resultdata, testlabels),
              show_metric=True,
              batch_size=None,
              snapshot_epoch=True,
              run_id='task-classifier')
    model.save("thaitext-classifier-mashita.tfl")
    print("Network trained and saved as Pthaitext-classifier-mashita.tfl")

    result = model.evaluate(resultdata, testlabels)
    print("Evaluation result: %s" % result)
def load_data(filename):
    num_classes = 30
    data, labels = load_csv(filename,
                            categorical_labels=True,
                            n_classes=num_classes)
    return data, labels, num_classes
import tflearn
import numpy

from tflearn.datasets import titanic

titanic.download_dataset("titanic_dataset.csv")

from tflearn.data_utils import load_csv
data, labels = load_csv(
    "titanic_dataset.csv",
    target_column=0,
    categorical_labels=True,
    n_classes=2,
    columns_to_ignore=[2, 7])  # two target columns/labels :survived or dead

for p in data:
    if p[1] == "female":
        p[1] = 1
    else:
        p[1] = 0

net = tflearn.input_data(shape=[None,
                                6])  # first layer of network has 6 layers
net = tflearn.fully_connected(net, 20)
net = tflearn.fully_connected(net, 20)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net)

# define model
model = tflearn.DNN(net)
Exemple #22
0
# -*- coding: utf-8 -*-
from __future__ import print_function

import numpy as np
import tflearn

# Download the Titanic dataset
from tflearn.datasets import titanic
titanic.download_dataset('titanic_dataset.csv')

# Load CSV file, indicate that the first column represents labels
from tflearn.data_utils import load_csv
data, labels = load_csv('titanic_dataset.csv', target_column=0,
                        categorical_labels=True, n_classes=2)


# Preprocessing function
def preprocess(data, columns_to_ignore):
    # Sort by descending id and delete columns
    for id in sorted(columns_to_ignore, reverse=True):
        [r.pop(id) for r in data]
    for i in range(len(data)):
      # Converting 'sex' field to float (id is 1 after removing labels column)
      data[i][1] = 1. if data[i][1] == 'female' else 0.
    return np.array(data, dtype=np.float32)

# Ignore 'name' and 'ticket' columns (id 1 & 6 of data array)
to_ignore=[1, 6]

# Preprocess data
data = preprocess(data, to_ignore)
Exemple #23
0
import numpy as np
import tflearn

from tflearn.datasets import titanic

from tflearn.data_utils import load_csv
data, labels = load_csv('measurements2.csv',
                        target_column=0,
                        columns_to_ignore=[5, 7, 8, 9, 10, 11],
                        categorical_labels=True,
                        n_classes=1)

for p in data:
    if not p[2]:
        p[2] = None
    if p[4] == "SP98":
        p[4] = 1
    else:
        p[4] = 0
    #print(p)

for x in data:
    print(x)

# for p in data:
#     if p[1] == "female":
#         p[1] = 1
#     else:
#         p[1] = 0

# for x in data:
Exemple #24
0
import numpy as np
import tflearn
from tflearn.data_utils import load_csv

# Load CSV file, indicate that the first column represents labels

data, labels = load_csv('peaks.csv',
                        target_column=0,
                        categorical_labels=True,
                        n_classes=2)


# Preprocessing function
def preprocess(data, columns_to_ignore):
    # Sort by descending id and delete columns
    for id in sorted(columns_to_ignore, reverse=True):
        [r.pop(id) for r in data]
    return np.array(data, dtype=np.float32)


to_ignore = [0]

# Preprocess data
data = preprocess(data, to_ignore)

# Build neural network
net = tflearn.input_data(shape=[None, 40])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net)
import numpy as np
import tflearn

# Download the Titanic dataset
from tflearn.datasets import titanic
titanic.download_dataset('titanic_dataset.csv')

# Load CSV file, indicate that the first column represents labels
from tflearn.data_utils import load_csv
#preprocessing data
data, labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2, columns_to_ignore=[2,7])
data2 = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2)

for p in data:
    if p[1] == 'female':
        p[1] = 1
    else:
        p[1] = 0

net = tflearn.input_data(shape=[None, 6])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 2, activation = 'softmax')
net = tflearn.regression(net)

model = tflearn.DNN(net)
model.fit(data, labels, n_epoch=100, batch_size=16, show_metric=True)

dicaprio = [3, 'Jack Dawson', 'male', 19, 0,0,'N/A', 5.000]
print (model.predict([[2, 0, 14, 0, 0, 33]]))
import numpy as np
import tflearn
import csv

# Load CSV file, indicate that the first column represents labels
from tflearn.data_utils import load_csv
data, labelsA = load_csv('clus_is_training_3.2_2.5.csv', target_column=0,
                        categorical_labels=True, n_classes=2)
input, labelsB = load_csv('clus_is_test_3.2_2.5.csv', target_column=0,
                        categorical_labels=True, n_classes=2)

# Build neural network
#Data has 5 features
net = tflearn.input_data(shape=[None, 5]) 
net = tflearn.fully_connected(net, 32)
dropout1 = tflearn.dropout(net, 0.5)
net = tflearn.fully_connected(dropout1, 2, activation='softmax', bias=False, weights_init='truncated_normal')

net = tflearn.regression(net)

# Define model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
model.fit(data, labelsA, n_epoch=10, batch_size=1, show_metric=True, validation_set=0.1)

# Test Data

predict = model.predict(input)

with open('/Users/Anne-Katherine/Desktop/ATLAS_ML/clus_is_out_25_32.csv','wb') as f:
	csv_writer = csv.writer(f, delimiter=',')
Exemple #27
0
from __future__ import print_function
import csv
import pandas as pd
import tensorflow as tf
import numpy as np
import math
import tflearn
from datetime import datetime
from tflearn.datasets import titanic
from tflearn.data_utils import load_csv, to_categorical

# The file containing the weather samples (including the column header)
WEATHER_SAMPLE_FILE = 'weather.csv'
#data, labels = load_csv(WEATHER_SAMPLE_FILE, target_column=12, categorical_labels=True, n_classes=2)
data, labels = load_csv(WEATHER_SAMPLE_FILE, target_column=11, columns_to_ignore=[0])

TrainingSetFeatures = data
#TestSetFeatures = data
TrainingSetLabels = labels
#TestSetLabels = labels

#TrainingSetFeatures = data[:6000]
#TestSetFeatures = data[6000:]
#TrainingSetLabels = labels[:6000]
#TestSetLabels = labels[6000:]

def preprocessor(data):
	copyData = np.zeros((len(data), 12))
	for i in range(len(data)):
		sample = data[i]
Exemple #28
0
from sklearn.metrics import classification_report
from plot_confusion_matrix import plot_confusion_matrix
import matplotlib.pyplot as plt
from tflearn.data_utils import to_categorical
'''
merge X and Y train files
# df = pd.read_csv('./Preprocessed Data/X_train.csv', header=0) 
# l = pd.read_csv('./Preprocessed Data/y_train.csv', header=0)
# df.insert(loc=0, column = 'Activity', value=l.values)
# df.to_csv('./Preprocessed Data/train.csv', index = False)
'''

# load train.csv as input file.
# 7 classes due to tensorflow using 0-based indexing. Transforming labels into a 1 x 7 matrix
data, labels = load_csv('./Preprocessed Data/train.csv',
                        target_column=0,
                        categorical_labels=True,
                        n_classes=7)
data = np.array(data, dtype=np.float64)
labels = np.array(labels, dtype=np.int)
class_names = [
    'WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING', 'STANDING',
    'LAYING'
]
input_layer = tflearn.input_data(shape=[None, 561])
# 1st hidden layer
dense1 = tflearn.fully_connected(input_layer, 64)
# dropout layer to prevent overfitting
dropout1 = tflearn.dropout(dense1, 0.5)
# 2nd hidden layer
dense2 = tflearn.fully_connected(dropout1, 64)
dropout2 = tflearn.dropout(dense2, 0.5)
Exemple #29
0
import tflearn
from tflearn.estimators import RandomForestClassifier
from tflearn.data_utils import load_csv
import numpy as np
from datetime import datetime

data, labels = load_csv('train.txt',
                        target_column=13,
                        categorical_labels=True,
                        n_classes=2)

# Transformation lists
# US size map
size_labels = ['?', 'unsized', 'xxs', 'xs', 's', 'm', 'l', 'xl', 'xxl', 'xxxl']
size_values = [0, 0, 30, 32, 35, 41, 43, 46, 48, 50]
gender_labels = ['?', 'Mrs', 'Mr', 'Family', 'not reported', 'Company']
gender_values = [0, 2, 3, 4, 1, 5]
color_labels = [
    '?', 'denim', 'ocher', 'curry', 'green', 'black', 'brown', 'red', 'mocca',
    'anthracite', 'olive', 'petrol', 'blue', 'grey', 'beige', 'ecru',
    'turquoise', 'magenta', 'purple', 'pink', 'khaki', 'navy', 'habana',
    'silver', 'white', 'nature', 'stained', 'orange', 'azure', 'apricot',
    'mango', 'berry', 'ash', 'hibiscus', 'fuchsia', 'blau', 'dark denim',
    'mint', 'ivory', 'yellow', 'bordeaux', 'pallid', 'ancient', 'baltic blue',
    'almond', 'aquamarine', 'brwon', 'aubergine', 'aqua', 'dark garnet',
    'dark grey', 'avocado', 'creme', 'champagner', 'cortina mocca',
    'currant purple', 'cognac', 'aviator', 'gold', 'ebony', 'cobalt blue',
    'kanel', 'curled', 'caramel', 'antique pink', 'darkblue', 'copper coin',
    'terracotta', 'basalt', 'amethyst', 'coral', 'jade', 'opal', 'striped',
    'mahagoni', 'floral', 'dark navy', 'dark oliv', 'vanille', 'ingwer',
    'iron', 'graphite', 'leopard', 'oliv', 'bronze', 'crimson', 'lemon',
import tflearn
import numpy as np

from tflearn.datasets import titanic
titanic.download_dataset('titanic_dataset.csv')

from tflearn.data_utils import load_csv

data, labels = load_csv('titanic_dataset.csv',
                        target_column=0,
                        categorical_labels=True,
                        n_classes=2)


def preprocess(data, columns_to_ignore):
    # Sort by descending id and delete columns
    for id in sorted(columns_to_ignore, reverse=True):
        [r.pop(id) for r in data]
    for i in range(len(data)):
        # Converting 'sex' field to float (id is 1 after removing labels column)
        data[i][1] = 1. if data[i][1] == 'female' else 0.
    return np.array(data, dtype=np.float32)


# Ignore 'name' and 'ticket' columns (id 1 & 6 of data array)
to_ignore = [1, 6]

# Preprocess data
data = preprocess(data, to_ignore)

net = tflearn.input_data(shape=[None, 6])
def load_data(filename):
    data, labels = load_csv(filename,
                            target_column=8,
                            categorical_labels=False)

    return data, labels