def __init__(self, args):
     #Load Data
     data = datasets.getData()
     self.data = data
     #Load LM
     lm_model = main_lm.RNNLanguageModelHandler(args)
     self.lm_model = lm_model
Exemple #2
0
 def __init_(self):
     for i in range(10):
         trainInputs,trainOutputs,validInputs,validOutputs,testInputs,testOutputs,wids=getData(crossValidate=True,filterKnight=True,foldId=i)
         print len(trainInputs)
         print len(validInputs)
         print len(testInputs)
     self.sequences = readData.getData(crossValidate=True,filterKnight=False,foldId=i)
Exemple #3
0
def main(args):
    # Load training and eval data
    xTrain, yTrain, xVal, yVal = getData()
    # random.shuffle(data)
    # xTrain = [d['timeSample'] for d in data[:2200]]
    # yTrain = [d[] for d in data[:2200]]

    # Create the Estimator
    classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
                                        model_dir="ABCDEFG")

    # Set up logging for predictions
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=50)

    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": xTrain},
                                                        y=yTrain,
                                                        batch_size=100,
                                                        num_epochs=None,
                                                        shuffle=True)
    classifier.train(input_fn=train_input_fn, steps=10, hooks=[logging_hook])

    # Evaluate the model and print results
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": xVal},
                                                       y=yVal,
                                                       num_epochs=10,
                                                       shuffle=False)
    eval_results = classifier.evaluate(input_fn=eval_input_fn)
    print(eval_results)
Exemple #4
0
 def __handle_message_activity(self, activity):
     self.send_response(200)
     self.end_headers()
     credentials = MicrosoftAppCredentials(APP_ID, APP_PASSWORD)
     connector = ConnectorClient(credentials, base_url=activity.service_url)
     reply = BotRequestHandler.__create_reply_activity(
         activity, 'You said: %s' % getData())
     connector.conversations.send_to_conversation(reply.conversation.id,
                                                  reply)
import dataProcessing

''' ###################################### '''
''' ##     COMPUTE SAMPLE BANK DATA     ## '''
''' ###################################### '''
start = time.clock()

freq_dict = dict() # Struct: dict[category] -> dict[category][word]:freq
posSize_dict = dict() # Struct: dict -> 1Dlist -> 1Dlist

tfidf_dict = dict() # Struct: dict[category] -> dict[word]:tfidf_weighting
wordMapDict = dict() # Struct: dict[category] -> dict[category][fileName] -> dict[[category][fileName][word]:1

numOfFile = 0

freq_dict, posSize_dict = readData.getData()


# freq_dict = processWords.filterTrashWords(freq_dict)

# freq_dict_th = processWords.applyMeanThreshold(freq_dict)

mid = time.clock()
print(mid - start)

wordMapDict, numOfFile = readData.getTrainingDataWordMap()

tfidf_dict = processWords.computeTFIDF(freq_dict, wordMapDict, numOfFile)


########################################################################
Exemple #6
0
    def predict(self, sample):
        result = {}
        maxcout = 0
        retlabel = None
        for tree in self.trees:
            label = tree.predict(sample)
            if label in result.keys():
                result[label] += 1
            else:
                result[label] = 1
            if result[label] > maxcout:
                maxcout = result[label]
                retlabel = label
        return retlabel


if __name__ == "__main__":
    rf = RandomForest(n_tree=201)
    from readData import getData

    data, labels = getData()
    rf.train(data)
    correct = 0
    total = len(data)
    for i in range(len(data)):
        label = rf.predict(data[i][:-1])
        if data[i][-1] == label:
            correct += 1
    print(correct, total, correct / total)
Exemple #7
0
import readData

for f in range(10):
    trainInputs, trainOutputs, validInputs, validOutputs, testInputs, testOutputs, wids = readData.getData(
        filterKnight=True, crossValidate=True, foldId=f)
    print len(trainInputs)
    print len(validInputs)
    print len(testInputs)
    print "Vocab Size:", len(wids)
    print "First Input:", trainInputs[0]
    print "First Output:", trainOutputs[0]
    reverse_wids = readData.reverseDictionary(wids)
    print "First Input Raw", [reverse_wids[c] for c in trainInputs[0]]
    print "First Output Raw", [reverse_wids[c] for c in trainOutputs[0]]
        # ************************************************************
        # EXTRACTION OF ECG SIGNALS FROM DATABASES
        # ************************************************************
        print 'Extracting ECG signals from the databases... '

        # Get the [Beats] objects that are [timeWindow] seconds in length
        # and that end [amountOfTimeBeforeBeg] seconds before an abnormal
        # cardiac rhythm begins.
        fileName = '../listOfBeatsObjs/listOfBeatsObjs' + str(
            amountOfTimeBeforeBeg) + '-' + str(timeWindow)
        fileExists = os.path.isfile(fileName)
        #if (True):
        if (not (fileExists)):
            doCreateTextFiles = False
            listOfBeatsObjs = getData(amountOfTimeBeforeBeg, timeWindow,
                                      doCreateTextFiles)
            with open(fileName, 'w') as f:
                pickle.dump([listOfBeatsObjs], f)
        else:
            with open(fileName) as f:
                listOfBeatsObjs = pickle.load(f)[0]

        print 'Completed! \n'

        # ********************************************************
        # DIVISION OF ECG SIGNALS INTO TRAINING AND TESTING DATA
        # *******************************************************
        print 'Dividing the ECG signals into training and testing data...'
        # The fraction of the [Beats] objects that should be a part of the
        # training data.
        trainingFract = 0.5
import readData as rd
import _pickle as cPickle
import numpy as np
import models
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix

trainX, trainY, testX, testY = rd.getData('./Datasets/en-gb0-short.tsv', 0.7)


def getEvaluations(y_true, y_pred):
    results = []

    results.append(accuracy_score(y_true, y_pred))
    results.append(precision_score(y_true, y_pred, average='macro'))
    results.append(recall_score(y_true, y_pred, average='macro'))
    results.append(f1_score(y_true, y_pred, average='macro'))

    return results


def getTrainedDT(dump_bool=0):
    try:
        with open('./Trained Models/DT.pickle', 'rb') as file:
            clf_DT = cPickle.load(file)
        return clf_DT
    except:
        print('Trained DT model not found, Re-training...')
        return models.trainDT(trainX, trainY, dump_bool)


def getTrainedRF(dump_bool=0):
        opt = Adam(learning_rate=0.01)
        modelObj.compile(loss='mse', optimizer=opt)
        modelHist = modelObj.fit(X_train,
                                 y_Train,
                                 validation_split=val_split,
                                 shuffle=True,
                                 epochs=epochs,
                                 batch_size=batchSize)
        Analyze(modelHist)
        modelObj.save('model.h5')
    else:
        new_model = load_model('model.h5')
        modelHist = new_model.fit(X_train,
                                  y_Train,
                                  validation_split=val_split,
                                  shuffle=True,
                                  epochs=epochs,
                                  batch_size=batchSize)
        Analyze(modelHist)
        new_model.save('model.h5')


# main entry point
if __name__ == "__main__":
    # play some images.
    # utils.playSome(X_train, 0)
    X_train, Y_Train = readData.getData(filePath='A:/driving_log.csv')
    plt.hist(Y_Train, bins=30)
    plt.show()
    Train(X_train, Y_Train)
#
# Doc : Gradient descent used to trains our logistic regression
#

from readData import getData
from function import getSum0
import re

theta0 = 0.0
learningRate = 0.01
age, chd = getData()

print "training algorithm ..."

while (1):
	tmpTheta0 = learningRate * (getSum0(theta0, age, chd) / len(chd))
	if (abs(tmpTheta0) < 0.0001):
		break
	theta0 = theta0 - tmpTheta0

#
# the folowing 5 line is use to update theta0 in our sigmoid function file
#
oldFile = open("SigmoidFunction/SigmoidFunction.py").read()
oldFile = re.sub("theta0 = [-]*\d*\.\d*", "theta0 = " + str(theta0), oldFile)
newFile = open("SigmoidFunction/SigmoidFunction.py", 'w')
newFile.write(oldFile)
newFile.close()
#

print "done"
Exemple #12
0
def genTrainingData():
    channel_0_x, channel_0_y = rd.getData(
        'OpenBCISession_2020-04-25_15-29-52-eyeblink', None, 0, None)
    fft = ft.fftData(channel_0_x, channel_0_y, None)
Exemple #13
0
        self.sharing = sharing
        self.GRU = GRU


READ_OPTION = "NORMAL"
downstream = True
sharing = False
GRU = False

config = Config(READ_OPTION=READ_OPTION,
                downstream=downstream,
                sharing=sharing,
                GRU=GRU)

if config.READ_OPTION == "NORMAL":
    train_sentences_de, train_sentences_en, valid_sentences_de, valid_sentences_en, test_sentences_de, test_sentences_en, wids = readData.getData(
        trainingPoints=700, validPoints=400)
elif config.READ_OPTION == "KNIGHTHOLDOUT":
    train_sentences_de, train_sentences_en, valid_sentences_de, valid_sentences_en, test_sentences_de, test_sentences_en, wids = readData.getDataKnightHoldOut(
        trainingPoints=1000)

reverse_wids = readData.reverseDictionary(wids)

print len(train_sentences_de)
print len(train_sentences_en)

print len(valid_sentences_de)
print len(valid_sentences_en)

VOCAB_SIZE_DE = len(wids)
VOCAB_SIZE_EN = VOCAB_SIZE_DE
#
# Doc : K-Means algorithme apply to car data
#

from sys import exit
from readData import getData
from random import seed, randint
import matplotlib.pyplot as plt
from function import assignmentStep, computeCentroid

seed()

acceleration, topSpeed = getData()

r1 = randint(0, (len(acceleration) - 1))
r2 = randint(0, (len(acceleration) - 1))
while (r1 == r2):
    r2 = randint(0, (len(acceleration) - 1))

c1 = (0.0, 0.0)
c2 = (0.0, 0.0)
tmpC1 = (acceleration[r1], topSpeed[r1])
tmpC2 = (acceleration[r2], topSpeed[r2])

while (tmpC1 != c1 and tmpC2 != c2):
    c1 = tmpC1
    c2 = tmpC2
    lstC1, lstC2 = assignmentStep(c1, c2, acceleration, topSpeed)
    tmpC1 = computeCentroid(lstC1)
    tmpC2 = computeCentroid(lstC2)