def trainTestSVM(xTraining, xTesting, yTraining, yTesting, labelLst): gc.collect() utils2.myPrint('---SVM Classifier---') utils2.myPrint('Original Train Batch:', np.shape(xTraining)) utils2.myPrint('Original Test Batch:', np.shape(xTesting)) # prepare data for SVM yTraining = np.argmax(yTraining, axis=1) yTesting = np.argmax(yTesting, axis=1) shape = np.shape(xTraining) divisor = maxPrimeFactors(shape[1]) utils2.myPrint('Divisor:', divisor) distribute = shape[1] // divisor # todo - ai : shuffle data may be needed xTraining = np.reshape(xTraining, (-1, shape[-1] * distribute)) xTesting = np.reshape(xTesting, (-1, shape[-1] * distribute)) yTrn = list() for y in yTraining: yTrn.extend([y] * divisor) del yTraining yTst = list() for y in yTesting: yTst.extend([y] * divisor) del yTesting utils2.myPrint('Mini-Batched Train Batch:', np.shape(xTraining)) utils2.myPrint('Mini-Batched Test Batch:', np.shape(xTesting)) model = LinearSVC(verbose=True) utils2.myPrint('') utils2.myPrint('Training...') model.fit(xTraining, yTrn) utils2.myPrint('') utils2.myPrint('Testing...') yPredict = model.predict(xTesting) utils2.myPrint('Test Accuracy:', model.score(xTesting, yTst)) yTst = [labelLst[i] for i in yTst] yPredict = [labelLst[i] for i in yPredict] utils2.myPrint('Labels:', labelLst) utils2.myPrint('Confusion Matrix:') utils2.myPrint(confusion_matrix(yTst, yPredict, labels=labelLst)) utils2.myPrint('Classification Report:') utils2.myPrint(classification_report(yTst, yPredict, labels=labelLst)) del xTraining del xTesting del yTrn del yTst del labelLst gc.collect()
def runConfig(parameters): folderInputs = parameters['inputFolder'] trainingEpoch = parameters['trainingEpoch'] featureMode = parameters['featureMode'] channelMode = parameters['channelMode'] classificationMode = parameters['classificationMode'] stepSize = parameters['stepSize'] sampRate = parameters['sampRate'] batchSize = parameters['batchSize'] lengthCut = parameters['lengthCut'] learningRate = parameters['learningRate'] lossFunction = parameters['lossFunction'] optimizer = parameters['optimizer'] clsModel = parameters['clsModel'] clsVersion = parameters['clsVersion'] if 'DTW' == clsModel: padding = False else: padding = True saveLoadData = True # check if inputs prepared before inputsFileName = 'E:/atili/Datasets/BreathDataset/temp' \ 'DataStorage/{}_inputs_{}_{}_{}_{}_{}_{}_{}.dat'\ .format(os.path.basename(os.path.dirname(folderInputs)), featureMode, channelMode, classificationMode, stepSize, sampRate, lengthCut, padding) labelsFileName = 'E:/atili/Datasets/BreathDataset/temp' \ 'DataStorage/{}_labels_{}_{}_{}_{}_{}_{}_{}.dat'\ .format(os.path.basename(os.path.dirname(folderInputs)), featureMode, channelMode, classificationMode, stepSize, sampRate, lengthCut, padding) labelDictFileName = 'E:/atili/Datasets/BreathDataset/temp' \ 'DataStorage/{}_labelDict_{}_{}_{}_{}_{}_{}_{}.dat'\ .format(os.path.basename(os.path.dirname(folderInputs)), featureMode, channelMode, classificationMode, stepSize, sampRate, lengthCut, padding) if os.path.exists(inputsFileName) and os.path.exists(labelsFileName)\ and os.path.exists(labelDictFileName) and saveLoadData: utils2.myPrint('Loading from Previous Data Files...') inputs = loadData(inputsFileName) utils2.myPrint('Loaded:', inputsFileName) labels = loadData(labelsFileName) utils2.myPrint('Loaded:', labelsFileName) labelDict = loadData(labelDictFileName) utils2.myPrint('Loaded:', labelDictFileName) else: # use fileReader() for random shuffling every iteration. use some temp data for tests only (using saveLoadData). inputs, labels, labelDict = fileReader(folderInputs, stepSize, sampRate, featureMode, channelMode, classificationMode, lengthCut, pad=padding) # Save some randomly shuffled data, then load them each run, instead of shuffling every run. # Best found way for comparing performances of different network variations if saveLoadData: utils2.myPrint('Saving Data Files for Later Use...') saveData(inputs, inputsFileName) utils2.myPrint('Saved:', inputsFileName) saveData(labels, labelsFileName) utils2.myPrint('Saved:', labelsFileName) saveData(labelDict, labelDictFileName) utils2.myPrint('Saved:', labelDictFileName) # write results to a seperate text file (part 1/3) fResult = open('./Results.txt', 'a+') fResult.write('\n\r ' + utils2.scriptStartDateTime + ', ') print(parameters, end='', file=fResult) fResult.close() utils2.myPrint('Inputs Shape:', np.shape(inputs)) numClasses = len( labelDict) # total number of classification classes (ie. people) utils2.myPrint('') utils2.myPrint('Total of ' + str(len(inputs)) + ' inputs loaded @ ' + folderInputs) utils2.myPrint('Total of', numClasses, 'classes') # train with 80%(minus remainder of batchSize) of files, test with 20% totalOfInputs = len(inputs) trainingSteps = int(totalOfInputs * 0.8) if (0 == batchSize) or (batchSize > trainingSteps): batchSize = trainingSteps # trainingSteps -= (trainingSteps % batchSize) # for better fit of train size, not necessary. testSteps = totalOfInputs - trainingSteps utils2.myPrint(trainingSteps, 'steps for training,', testSteps, 'steps for test') # todo - ai : validation can be added for once utils2.myPrint('Splitting Train and Test Data...', flush=True) xTrain, xTest, yTrain, yTest = train_test_split(np.asarray(inputs), np.asarray(labels), stratify=labels, train_size=trainingSteps, test_size=testSteps) utils2.myPrint('------Model for %s------' % featureMode) if 'LSTM' == clsModel: # Classify with Keras LSTM Model trainTestLSTM(xTrain, xTest, yTrain, yTest, numClasses, trainingEpoch, batchSize, list(labelDict.keys()), lossFunction, optimizer, learningRate, featureMode, clsVersion) gc.collect() elif 'SVM' == clsModel: # Classify with SkLearn SVM Model trainTestSVM(xTrain, xTest, yTrain, yTest, list(labelDict.keys())) gc.collect() elif 'DTW' == clsModel: # Classify with kNN DTW model trainTestkNNDTW(xTrain, xTest, yTrain, yTest, list(labelDict.keys())) gc.collect() else: utils2.myPrint('ERROR: Invalid Classification Model:', clsModel) sys.exit() del xTrain del yTrain del xTest del yTest del inputs del labelDict del labels gc.collect()
def saveData(data, path): try: pickle.dump(data, open(path, "wb")) except (MemoryError, OverflowError): utils2.myPrint('Pickle failed to save, trying joblib...') joblib.dump(data, path)
def fileReader(folder, stepSz, sampRt, featureM, channelM, classificationM, lenCutMs=0, shuffle=True, pad=True, flatten=True): gc.collect() labelDictLocal = dict() labelListLocal = list() inputFilesLocal = list() maxLen = 0 r = np.random.RandomState() randState = r.get_state() imfFeatExt = 'imf' if 8 == sampRt: imfFeatExt = imfFeatExt + '08' elif 48 == sampRt: imfFeatExt = imfFeatExt + '48' else: utils2.myPrint('ERROR: Invalid sampling rate parameter:', sampRt) sys.exit() utils2.myPrint('Initial Scan.') for rootPath, directories, files in os.walk(folder): if shuffle: utils2.myPrint('Shuffling...') np.random.shuffle(files) utils2.myPrint('Reading:', end='') for flname in files: ext = flname.split('.')[-1] if (imfFeatExt == ext and featureM in ['Freqs', 'Mags', 'Phases', 'FrMg', 'MgPh', 'FrPh', 'FrMgPh', 'nFreqs', 'nMags', 'nPhases', 'FrnFr', 'MgnMg', 'PhnPh', ]) or \ ('wav' == ext and featureM in ['Wav', 'Dur']) or \ ('spct48' == ext and featureM in ['Specto']): # read file if 'Wav' == featureM: if 0 < lenCutMs: _, inputFile = scipy.io.wavfile.read( rootPath + flname)[:lenCutMs * sampRt] else: _, inputFile = scipy.io.wavfile.read(rootPath + flname) elif 'Dur' == featureM: # read duration features parts = '.'.join( flname.split('.')[:-1]) # only name, without extension parts = str(parts.split('_')[-1]) # 'startMs-lenMs' parts = parts.split('-') startMs = float(parts[0]) * 1000 durationMs = float(parts[1]) * 1000 inputFile = [startMs, durationMs] else: if 0 < lenCutMs: inputFile = loadData(rootPath + flname)[:lenCutMs * sampRt] else: inputFile = loadData(rootPath + flname) # read labels speakerId = flname[0:2] postureId = flname[2:4] if 'Speaker' == classificationM: label = speakerId elif 'Posture5' == classificationM: label = postureId elif 'Posture3' == classificationM: if postureId in ['01', '02']: label = '01' elif postureId in ['03', '04']: label = '02' elif '05' == postureId: label = '03' else: utils2.myPrint('ERROR: Invalid posture id:', postureId) sys.exit() else: utils2.myPrint('ERROR: Invalid classification mode:', classificationM) sys.exit() if label not in labelDictLocal: labelCount = len(labelDictLocal) for l in labelDictLocal: labelDictLocal[l].append(0) labelDictLocal[label] = (labelCount * [0]) labelDictLocal[label].append(1) utils2.myPrint('.', end='', flush=True) # decimate by stepSize if stepSz > 1 and featureM not in ['Specto', 'Dur']: inputFile = np.array(inputFile[::stepSz]) gc.collect() # update max length seqLen = len(inputFile) maxLen = max(seqLen, maxLen) if 'Dur' != featureM: # seperate out only wanted channel(s) chnSlice = None if channelM in [ '0', '1', '2', '3', '0Ov', '1Ov', '2Ov', '3Ov' ]: chnSlice = int(channelM[0]) # index: [0-3] channel elif 'Front' == channelM: if postureId in ['01', '02']: chnSlice = 1 elif postureId in ['03', '04']: chnSlice = 2 elif postureId in ['05']: chnSlice = 3 else: utils2.myPrint( 'ERROR: Invalid posture for front microphone setting:', postureId) sys.exit() elif channelM in [ 'All', 'Split', 'SplitOv', 'AllShfUni', 'AllShfRnd' ]: pass else: utils2.myPrint( 'ERROR: Invalid channel mode for file read:', channelM) sys.exit() # seperate out only wanted feature(s) featSlice = None if 'Freqs' == featureM: featSlice = 0 # index: 0 elif 'Mags' == featureM: featSlice = 1 # index: 1 elif 'Phases' == featureM: featSlice = 2 # index: 2 elif 'nFreqs' == featureM: featSlice = 3 # index: 3 elif 'nMags' == featureM: featSlice = 4 # index: 4 elif 'nPhases' == featureM: featSlice = 5 # index: 5 elif 'FrMg' == featureM: featSlice = slice(0, 2) # indexes: 0,1 elif 'MgPh' == featureM: featSlice = slice(1, 3) # indexes: 1,2 elif 'FrPh' == featureM: featSlice = slice(0, 3, 2) # indexes: 0,2 elif 'FrnFr' == featureM: featSlice = slice(0, 4, 3) # indexes: 0,3 elif 'MgnMg' == featureM: featSlice = slice(1, 5, 3) # indexes: 1,4 elif 'PhnPh' == featureM: featSlice = slice(2, 6, 3) # indexes: 2,5 elif 'FrMgPh' == featureM: featSlice = slice(0, 3) # indexes: 0,1,2 elif featureM in ['Wav', 'Specto', 'Dur']: pass else: utils2.myPrint( 'ERROR: Invalid feature mode for file read:', featureM) sys.exit() # apply seperation if (chnSlice is not None) and (featSlice is not None): inputFile = inputFile[:, chnSlice, featSlice, :] elif (chnSlice is not None) and (featSlice is None): inputFile = inputFile[:, chnSlice, ...] elif (chnSlice is None) and (featSlice is not None): inputFile = inputFile[:, :, featSlice, :] else: pass # No slicing needed if channelM in ['Split', 'SplitOv']: # make channels first dimension inputFile = np.swapaxes(inputFile, 0, 1) for inpFl in inputFile: # flatten each channel inpFl = np.reshape(inpFl, (seqLen, -1)).copy() gc.collect() if 'SplitOv' == channelM: inpFl = np.lib.stride_tricks.as_strided( inpFl, (seqLen - 3, inpFl.shape[-1] * 4), inpFl.strides, writeable=False) # append each channel as seperate items inputFilesLocal.append(inpFl.copy()) labelListLocal.append(label) del inpFl gc.collect() else: if channelM in ['AllShfUni', 'AllShfRnd']: # make channels first dimension inputFile = np.swapaxes(inputFile, 0, 1) if 'AllShfUni' == channelM: # randomize each config's channel order, not each file np.random.set_state( randState ) # randomize channels in unison way for each config elif 'AllShfRnd' == channelM: pass # randomize channel order of each and every file differently else: utils2.myPrint( 'ERROR: Invalid channel mode for randomization:', channelM) sys.exit() # shuffle channels np.random.shuffle(inputFile) # set dimension order to default inputFile = np.swapaxes(inputFile, 0, 1) # flatten if flatten: inputFile = np.reshape(inputFile, (seqLen, -1)).copy() gc.collect() if channelM in ['0Ov', '1Ov', '2Ov', '3Ov']: inputFile = np.lib.stride_tricks.as_strided( inputFile, (seqLen - 3, inputFile.shape[-1] * 4), inputFile.strides, writeable=False) # append each item to their lists inputFilesLocal.append(inputFile.copy()) labelListLocal.append(label) else: # featureM == 'Dur' inputFilesLocal.append(inputFile.copy()) labelListLocal.append(label) del inputFile gc.collect() # beacuse of overlapping, input lengths are reduced by 3. if channelM in ['0Ov', '1Ov', '2Ov', '3Ov', 'SplitOv' ] and 'Dur' != featureM: maxLen -= 3 utils2.myPrint('') # for new line utils2.myPrint('Generating Labels...') # regenerate label list from final label dict as one-hot vector form for i in range(len(labelListLocal)): labelListLocal[i] = labelDictLocal[labelListLocal[i]] utils2.myPrint('%d Files with %d Label(s): %s.' % (len(inputFilesLocal), len(labelDictLocal), list(labelDictLocal.keys()))) if pad and 'Dur' != featureM: msLen = maxLen / sampRt # calculate length in milliseconds if stepSz > 0: msLen *= stepSz utils2.myPrint('Padding(', msLen, 'ms):', end='') for i in range(len(inputFilesLocal)): seqLen = len(inputFilesLocal[i]) diff = maxLen - seqLen inputFilesLocal[i] = np.pad(inputFilesLocal[i], ((0, diff), (0, 0)), mode='constant', constant_values=0) utils2.myPrint('.', end='', flush=True) gc.collect() utils2.myPrint('') gc.collect() return inputFilesLocal, labelListLocal, labelDictLocal
def loadData(path): try: return pickle.load(open(path, 'rb')) except (pickle.UnpicklingError, TypeError): utils2.myPrint('Not a pickle file, trying joblib...') return joblib.load(path)
def trainTestLSTM(xTraining, xTesting, yTraining, yTesting, numCls, trainEpoch, batchSz, labelLst, losFnc, optim, learnRate, featMode, clsVer): gc.collect() clearGPU() utils2.myPrint('---LSTM Classifier---') trainShape = np.shape(xTraining) testShape = np.shape(xTesting) utils2.myPrint('Train Batch:', trainShape) utils2.myPrint('Test Batch:', testShape) models = list() for version in clsVer: # create the model model = Sequential() # todo - ai : possible variations # try lstm decay # try clipnorm and clipvalue # try convlstm2d and/or concatanate two models # try sgd instead of adam optimizer if 'Specto' != featMode: # do not convolve for spectogram, since it is not as long as other modes. utils2.myPrint('Classifier Version:', version) if 0 == version: # inputs 1 # 300 epoch .3924. model.add(Conv1D(8, 48, strides=48, input_shape=trainShape[1:])) model.add(Activation('relu')) model.add(Conv1D(16, 24, strides=24)) model.add(Activation('sigmoid')) model.add(LSTM(24, return_sequences=True)) model.add(LSTM(12, return_sequences=False)) model.add(Dense(numCls, activation='softmax')) elif 1 == version: model.add( Conv1D(8, 48, strides=12, activation='relu', input_shape=trainShape[1:])) model.add(Conv1D(16, 36, strides=6, activation='relu')) model.add(Conv1D(32, 24, strides=2, activation='relu')) model.add(Conv1D(64, 24, strides=2, activation='relu')) model.add(LSTM(64, return_sequences=True)) model.add(LSTM(32, activation='relu', return_sequences=False)) model.add(Dense(numCls, activation='softmax')) elif 2 == version: # resulted better than LSTM variant(1 == clsVer) with following configuration # {'inputFolder': 'D:/atili/MMIExt/Audacity/METU Recordings/Dataset/4spkr5post/', 'featureMode': 'Mags', # 'channelMode': '0', 'classificationMode': 'Speaker', 'trainingEpoch': 200, 'stepSize': 0, # 'sampRate': 48, 'batchSize': 32, 'lengthCut': 600, 'learningRate': 0.001, # 'lossFunction': 'CatCrosEnt', 'optimizer': 'Adam', 'clsModel': 'LSTM', 'clsVersion': 2} # overfitted with 1.000 accuracy(started around 30th epoch) with following configuration # {'inputFolder': 'D:/atili/MMIExt/Audacity/METU Recordings/Dataset/allSmall/', 'featureMode': 'Mags', # 'channelMode': '0', 'classificationMode': 'Speaker', 'trainingEpoch': 400, 'stepSize': 0, # 'sampRate': 48, 'batchSize': 32, 'lengthCut': 600, 'learningRate': 0.001, # 'lossFunction': 'CatCrosEnt', 'optimizer': 'Adam', 'clsModel': 'LSTM', 'clsVersion': 2} model.add( Conv1D(8, 48, strides=12, activation='relu', input_shape=trainShape[1:])) model.add(Conv1D(16, 36, strides=6, activation='relu')) model.add(Conv1D(32, 24, strides=2, activation='relu')) model.add(Conv1D(64, 24, strides=2, activation='relu')) model.add(GRU(64, return_sequences=True)) model.add(GRU(32, activation='relu', return_sequences=False)) model.add(Dense(numCls, activation='softmax')) elif 3 == version: # tezde -1 burdan sonra model.add( Conv1D(8, 48, strides=12, activation='relu', input_shape=trainShape[1:])) model.add(Dropout(0.5)) model.add(Conv1D(16, 36, strides=6, activation='relu')) model.add(Dropout(0.5)) model.add(Conv1D(32, 24, strides=2, activation='relu')) model.add(Dropout(0.5)) model.add(Conv1D(64, 24, strides=2, activation='relu')) model.add(LSTM(64, return_sequences=True)) model.add(LSTM(32, activation='relu', return_sequences=False)) model.add(Dense(numCls, activation='softmax')) elif 4 == version: model.add( Conv1D(16, 96, strides=12, activation='relu', input_shape=trainShape[1:])) model.add(Dropout(0.5)) model.add(Conv1D(32, 48, strides=6, activation='relu')) model.add(Dropout(0.5)) model.add(Conv1D(64, 24, strides=2, activation='relu')) model.add(Dropout(0.5)) model.add(CuDNNGRU(64, return_sequences=True)) model.add(Dropout(0.5)) model.add(CuDNNGRU(64, return_sequences=True)) model.add(Dropout(0.5)) model.add(CuDNNGRU(32, return_sequences=False)) model.add(Dense(numCls, activation='softmax')) elif 5 == version: model.add( Conv1D(16, 96, strides=12, activation='relu', input_shape=trainShape[1:])) model.add(Dropout(0.5)) model.add(Conv1D(32, 48, strides=6, activation='relu')) model.add(Dropout(0.5)) model.add(Conv1D(64, 24, strides=2, activation='relu')) model.add(Dropout(0.5)) model.add(CuDNNGRU(64, return_sequences=True)) model.add(Dropout(0.5)) model.add(CuDNNGRU(32, return_sequences=False)) model.add(Dense(numCls, activation='softmax')) elif 6 == version: # todo - ai : this is temp clsVer. give a static version number to successful model structures model.add( Conv1D(16, 96, strides=12, activation='relu', input_shape=trainShape[1:])) model.add(Dropout(0.2)) model.add(Conv1D(32, 48, strides=6, activation='relu')) model.add(Dropout(0.2)) model.add(Conv1D(64, 24, strides=2, activation='relu')) model.add(Dropout(0.2)) model.add(CuDNNGRU(64, return_sequences=True)) model.add(Dropout(0.2)) model.add(CuDNNGRU(32, return_sequences=False)) model.add(Dense(numCls, activation='softmax')) elif 7 == version: # todo - ai : this is temp clsVer. give a static version number to successful model structures model.add( Conv1D(32, 96, strides=16, activation='relu', input_shape=trainShape[1:])) model.add(Conv1D(64, 48, strides=8, activation='relu')) model.add(CuDNNGRU(64, return_sequences=True)) model.add(CuDNNGRU(32, return_sequences=False)) model.add(Dense(numCls, activation='softmax')) else: utils2.myPrint('ERROR: Unknown Classifier Version') sys.exit() else: # for Spectograms utils2.myPrint('Classifier Version: Spectogram') model.add( LSTM(24, activation='relu', return_sequences=True, input_shape=trainShape[1:])) model.add(LSTM(32, activation='relu', return_sequences=False)) model.add(Dense(numCls, activation='softmax')) utils2.printModelConfig(model.get_config()) ## # Optimizer selection (Paper Refs: https://keras.io/optimizers/ and # https://www.dlology.com/blog/quick-notes-on-how-to-choose-optimizer-in-keras/) ## if 'Adam' == optim: opt = optimizers.adam(lr=learnRate) elif 'Sgd' == optim: opt = optimizers.sgd( lr=learnRate, nesterov=False) # works well with shallow networks elif 'SgdNest' == optim: opt = optimizers.sgd( lr=learnRate, nesterov=True) # works well with shallow networks elif 'Adamax' == optim: opt = optimizers.adamax(lr=learnRate) elif 'Nadam' == optim: opt = optimizers.nadam(lr=learnRate) elif 'Rms' == optim: opt = optimizers.rmsprop(lr=learnRate) else: utils2.myPrint('ERROR: Invalid Optimizer Parameter Value:', optim) sys.exit() ## # Loss function selection (Paper Refs: https://keras.io/losses/ and # https://machinelearningmastery.com/how-to-choose-loss-functions-when-training-deep-learning-neural-networks/) ## if 'SparCatCrosEnt' == losFnc: # sparse_categorical_crossentropy uses integers for labels instead of one-hot vectors. # Saves memory when numCls is big. Other than that identical to categorical_crossentropy, use that instead. # Do not use this before modifying labelList structure. los = losses.sparse_categorical_crossentropy elif 'CatCrosEnt' == losFnc: los = losses.categorical_crossentropy elif 'KLDiv' == losFnc: los = losses.kullback_leibler_divergence else: utils2.myPrint('ERROR: Invalid Loss Function Parameter Value:', losFnc) sys.exit() model.compile(loss=los, optimizer=opt, metrics=['accuracy']) utils2.myPrint('Optimizer:', opt) utils2.myPrint('Learning Rate:', backend.eval(model.optimizer.lr)) utils2.myPrint('Loss func:', los) model.summary(print_fn=utils2.myPrint) # input('Press ENTER to continue with training:') utils2.myPrint('') utils2.myPrint('Training:', flush=True) # prepare callbacks earlyStopping = EarlyStopping(monitor='val_loss', mode='min', patience=45, min_delta=1e-4, restore_best_weights=True, verbose=1) # save best model for later use modelName = 'model_' + utils2.scriptStartDateTime + '_numCls-'+str(numCls) + '_loss-'+losFnc + '_opt-'+optim + \ '_lr-'+str(learnRate) + '_featMode-'+featMode + '_clsVer-'+str(version) + '.clsmdl' modelPath = './models/' + modelName modelSaving = ModelCheckpoint(modelPath, monitor='val_loss', mode='min', save_best_only=True, verbose=1) reduceLrLoss = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.5, cooldown=10, patience=10, min_delta=1e-4, min_lr=learnRate / 32, verbose=1) # Train trainingResults = model.fit( xTraining, yTraining, epochs=trainEpoch, batch_size=batchSz, validation_data=(xTesting, yTesting), callbacks=[earlyStopping, modelSaving, reduceLrLoss]) # model.fit() function prints to console but we can not grab it as it is. # So myPrint it only to file with given info. for i in range(len(trainingResults.history['loss'])): utils2.myPrint( 'Epoch #%d: Loss:%.4f, Accuracy:%.4f, Validation Loss:%.4f, Validation Accuracy:%.4f' % (i + 1, trainingResults.history['loss'][i], trainingResults.history['acc'][i], trainingResults.history['val_loss'][i], trainingResults.history['val_acc'][i]), mode='file') utils2.myPrint(trainingResults.history, mode='code') utils2.myPrint('') # Restore best Model utils2.myPrint('Restoring best model...') model = load_model(modelPath) models.append(modelPath) # Final evaluation of the model utils2.myPrint('Test:') scores = model.evaluate(xTesting, yTesting, batch_size=testShape[0]) utils2.myPrint('Test Loss:%.8f, Accuracy:%.4f' % (scores[0], scores[1])) # write results to a seperate text file (part 2/3) fResult = open('./Results.txt', 'a+') fResult.write(', ' + modelName + ', Test Loss:%.8f, Accuracy:%.4f' % (scores[0], scores[1])) fResult.close() # todo - ai : kfold cross validation can be inserted here # Stats by class yTesting1Hot = np.argmax(yTesting, axis=1) # Convert one-hot to index yTesting1Hot = [labelLst[i] for i in yTesting1Hot] yPredict = model.predict_classes(xTesting) yPredict = [labelLst[i] for i in yPredict] utils2.myPrint('Labels:', labelLst) utils2.myPrint('Confusion Matrix:') utils2.myPrint( pd.DataFrame(confusion_matrix(yTesting1Hot, yPredict, labels=labelLst), index=['t:{:}'.format(x) for x in labelLst], columns=['{:}'.format(x) for x in labelLst])) utils2.myPrint('Classification Report:') utils2.myPrint( classification_report(yTesting1Hot, yPredict, labels=labelLst)) clearGPU() del model gc.collect() if 1 < len(models): # Test models, ensembled modelId = utils2.scriptStartDateTime modelName = 'model_' + modelId + '_ensembled.clsmdl' modelPath = './models/' + modelName # write results to a seperate text file (part 3/3) for mdlIdx in range(len(models)): models[mdlIdx] = load_model(models[mdlIdx]) models[mdlIdx].name = modelId + '_' + str( mdlIdx) # change name to be unique model_input = Input(shape=models[0].input_shape[1:]) # c*h*w modelEns = ensembleModels(models, model_input) modelEns.compile(optimizer=optimizers.adam(lr=learnRate), loss=losses.categorical_crossentropy, metrics=['accuracy']) modelEns.summary(print_fn=utils2.myPrint) modelEns.save(modelPath) utils2.myPrint('Ensemble Test:') scores = modelEns.evaluate(xTesting, yTesting, batch_size=testShape[0]) utils2.myPrint('Test Loss:%.8f, Accuracy:%.4f' % (scores[0], scores[1])) fResult = open('./Results.txt', 'a+') fResult.write(', ' + modelName + ', Test Loss:%.8f, Accuracy:%.4f' % (scores[0], scores[1])) fResult.close() # Stats by class yTesting1Hot = np.argmax(yTesting, axis=1) # Convert one-hot to index yTesting1Hot = [labelLst[i] for i in yTesting1Hot] yPredict = modelEns.predict(xTesting) yPredict = np.argmax(yPredict, axis=1) yPredict = [labelLst[i] for i in yPredict] utils2.myPrint('Labels:', labelLst) utils2.myPrint('Confusion Matrix:') utils2.myPrint( pd.DataFrame(confusion_matrix(yTesting1Hot, yPredict, labels=labelLst), index=['t:{:}'.format(x) for x in labelLst], columns=['{:}'.format(x) for x in labelLst])) utils2.myPrint('Classification Report:') utils2.myPrint( classification_report(yTesting1Hot, yPredict, labels=labelLst)) del modelEns clearGPU() gc.collect() del xTraining del xTesting del yTraining del yTesting del labelLst del models gc.collect() clearGPU()
def trainTestkNNDTW(xTraining, xTesting, yTraining, yTesting, labelLst): gc.collect() clearGPU() utils2.myPrint('---DTW Classifier---') # convert labels from one-hot vector to decimal encoding yTraining = np.argmax(yTraining, axis=1) yTesting = np.argmax(yTesting, axis=1) trainShape = np.shape(xTraining) testShape = np.shape(xTesting) utils2.myPrint('Train Batch:', trainShape) utils2.myPrint('Test Batch:', testShape) # create the model model = kNNDTW.KnnDtw(n_neighbors=1, max_warping_window=10) model.fit(xTraining, yTraining) yPredict, proba = model.predict(xTesting) cls_rep = classification_report(yTesting, yPredict, target_names=[l for l in labelLst]) conf_mat = confusion_matrix(yTesting, yPredict) utils2.myPrint('Labels:', labelLst) utils2.myPrint('Confusion Matrix:') utils2.myPrint( pd.DataFrame(conf_mat, index=['t:{:}'.format(x) for x in labelLst], columns=['{:}'.format(x) for x in labelLst])) utils2.myPrint('Classification Report:') utils2.myPrint(cls_rep) clearGPU() gc.collect() del xTraining del xTesting del yTraining del yTesting del labelLst del model gc.collect() clearGPU()
del xTrain del yTrain del xTest del yTest del inputs del labelDict del labels gc.collect() # ===================================== MAIN STARTS HERE. FUNCTIONS ARE ABOVE ===================================== # enable for capturing console to file (does not print to console this way, but captures all stdout from other libs too) # utils2.myPrint() function on the other hand, writes to both console and file, but does not capture stdout # sys.stdout = open('out.txt', 'a') utils2.myPrint('======= Running File: %s =======' % sys.argv[0]) if 2 == len(sys.argv): fConf = open(sys.argv[1], 'r') utils2.myPrint('Reading Configuration from command line argument: %s' % os.path.realpath(fConf.name)) else: fConf = open('conf.txt', 'r') utils2.myPrint('Reading Default Configuration: %s' % os.path.realpath(fConf.name)) configList = fConf.read().splitlines() fConf.close() totalConfigurationCount = len(configList) utils2.myPrint('Total of %d configuration(s) will be run' % totalConfigurationCount) for cIdx in range(totalConfigurationCount): gc.collect()
def animate_ipython(self, itera): utils2.myPrint('\r', self, flush=True) self.update_iteration(itera + 1)