def findKMeansPerLabel(activations, k, labelCount, targetPath, indexLabelMapping): labels = None if indexLabelMapping != None: labels = utility.getLabelStrings(indexLabelMapping) # split activations by label activationsByLabel = [] counter = 0 while counter < labelCount: currentLabelIndex = activations.shape[1] - labelCount + counter logger.debug(currentLabelIndex) currentSelection = activations[activations[:, currentLabelIndex] == 1] activationsByLabel.append(currentSelection) counter += 1 counter = 0 clusters = [] iterations = [] for batch in activationsByLabel: if labels != None: logger.info('Running KMeans for label ' + labels[counter] + '.') else: logger.info('Running KMeans for label ' + str(counter)) logger.debug("Batch shape: " + str(batch.shape)) [c, i] = kMeans_core.runKMeans(batch, labelCount, k, MAX_ITERATIONS) clusters.extend(c) iterations.append(i) counter += 1 kMeans_core.saveResults(clusters, iterations, targetPath) return [clusters, iterations]
def runTests(activationsPath, indexLabelMappingPath, sourcePath): labels = utility.getLabelStrings(indexLabelMappingPath) activations = utility.arrayFromFile(activationsPath) if sourcePath.endswith('/') == False: sourcePath += '/' clusterGroups = loadClusters(sourcePath) # returns list with all previously generated cluster groups mixedClusterResultsSimple = [] perLabelClusterResultsSimple = [] for clusters in clusterGroups: logger.info("Evaluating clusters at " + clusters['path']) if(clusters['type'] == 'perLabel'): [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = runTest(clusters['data'], activations, labels True) # (clusters, activations, labels) perLabelClusterResultsSimple.append([clusters['k'], meanAveragePrecision, overallCorrect, overallWrong]) saveConfusionMatrix(confusionMatrix, clusters['path'] + "confusion_test.npy") utility.plotConfusionMatrix(confusionMatrix, labels, clusters['path'] + "confusion_test.pdf") else: [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = runTest(clusters['data'], activations, labels False) # (clusters, activations, labels) mixedClusterResultsSimple.append([clusters['k'], meanAveragePrecision, overallCorrect, overallWrong]) saveConfusionMatrix(confusionMatrix, clusters['path'] + "confusion_test.npy") utility.plotConfusionMatrix(confusionMatrix, labels, clusters['path'] + "confusion_test.pdf") overviewPerLabel = np.array(perLabelClusterResultsSimple) overviewMixed = np.array(mixedClusterResultsSimple) saveOverview(overviewPerLabel, sourcePath + 'perLabel_') saveOverview(overviewMixed, sourcePath + 'result_mixed_') utility.plotKMeansOverview(overviewPerLabel, sourcePath + 'perLabel_result.pdf', True) utility.plotKMeansOverview(overviewMixed, sourcePath + 'result_mixed_result.pdf', True)
def plotOverview(activations, labelCount, indexLabelMappingPath, plotFileName): firstLabelIndex = activations.shape[1] - labelCount reduceActivationsBy = 1 if firstLabelIndex % 4 == 0 and firstLabelIndex > 1024: reduceActivationsBy = 4 elif firstLabelIndex % 2 == 0 and firstLabelIndex > 1024: reduceActivationsBy = 2 reduceActivationsTo = firstLabelIndex / reduceActivationsBy activationsPerLabel = 1024 / labelCount logger.info(str(activationsPerLabel) + " rows per label.") labels = np.array(activations)[:,firstLabelIndex:] selection = np.empty((0, activations.shape[1])) tickLabels = [] labelCounter = 0 while labelCounter < labels.shape[1]: picked = np.random.randint(0,activations.shape[0],2) monoLabelSelection = activations[np.logical_or.reduce([activations[:,firstLabelIndex+labelCounter] == 1])] picked = np.random.randint(0, monoLabelSelection.shape[0], activationsPerLabel) subSelection = monoLabelSelection[picked] selection = np.vstack((selection, subSelection)) tickLabels.append("Label " + str(labelCounter)) labelCounter += 1 if indexLabelMappingPath != None: tickLabels = utility.getLabelStrings(indexLabelMappingPath) cmap = plt.get_cmap('Greys_r') cmap_adjusted = colors.LinearSegmentedColormap.from_list('trunc(' + cmap.name +', ' + str(0) + ',' + str(1) + ')', cmap(np.linspace(0,1,100))) scaled = np.reshape(selection[:,0:firstLabelIndex], (selection.shape[0], reduceActivationsBy, reduceActivationsTo)) scaled = scaled.mean(axis=1) plt.imshow(scaled, cmap=cmap_adjusted, interpolation='none') ax = plt.gca() # ax.pcolormesh(scaled, cmap=plt.get_cmap('afmhot')) ax.tick_params(axis='both', which='major', bottom=False, top=False, left=False, right=False) ticks = np.arange(activationsPerLabel * 0.5,selection.shape[0],activationsPerLabel) ax.set_yticks(ticks) ax.set_yticklabels(tickLabels) plt.savefig(plotFileName, bbox_inches='tight')
def generateKMeansSeries(activationsPath, labelIndexMappingPath, targetFolder): if targetFolder.endswith('/') == False: targetFolder += '/' activations = utility.arrayFromFile(activationsPath) labels = utility.getLabelStrings(labelIndexMappingPath) neurons = activations.shape[1] - len(labels) # test per label k-means k = PER_LABEL_START while k <= PER_LABEL_END: runCounter = 0 while runCounter < RUNS_PER_TYPE: logger.info("Calculating per label KMeans with k = " + str(k) + ".") currentTarget = targetFolder + "perLabel_" + str( k) + "/run_" + str(runCounter) + "/" if not os.path.exists(os.path.dirname(currentTarget)): os.makedirs(os.path.dirname(currentTarget)) c, i = kMeans_per_label.findKMeansPerLabel(activations, k, len(labels), currentTarget, labelIndexMappingPath) plot_cluster.plotClusters(kMeans_core.cleanUp(c), i, neurons, currentTarget, labels[:len(labels)]) runCounter += 1 k += 1 # test mixed k-means k = MIXED_START while k <= MIXED_END: runCounter = 0 while runCounter < RUNS_PER_TYPE: logger.info("Calculating mixed KMeans with k = " + str(k) + ".") currentTarget = targetFolder + "mixed_" + str(k) + "/run_" + str( runCounter) + "/" if not os.path.exists(os.path.dirname(currentTarget)): os.makedirs(os.path.dirname(currentTarget)) [c, i] = kMeans_mixed.findKMeans(activations, k, len(labels), currentTarget) plot_cluster.plotClusters(kMeans_core.cleanUp(c), i, activations.shape[1] - len(labels), currentTarget, labels[:len(labels)]) runCounter += 1 k += MIXED_STEP
def generateKMeansSeries(activationsPath, labelIndexMappingPath, targetFolder): if targetFolder.endswith("/") == False: targetFolder += "/" activations = utility.arrayFromFile(activationsPath) labels = utility.getLabelStrings(labelIndexMappingPath) neurons = activations.shape[1] - len(labels) # test per label k-means k = PER_LABEL_START while k <= PER_LABEL_END: runCounter = 0 while runCounter < RUNS_PER_TYPE: logger.info("Calculating per label KMeans with k = " + str(k) + ".") currentTarget = targetFolder + "perLabel_" + str(k) + "/run_" + str(runCounter) + "/" if not os.path.exists(os.path.dirname(currentTarget)): os.makedirs(os.path.dirname(currentTarget)) c, i = kMeans_per_label.findKMeansPerLabel( activations, k, len(labels), currentTarget, labelIndexMappingPath ) plot_cluster.plotClusters(kMeans_core.cleanUp(c), i, neurons, currentTarget, labels[: len(labels)]) runCounter += 1 k += 1 # test mixed k-means k = MIXED_START while k <= MIXED_END: runCounter = 0 while runCounter < RUNS_PER_TYPE: logger.info("Calculating mixed KMeans with k = " + str(k) + ".") currentTarget = targetFolder + "mixed_" + str(k) + "/run_" + str(runCounter) + "/" if not os.path.exists(os.path.dirname(currentTarget)): os.makedirs(os.path.dirname(currentTarget)) [c, i] = kMeans_mixed.findKMeans(activations, k, len(labels), currentTarget) plot_cluster.plotClusters( kMeans_core.cleanUp(c), i, activations.shape[1] - len(labels), currentTarget, labels[: len(labels)] ) runCounter += 1 k += MIXED_STEP
utility.plotConfusionMatrix(confusionMatrix, labels, currentTargetPath + "confusion.pdf") kCounter += 1 results = np.array(results) logger.info('Writing file ' + targetPath + "overview.csv") np.savetxt( targetPath + "overview.csv", results, delimiter=',') utility.plotKMeansOverview(results, targetPath + "overview.pdf", False) if __name__ == '__main__': if len(sys.argv) != 5 and len(sys.argv) != 6: logger.info("Please provide as argument:") logger.info("1) Path to training activations (*.npy).") logger.info("2) Path to test activations (*.npy).") logger.info("3) Path to label mapping.") logger.info("4) Path for target folder.") logger.info("5) Neighbour relations (optional)") sys.exit(); trainingActivations = utility.arrayFromFile(sys.argv[1]) testActivations = utility.arrayFromFile(sys.argv[2]) labels = utility.getLabelStrings(sys.argv[3]) nn = None if len(sys.argv) == 6: nn = utility.arrayFromFile(sys.argv[5]) runTests(trainingActivations, testActivations, labels, sys.argv[4], nn)
logger.info("4) Target path for evaluation results.") sys.exit(); labelIndexInfoPath = sys.argv[1] labelIndexMappingPath = sys.argv[2] logFilePath = sys.argv[3] evaluationTargetPath = sys.argv[4] if evaluationTargetPath.endswith('/') == False: evaluationTargetPath += '/' if not os.path.exists(os.path.dirname(evaluationTargetPath)) and os.path.dirname(evaluationTargetPath) != '': os.makedirs(os.path.dirname(evaluationTargetPath)) plotTrainingLossAndAccuracy(logFilePath, evaluationTargetPath) # sys.exit() labels = utility.getLabelStrings(labelIndexMappingPath) [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = testNeuralNet(labels, labelIndexInfoPath, evaluationTargetPath) overviewPath = evaluationTargetPath + "overview.csv" logger.info('Writing file ' + overviewPath) np.savetxt(overviewPath, np.array([0,meanAveragePrecision, overallCorrect, overallWrong]), delimiter=',') confusionMatrixPath = evaluationTargetPath + 'confusionMatrix.npy' logger.info('Writing file ' + confusionMatrixPath) with open(confusionMatrixPath, "w") as outputFile: np.save(outputFile, confusionMatrix) utility.plotConfusionMatrix(confusionMatrix, labels, evaluationTargetPath + 'confusionMatrix.pdf')
currentTargetPath + "confusion.pdf") kCounter += 1 results = np.array(results) logger.info('Writing file ' + targetPath + "overview.csv") np.savetxt(targetPath + "overview.csv", results, delimiter=',') utility.plotKMeansOverview(results, targetPath + "overview.pdf", False) if __name__ == '__main__': if len(sys.argv) != 5 and len(sys.argv) != 6: logger.info("Please provide as argument:") logger.info("1) Path to training activations (*.npy).") logger.info("2) Path to test activations (*.npy).") logger.info("3) Path to label mapping.") logger.info("4) Path for target folder.") logger.info("5) Neighbour relations (optional)") sys.exit() trainingActivations = utility.arrayFromFile(sys.argv[1]) testActivations = utility.arrayFromFile(sys.argv[2]) labels = utility.getLabelStrings(sys.argv[3]) nn = None if len(sys.argv) == 6: nn = utility.arrayFromFile(sys.argv[5]) runTests(trainingActivations, testActivations, labels, sys.argv[4], nn)
labelIndexInfoPath = sys.argv[1] labelIndexMappingPath = sys.argv[2] logFilePath = sys.argv[3] evaluationTargetPath = sys.argv[4] if evaluationTargetPath.endswith('/') == False: evaluationTargetPath += '/' if not os.path.exists(os.path.dirname(evaluationTargetPath) ) and os.path.dirname(evaluationTargetPath) != '': os.makedirs(os.path.dirname(evaluationTargetPath)) plotTrainingLossAndAccuracy(logFilePath, evaluationTargetPath) # sys.exit() labels = utility.getLabelStrings(labelIndexMappingPath) [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = testNeuralNet(labels, labelIndexInfoPath, evaluationTargetPath) overviewPath = evaluationTargetPath + "overview.csv" logger.info('Writing file ' + overviewPath) np.savetxt(overviewPath, np.array( [0, meanAveragePrecision, overallCorrect, overallWrong]), delimiter=',') confusionMatrixPath = evaluationTargetPath + 'confusionMatrix.npy' logger.info('Writing file ' + confusionMatrixPath) with open(confusionMatrixPath, "w") as outputFile: np.save(outputFile, confusionMatrix)