def runTests(activationsPath, indexLabelMappingPath, sourcePath): labels = utility.getLabelStrings(indexLabelMappingPath) activations = utility.arrayFromFile(activationsPath) if sourcePath.endswith('/') == False: sourcePath += '/' clusterGroups = loadClusters(sourcePath) # returns list with all previously generated cluster groups mixedClusterResultsSimple = [] perLabelClusterResultsSimple = [] for clusters in clusterGroups: logger.info("Evaluating clusters at " + clusters['path']) if(clusters['type'] == 'perLabel'): [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = runTest(clusters['data'], activations, labels True) # (clusters, activations, labels) perLabelClusterResultsSimple.append([clusters['k'], meanAveragePrecision, overallCorrect, overallWrong]) saveConfusionMatrix(confusionMatrix, clusters['path'] + "confusion_test.npy") utility.plotConfusionMatrix(confusionMatrix, labels, clusters['path'] + "confusion_test.pdf") else: [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = runTest(clusters['data'], activations, labels False) # (clusters, activations, labels) mixedClusterResultsSimple.append([clusters['k'], meanAveragePrecision, overallCorrect, overallWrong]) saveConfusionMatrix(confusionMatrix, clusters['path'] + "confusion_test.npy") utility.plotConfusionMatrix(confusionMatrix, labels, clusters['path'] + "confusion_test.pdf") overviewPerLabel = np.array(perLabelClusterResultsSimple) overviewMixed = np.array(mixedClusterResultsSimple) saveOverview(overviewPerLabel, sourcePath + 'perLabel_') saveOverview(overviewMixed, sourcePath + 'result_mixed_') utility.plotKMeansOverview(overviewPerLabel, sourcePath + 'perLabel_result.pdf', True) utility.plotKMeansOverview(overviewMixed, sourcePath + 'result_mixed_result.pdf', True)
def generateKMeansSeries(activationsPath, labelIndexMappingPath, targetFolder): if targetFolder.endswith('/') == False: targetFolder += '/' activations = utility.arrayFromFile(activationsPath) labels = utility.getLabelStrings(labelIndexMappingPath) neurons = activations.shape[1] - len(labels) # test per label k-means k = PER_LABEL_START while k <= PER_LABEL_END: runCounter = 0 while runCounter < RUNS_PER_TYPE: logger.info("Calculating per label KMeans with k = " + str(k) + ".") currentTarget = targetFolder + "perLabel_" + str( k) + "/run_" + str(runCounter) + "/" if not os.path.exists(os.path.dirname(currentTarget)): os.makedirs(os.path.dirname(currentTarget)) c, i = kMeans_per_label.findKMeansPerLabel(activations, k, len(labels), currentTarget, labelIndexMappingPath) plot_cluster.plotClusters(kMeans_core.cleanUp(c), i, neurons, currentTarget, labels[:len(labels)]) runCounter += 1 k += 1 # test mixed k-means k = MIXED_START while k <= MIXED_END: runCounter = 0 while runCounter < RUNS_PER_TYPE: logger.info("Calculating mixed KMeans with k = " + str(k) + ".") currentTarget = targetFolder + "mixed_" + str(k) + "/run_" + str( runCounter) + "/" if not os.path.exists(os.path.dirname(currentTarget)): os.makedirs(os.path.dirname(currentTarget)) [c, i] = kMeans_mixed.findKMeans(activations, k, len(labels), currentTarget) plot_cluster.plotClusters(kMeans_core.cleanUp(c), i, activations.shape[1] - len(labels), currentTarget, labels[:len(labels)]) runCounter += 1 k += MIXED_STEP
def loadClusters(sourcePath): clusterSets = [] for rootPath, subdirs, files in os.walk(sourcePath): for f in files: if f.endswith('clusters.npy'): split = rootPath.split('/')[::-1][1].split('_') splitLength = len(split) k = split[splitLength - 1] clusteringType = split[splitLength - 2] clusterSets.append({'file': f,'path':rootPath + '/', 'data':utility.arrayFromFile(rootPath + '/' + f), 'k':int(k), 'type':clusteringType}) return clusterSets
def generateKMeansSeries(activationsPath, labelIndexMappingPath, targetFolder): if targetFolder.endswith("/") == False: targetFolder += "/" activations = utility.arrayFromFile(activationsPath) labels = utility.getLabelStrings(labelIndexMappingPath) neurons = activations.shape[1] - len(labels) # test per label k-means k = PER_LABEL_START while k <= PER_LABEL_END: runCounter = 0 while runCounter < RUNS_PER_TYPE: logger.info("Calculating per label KMeans with k = " + str(k) + ".") currentTarget = targetFolder + "perLabel_" + str(k) + "/run_" + str(runCounter) + "/" if not os.path.exists(os.path.dirname(currentTarget)): os.makedirs(os.path.dirname(currentTarget)) c, i = kMeans_per_label.findKMeansPerLabel( activations, k, len(labels), currentTarget, labelIndexMappingPath ) plot_cluster.plotClusters(kMeans_core.cleanUp(c), i, neurons, currentTarget, labels[: len(labels)]) runCounter += 1 k += 1 # test mixed k-means k = MIXED_START while k <= MIXED_END: runCounter = 0 while runCounter < RUNS_PER_TYPE: logger.info("Calculating mixed KMeans with k = " + str(k) + ".") currentTarget = targetFolder + "mixed_" + str(k) + "/run_" + str(runCounter) + "/" if not os.path.exists(os.path.dirname(currentTarget)): os.makedirs(os.path.dirname(currentTarget)) [c, i] = kMeans_mixed.findKMeans(activations, k, len(labels), currentTarget) plot_cluster.plotClusters( kMeans_core.cleanUp(c), i, activations.shape[1] - len(labels), currentTarget, labels[: len(labels)] ) runCounter += 1 k += MIXED_STEP
utility.plotConfusionMatrix(confusionMatrix, labels, currentTargetPath + "confusion.pdf") kCounter += 1 results = np.array(results) logger.info('Writing file ' + targetPath + "overview.csv") np.savetxt( targetPath + "overview.csv", results, delimiter=',') utility.plotKMeansOverview(results, targetPath + "overview.pdf", False) if __name__ == '__main__': if len(sys.argv) != 5 and len(sys.argv) != 6: logger.info("Please provide as argument:") logger.info("1) Path to training activations (*.npy).") logger.info("2) Path to test activations (*.npy).") logger.info("3) Path to label mapping.") logger.info("4) Path for target folder.") logger.info("5) Neighbour relations (optional)") sys.exit(); trainingActivations = utility.arrayFromFile(sys.argv[1]) testActivations = utility.arrayFromFile(sys.argv[2]) labels = utility.getLabelStrings(sys.argv[3]) nn = None if len(sys.argv) == 6: nn = utility.arrayFromFile(sys.argv[5]) runTests(trainingActivations, testActivations, labels, sys.argv[4], nn)
with open(sys.argv[2], "r") as result: for line in result.readlines(): split = line.strip().split(' ') imagePaths.append(split[0]) # for rootPath, subdirs, files in os.walk(sys.argv[2]): # for f in files: # if f.endswith('.jpg'): # imagePaths.append(rootPath + f) clusters = None if len(sys.argv) == 5: clusters = pickle.load(sys.argv[4]) activations = utility.arrayFromFile(sys.argv[1]) if clusters == None: [clusters, iterations] = kMeans.findKMeans(activations, K, 0, targetPath) for clusterIndex, cluster in enumerate(clusters): for index in cluster['memberIndices']: currentTarget = targetPath + str(clusterIndex) + "/" if not os.path.exists(os.path.dirname(currentTarget)): os.makedirs(os.path.dirname(currentTarget)) copyfile(imagePaths[index], currentTarget + os.path.basename(imagePaths[index]))
currentTargetPath + "confusion.pdf") kCounter += 1 results = np.array(results) logger.info('Writing file ' + targetPath + "overview.csv") np.savetxt(targetPath + "overview.csv", results, delimiter=',') utility.plotKMeansOverview(results, targetPath + "overview.pdf", False) if __name__ == '__main__': if len(sys.argv) != 5 and len(sys.argv) != 6: logger.info("Please provide as argument:") logger.info("1) Path to training activations (*.npy).") logger.info("2) Path to test activations (*.npy).") logger.info("3) Path to label mapping.") logger.info("4) Path for target folder.") logger.info("5) Neighbour relations (optional)") sys.exit() trainingActivations = utility.arrayFromFile(sys.argv[1]) testActivations = utility.arrayFromFile(sys.argv[2]) labels = utility.getLabelStrings(sys.argv[3]) nn = None if len(sys.argv) == 6: nn = utility.arrayFromFile(sys.argv[5]) runTests(trainingActivations, testActivations, labels, sys.argv[4], nn)
if __name__ == '__main__': indexLabelMappingPath = None activationsPath = "" if len(sys.argv) != 4 and len(sys.argv) != 5: logger.info("Please provide as argument:") logger.info("1) npy-file with activations.") logger.info("2) The the number of neurons.") logger.info("3) The target path.") logger.info("3) The path to the index label mapping (optional).") sys.exit() activationsPath = sys.argv[1] neurons = int(sys.argv[2]) targetPath = sys.argv[3] if len(sys.argv) == 5: indexLabelMappingPath = sys.argv[4] activations = utility.arrayFromFile(activationsPath) if targetPath.endswith('/') == False: targetPath += '/' if not os.path.exists(os.path.dirname(targetPath)): os.makedirs(os.path.dirname(targetPath)) labelCount = activations[0,neurons:].shape[0] plotActivations(activations, labelCount, targetPath, indexLabelMappingPath)
clusterHistograms = clusters[:,neurons:] plotPositions(positions, additionalRows, targetPath) plotLabelGrid(clusterHistograms, labelCount, labels, targetPath) if __name__ == '__main__': if len(sys.argv) != 5 and len(sys.argv) != 6: logger.info("Please provide as arguments:") logger.info("1) Cluster file (*.npy).") logger.info("2) Iterations file (*.npy)") logger.info("3) The the number of activation neurons.") logger.info("4) The target path.") logger.info("5) The path to the index label mapping (optional).") sys.exit() labels = None if len(sys.argv) == 6: labels = utility.getLabelStrings(sys.argv[5]) clusters = utility.arrayFromFile(sys.argv[1]) iterations = loadArray(sys.argv[2]) neurons = int(sys.argv[3]) targetPath = sys.argv[4] if targetPath.endswith('/') == False: targetPath += '/' evaluateClusters(clusters, iterations, neurons, targetPath, labels)