Ejemplo n.º 1
0
def findKMeansPerLabel(activations, k, labelCount, targetPath, indexLabelMapping):
   labels = None
   if indexLabelMapping != None:
      labels = utility.getLabelStrings(indexLabelMapping)
      
   # split activations by label
   activationsByLabel = []
   counter = 0
   while counter < labelCount:
      currentLabelIndex = activations.shape[1] - labelCount + counter
      logger.debug(currentLabelIndex)
      currentSelection = activations[activations[:, currentLabelIndex] == 1]
      activationsByLabel.append(currentSelection)
      counter += 1

   counter = 0
   clusters = []
   iterations = []
   for batch in activationsByLabel:
      if labels != None:
         logger.info('Running KMeans for label ' + labels[counter] + '.')
      else:
         logger.info('Running KMeans for label ' + str(counter))
      logger.debug("Batch shape: " + str(batch.shape))
      [c, i] = kMeans_core.runKMeans(batch, labelCount, k, MAX_ITERATIONS)
      clusters.extend(c)
      iterations.append(i)
      counter += 1

   kMeans_core.saveResults(clusters, iterations, targetPath)

   return [clusters, iterations]
Ejemplo n.º 2
0
def runTests(activationsPath, indexLabelMappingPath, sourcePath):

   labels = utility.getLabelStrings(indexLabelMappingPath)
   activations = utility.arrayFromFile(activationsPath)

   if sourcePath.endswith('/') == False:
      sourcePath += '/'

   clusterGroups = loadClusters(sourcePath) # returns list with all previously generated cluster groups

   mixedClusterResultsSimple = []
   perLabelClusterResultsSimple = []

   for clusters in clusterGroups:
      logger.info("Evaluating clusters at " + clusters['path'])

      if(clusters['type'] == 'perLabel'):
         [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = runTest(clusters['data'], activations, labels True) # (clusters, activations, labels)
         perLabelClusterResultsSimple.append([clusters['k'], meanAveragePrecision, overallCorrect, overallWrong])
         saveConfusionMatrix(confusionMatrix, clusters['path'] + "confusion_test.npy")
         utility.plotConfusionMatrix(confusionMatrix, labels, clusters['path'] + "confusion_test.pdf")
      else:
         [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = runTest(clusters['data'], activations, labels False) # (clusters, activations, labels)
         mixedClusterResultsSimple.append([clusters['k'], meanAveragePrecision, overallCorrect, overallWrong])
         saveConfusionMatrix(confusionMatrix, clusters['path'] + "confusion_test.npy")
         utility.plotConfusionMatrix(confusionMatrix, labels, clusters['path'] + "confusion_test.pdf")

   overviewPerLabel = np.array(perLabelClusterResultsSimple)
   overviewMixed = np.array(mixedClusterResultsSimple)

   saveOverview(overviewPerLabel, sourcePath + 'perLabel_')
   saveOverview(overviewMixed, sourcePath + 'result_mixed_')

   utility.plotKMeansOverview(overviewPerLabel, sourcePath + 'perLabel_result.pdf', True)
   utility.plotKMeansOverview(overviewMixed, sourcePath + 'result_mixed_result.pdf', True)
Ejemplo n.º 3
0
def plotOverview(activations, labelCount, indexLabelMappingPath, plotFileName):
   firstLabelIndex = activations.shape[1] - labelCount
   reduceActivationsBy = 1
   if firstLabelIndex % 4 == 0 and firstLabelIndex > 1024:
      reduceActivationsBy = 4
   elif firstLabelIndex % 2 == 0 and firstLabelIndex > 1024:
      reduceActivationsBy = 2

   reduceActivationsTo = firstLabelIndex / reduceActivationsBy

   activationsPerLabel = 1024 / labelCount

   logger.info(str(activationsPerLabel) + " rows per label.")

   labels = np.array(activations)[:,firstLabelIndex:]
   selection = np.empty((0, activations.shape[1]))

   tickLabels = []

   labelCounter = 0
   while labelCounter < labels.shape[1]:
      picked = np.random.randint(0,activations.shape[0],2)
      monoLabelSelection = activations[np.logical_or.reduce([activations[:,firstLabelIndex+labelCounter] == 1])]
      picked = np.random.randint(0, monoLabelSelection.shape[0], activationsPerLabel)
      subSelection = monoLabelSelection[picked]
      selection = np.vstack((selection, subSelection))

      tickLabels.append("Label " + str(labelCounter))
      labelCounter += 1

   if indexLabelMappingPath != None:
       tickLabels = utility.getLabelStrings(indexLabelMappingPath)

   cmap = plt.get_cmap('Greys_r')
   cmap_adjusted = colors.LinearSegmentedColormap.from_list('trunc(' + cmap.name +', ' + str(0) + ',' + str(1) + ')', cmap(np.linspace(0,1,100)))

   scaled = np.reshape(selection[:,0:firstLabelIndex], (selection.shape[0], reduceActivationsBy, reduceActivationsTo))
   scaled = scaled.mean(axis=1)

   plt.imshow(scaled, cmap=cmap_adjusted, interpolation='none')

   ax = plt.gca()
   # ax.pcolormesh(scaled, cmap=plt.get_cmap('afmhot'))
   ax.tick_params(axis='both', which='major', bottom=False, top=False, left=False, right=False)
   ticks = np.arange(activationsPerLabel * 0.5,selection.shape[0],activationsPerLabel)

   ax.set_yticks(ticks)
   ax.set_yticklabels(tickLabels)

   plt.savefig(plotFileName, bbox_inches='tight')
Ejemplo n.º 4
0
def generateKMeansSeries(activationsPath, labelIndexMappingPath, targetFolder):
    if targetFolder.endswith('/') == False:
        targetFolder += '/'

    activations = utility.arrayFromFile(activationsPath)
    labels = utility.getLabelStrings(labelIndexMappingPath)
    neurons = activations.shape[1] - len(labels)

    # test per label k-means
    k = PER_LABEL_START
    while k <= PER_LABEL_END:
        runCounter = 0
        while runCounter < RUNS_PER_TYPE:

            logger.info("Calculating per label KMeans with k = " + str(k) +
                        ".")
            currentTarget = targetFolder + "perLabel_" + str(
                k) + "/run_" + str(runCounter) + "/"
            if not os.path.exists(os.path.dirname(currentTarget)):
                os.makedirs(os.path.dirname(currentTarget))

            c, i = kMeans_per_label.findKMeansPerLabel(activations, k,
                                                       len(labels),
                                                       currentTarget,
                                                       labelIndexMappingPath)
            plot_cluster.plotClusters(kMeans_core.cleanUp(c), i, neurons,
                                      currentTarget, labels[:len(labels)])
            runCounter += 1
        k += 1

    # test mixed k-means
    k = MIXED_START
    while k <= MIXED_END:
        runCounter = 0
        while runCounter < RUNS_PER_TYPE:

            logger.info("Calculating mixed KMeans with k = " + str(k) + ".")
            currentTarget = targetFolder + "mixed_" + str(k) + "/run_" + str(
                runCounter) + "/"
            if not os.path.exists(os.path.dirname(currentTarget)):
                os.makedirs(os.path.dirname(currentTarget))

            [c, i] = kMeans_mixed.findKMeans(activations, k, len(labels),
                                             currentTarget)
            plot_cluster.plotClusters(kMeans_core.cleanUp(c), i,
                                      activations.shape[1] - len(labels),
                                      currentTarget, labels[:len(labels)])
            runCounter += 1
        k += MIXED_STEP
Ejemplo n.º 5
0
def generateKMeansSeries(activationsPath, labelIndexMappingPath, targetFolder):
    if targetFolder.endswith("/") == False:
        targetFolder += "/"

    activations = utility.arrayFromFile(activationsPath)
    labels = utility.getLabelStrings(labelIndexMappingPath)
    neurons = activations.shape[1] - len(labels)

    # test per label k-means
    k = PER_LABEL_START
    while k <= PER_LABEL_END:
        runCounter = 0
        while runCounter < RUNS_PER_TYPE:

            logger.info("Calculating per label KMeans with k = " + str(k) + ".")
            currentTarget = targetFolder + "perLabel_" + str(k) + "/run_" + str(runCounter) + "/"
            if not os.path.exists(os.path.dirname(currentTarget)):
                os.makedirs(os.path.dirname(currentTarget))

            c, i = kMeans_per_label.findKMeansPerLabel(
                activations, k, len(labels), currentTarget, labelIndexMappingPath
            )
            plot_cluster.plotClusters(kMeans_core.cleanUp(c), i, neurons, currentTarget, labels[: len(labels)])
            runCounter += 1
        k += 1

    # test mixed k-means
    k = MIXED_START
    while k <= MIXED_END:
        runCounter = 0
        while runCounter < RUNS_PER_TYPE:

            logger.info("Calculating mixed KMeans with k = " + str(k) + ".")
            currentTarget = targetFolder + "mixed_" + str(k) + "/run_" + str(runCounter) + "/"
            if not os.path.exists(os.path.dirname(currentTarget)):
                os.makedirs(os.path.dirname(currentTarget))

            [c, i] = kMeans_mixed.findKMeans(activations, k, len(labels), currentTarget)
            plot_cluster.plotClusters(
                kMeans_core.cleanUp(c), i, activations.shape[1] - len(labels), currentTarget, labels[: len(labels)]
            )
            runCounter += 1
        k += MIXED_STEP
Ejemplo n.º 6
0
      utility.plotConfusionMatrix(confusionMatrix, labels, currentTargetPath + "confusion.pdf")

      kCounter += 1

   results = np.array(results)

   logger.info('Writing file ' + targetPath + "overview.csv")
   np.savetxt( targetPath + "overview.csv", results, delimiter=',')

   utility.plotKMeansOverview(results, targetPath + "overview.pdf", False)


if __name__ == '__main__':
   if len(sys.argv) != 5 and len(sys.argv) != 6:
      logger.info("Please provide as argument:")
      logger.info("1) Path to training activations (*.npy).")
      logger.info("2) Path to test activations (*.npy).")
      logger.info("3) Path to label mapping.")
      logger.info("4) Path for target folder.")
      logger.info("5) Neighbour relations (optional)")
      sys.exit();

   trainingActivations = utility.arrayFromFile(sys.argv[1])
   testActivations = utility.arrayFromFile(sys.argv[2])
   labels = utility.getLabelStrings(sys.argv[3])
   nn = None
   if len(sys.argv) == 6:
      nn = utility.arrayFromFile(sys.argv[5])

   runTests(trainingActivations, testActivations, labels, sys.argv[4], nn)
Ejemplo n.º 7
0
      logger.info("4) Target path for evaluation results.")
      sys.exit();

   labelIndexInfoPath = sys.argv[1]
   labelIndexMappingPath = sys.argv[2]
   logFilePath = sys.argv[3]
   evaluationTargetPath = sys.argv[4]
   if evaluationTargetPath.endswith('/') == False:
      evaluationTargetPath += '/'

   if not os.path.exists(os.path.dirname(evaluationTargetPath)) and os.path.dirname(evaluationTargetPath) != '':
      os.makedirs(os.path.dirname(evaluationTargetPath))


   plotTrainingLossAndAccuracy(logFilePath, evaluationTargetPath)

   # sys.exit()
   labels = utility.getLabelStrings(labelIndexMappingPath)
   [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = testNeuralNet(labels, labelIndexInfoPath, evaluationTargetPath)

   overviewPath = evaluationTargetPath + "overview.csv"
   logger.info('Writing file ' + overviewPath)
   np.savetxt(overviewPath, np.array([0,meanAveragePrecision, overallCorrect, overallWrong]), delimiter=',')

   confusionMatrixPath = evaluationTargetPath + 'confusionMatrix.npy'
   logger.info('Writing file ' + confusionMatrixPath)
   with open(confusionMatrixPath, "w") as outputFile:
      np.save(outputFile, confusionMatrix)

   utility.plotConfusionMatrix(confusionMatrix, labels, evaluationTargetPath + 'confusionMatrix.pdf')
Ejemplo n.º 8
0
                                    currentTargetPath + "confusion.pdf")

        kCounter += 1

    results = np.array(results)

    logger.info('Writing file ' + targetPath + "overview.csv")
    np.savetxt(targetPath + "overview.csv", results, delimiter=',')

    utility.plotKMeansOverview(results, targetPath + "overview.pdf", False)


if __name__ == '__main__':
    if len(sys.argv) != 5 and len(sys.argv) != 6:
        logger.info("Please provide as argument:")
        logger.info("1) Path to training activations (*.npy).")
        logger.info("2) Path to test activations (*.npy).")
        logger.info("3) Path to label mapping.")
        logger.info("4) Path for target folder.")
        logger.info("5) Neighbour relations (optional)")
        sys.exit()

    trainingActivations = utility.arrayFromFile(sys.argv[1])
    testActivations = utility.arrayFromFile(sys.argv[2])
    labels = utility.getLabelStrings(sys.argv[3])
    nn = None
    if len(sys.argv) == 6:
        nn = utility.arrayFromFile(sys.argv[5])

    runTests(trainingActivations, testActivations, labels, sys.argv[4], nn)
Ejemplo n.º 9
0
    labelIndexInfoPath = sys.argv[1]
    labelIndexMappingPath = sys.argv[2]
    logFilePath = sys.argv[3]
    evaluationTargetPath = sys.argv[4]
    if evaluationTargetPath.endswith('/') == False:
        evaluationTargetPath += '/'

    if not os.path.exists(os.path.dirname(evaluationTargetPath)
                          ) and os.path.dirname(evaluationTargetPath) != '':
        os.makedirs(os.path.dirname(evaluationTargetPath))

    plotTrainingLossAndAccuracy(logFilePath, evaluationTargetPath)

    # sys.exit()
    labels = utility.getLabelStrings(labelIndexMappingPath)
    [confusionMatrix, meanAveragePrecision, overallCorrect,
     overallWrong] = testNeuralNet(labels, labelIndexInfoPath,
                                   evaluationTargetPath)

    overviewPath = evaluationTargetPath + "overview.csv"
    logger.info('Writing file ' + overviewPath)
    np.savetxt(overviewPath,
               np.array(
                   [0, meanAveragePrecision, overallCorrect, overallWrong]),
               delimiter=',')

    confusionMatrixPath = evaluationTargetPath + 'confusionMatrix.npy'
    logger.info('Writing file ' + confusionMatrixPath)
    with open(confusionMatrixPath, "w") as outputFile:
        np.save(outputFile, confusionMatrix)