def testClusters(self, x):
     p = self.model.predict(x)
     print 'Number of clusters: ', len(self.clusters)
     for i in xrange(len(self.clusters)):
     	print core.clusterIntersectionRatio(x, self.clusters[i])
     C = self.clusters[p]
     print 'This should be better:'
     print core.clusterIntersectionRatio(x, C)
     print 'This should be even better:'
     D = self.buildCluster(x)
     print core.clusterIntersectionRatio(x, D)
Exemple #2
def test(data, recommender, fractionTrain=.8, highFactor=.1, verbose=False):
        cluster: an array of arrays
        fractionTrain: a float specifying percent of data to use as train
        highFactor: ratio of any given element to the largest element in the array
        the RMSE produced by removing a specific color, then adding it back

        Works by first finding the max value in the histogram and then trying to find the
        index of the histogram that contains the value that is closest (in terms of a ration
        with the max value) to highFactor. Removes this color, then passes it to recommender
        to get back a value which it then adds to the histogramogram, then takes the rmse between
        the original and the modified
    xTrain, xTest = ml.splitData(data, fractionTrain)
    n = xTest.shape[0]
    m = xTrain.shape[0]

    train_colors, _, train_histograms = removeColors(xTrain, highFactor=highFactor), train_colors)
    if verbose:
        print 'Done fitting'

    colors, quantities, histograms = removeColors(xTest, highFactor=highFactor)
    assert(colors.shape[0] == n)
    assert(histograms.shape[0] == n)
    numCorrect = 0

    D = histograms.shape[1]
    count = np.zeros(D)
    for color in colors:
        count[color] += 1

    tmp = count[np.where(count > 0)]
    color_mean = np.mean(tmp)
    color_stdev = np.std(tmp)
    print 'Color mean and stdev', color_mean, color_stdev

    recommendedColors = np.zeros((n))
    ignored = 0
    intersectionRatio = 0.
    for i in xrange(n):
        if i % 100 == 1:
            print 'Partial %d: %f' % (i, float(numCorrect) / (i - ignored + 1))

        color, amount = colors[i], quantities[i]
        # Ignore colors that might bias us
        if count[color] > color_mean + color_stdev:
            ignored += 1

        hist = histograms[i]
        # Ignore colors that are basically the background
        if hist[color] > 0.4:
        	ignored += 1

        if verbose:
            print 'Testing site %s' % names[i]
            print 'Amount remmoved %d' % amount
            cluster = recommender.cluster(hist)
            intersectionRatio += core.clusterIntersectionRatio(hist, cluster)
        recommendedColor = recommender.predict(hist)
        r1, g1, b1 = image.binToRGB(color)
        r2, g2, b2 = image.binToRGB(recommendedColor)
        if verbose:
            print 'Removed color %d %d %d. Recommended color %d %d %d.' % (r1, g1, b1, r2, g2, b2)
            print 'Color distance: %d' % (image.binDistance(recommendedColor, color))
        recommendedColors[i] = recommendedColor

        if verbose:
            print 'Recommended color %d' % (recommendedColor)

        if recommendedColor == color:
            numCorrect += 1

    print 'Ignored: %d. Used: %d' % (ignored, n - ignored)
    print 'Mean cluster intersection ratio: %f' % (intersectionRatio / (n - ignored))
    print colorError(colors, recommendedColors)
    percentCorrect = float(numCorrect)/(n - ignored)
    return percentCorrect