def testClusters(self, x):
     p = self.model.predict(x)
     print 'Number of clusters: ', len(self.clusters)
     for i in xrange(len(self.clusters)):
     	print core.clusterIntersectionRatio(x, self.clusters[i])
     C = self.clusters[p]
     print 'This should be better:'
     print core.clusterIntersectionRatio(x, C)
     print 'This should be even better:'
     D = self.buildCluster(x)
     print core.clusterIntersectionRatio(x, D)
Beispiel #2
0
def test(data, recommender, fractionTrain=.8, highFactor=.1, verbose=False):
    """
    Parameters:
        cluster: an array of arrays
        fractionTrain: a float specifying percent of data to use as train
        highFactor: ratio of any given element to the largest element in the array
    Returns:
        the RMSE produced by removing a specific color, then adding it back

    Algorithm:
        Works by first finding the max value in the histogram and then trying to find the
        index of the histogram that contains the value that is closest (in terms of a ration
        with the max value) to highFactor. Removes this color, then passes it to recommender
        to get back a value which it then adds to the histogramogram, then takes the rmse between
        the original and the modified
    """
    xTrain, xTest = ml.splitData(data, fractionTrain)
    n = xTest.shape[0]
    m = xTrain.shape[0]

    train_colors, _, train_histograms = removeColors(xTrain, highFactor=highFactor)
    recommender.fit(train_histograms, train_colors)
    if verbose:
        print 'Done fitting'

    colors, quantities, histograms = removeColors(xTest, highFactor=highFactor)
    assert(colors.shape[0] == n)
    assert(histograms.shape[0] == n)
    numCorrect = 0

    D = histograms.shape[1]
    count = np.zeros(D)
    for color in colors:
        count[color] += 1

    tmp = count[np.where(count > 0)]
    color_mean = np.mean(tmp)
    color_stdev = np.std(tmp)
    print 'Color mean and stdev', color_mean, color_stdev

    recommendedColors = np.zeros((n))
    ignored = 0
    intersectionRatio = 0.
    for i in xrange(n):
        if i % 100 == 1:
            print 'Partial %d: %f' % (i, float(numCorrect) / (i - ignored + 1))

        color, amount = colors[i], quantities[i]
        # Ignore colors that might bias us
        if count[color] > color_mean + color_stdev:
            ignored += 1
            continue

        hist = histograms[i]
        # Ignore colors that are basically the background
        if hist[color] > 0.4:
        	ignored += 1
        	continue

        if verbose:
            print 'Testing site %s' % names[i]
            print 'Amount remmoved %d' % amount
        try:
            cluster = recommender.cluster(hist)
            intersectionRatio += core.clusterIntersectionRatio(hist, cluster)
        except:
            pass
        #recommender.testClusters(hist)
        recommendedColor = recommender.predict(hist)
        r1, g1, b1 = image.binToRGB(color)
        r2, g2, b2 = image.binToRGB(recommendedColor)
        if verbose:
            print 'Removed color %d %d %d. Recommended color %d %d %d.' % (r1, g1, b1, r2, g2, b2)
            print 'Color distance: %d' % (image.binDistance(recommendedColor, color))
        recommendedColors[i] = recommendedColor

        if verbose:
            print 'Recommended color %d' % (recommendedColor)

        if recommendedColor == color:
            numCorrect += 1


    print 'Ignored: %d. Used: %d' % (ignored, n - ignored)
    print 'Mean cluster intersection ratio: %f' % (intersectionRatio / (n - ignored))
    print colorError(colors, recommendedColors)
    percentCorrect = float(numCorrect)/(n - ignored)
    return percentCorrect