コード例 #1
0
def makeContours(xcoordinates,ycoordinates,width,height,binsize): 

    getLngLat = utilities.makeGetLngLat(metadata)
    getMeters = utilities.makeGetMeters(metadata)

    # make the 2d histogram
    clusterdata, xedges, yedges = numpy.histogram2d(xcoordinates, ycoordinates, bins=(int(width/binsize), int(height/binsize)), range=((0, width), (0, height)))
    if len(xcoordinates) == 0:
        clusterdata = numpy.zeros((int(width/binsize), int(height/binsize)), dtype=numpy.dtype(float))

    # make contours for the three levels; the contour polygons are expressed in pixel-index coordinates (not lng/lat or meters)
    contoursMin = utilities.contours(clusterdata, xedges, yedges, 0.5, interpolate=True, smooth=True)
    cutLevel50 = utilities.cutLevel(clusterdata, 50.0)
    contours50 = utilities.contours(clusterdata, xedges, yedges, cutLevel50, interpolate=True, smooth=True)
    cutLevel95 = utilities.cutLevel(clusterdata, 95.0)
    contours95 = utilities.contours(clusterdata, xedges, yedges, cutLevel95, interpolate=True, smooth=True)

    # construct output data that includes the polygons in lng,lat coordinates, circumferences in meters, and areas in meters^2
    clusterData = {"contoursMin": [], "contours50": [], "contours95": [],
                        "numberOfLabeledPixels": len(xcoordinates), "cutLevel50": cutLevel50, "cutLevel95": cutLevel95}

    for polygon in contoursMin:

        lnglatPolygon = utilities.convert(polygon, getLngLat)
        metersPolygon = utilities.convert(lnglatPolygon, getMeters)

        data = {"rowcolpolygon": polygon, "lnglatpolygon": lnglatPolygon, "areaInPixels": numpy.abs(utilities.area(polygon)), "circumferenceInMeters": numpy.abs(utilities.circumference(metersPolygon)), "areaInMeters": numpy.abs(utilities.area(metersPolygon)), "centroidInLngLat": utilities.centroid(lnglatPolygon)}
        clusterData["contoursMin"].append(data)

    for polygon in contours50:

        lnglatPolygon = utilities.convert(polygon, getLngLat)
        metersPolygon = utilities.convert(lnglatPolygon, getMeters)

        data = {"rowcolpolygon": polygon, "lnglatpolygon": lnglatPolygon, "areaInPixels": numpy.abs(utilities.area(polygon)), "circumferenceInMeters": numpy.abs(utilities.circumference(metersPolygon)), "areaInMeters": numpy.abs(utilities.area(metersPolygon)), "centroidInLngLat": utilities.centroid(lnglatPolygon)}
        clusterData["contours50"].append(data)

    for polygon in contours95:

        lnglatPolygon = utilities.convert(polygon, getLngLat)
        metersPolygon = utilities.convert(lnglatPolygon, getMeters)

        data = {"rowcolpolygon": polygon, "lnglatpolygon": lnglatPolygon, "areaInPixels": numpy.abs(utilities.area(polygon)), "circumferenceInMeters": numpy.abs(utilities.circumference(metersPolygon)), "areaInMeters": numpy.abs(utilities.area(metersPolygon)), "centroidInLngLat": utilities.centroid(lnglatPolygon)}
        clusterData["contours95"].append(data)

    return clusterData
コード例 #2
0
ファイル: gmmknn.py プロジェクト: occ-data/matsu-project
def doEverything(metadata, mask, originalBlock, numSetColors, numBands, bandToIndexLookup):
    globalStart = time.time()

    if numSetColors < numBands:
        heartbeat("Image should have {} bands, but only {} were set\n".format(numBands, numSetColors))
        return

    if cameraName != "ALI":
        heartbeat("Reducing its dimensionality to 26\n")
        windowsOfBandsToTake = range(0, 27) + range(43, 46) + range(58, 67) + range(77, 92) + range(115, 139)
        reducedBlock = originalBlock[windowsOfBandsToTake,:,:]
        bandsToTake = []
        reducedBlock2 = numpy.zeros((reducedBlock.shape[0]/3, reducedBlock.shape[1], reducedBlock.shape[2]), dtype=numpy.double)
        for i in xrange(reducedBlock2.shape[0]):
            bandsToTake.append(windowsOfBandsToTake[3*i + 1])
            reducedBlock2[i] = reducedBlock[3*i:3*(i+1),:,:].mean(axis=0)
        del reducedBlock
    else:
        bandsToTake = numpy.argsort(metadata["bandNames"])
        reducedBlock2 = originalBlock

    heartbeat("Improving the mask\n")
    betterMask = mask > 0
    for i in xrange(reducedBlock2.shape[0]):
        numpy.logical_and(betterMask, reducedBlock2[i,:,:] > 0.0, betterMask)
    del mask

    shrinkmask = betterMask > 0
    for roll in -2, -1, 1, 2:
        for axis in 0, 1:
            numpy.logical_and(shrinkmask, numpy.roll(betterMask, roll, axis=axis) > 0, shrinkmask)

    heartbeat("Taking logarithm\n")
    oldsettings = numpy.seterr(divide="ignore", invalid="ignore")
    block = numpy.log(reducedBlock2)
    numpy.seterr(**oldsettings)

    heartbeat("Reducing image to a bag of pixels\n")
    bag = block.view()
    bag.shape = (block.shape[0], block.shape[1] * block.shape[2])
    del block

    bagMask = betterMask.view()
    bagMask.shape = (originalBlock.shape[1] * originalBlock.shape[2])
    bag = bag[:, bagMask]
    del bagMask

    projectionMatrix = numpy.matrix([[1 if j == i else -1 if j == i + 1 else 0 for j in xrange(reducedBlock2.shape[0])] for i in xrange(reducedBlock2.shape[0] - 1)])
    projectionInverse = projectionMatrix.I

    heartbeat("Projecting the bag onto the color-only basis\n")
    projected = numpy.array(numpy.dot(projectionMatrix, bag))
    del bag

    heartbeat("Casting the projected bag onto the image shape\n")
    projectedBlock = numpy.empty((reducedBlock2.shape[0] - 1, reducedBlock2.shape[1], reducedBlock2.shape[2]), dtype=numpy.double)
    for i in xrange(reducedBlock2.shape[0] - 1):
        projectedBlock[i,betterMask] = projected[i,:]

    heartbeat("Detecting edges\n")
    # 5x5 Kroon without integer-rounding: http://www.k-zone.nl/Kroon_DerivativePaper.pdf
    Gx = numpy.array([[ 0.0007,  0.0052,  0.0370,  0.0052,  0.0007],
                      [ 0.0037,  0.1187,  0.2589,  0.1187,  0.0037],
                      [ 0.0,     0.0,     0.0,     0.0,     0.0],
                      [-0.0037, -0.1187, -0.2589, -0.1187, -0.0037],
                      [-0.0007, -0.0052, -0.0370, -0.0052, -0.0007]])
    Gy = Gx.T

    startTime = time.time()
    convBlock = numpy.zeros((projectedBlock.shape[1], projectedBlock.shape[2]), numpy.double)
    for index in xrange(projectedBlock.shape[0]):
        heartbeat("    {} {} {}\n".format(index, time.time() - startTime, convBlock.max()))
        convGx2 = numpy.power(convolve(projectedBlock[index,:,:], Gx)[2:projectedBlock.shape[1]+2, 2:projectedBlock.shape[2]+2], 2)
        convGy2 = numpy.power(convolve(projectedBlock[index,:,:], Gy)[2:projectedBlock.shape[1]+2, 2:projectedBlock.shape[2]+2], 2)
        convBlock = convBlock + convGx2
        convBlock = convBlock + convGy2
    convBlock = numpy.sqrt(convBlock)

    heartbeat("Edges took {} seconds to detect\n".format(time.time() - startTime))

    heartbeat("Optimizing GMM\n")
    numGMMcomponents = 20
    startTime = time.time()

    if projected.shape[1] < 10 * numGMMcomponents or projected.shape[1] < 10 * projected.shape[0]:
        heartbeat("There are only {} points; skipping (number of GMM components is {} and number of dimensions in the space is {})\n".format(projected.shape[1], numGMMcomponents, projected.shape[0]))
        return

    attempts = 0
    done = False
    while not done:
        try:
            if 10000 < projected.shape[1]:
                randomSelection = projected[:,random.sample(xrange(projected.shape[1]), 10000)]
                model = MoG(randomSelection, numGMMcomponents)
                model.em(10)
                heartbeat("     time for first pass: {} seconds\n".format(time.time() - startTime))

                randomSelection = projected[:,random.sample(xrange(projected.shape[1]), 10000)]
                model = MoG(randomSelection, numGMMcomponents, means=model.means, covs=model.covs, mixprops=model.mixprops)
                model.em(10)
                heartbeat("     time for second pass: {} seconds\n".format(time.time() - startTime))

                randomSelection = projected[:,random.sample(xrange(projected.shape[1]), 10000)]
                model = MoG(randomSelection, numGMMcomponents, means=model.means, covs=model.covs, mixprops=model.mixprops)
                model.em(10)
                heartbeat("     time for third pass: {} seconds\n".format(time.time() - startTime))

                model = MoG(projected, numGMMcomponents, means=model.means, covs=model.covs, mixprops=model.mixprops)
                model.em(5)
                done = True

            else:
                heartbeat("     skipping three-pass subfit because the dataset is small\n")

                model = MoG(projected, numGMMcomponents)
                model.em(5)
                done = True

        except numpy.linalg.linalg.LinAlgError:
            attempts += 1
            if attempts > 4:
                heartbeat("    could not fit in 4 attempts; giving up\n")
                return

    heartbeat("GMM took {} seconds to optimize\n".format(time.time() - startTime))

    heartbeat("Scoring all pixels with GMM\n")
    startTime = time.time()
    scores = logsumexp(model.compute_posteriors(projected, reinit=True, normalize=False, logscale=True), 0)

    scoresBlock = numpy.zeros((reducedBlock2.shape[1], reducedBlock2.shape[2]), dtype=numpy.double)
    scoresBlock[betterMask] = scores
    del scores

    themin, themax = numpy.percentile(convBlock[shrinkmask], [0.5, 99.5])
    convNorm = (convBlock[shrinkmask] - themin)/(themax - themin)
    convBlockNorm = (convBlock - themin)/(themax - themin)

    themin, themax = numpy.percentile(scoresBlock[shrinkmask], [0.5, 99.5])
    scoresNorm = 1.0 - (scoresBlock[shrinkmask] - themin)/(themax - themin)
    scoresBlockNorm = 1.0 - (scoresBlock - themin)/(themax - themin)
    rawscoresmin, rawscoresmax = themin, themax
    del scoresBlock
    del scoresNorm

    selection = numpy.logical_and(scoresBlockNorm > 1.0, shrinkmask)
    indexes = zip(*numpy.nonzero(selection))

    scoredBag = numpy.argmax(model.compute_posteriors(projected, reinit=True), axis=0)
    heartbeat("GMM took {} seconds to score all pixels (a few times in different ways)\n".format(time.time() - startTime))

    heartbeat("Blurring scores for bucket-fill to spread better\n")
    startTime = time.time()
    spot = numpy.array([[math.exp(-((i - 2)**2 + (j - 2)**2)/2.0/1.0**2) for i in xrange(5)] for j in xrange(5)])
    spot = spot / spot.sum()
    blurScores = convolve(scoresBlockNorm[:,:], spot)[2:scoresBlockNorm.shape[0]+2, 2:scoresBlockNorm.shape[1]+2]
    heartbeat("Time to blur image: {} seconds\n".format(time.time() - startTime))

    heartbeat("Building the KD-tree\n")
    startTime = time.time()
    kdtree = KDTree(projected)
    dynamicRange = (lambda x: x[1] - x[0])(numpy.percentile(projected, [1, 99]))
    heartbeat("KD-tree took {} seconds to build\n".format(time.time() - startTime))

    heartbeat("Performing bucket-fill searches\n")
    startTime = time.time()

    clumps = set()
    assigned = numpy.empty((projectedBlock.shape[1], projectedBlock.shape[2]), dtype=numpy.dtype(object))
    considered = numpy.zeros((projectedBlock.shape[1], projectedBlock.shape[2]), dtype=numpy.dtype(bool))

    Clump.assigned = assigned
    Clump.projectedBlock = projectedBlock
    Clump.projectionInverse = projectionInverse
    Clump.metadata = metadata
    Clump.kdtree = kdtree
    Clump.bandsToTake = bandsToTake
    Clump.projected = projected
    Clump.convBlockNorm = convBlockNorm
    Clump.model = model
    Clump.rawscoresmin = rawscoresmin
    Clump.rawscoresmax = rawscoresmax
    Clump.dynamicRange = dynamicRange
    Clump.blurBlock = None
    Clump.scoresBlockNorm = scoresBlockNorm
    Clump.blurScores = blurScores

    for index, (x, y) in enumerate(indexes):
        if index % 100 == 0:
            heartbeat("    {} {}\n".format(float(index)/len(indexes), time.time() - startTime))
        queue = [(x, y)]
        clump = Clump((x, y))
        while len(queue) > 0:
            i, j = queue.pop()

            if i >= 0 and j >= 0 and i < shrinkmask.shape[0] and j < shrinkmask.shape[1] and shrinkmask[i, j]:
                if not considered[i, j]:
                    if blurScores[i, j] > 1.0:
                        clump.add((i, j))
                        assigned[i, j] = clump

                        if clump.size() > Clump.maxSize:
                            heartbeat("         clump is too big!\n")
                            break

                        newQueue = []
                        for ii in -1, 0, 1:
                            for jj in -1, 0, 1:
                                if ii != jj:
                                    newQueue.append((i + ii, j + jj))

                        queue = newQueue + queue

                elif assigned[i, j] is not None and assigned[i, j] != clump:
                    heartbeat("         merging clumps\n")
                    clump = assigned[i, j].mergeIn(clump)
                    if clump.size() > Clump.maxSize:
                        heartbeat("             ... into one that was too big!\n")
                        break

                considered[i, j] = True

        if not clump.isEmpty() and clump.size() <= Clump.maxSize:
            heartbeat("     new clump with size {}\n".format(clump.size()))
            clumps.add(clump)

    heartbeat("bucket-fill search took {} seconds\n".format(time.time() - startTime))

    heartbeat("Calculating attributes of the image and clumps\n")
    startTime = time.time()

    def gmmSpectrum(index):
        unnormalized = numpy.exp(numpy.array(numpy.dot(projectionInverse, model.means[:,index].T))[0])
        return dict(zip(list(numpy.array(metadata["bandNames"])[bandsToTake]), unnormalized / unnormalized.sum()))

    def fullSpectrumAt(indexes):
        unnormalized = numpy.zeros(originalBlock.shape[0], dtype=numpy.double)
        for i, j in indexes:
            unnormalized += originalBlock[:,i,j]
        return dict(zip(metadata["bandNames"], unnormalized / len(indexes)))

    output = {"metadata": metadata}
    getLngLat = utilities.makeGetLngLat(metadata)
    getMeters = utilities.makeGetMeters(metadata)

    wavelengths = [metadata["bandWavelength"][x] for x in numpy.array(metadata["bandNames"])[bandsToTake]]
    clusterNumber = 0
    for clump in clumps:
        heartbeat("     do clump {}\n".format(clump.indexes))

        border1 = clump.border()
        borderPoint1 = numpy.zeros(projectedBlock.shape[0], dtype=numpy.double)
        for i, j in clump.indexes:
            borderPoint1 = borderPoint1 + projectedBlock[:, i, j]
        borderPoint1 = borderPoint1 / len(border1)

        border2 = clump.border(list(clump.indexes) + list(border1))
        borderPoint2 = numpy.zeros(projectedBlock.shape[0], dtype=numpy.double)
        for i, j in clump.indexes:
            borderPoint2 = borderPoint2 + projectedBlock[:, i, j]
        borderPoint2 = borderPoint2 / len(border2)

        numSeeds = len(clump.seeds)
        numPixels = clump.size()
        seeds = sorted((int(i), int(j)) for i, j in clump.seeds)
        indexes = sorted((int(i), int(j)) for i, j in clump.indexes)
        border1 = sorted((int(i), int(j)) for i, j in border1)
        border2 = sorted((int(i), int(j)) for i, j in border2)
        mean = list(clump.mean())
        meanSeeds = list(clump.meanSeeds())
        stdev = clump.stdev()
        specMean = clump.spectrumOf(clump.mean())
        specMeanSeeds = clump.spectrumOf(clump.meanSeeds())
        borderSpec1 = clump.spectrumOf(borderPoint1)
        borderSpec2 = clump.spectrumOf(borderPoint2)
        edgeScore1 = clump.edginess(border1)
        edgeScore2 = clump.edginess(border2)
        gmmScoreMean = clump.gmmscoreOf(clump.mean())
        gmmScoreMeanSeeds = clump.gmmscoreOf(clump.meanSeeds())
        fullSpectrum = fullSpectrumAt(clump.indexes)
        fullSpectrumSeeds = fullSpectrumAt(clump.seeds)
        fullSpectrumBorder1 = fullSpectrumAt(border1)
        fullSpectrumBorder2 = fullSpectrumAt(border2)

        r200 = float(clump.density(clump.meanSeeds(), 0.200))
        r500 = float(clump.density(clump.meanSeeds(), 0.500))

        if r200 > 0.0 and r500 > 0.0 and gmmScoreMeanSeeds > 1.42 and gmmScoreMeanSeeds - gmmScoreMean > 0.12 and math.log10(r200) < -2.78 and math.log10(r500) < -1.30 and stdev > 0.067 and edgeScore2 < 0.61:
            clusterName = "cluster_{}".format(clusterNumber)
            output[clusterName] = {}
            clusterNumber += 1

            output[clusterName]["contours95"] = [{}]
            c95 = output[clusterName]["contours95"][0]

            x0 = numpy.mean([x for x, y in border2])
            y0 = numpy.mean([y for x, y in border2])
            order = numpy.argsort([math.atan2(y - y0, x - x0) for x, y in border2])
            c95["rowcolpolygon"] = [(int(x), int(y)) for x, y in numpy.array(border2)[order]]
            c95["lnglatpolygon"] = [getLngLat(x, y) for x, y in numpy.array(border2)[order]]

            c95["centroidInLngLat"] = getLngLat(x0, y0)
            c95["areaInPixels"] = numPixels

            pixelLength = abs(getMeters(*getLngLat(x0 + 0.5, y0))[0] - getMeters(*getLngLat(x0 - 0.5, y0))[0])
            pixelHeight = abs(getMeters(*getLngLat(x0, y0 + 0.5))[1] - getMeters(*getLngLat(x0, y0 - 0.5))[1])
            c95["areaInMeters"] = numPixels * pixelLength * pixelHeight
            c95["circumferenceInMeters"] = 0.5*(pixelLength + pixelHeight) * len(border1)

            rawGMMscore = float(logsumexp(model.compute_posteriors(numpy.array([clump.meanSeeds()]).T, reinit=True, normalize=False, logscale=True), 0)[0])
            rawKNNscore = float(r500)

            c95["score"] = rawGMMscore, rawKNNscore

            c95["other"] = {
                "numSeeds": numSeeds,
                "numPixels": numPixels,
                "seeds": seeds,
                "indexes": indexes,
                "border1": border1,
                "border2": border2,
                "mean": mean,
                "meanSeeds": meanSeeds,
                "stdev": stdev,
                "specMean": specMean,
                "specMeanSeeds": specMeanSeeds,
                "borderSpec1": borderSpec1,
                "borderSpec2": borderSpec2,
                "edgeScore1": edgeScore1,
                "edgeScore2": edgeScore2,
                "gmmScoreMean": gmmScoreMean,
                "gmmScoreMeanSeeds": gmmScoreMeanSeeds,
                "fullSpectrum": fullSpectrum,
                "fullSpectrumSeeds": fullSpectrumSeeds,
                "fullSpectrumBorder1": fullSpectrumBorder1,
                "fullSpectrumBorder2": fullSpectrumBorder2,
                "r200": r200,
                "r500": r500}

    binaryhadoop.emit(sys.stdout, metadata["originalDirName"], output, encoding=binaryhadoop.TYPEDBYTES_JSON)

    heartbeat("Calculating attributes took {} seconds\n".format(time.time() - startTime))

    totalTime = time.time() - globalStart
    heartbeat("Time to do everything: {} sec, which is {} min\n".format(totalTime, totalTime/60.0))