Esempio n. 1
0
def scatter(pts, nsamples=100, colormap=None, scale=1, thresh=0.001, ax=None, store=False):
    """Create a scatter plot of x and y points from an array or an RDD (through sampling)
    Can optionally use the values to determine colors"""

    if ax is None:
        ax = pyplot.gca()

    if isrdd(pts):
        if thresh is not None:
            pts = array(pts.values().filter(lambda x: std(x) > thresh).takeSample(False, nsamples))
        else:
            pts = array(pts.values().takeSample(False, nsamples))
        if len(pts) == 0:
            raise Exception('no samples found, most likely your threshold is too low')
    else:
        pts = asarray(pts)

    if colormap is not None:
        # pass in strings or actual colormap objects
        if isinstance(colormap, basestring):
            clrs = Colorize(colormap, scale).calc(pts)
        else:
            clrs = colormap.calc(pts)

    else:
        clrs = 'indianred'

    h = ax.scatter(pts[:, 0], pts[:, 1], s=100, c=clrs, alpha=0.6, edgecolor='black', linewidth=0.2)

    if store is True:
        return ax, h, pts
    else:
        return ax, h
Esempio n. 2
0
    def calc(self, data):

        if isrdd(data):
            self.checkargs(size(data.first()[1]))
            return data.mapValues(lambda x: self.get(x))
        else:
            self.checkargs(size(data[0]))
            return map(lambda line: self.get(line), data)
Esempio n. 3
0
    def calc(self, data):

        if isrdd(data):
            self.checkargs(size(data.first()[1]))
            return data.mapValues(lambda x: self.get(x))
        else:
            self.checkargs(size(data[0]))
            return map(lambda line: self.get(line), data)
Esempio n. 4
0
def scatter(pts,
            nsamples=100,
            colormap=None,
            scale=1,
            thresh=0.001,
            ax=None,
            store=False):
    """Create a scatter plot of x and y points from an array or an RDD (through sampling)
    Can optionally use the values to determine colors"""

    if ax is None:
        ax = pyplot.gca()

    if isrdd(pts):
        if thresh is not None:
            pts = array(
                pts.values().filter(lambda x: std(x) > thresh).takeSample(
                    False, nsamples))
        else:
            pts = array(pts.values().takeSample(False, nsamples))
        if len(pts) == 0:
            raise Exception(
                'no samples found, most likely your threshold is too low')
    else:
        pts = asarray(pts)

    if colormap is not None:
        # pass in strings or actual colormap objects
        if isinstance(colormap, basestring):
            clrs = Colorize(colormap, scale).calc(pts)
        else:
            clrs = colormap.calc(pts)

    else:
        clrs = 'indianred'

    h = ax.scatter(pts[:, 0],
                   pts[:, 1],
                   s=100,
                   c=clrs,
                   alpha=0.6,
                   edgecolor='black',
                   linewidth=0.2)

    if store is True:
        return ax, h, pts
    else:
        return ax, h
Esempio n. 5
0
    def calc(self, data, func):
        """Base function for making clustering predictions"""

        # small optimization to avoid serializing full model
        centers = self.centers

        if isrdd(data):
            return data.mapValues(lambda x: func(centers, x))

        elif isinstance(data, list):
            return map(lambda x: func(centers, x), data)

        elif isinstance(data, ndarray):
            if data.ndim == 1:
                return func(centers, data)
            else:
                return map(lambda x: func(centers, x), data)
Esempio n. 6
0
    def calc(self, data, func):
        """Base function for making clustering predictions"""

        # small optimization to avoid serializing full model
        centers = self.centers

        if isrdd(data):
            return data.mapValues(lambda x: func(centers, x))

        elif isinstance(data, list):
            return map(lambda x: func(centers, x), data)

        elif isinstance(data, ndarray):
            if data.ndim == 1:
                return func(centers, data)
            else:
                return map(lambda x: func(centers, x), data)
Esempio n. 7
0
def pointmap(data, colormap='polar', scale=1.0, ax=None):
    """Create a spatial point map from a collection of key-value pairs, using the
    keys as spatial indices, and the values to compute colors"""

    if ax is None:
        ax = pyplot.gca()

    if isrdd(data):
        pts = Colorize(colormap, scale).calc(data).collect()
    else:
        raise Exception('input must be an RDD')

    clrs = array(map(lambda (k, v): v, pts))
    x = map(lambda (k, v): k[0], pts)
    y = map(lambda (k, v): k[1], pts)
    z = map(lambda (k, v): k[2], pts)  # currently unused
    h = ax.scatter(x, y, s=100, c=clrs, alpha=0.5, edgecolor='black', linewidth=0.2)
    return ax, h
Esempio n. 8
0
    def predict(self, data):
        """Predict the cluster that all data points belong to, and the similarity

        Parameters
        ----------
        data : RDD of (tuple, array) pairs, a list of arrays, or a single array
            The data to predict cluster assignments on

        Returns
        -------
        closest : RDD of (tuple, array) pairs, list of arrays, or a single array
            For each data point, gives an array with the closest center for each data point,
            and the correlation with that center
        """

        if isrdd(data):
            return data.mapValues(lambda x: KMeans.similarity(x, self.centers))
        elif type(data) is list:
            return map(lambda x: KMeans.similarity(x, self.centers), data)
        else:
            return KMeans.similarity(data, self.centers)
Esempio n. 9
0
    def predict(self, data):
        """Predict the cluster that all data points belong to, and the similarity

        Parameters
        ----------
        data : RDD of (tuple, array) pairs, a list of arrays, or a single array
            The data to predict cluster assignments on

        Returns
        -------
        closest : RDD of (tuple, array) pairs, list of arrays, or a single array
            For each data point, gives an array with the closest center for each data point,
            and the correlation with that center
        """

        if isrdd(data):
            return data.mapValues(lambda x: KMeans.similarity(x, self.centers))
        elif type(data) is list:
            return map(lambda x: KMeans.similarity(x, self.centers), data)
        else:
            return KMeans.similarity(data, self.centers)
Esempio n. 10
0
def imagemap(data, colormap='polar', scale=1.0, ax=None):
    """Create an image from a collection of key-value pairs, using the
    keys as spatial indices, and the values to compute colors"""

    if ax is None:
        ax = pyplot.gca()

    if isrdd(data):
        ndim = len(data.first()[0])
        data = Colorize(colormap, scale).calc(data)
        if ndim == 3:
            pixels = pack(data, axes=2)
        elif ndim == 2:
            pixels = pack(data)
        else:
            raise Exception('number of spatial dimensions for images must be 2 or 3')
    else:
        raise Exception('input must be an RDD')

    h = ax.imshow(transpose(pixels, [2, 1, 0]))
    return ax, h
Esempio n. 11
0
def imagemap(data, colormap='polar', scale=1.0, ax=None):
    """Create an image from a collection of key-value pairs, using the
    keys as spatial indices, and the values to compute colors"""

    if ax is None:
        ax = pyplot.gca()

    if isrdd(data):
        ndim = len(data.first()[0])
        data = Colorize(colormap, scale).calc(data)
        if ndim == 3:
            pixels = pack(data, axes=2)
        elif ndim == 2:
            pixels = pack(data)
        else:
            raise Exception(
                'number of spatial dimensions for images must be 2 or 3')
    else:
        raise Exception('input must be an RDD')

    h = ax.imshow(transpose(pixels, [2, 1, 0]))
    return ax, h
Esempio n. 12
0
def pointmap(data, colormap='polar', scale=1.0, ax=None):
    """Create a spatial point map from a collection of key-value pairs, using the
    keys as spatial indices, and the values to compute colors"""

    if ax is None:
        ax = pyplot.gca()

    if isrdd(data):
        pts = Colorize(colormap, scale).calc(data).collect()
    else:
        raise Exception('input must be an RDD')

    clrs = array(map(lambda (k, v): v, pts))
    x = map(lambda (k, v): k[0], pts)
    y = map(lambda (k, v): k[1], pts)
    z = map(lambda (k, v): k[2], pts)  # currently unused
    h = ax.scatter(x,
                   y,
                   s=100,
                   c=clrs,
                   alpha=0.5,
                   edgecolor='black',
                   linewidth=0.2)
    return ax, h
Esempio n. 13
0
def save(data,
         outputdir,
         outputfile,
         outputformat,
         sorting=False,
         dimsmax=None,
         dimsmin=None):
    """
    Save data to a variety of formats
    Automatically determines whether data is an array
    or an RDD and handle appropriately

    Parameters
    ----------
    data : RDD of (tuple, array) pairs, or numpy array
        The data to save

    outputdir : str
        Output directory

    outputfile : str
        Output filename

    outputformat : str
        Output format ("matlab", "text", or "image")
    """

    if not os.path.exists(outputdir):
        os.makedirs(outputdir)

    filename = os.path.join(outputdir, outputfile)

    if isrdd(data):
        nout = size(data.first()[1])
        if dimsmax is not None:
            dims = Dimensions()
            dims.max = dimsmax
            if dimsmin is not None:
                dims.min = dimsmin
            else:
                dims.min = (1, 1, 1)
        elif dimsmin is not None:
            raise Exception('cannot provide dimsmin without dimsmax')
        else:
            dims = getdims(data)

    if (outputformat == "matlab") | (outputformat == "text"):
        if isrdd(data):
            if nout > 1:
                for iout in range(0, nout):
                    result = pack(data, ind=iout, dims=dims, sorting=sorting)
                    if outputformat == "matlab":
                        savemat(filename + "-" + str(iout) + ".mat",
                                mdict={outputfile + str(iout): result},
                                oned_as='column',
                                do_compression='true')
                    if outputformat == "text":
                        savetxt(filename + "-" + str(iout) + ".txt",
                                result,
                                fmt="%.6f")
            else:
                result = pack(data, dims=dims, sorting=sorting)
                if outputformat == "matlab":
                    savemat(filename + ".mat",
                            mdict={outputfile: result},
                            oned_as='column',
                            do_compression='true')
                if outputformat == "text":
                    savetxt(filename + ".txt", result, fmt="%.6f")
        else:
            if outputformat == "matlab":
                savemat(filename + ".mat",
                        mdict={outputfile: data},
                        oned_as='column',
                        do_compression='true')
            if outputformat == "text":
                savetxt(filename + ".txt", data, fmt="%.6f")

    if outputformat == "image":
        if isrdd(data):
            data = rescale(data)
            if nout > 1:
                for iout in range(0, nout):
                    result = pack(data, ind=iout, dims=dims, sorting=sorting)
                    arraytoim(result, filename + "-" + str(iout))
            else:
                result = pack(data, dims=dims, sorting=sorting)
                arraytoim(result, filename)
        else:
            arraytoim(data, filename)
Esempio n. 14
0
def save(data, outputdir, outputfile, outputformat, sorting=False, dimsmax=None, dimsmin=None):
    """
    Save data to a variety of formats
    Automatically determines whether data is an array
    or an RDD and handle appropriately

    Parameters
    ----------
    data : RDD of (tuple, array) pairs, or numpy array
        The data to save

    outputdir : str
        Output directory

    outputfile : str
        Output filename

    outputformat : str
        Output format ("matlab", "text", or "image")
    """

    if not os.path.exists(outputdir):
        os.makedirs(outputdir)

    filename = os.path.join(outputdir, outputfile)

    if isrdd(data):
        nout = size(data.first()[1])
        if dimsmax is not None:
            dims = Dimensions()
            dims.max = dimsmax
            if dimsmin is not None:
                dims.min = dimsmin
            else:
                dims.min = (1, 1, 1)
        elif dimsmin is not None:
            raise Exception('cannot provide dimsmin without dimsmax')
        else:
            dims = getdims(data)

    if (outputformat == "matlab") | (outputformat == "text"):
        if isrdd(data):
            if nout > 1:
                for iout in range(0, nout):
                    result = pack(data, ind=iout, dims=dims, sorting=sorting)
                    if outputformat == "matlab":
                        savemat(filename+"-"+str(iout)+".mat", mdict={outputfile+str(iout): result},
                                oned_as='column', do_compression='true')
                    if outputformat == "text":
                        savetxt(filename+"-"+str(iout)+".txt", result, fmt="%.6f")
            else:
                result = pack(data, dims=dims, sorting=sorting)
                if outputformat == "matlab":
                    savemat(filename+".mat", mdict={outputfile: result},
                            oned_as='column', do_compression='true')
                if outputformat == "text":
                    savetxt(filename+".txt", result, fmt="%.6f")
        else:
            if outputformat == "matlab":
                savemat(filename+".mat", mdict={outputfile: data}, oned_as='column', do_compression='true')
            if outputformat == "text":
                savetxt(filename+".txt", data, fmt="%.6f")

    if outputformat == "image":
        if isrdd(data):
            data = rescale(data)
            if nout > 1:
                for iout in range(0, nout):
                    result = pack(data, ind=iout, dims=dims, sorting=sorting)
                    arraytoim(result, filename+"-"+str(iout))
            else:
                result = pack(data, dims=dims, sorting=sorting)
                arraytoim(result, filename)
        else:
            arraytoim(data, filename)