Python pdf Examples, fastkde.fastKDE.pdf Python Examples

Example #1

0

Show file

File: test_simple.py Project: davidt0x/fastkde

def test_simple_2D():
    np.random.seed(42)
    N = int(2e5)
    var1 = 50 * np.random.normal(size=N) + 0.1
    var2 = 0.01 * np.random.normal(size=N) - 300

    # Do the self-consistent density estimate
    myPDF, axes = fastKDE.pdf(var1, var2)

    # Extract the axes from the axis list
    v1, v2 = axes

Example #2

0

Show file

def fastkde_2d(d_x, d_y, xmin=None, xmax=None, ymin=None, ymax=None):
    """Perform a two-dimensional kernel density estimation.

    Wrapper round fastkde.fastKDE. Boundary corrections implemented by
    reflecting boundary conditions.

    Parameters
    ----------
    d_x, d_y: numpy.array
        x/y coordinates of data to perform kde on

    xmin, xmax, ymin, ymax: float
        lower/upper prior bounds in x/y coordinates
        optional, default None

    Returns
    -------
    x,y: numpy.array
        x/y-coordinates of kernel density estimates. One-dimensional array
    p: numpy.array
        kernel density estimates. Two-dimensional array

    """
    xmin, xmax = check_bounds(d_x, xmin, xmax)
    ymin, ymax = check_bounds(d_y, ymin, ymax)
    f = [xmax is None or xmin is None, ymax is None or ymin is None]
    d_x_, d_y_ = mirror_2d(d_x, d_y, xmin, xmax, ymin, ymax)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        p, (x, y) = fastKDE.pdf(d_x_,
                                d_y_,
                                axisExpansionFactor=f,
                                numPointsPerSigma=10 * (2 - f[0]) * (2 - f[1]))

    p *= (2 - f[0])
    p *= (2 - f[1])
    if xmin is not None:
        p = p[:, x >= xmin]
        x = x[x >= xmin]

    if xmax is not None:
        p = p[:, x <= xmax]
        x = x[x <= xmax]

    if ymin is not None:
        p = p[y >= ymin, :]
        y = y[y >= ymin]

    if ymax is not None:
        p = p[y <= ymax, :]
        y = y[y <= ymax]

    return x, y, p

Example #3

0

Show file

File: bayes_error_rate.py Project: velezj/ml-chats

def rough_lipschitz_k( samples, EPS=1.0e-5 ):
            
    # estimate a distribution from samples using
    # kernel density estimation (guassian kernel)
    N = len(samples)
    sa = np.array(samples).reshape( (N,-1) )
    sa += np.random.random( size=sa.shape ) * EPS
    D = sa.shape[1]

    # grab individual dimensoins
    sa_dims = []
    for i in xrange(sa.shape[1]):
        sa_dims.append( sa[:,i] )
    kde_pdf, kde_axes = fastKDE.pdf( *sa_dims )
    if D == 1:
        kde_axes = [ kde_axes ]

    # now compute max derivative
    max_deriv = None
    it1 = np.nditer(kde_pdf,flags=['multi_index'])
    while not it1.finished:
        it2 = np.nditer(kde_pdf,flags=['multi_index'])
        while not it2.finished:

            # grab indices and pdf
            idx1 = it1.multi_index    
            p1 = it1.value
            idx2 = it2.multi_index
            p2 = it2.value

            # compute x from indices and axes
            x1 = np.array( map(lambda i,a: a[i], idx1, kde_axes))
            x2 = np.array( map(lambda i,a: a[i], idx2, kde_axes))

            # x distance
            diff_x = np.linalg.norm( x1 - x2, ord=1 )
            diff_p = abs( p1 - p2 )
            if diff_x != 0:
                deriv = diff_p / diff_x
                if max_deriv is None or deriv > max_deriv:
                    max_deriv = deriv
            
            it2.iternext()
        it1.iternext()

    return max_deriv

Example #4

0

Show file

def fastkde_1d(d, xmin=None, xmax=None):
    """Perform a one-dimensional kernel density estimation.

    Wrapper round fastkde.fastKDE. Boundary corrections implemented by
    reflecting boundary conditions.

    Parameters
    ----------
    d: numpy.array
        Data to perform kde on

    xmin, xmax: float
        lower/upper prior bounds
        optional, default None

    Returns
    -------
    x: numpy.array
        x-coordinates of kernel density estimates
    p: numpy.array
        kernel density estimates

    """
    xmin, xmax = check_bounds(d, xmin, xmax)
    f = xmax is None or xmin is None
    d_ = mirror_1d(d, xmin, xmax)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        p, x = fastKDE.pdf(d_,
                           axisExpansionFactor=f,
                           numPointsPerSigma=10 * (2 - f))
    p *= 2 - f

    if xmin is not None:
        p = p[x >= xmin]
        x = x[x >= xmin]

    if xmax is not None:
        p = p[x <= xmax]
        x = x[x <= xmax]

    return x, p

Example #5

0

Show file

File: test_simple.py Project: davidt0x/fastkde

def test_simple_3D():
    np.random.seed(42)
    N = int(1e3)  # number of points

    # generate 3 independent samples from 3 different distributions
    x_1 = stats.norm.rvs(size=N)
    x_2 = stats.gamma.rvs(2, size=N)
    x_3 = stats.betaprime.rvs(5, 6, size=N)

    # calculate the 3D PDF
    pdf, values = fastKDE.pdf(x_1, x_2, x_3, numPoints=[
        65, 65, 65
    ])  # simply add more variables to the argument list for higher dimensions
    # note though that memory quickly becomes an issue
    # the numPoints argument results in a coarser PDF--but one that is calculated
    # faster (and with less memory)

    # calculate the index of the mode of the distribution
    # (we'll plot 2D slices through the mode)
    i_mode_ravel = np.argmax(pdf.ravel())
    nmode = np.unravel_index(i_mode_ravel, np.shape(pdf))

Example #6

0

Show file

File: scde.py Project: yketa/active_work

    def __init__(self, *vars, renormalise=True, **fastKDE_kwargs):
        """
        Compute probability density function.
        (see fastkde.fastKDE.pdf)

        NOTE: Coordinates in self.axes are in the same order as the input
              variables while it is in reversed order in self.pdf (see
              fastkde.fastKDE.pdf).
              See active_particles.scde.PDF.evaluate for probability density
              function evaluation.

        Positional arguments
        --------------------
        vars : array-like
            Input variables.

        Parameters
        ----------
        renormalise : bool
            Rescale probability density function values by the integral over
            the computed volume.
            DEFAULT: True

        Optional keyword arguments
        --------------------------
        (see fastkde.fastKDE.pdf)
        """

        self.vars = vars
        self.n = len(self.vars)
        self.fastKDE_kwargs = fastKDE_kwargs

        self.pdf, self.axes = fastKDE.pdf(*self.vars, **self.fastKDE_kwargs)
        if self.n == 1: self.axes = [np.array(self.axes)]
        self._extended_axes()

        if renormalise: self.renormalise()

Example #7

0

Show file

File: Visualization_kernel_density_estimation.py Project: Diviyan-Kalainathan/causal-humans

r = pairs[k].split(",", 2)

x = scale(np.array(r[1].split(), dtype=np.float))
y = scale(np.array(r[2].split(), dtype=np.float))

print(len(x))

mask = (x > -maxstd) & (x < maxstd) & ( y > -maxstd) & ( y < maxstd)
x = x[mask]
y = y[mask]



numPoints = 32+1

pXY, axes = fastKDE.pdf(x, y, numPoints=numPoints,axisExpansionFactor = 0.1)


fig,axs = PP.subplots(1,2,figsize=(10,5))

#Plot a scatter plot of the incoming data
axs[0].plot(x,y,'k.',alpha=0.1)
axs[0].set_title('Original (x,y) data')

#Set axis labels
for i in (0,1):
    axs[i].set_xlabel('x')
    axs[i].set_ylabel('y')

Example #8

0

Show file

 hull = ConvexHull(ha)
 x = ha[hull.vertices, 0]
 y = ha[hull.vertices, 1]
 x = np.append(x, x[0])
 y = np.append(y, y[0])
 x1, y1 = m(x, y)
 m.plot(x1, y1, 'r-', lw=2)
 m.drawparallels(pars, labels=[1, 0, 0, 0], labelstyle='+/-')
 m.drawmeridians(mers)
 plt.title("Event %i" % evtnum)
 xmin, xmax, ymin, ymax = p_lon2.min() - 1, p_lon2.max() + 1, p_lat2.min(
 ) - 1, p_lat2.max() + 1
 # Fast KDE based on O'Brien et al., Comput. Stat. Data Anal. 101, 148-160 (2016)
 xax = np.linspace(xmin, xmax, 513)
 yax = np.linspace(ymin, ymax, 513)
 myPDF, axes = fastKDE.pdf(p_lon2, p_lat2, axes=[xax, yax], numPoints=513)
 zz = myPDF
 ax1 = np.zeros(len(axes[0]))
 ax2 = np.zeros(len(axes[1]))
 ax1 = axes[0]
 ax2 = axes[1]
 xx, yy = np.meshgrid(ax1, ax2)
 xy = np.zeros((len(x), 2))
 xy[:, 0] = x
 xy[:, 1] = y
 bbp = mplPath.Path(xy)
 mask_array = np.zeros((len(ax1), len(ax2)), dtype=int)
 for i in range(len(ax1)):
     for j in range(len(ax2)):
         if bbp.contains_point((ax1[i], ax2[j])):
             mask_array[i, j] = 0

Example #9

0

Show file

File: plot.py Project: VELA-CLARA-software/SimFrame

def plotScreenImage(beam,
                    keys=['x', 'y'],
                    scale=[1, 1],
                    iscale=1,
                    colormap=plt.cm.jet,
                    size=None,
                    grid=False,
                    marginals=False,
                    limits=None,
                    screen=False,
                    use_scipy=False,
                    subtract_mean=[False, False],
                    **kwargs):
    #Do the self-consistent density estimate
    key1, key2 = keys
    if not isinstance(subtract_mean, (list, tuple)):
        subtract_mean = [subtract_mean, subtract_mean]
    if not isinstance(scale, (list, tuple)):
        scale = [scale, scale]
    if not isinstance(size, (list, tuple)):
        size = [size, size]

    x, f1, p1 = nice_array(
        scale[0] * (beam[key1] - subtract_mean[0] * np.mean(beam[key1])))
    y, f2, p2 = nice_array(
        scale[1] * (beam[key2] - subtract_mean[1] * np.mean(beam[key2])))

    u1, u2 = [beam[k].units for k in keys]
    ux = p1 + u1
    uy = p2 + u2

    labelx = f'{key1} ({ux})'
    labely = f'{key2} ({uy})'

    if fastKDE_installed and not use_scipy:
        myPDF, axes = fastKDE.pdf(x, y, **kwargs)
        v1, v2 = axes
    elif SciPy_installed:
        xmin = x.min()
        xmax = x.max()
        ymin = y.min()
        ymax = y.max()
        v1, v2 = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
        positions = np.vstack([v1.ravel(), v2.ravel()])
        values = np.vstack([x, y])
        kernel = stats.gaussian_kde(values)
        myPDF = np.reshape(kernel(positions).T, v1.shape)
    else:
        raise Exception("fastKDE or SciPy required")
    # normalise the PDF to 1
    myPDF = myPDF / myPDF.max() * iscale

    # Initialise the plot objects
    # start with a square Figure

    # Add a gridspec with two rows and two columns and a ratio of 2 to 7 between
    # the size of the marginal axes and the main axes in both directions.
    # Also adjust the subplot parameters for a square plot.
    if marginals:
        fig = plt.figure(figsize=(12.41, 12.41))
        gs = fig.add_gridspec(2,
                              2,
                              width_ratios=(8, 2),
                              height_ratios=(2, 8),
                              left=0.1,
                              right=0.9,
                              bottom=0.1,
                              top=0.95,
                              wspace=0.05,
                              hspace=0.05)
        ax = fig.add_subplot(gs[1, 0])
        ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
        ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)
    else:
        fig = plt.figure(figsize=(10, 10))
        fig.subplots_adjust(top=0.95)
        ax = fig.add_subplot()

    # Define ticks
    # Major ticks every 5, minor ticks every 1
    if size[0] is None:
        use_size = False
        if not screen:
            xmin, xmax = [min(v1), max(v1)]
            ymin, ymax = [min(v2), max(v2)]
            size = [xmax - xmin, ymax - ymin]
        else:
            xmin, xmax = -15, 15
            ymin, ymax = -15, 15
            size = [15, 15]
        minvalx = xmin
        maxvalx = xmax
        meanvalx = (xmin + xmax) / 2.0 if not subtract_mean[0] else 0
        minvaly = ymin
        maxvaly = ymax
        meanvaly = (ymin + ymax) / 2.0 if not subtract_mean[1] else 0
    else:
        use_size = True
        maxvalx = size[0] / f1
        minvalx = -maxvalx
        meanvalx = (max(v1) + min(v1)) / 2.0 if not subtract_mean[0] else 0
        maxvaly = size[1] / f2
        minvaly = -maxvaly
        meanvaly = (max(v2) + min(v2)) / 2.0 if not subtract_mean[1] else 0
        size[0] = size[0] / f1
        size[1] = size[1] / f2

    # print(meanvaly, minvaly, maxvaly)
    major_ticksx = meanvalx + np.arange(minvalx, maxvalx +
                                        (maxvalx - minvalx) / 100,
                                        (maxvalx - minvalx) / 4)
    minor_ticksx = meanvalx + np.arange(minvalx, maxvalx +
                                        (maxvalx - minvalx) / 100,
                                        (maxvalx - minvalx) / 40)
    ax.set_xticks(major_ticksx)
    ax.set_xticks(minor_ticksx, minor=True)
    major_ticksy = meanvaly + np.arange(minvaly, maxvaly +
                                        (maxvaly - minvaly) / 100,
                                        (maxvaly - minvaly) / 4)
    minor_ticksy = meanvaly + np.arange(minvaly, maxvaly +
                                        (maxvaly - minvaly) / 100,
                                        (maxvaly - minvaly) / 40)
    # print(minvaly, maxvaly, meanvaly, major_ticksy)
    ax.set_yticks(major_ticksy)
    ax.set_yticks(minor_ticksy, minor=True)

    if marginals:
        hist, bin_edges = myPDF.sum(axis=0)[:-1], v1
        hist_x = bin_edges[:-1] + np.diff(bin_edges) / 2
        hist_width = np.diff(bin_edges)
        hist_y, hist_f, hist_prefix = nice_array(hist / hist_width)
        ax_histx.bar(hist_x,
                     hist_y,
                     hist_width,
                     color=colormap(hist_y / max(hist_y)))

        hist, bin_edges = myPDF.sum(axis=1)[:-1], v2
        hist_x = bin_edges[:-1] + np.diff(bin_edges) / 2
        hist_width = np.diff(bin_edges)
        hist_y, hist_f, hist_prefix = nice_array(hist / hist_width)
        ax_histy.barh(hist_x,
                      hist_y,
                      hist_width,
                      color=colormap(hist_y / max(hist_y)))

    # Make a circle for the edges of the screen
    if screen:
        draw_circle = plt.Circle((meanvalx, meanvaly),
                                 size + 0.05,
                                 fill=True,
                                 ec='w',
                                 fc=colormap(0),
                                 zorder=-1)
        ax.add_artist(draw_circle)

    if screen:
        ax.set_facecolor('k')
    else:
        ax.set_facecolor(colormap(0))

    # Make a circle to clip the PDF
    if screen:
        circ = plt.Circle((meanvalx, meanvaly), max(size), facecolor='none')
    else:
        circ = plt.Circle((meanvalx, meanvaly),
                          3 * max(size),
                          facecolor='none')
    # ax.add_patch(circ) # Plot the outline

    # Plot the PDF
    if grid:
        # Add a grid
        ax.grid(which='minor', color="w", alpha=0.3, clip_path=circ)
        ax.grid(which='major', color="w", alpha=0.55, clip_path=circ)
    # Set the image limits to slightly larger than the screen size
    if limits:
        if isinstance(limits, (int, float)):
            limits = (-limits, limits)
        if np.array(limits).shape == (2, 2):
            ax.set_xlim(limits[0])
            ax.set_ylim(limits[1])
            bbox = plt.Rectangle((min(limits[0]), min(limits[1])),
                                 max(limits[0]) - min(limits[0]),
                                 max(limits[1]) - min(limits[1]),
                                 facecolor="none",
                                 edgecolor="none")
        elif np.array(limits).shape == (2, ):
            ax.set_xlim(limits)
            ax.set_ylim(limits)
            # make a bounding box for the limits
            bbox = plt.Rectangle((min(limits), min(limits)),
                                 max(limits) - min(limits),
                                 max(limits) - min(limits),
                                 facecolor="none",
                                 edgecolor="none")
    elif screen or use_size:
        ax.set_xlim([meanvalx - (size[0] + 0.5), meanvalx + (size[0] + 0.5)])
        ax.set_ylim([meanvaly - (size[1] + 0.5), meanvaly + (size[1] + 0.5)])
        bbox = plt.Rectangle((-(size[0] + 0.5), -(size[1] + 0.5)),
                             size[0] + 0,
                             size[1] + 0,
                             facecolor="none",
                             edgecolor="none")
    else:
        ax.set_xlim([min(v1), max(v1)])
        ax.set_ylim([min(v2), max(v2)])
        bbox = plt.Polygon([(min(v1), min(v2)), (min(v1), max(v2)),
                            (max(v1), max(v2)), (max(v1), min(v2))],
                           facecolor="none",
                           edgecolor="none")

    # ax.add_artist(bbox)

    mesh = ax.pcolormesh(v1,
                         v2,
                         myPDF,
                         cmap=colormap,
                         zorder=1,
                         shading='auto')  #, clip_path=bbox)
    if screen:
        mesh.set_clip_path(circ)
    if marginals:
        plt.setp(ax_histx.get_xticklabels(), visible=False)
        plt.setp(ax_histy.get_yticklabels(), visible=False)
    # ax_histy.set_ylim([-(size + 0.5), (size + 0.5)])
    ax.set_xlabel(labelx)
    ax.set_ylabel(labely)

    # Extract the screen name
    file, ext = os.path.splitext(os.path.basename(beam.filename))
    # Set the screen name as the title
    plt.suptitle(file)
    # Show the final image
    plt.draw()

Example #10

0

Show file

 def Fast2DKDE(self, X, Y):
     from fastkde import fastKDE
     pdf, axes = fastKDE.pdf(X, Y)
     ax1, ax2 = axes
     return ax1, ax2, pdf

Example #11

0

Show file

 def Fast1DKDE(self, X):
     from fastkde import fastKDE
     pdf, axes = fastKDE.pdf(X)
     return axes, pdf

Example #12

0

Show file

def test_fastkde_runs():
    gauss = stats.norm(-2, 4)
    data = gauss.rvs(size=100)
    _ = fastKDE.pdf(data)

Example #13

0

Show file

    def __init__(self,
                 *vars,
                 renormalise=True,
                 wrap_period=None,
                 wrap_method='linear',
                 wrap_fill_value=0,
                 wrap_processes=None,
                 **fastKDE_kwargs):
        """
        Compute probability density function.
        (see fastkde.fastKDE.pdf)

        NOTE: Coordinates in self.axes are in the same order as the input
              variables while it is in reversed order in self.pdf (see
              fastkde.fastKDE.pdf).
              See active_particles.scde.PDF.evaluate for probability density
              function evaluation.

        Positional arguments
        --------------------
        vars : array-like
            Input variables.

        Parameters
        ----------
        renormalise : bool
            Rescale probability density function values by the integral over
            the computed volume.
            DEFAULT: True
        wrap_period : float
            Period over which to wrap the computed probability density function.
            NOTE: If wrap_period == None, the computed probability density
                  function remains unwrapped.
            DEFAULT: None
        wrap_method : string
            Method of interpolation. (see scipy.interpolate.griddata)
            DEFAULT: linear
        wrap_fill_value : float
            Value used to fill in for requested points outside of the convex
            hull of the input points. (see scipy.interpolate.griddata)
            DEFAULT: 0
        wrap_processes : int
            Number of worker processes to use. (see multiprocessing.Pool)
            NOTE: If processes == None then processes = os.cpu_count().
            DEFAULT: None

        Optional keyword arguments
        --------------------------
        (see fastkde.fastKDE.pdf)
        """

        self.vars = vars
        self.n = len(self.vars)
        self.fastKDE_kwargs = fastKDE_kwargs

        self.pdf, self.axes = fastKDE.pdf(*self.vars, **self.fastKDE_kwargs)
        if self.n == 1: self.axes = [np.array(self.axes)]
        self._extended_axes()

        if wrap_period != None:
            self.wrap(wrap_period,
                      method=wrap_method,
                      fill_value=wrap_fill_value,
                      processes=wrap_processes)

        if renormalise: self.renormalise()

Example #14

0

Show file

def googleimage_seg(my_input_folder,
                    my_valid_folder,
                    seg_folder,
                    my_new_folder,
                    amplitudes=[100, 128, 128],
                    dimensions=[5, 5, 5],
                    kde=True,
                    chop=True,
                    chop_size=3500,
                    sample=False,
                    sample_size=700,
                    title='My Title'):
    """Take folder MY_INPUT_FOLDER of images and SEG_FOLDER of segmented images, create folder MY_NEW_FOLDER containing 3D Histograms of foreground and background pixel distributions.
        Images can be scraped from google images using https://github.com/hardikvasa/google-images-download.
        Images can be segmented in MATLAB using http://calvin.inf.ed.ac.uk/software/figure-ground-segmentation-by-transferring-window-masks/

    MY_INPUT_FOLDER -- Absolute filepath to the folder of images you wish to plot
    MY_VALID_FOLDER -- Absolute filepath to the folder containing valid_lab.pkl and valid_rgb.pkl
    SEG_FOLDER -- Absolute filepath to folder of segmented images (.png files)
    MY_NEW_FOLDER -- Absolute filepath to the folder to where plots and data will be exported
    AMPLITUDES -- Amplitudes of each axis [L a b]; axes extend from 0 to L, -a to a, and -b to b
    DIMENSIONS -- Dimensions of bins in CIELAB space [L a b]; all pixels within confines of a bin take on the same color value
    KDE -- If True, use a Kernel Density Estimate to smooth results in CIELAB space after data collection https://bitbucket.org/lbl-cascade/fastkde
    CHOP -- If True, plot only the first CHOP_SIZE most frequent values (only affects visualization)
    SAMPLE -- If True, use SAMPLE_SIZE to randomly thin out data if plots are too dense (only affects visualization)
    TITLE -- Plot title
    """

    #********************
    """INITIALIZATION"""
    #********************
    assert (not os.path.exists(my_new_folder))
    os.makedirs(my_new_folder)
    Lw, aw, bw = dimensions[0], dimensions[1], dimensions[
        2]  # Bin dimensions (widths)
    L_amp, a_amp, b_amp = amplitudes[0], amplitudes[1], amplitudes[
        2]  # Amplitude of each axis
    Lbins, abins, bbins = L_amp / Lw, a_amp * 2 / aw, b_amp * 2 / bw  # Number of 1D bins per axis
    L_list, a_list, b_list = [], [], []  # for figures
    L_blist, a_blist, b_blist = [], [], []  # for grounds
    unique_bins, unique_background_bins = {}, {}  # Initialize dictionaries
    Lvec, avec, bvec = np.linspace(0, L_amp, Lbins + 1), np.linspace(
        -a_amp, a_amp,
        abins + 1), np.linspace(-b_amp, b_amp,
                                bbins + 1)  # Vectors for each axis

    with open(my_valid_folder + '/valid_lab.pkl', 'rb') as pickle_load:
        valid_lab = pickle.load(pickle_load)
    with open(my_valid_folder + '/valid_rgb.pkl', 'rb') as pickle_load:
        valid_rgb = pickle.load(pickle_load)

    #************************
    """DEFINING FUNCTIONS"""

    #************************
    def bounder_v2(x, v):
        """Take x and evenly-spaced ordered vector, return list of bin coordinates"""

        x0 = v[0]  # minimum value of vector
        w = v[1] - x0  # width of a bin on given axis
        binnum = ceil(
            (x - x0) / w
        )  # number of bins is distance between x & x0, divided by bin width

        # edge case
        if binnum == 0:
            binnum == 1

        return binnum

    def binner_v2(Linput, ainput, binput):
        """Take an LAB value, axis vectors, return linear index of 3D bin"""

        # position of bin on each axis
        Lbin = bounder_v2(Linput, Lvec)
        abin = bounder_v2(ainput, avec)
        bbin = bounder_v2(binput, bvec)

        return [Lbin, abin, bbin]

    def sub2ind(ypos, xpos):
        """Take a 2D matrix coordinates, return linear index"""

        linear_index = imagewidth * ypos + xpos
        return linear_index

    def ind2sub(linear_index):
        """Take linear index, return 2D matrix coordinates"""

        ypos = linear_index // imagewidth
        xpos = linear_index % imagewidth
        return (ypos, xpos)

    def bins2lab(bin_list):
        """Take bin_list [Lbin, abin, bbin], return [L, a, b]"""
        L = Lw * bin_list[0] - Lw / 2
        a = -a_amp + aw * bin_list[1] - aw / 2
        b = -b_amp + bw * bin_list[2] - bw / 2
        return [L, a, b]

    def uniq(lst):
        last = object()
        for item in lst:
            if item == last:
                continue
            yield item
            last = item

    images_skipped, total_images = 0, 0

    #*********************
    """DATA COLLECTION"""
    #*********************
    # iterate through folder of images
    for my_image in os.listdir(my_input_folder):
        total_images += 1
        my_image_path = my_input_folder + '/' + my_image  # reverse engineer file path to image
        try:
            rgb_img = mpimg.imread(my_image_path)  # array [height][width][RGB]
        except ValueError as error:
            print(error, ';', '%s was skipped' % my_image)
            images_skipped += 1
            continue
        except OSError as os_error:
            print(os_error, ';',
                  'No image was skipped')  # .DS_store, not an image
            total_images -= 1
            continue
        rgb_img = rgb_img / 255
        try:
            lab_img = color.rgb2lab(rgb_img)
        except ValueError as error:
            print(error, ';', '%s was skipped' % my_image)
            images_skipped += 1
            continue

        imshape = lab_img.shape  # (height,width,depth)
        imageheight, imagewidth = imshape[0], imshape[1]

        seglist = os.listdir(seg_folder)
        segnum, imagenum, count = 0, my_image[:3], 0
        while segnum != imagenum and count < len(seglist):
            segnum = seglist[count][:3]
            count += 1

        segs_skipped = 0
        if segnum == imagenum:
            exist_segmask = True
        else:
            exist_segmask = False
            segs_skipped += 1

        # iterate through pixels and add to unique bins
        if exist_segmask:
            my_seg_path = seg_folder + '/' + my_image + '.png'
            logicmask = mpimg.imread(
                my_seg_path)  # array [height][width][0 or 1]
            for xpos in range(imagewidth):
                for ypos in range(imageheight):
                    Linput, ainput, binput = lab_img[ypos, xpos][0], lab_img[
                        ypos, xpos][1], lab_img[ypos, xpos][2]
                    bin = str(binner_v2(Linput, ainput,
                                        binput))  # string b/c dictionary
                    my_vals = bins2lab(binner_v2(Linput, ainput, binput))
                    if not logicmask[ypos, xpos]:  # 0 = grounds, 1 = figures
                        if bin in unique_background_bins:
                            unique_background_bins[bin] += 1
                        else:
                            unique_background_bins[bin] = 1
                        L_blist.append(my_vals[0])
                        a_blist.append(my_vals[1])
                        b_blist.append(my_vals[2])
                    else:
                        if bin in unique_bins:
                            unique_bins[bin] += 1
                        else:
                            unique_bins[bin] = 1
                        L_list.append(my_vals[0])
                        a_list.append(my_vals[1])
                        b_list.append(my_vals[2])
        else:
            for xpos in range(imagewidth):
                for ypos in range(imageheight):
                    Linput, ainput, binput = lab_img[ypos, xpos][0], lab_img[
                        ypos, xpos][1], lab_img[ypos, xpos][2]
                    bin = str(binner_v2(Linput, ainput,
                                        binput))  # string b/c dictionary
                    if bin in unique_bins:
                        unique_bins[bin] += 1
                    else:
                        unique_bins[bin] = 1

                    my_vals = bins2lab(binner_v2(Linput, ainput, binput))
                    L_list.append(my_vals[0])
                    a_list.append(my_vals[1])
                    b_list.append(my_vals[2])

    print('Out of %s total images, %d were skipped' %
          (total_images, images_skipped))
    print('Out of %s images processed, %d were not segmented' %
          (total_images - images_skipped, segs_skipped))

    #********************
    """VISUALIZATION"""
    #********************

    #for 3D histogram
    plt.close()
    varL, vara, varb = np.asarray(L_list), np.asarray(a_list), np.asarray(
        b_list)

    if kde:
        myPDF, axes = fastKDE.pdf(varL, vara, varb)
        varL, vara, varb = axes
        varlist, vardensity = [], []
        for L in range(len(varL)):
            for a in range(len(vara)):
                for b in range(len(varb)):
                    varlist.append([varL[L], vara[a], varb[b]])
                    vardensity.append(myPDF[b][a][L])
    else:
        unique_bins_sorted = sorted(
            unique_bins.items(), key=operator.itemgetter(1),
            reverse=1)  #list of tuples sorted by descending frequency
        varlist, vardensity = [], []
        for unique_bin in unique_bins_sorted:
            varlist.append(bins2lab(eval(unique_bin[0])))
            vardensity.append(unique_bin[1])

    #sorted by descending frequency
    s_vardensity = sorted(vardensity, reverse=True)
    s_varlist = [
        bin for _, bin in sorted(zip(vardensity, varlist), reverse=True)
    ]
    if 0 in s_vardensity:
        last = s_vardensity.index(0)
        s_varlist, s_vardensity = s_varlist[:last + 1], s_vardensity[:last + 1]
        print("Color-density pairs with density=0 have been removed")
    my_colors_valid = []

    # For LAB values outside of RGB gamut, use spatial.cKDTree to find nearest LAB values inside of RGB gamut
    # https://stackoverflow.com/questions/10818546/finding-index-of-nearest-point-in-numpy-arrays-of-x-and-y-coordinates
    count = 0
    valid_lab_tree = spatial.cKDTree(valid_lab)
    for lab_color in s_varlist:
        if lab_color not in valid_lab:
            lab_color = valid_lab[valid_lab_tree.query(lab_color)[1]]
        my_colors_valid.append(lab_color)
        count += 1
        print(len(s_varlist) + 1 - count)

    # as of this line, s_varlist and s_vardensity are sorted by density

    #sorting by s_varlist so the while loop works
    my_densities = [
        bin for _, bin in sorted(zip(my_colors_valid, s_vardensity))
    ]
    my_colors_valid = sorted(my_colors_valid)

    colors_valid, densities = [], []

    #basically a linked list, sums repeated color-density pairs from using spatial.cKDTree above
    my_colors_valid.append("empty")
    my_densities.append("empty")

    currDense = my_densities[0]
    while my_colors_valid[0] != "empty":
        currColor, nextColor = my_colors_valid[0], my_colors_valid[1]
        if nextColor == currColor:
            currDense += my_densities[1]
            my_densities.remove(my_densities[0])
        else:
            colors_valid.append(currColor)
            densities.append(currDense)
            my_densities.remove(my_densities[0])
            currDense = my_densities[0]
        my_colors_valid.remove(currColor)

    s_varlist = [bin for bin, _ in sorted(zip(colors_valid, densities))]
    s_vardensity = sorted(densities)

    # sorted and chopped
    if chop:
        if chop < last:
            s_c_vardensity, s_c_varlist = s_vardensity[:
                                                       chop_size], s_varlist[:
                                                                             chop_size]
        else:
            print(
                'Chop size of %c >= number of non-zero values %f. No values were chopped.'
                % (chop, last))
            s_c_vardensity, s_c_varlist = s_vardensity, s_varlist
    else:
        s_c_vardensity, s_c_varlist = s_vardensity, s_varlist

    x, y, z = [], [], []
    for lab in s_c_varlist:
        x.append(lab[0])  #L
        y.append(lab[1])  #a
        z.append(lab[2])  #b

    colors = []
    for i in range(len(s_c_varlist)):
        lab_color = s_c_varlist[i]  #already binned
        # rgb_color = list(color.lab2rgb([[lab_color]])[0][0])
        rgb_color = list(valid_rgb[valid_lab.index(lab_color)][0])
        colors.append(rgb_color)
    colors = np.asarray(colors)

    plt.close()
    fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
    if kde:
        ax.scatter(z,
                   y,
                   x,
                   s=[foo * 80000 for foo in s_c_vardensity],
                   c=colors)
    else:
        ax.scatter(z, y, x, s=[foo / 100 for foo in s_c_vardensity], c=colors)
    ax.set_xlabel('b')
    ax.set_ylabel('a')
    ax.set_zlabel('L')
    ax.set_xlim([-b_amp, b_amp])
    ax.set_ylim([-a_amp, a_amp])
    ax.set_zlim([0, L_amp])
    plt.title(title)
    plt.savefig(my_new_folder + '/' + 'histogram 3D' + '.svg',
                format='svg',
                bbox_inches='tight')

    with open(my_new_folder + '/' + 'colors.pkl', 'wb') as pickle_file:
        pickle.dump(s_varlist, pickle_file, protocol=pickle.HIGHEST_PROTOCOL)

    with open(my_new_folder + '/' + 'densities.pkl', 'wb') as pickle_file:
        pickle.dump(s_vardensity,
                    pickle_file,
                    protocol=pickle.HIGHEST_PROTOCOL)

    # for 3D histogram of background pixels
    plt.close()
    varL, vara, varb = np.asarray(L_blist), np.asarray(a_blist), np.asarray(
        b_blist)
    if kde:
        myPDF, axes = fastKDE.pdf(varL, vara, varb)
        varL, vara, varb = axes
        varlist, vardensity = [], []
        for L in range(len(varL)):
            for a in range(len(vara)):
                for b in range(len(varb)):
                    varlist.append([varL[L], vara[a], varb[b]])
                    vardensity.append(myPDF[b][a][L])
    else:
        unique_background_bins_sorted = sorted(
            unique_background_bins.items(),
            key=operator.itemgetter(1),
            reverse=1)  #list of tuples sorted by descending frequency
        varlist, vardensity = [], []
        for unique_background_bin in unique_background_bins_sorted:
            varlist.append(bins2lab(eval(unique_background_bin[0])))
            vardensity.append(unique_background_bin[1])

    #sorted by descending frequency
    s_vardensity = sorted(vardensity, reverse=True)
    s_varlist = [
        bin for _, bin in sorted(zip(vardensity, varlist), reverse=True)
    ]

    if 0 in s_vardensity:
        last = s_vardensity.index(0)
        s_varlist, s_vardensity = s_varlist[:last + 1], s_vardensity[:last + 1]
        print("Color-density pairs with density=0 have been removed")
    my_colors_valid = []

    # For LAB values outside of RGB gamut, use spatial.cKDTree to find nearest LAB values inside of RGB gamut
    # https://stackoverflow.com/questions/10818546/finding-index-of-nearest-point-in-numpy-arrays-of-x-and-y-coordinates
    count = 0
    valid_lab_tree = spatial.cKDTree(valid_lab)
    for lab_color in s_varlist:
        if lab_color not in valid_lab:
            lab_color = valid_lab[valid_lab_tree.query(lab_color)[1]]
        my_colors_valid.append(lab_color)
        count += 1
        print(len(s_varlist) + 1 - count)

    # as of this line, s_varlist and s_vardensity are sorted by density

    #sorting by s_varlist so the while loop works
    my_densities = [
        bin for _, bin in sorted(zip(my_colors_valid, s_vardensity))
    ]
    my_colors_valid = sorted(my_colors_valid)

    colors_valid, densities = [], []

    # basically a linked list, sums repeated color-density pairs from using spatial.cKDTree above
    my_colors_valid.append("empty")
    my_densities.append("empty")

    currDense = my_densities[0]
    while my_colors_valid[0] != "empty":
        currColor, nextColor = my_colors_valid[0], my_colors_valid[1]
        if nextColor == currColor:
            currDense += my_densities[1]
            my_densities.remove(my_densities[0])
        else:
            colors_valid.append(currColor)
            densities.append(currDense)
            my_densities.remove(my_densities[0])
            currDense = my_densities[0]
        my_colors_valid.remove(currColor)

    s_varlist = [bin for bin, _ in sorted(zip(colors_valid, densities))]
    s_vardensity = sorted(densities)

    # sorted and chopped
    if chop:
        if chop < last:
            s_c_vardensity, s_c_varlist = s_vardensity[:
                                                       chop_size], s_varlist[:
                                                                             chop_size]
        else:
            print(
                'Chop size of %c >= number of non-zero values %f. No values were chopped.'
                % (chop, last))
            s_c_vardensity, s_c_varlist = s_vardensity, s_varlist
    else:
        s_c_vardensity, s_c_varlist = s_vardensity, s_varlist

    # sorted, chopped, and sampled
    if sample:
        s_c_varlist, s_c_vardensity = zip(*random.sample(
            list(zip(s_c_varlist, s_c_vardensity)), sample_size))

    x, y, z = [], [], []
    for lab in s_c_varlist:
        x.append(lab[0])  #L
        y.append(lab[1])  #a
        z.append(lab[2])  #b

    colors = []
    for i in range(len(s_c_varlist)):
        lab_color = s_c_varlist[i]  #already binned
        rgb_color = list(valid_rgb[valid_lab.index(lab_color)][0])
        colors.append(rgb_color)
    colors = np.asarray(colors)

    plt.close()
    fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
    # resizing points
    if kde:
        ax.scatter(z,
                   y,
                   x,
                   s=[foo * 80000 for foo in s_c_vardensity],
                   c=colors)
    else:
        ax.scatter(z, y, x, s=[foo / 100 for foo in s_c_vardensity], c=colors)
    ax.set_xlabel('b')
    ax.set_ylabel('a')
    ax.set_zlabel('L')
    ax.set_xlim([-b_amp, b_amp])
    ax.set_ylim([-a_amp, a_amp])
    ax.set_zlim([0, L_amp])
    plt.title(title + ' background')
    plt.savefig(my_new_folder + '/' + 'background histogram 3D' + '.svg',
                format='svg',
                bbox_inches='tight')

    with open(my_new_folder + '/' + 'background_colors.pkl',
              'wb') as pickle_file:
        pickle.dump(s_varlist, pickle_file, protocol=pickle.HIGHEST_PROTOCOL)

    with open(my_new_folder + '/' + 'background_densities.pkl',
              'wb') as pickle_file:
        pickle.dump(s_vardensity,
                    pickle_file,
                    protocol=pickle.HIGHEST_PROTOCOL)

Example #15

0

Show file

    def plot_density(self, max_number=9000):
        """Plot the density using kernel densty estimates (KDEs)."""
        spacing = 2049
        # Use base cmap to create transparent.
        mycmap = transparent_cmap(plt.cm.plasma)
        mycmap = transparent_cmap(plt.cm.gnuplot)
        # mycmap = transparent_cmap(plt.cm.bone)

        # Make a grid to sample on (randomized a little bit).
        rows, cols = self.get_points(max_number=max_number)
        grid_rows = np.linspace(0, self.height,
                                spacing)  # + 10 * (np.random.rand(512) - 0.5)
        grid_cols = np.linspace(0, self.width,
                                spacing)  # + 10 * (np.random.rand(512) - 0.5)
        axes = np.array([grid_cols, grid_rows])
        pdf, axes = fastKDE.pdf(cols, rows, axes=axes)
        pdf[pdf < 0] = np.min(pdf[pdf > 0])
        pdf -= pdf.min()

        # Normalize the PDF to compare across maps.
        # pdf -= pdf.mean()
        pdf /= pdf.max()

        # mg, _ = np.meshgrid(grid_rows, grid_cols)
        # for point in tqdm(axes.T):
        #     col = int(point[0])
        #     row = int(point[1])
        #     debug()
        #     if row < self.height and col < self.width:
        #         if self.map_image[int(np.floor(row)), int(np.floor(col)), :].sum() == 0:
        #             r_ = np.argmin(np.abs(grid_rows - row))
        #             c_ = np.argmin(np.abs(grid_cols - col))
        #             pdf[r_, c_] = pdf.max()

        # Make the plot!
        plt.close("all")
        plt.ion()
        fig, ax = plt.subplots(1, 1)
        # fig.set_size_inches(width / 220, height / 220)
        ax.imshow(self.map_image)
        cb = ax.contourf(axes[0],
                         axes[1],
                         pdf,
                         15,
                         cmap=mycmap,
                         antialiased=True)
        fig.subplots_adjust(bottom=0)
        fig.subplots_adjust(top=1)
        fig.subplots_adjust(right=1)
        fig.subplots_adjust(left=0)
        plt.gca().set_axis_off()
        plt.subplots_adjust(top=1,
                            bottom=0,
                            right=1,
                            left=0,
                            hspace=0,
                            wspace=0)
        plt.margins(0, 0)
        plt.gca().xaxis.set_major_locator(plt.NullLocator())
        plt.gca().yaxis.set_major_locator(plt.NullLocator())
        plt.show()

Example #16

0

Show file

File: causal_convnet_tensorflow.py Project: Diviyan-Kalainathan/causal-humans

def featurizePairs(path, filepairs, filetargets, maxstd, size, featurizingmethod = 0, ratio=1, doubletrainset=True):

    f = open(path + filepairs );
    pairs = f.readlines();
    pairs.pop(0)
    f.close();

    y_te = np.genfromtxt(path + filetargets, delimiter=",")

    for k in range(0, int(len(y_te)*ratio)):

        if(k%100==0):
            print(k)

        r = pairs[k].split(",", 2)

        x = scale(np.array(r[1].split(), dtype=np.float))
        y = scale(np.array(r[2].split(), dtype=np.float))

        mask = (x > -maxstd) & (x < maxstd) & ( y > -maxstd) & ( y < maxstd)
        x = x[mask]
        y = y[mask]

        try:
            if(featurizingmethod == 0):
                pXY = getHisto(x, y, size, maxstd)

            elif(featurizingmethod == 1):

                pXY, axes = fastKDE.pdf(x, y, numPoints=size+1, axisExpansionFactor = 0.1)

                pXY = delete(pXY, s_[0], axis=0)
                pXY = delete(pXY, s_[0], axis=1)

            arrayXY = np.ravel(pXY)

            if(k==0):
                vectorizedPairs = arrayXY
            else:
                vectorizedPairs = np.vstack((vectorizedPairs, arrayXY))

            if(doubletrainset == True):
                arrayYX = np.ravel(np.transpose(pXY))
                vectorizedPairs = np.vstack((vectorizedPairs, arrayYX))

            if y_te[k] == -1:
                arrayTargetXY = np.array([1,0,0])
                arrayTargetYX = np.array([0,0,1])

            elif y_te[k] == 0:
                arrayTargetXY = np.array([0,1,0])
                arrayTargetYX = np.array([0,1,0])

            elif y_te[k] == 1:
                arrayTargetXY = np.array([0,0,1])
                arrayTargetYX = np.array([1,0,0])

            if(k==0):
                vectorizedTarget = arrayTargetXY
            else:
                vectorizedTarget = np.vstack((vectorizedTarget, arrayTargetXY))

            if (doubletrainset == True):
                vectorizedTarget = np.vstack((vectorizedTarget, arrayTargetYX))

        except ValueError:
            print("pbkde nbpoints pairs " + str(k))


    np.savetxt(path + "vectorized" + "_maxstd" + maxstd + "_size" + size + filepairs, vectorizedPairs)
    np.savetxt(path + "vectorized" + "_maxstd" + maxstd + "_size" + size + filetargets, vectorizedTarget)

    return vectorizedPairs,vectorizedTarget

Example #17

0

Show file

def googleimage_lin(my_input_folder,
                    my_new_folder,
                    my_valid_folder,
                    amplitudes=[100, 128, 128],
                    dimensions=[5, 5, 5],
                    seg=True,
                    tau=3,
                    kde=True,
                    chop=True,
                    chop_size=3500,
                    sample=False,
                    sample_size=700,
                    title='My Title'):
    """Take folder MY_INPUT_FOLDER of images, create folder MY_NEW_FOLDER containing segmented images and/or 3D Histogram of pixel distribution.
        Images can be scraped from google images using https://github.com/hardikvasa/google-images-download.

    MY_INPUT_FOLDER -- Absolute filepath to the folder of images you wish to segment and/or plot
    MY_NEW_FOLDER -- Absolute filepath to the folder to where segmented images and/or plots and data will be exported
    MY_VALID_FOLDER -- Absolute filepath to folder containing valid_lab.pkl and valid_rgb.pkl
    AMPLITUDES -- Amplitudes of each axis [L a b]; axes extend from 0 to L, -a to a, and -b to b
    DIMENSIONS -- Dimensions of bins in CIELAB space [L a b]; all pixels within confines of a bin take on the same color value
    SEG -- If True, segment each image using an approximation of the Lin et. al, 2013 method http://vis.stanford.edu/papers/semantically-resonant-colors
    TAU -- Segmentation parameter, in CIELAB units
    KDE -- If True, use a Kernel Density Estimate to smooth results in CIELAB space after data collection https://bitbucket.org/lbl-cascade/fastkde
    CHOP -- If True, plot only the first CHOP_SIZE most frequent values (only affects visualization)
    SAMPLE -- If True, use SAMPLE_SIZE to randomly thin out data if plots are too dense (only affects visualization)
    TITLE -- Plot title
    """

    #********************
    """INITIALIZATION"""
    #********************
    assert (not os.path.exists(my_new_folder))
    os.makedirs(my_new_folder)

    Lw, aw, bw = dimensions[0], dimensions[1], dimensions[
        2]  # Bin dimensions (widths)
    L_amp, a_amp, b_amp = amplitudes[0], amplitudes[1], amplitudes[
        2]  # Amplitude of each axis
    Lbins, abins, bbins = L_amp / Lw, a_amp * 2 / aw, b_amp * 2 / bw  # Number of 1D bins per axis
    L_list, a_list, b_list = [], [], []  # Destination for pixel values
    unique_bins = {
    }  # Dictionary for unique bins (2D histogram). Key is CIELAB value, value is absolute frequency.
    total_images, images_skipped = 0, 0

    # Vector for each axis
    L_vec, a_vec, b_vec = np.linspace(0, L_amp, Lbins + 1), np.linspace(
        -a_amp, a_amp, abins + 1), np.linspace(-b_amp, b_amp, bbins + 1)

    with open(my_valid_folder + '/valid_lab.pkl', 'rb') as pickle_load:
        valid_lab = pickle.load(pickle_load)
    with open(my_valid_folder + '/valid_rgb.pkl', 'rb') as pickle_load:
        valid_rgb = pickle.load(pickle_load)
    #************************
    """DEFINING FUNCTIONS"""

    #************************
    def bounder_v2(x, v):
        """Take x and evenly-spaced ordered vector, return list of bin coordinates."""

        x0 = v[0]  # minimum value of vector
        w = v[1] - x0  # width of a bin on given axis
        binnum = ceil(
            (x - x0) / w
        )  # number of bins is distance between x & x0, divided by bin width

        # edge case
        if binnum == 0:
            binnum = 1  # check to make sure this works

        return binnum

    def binner_v2(L_input, a_input, b_input):
        """Take an LAB value, axis vectors, return linear index of 3D bin."""

        # position of bin on each axis
        L_bin = bounder_v2(L_input, L_vec)
        a_bin = bounder_v2(a_input, a_vec)
        b_bin = bounder_v2(b_input, b_vec)

        return [L_bin, a_bin, b_bin]

    def sub2ind(ypos, xpos):
        """Take a 2D matrix coordinates, return linear index."""

        linear_index = imagewidth * ypos + xpos  # imagewidth is defined on a per-image basis
        return linear_index

    def ind2sub(linear_index):
        """Take linear index, return 2D matrix coordinates."""

        ypos = linear_index // imagewidth
        xpos = linear_index % imagewidth
        return (ypos, xpos)

    def neighbors(ypos, xpos):
        """Find all 8 neighboring coordinates to given coordinates."""

        # could do this programmatically
        top_left = [ypos - 1, xpos - 1]
        top = [ypos - 1, xpos]
        top_right = [ypos - 1, xpos + 1]
        left = [ypos, xpos - 1]
        right = [ypos, xpos + 1]
        bottom_left = [ypos + 1, xpos - 1]
        bottom = [ypos + 1, xpos]
        bottom_right = [ypos + 1, xpos + 1]
        return [
            top_left, top, top_right, left, right, bottom_left, bottom,
            bottom_right
        ]

    def bins2lab(bin_list):
        """Take bin_list [Lbin, abin, bbin] and return [L, a, b]."""

        L = Lw * bin_list[0] - Lw / 2
        a = -a_amp + aw * bin_list[1] - aw / 2
        b = -b_amp + bw * bin_list[2] - bw / 2
        return [L, a, b]

    def grouper(ypos, xpos):
        """Group pixels using approximation of Lin et al., 2013 method. Variables unspecified in this function body are nonlocally defined."""

        my_ind = sub2ind(ypos, xpos)  # get linear index of coordinate
        if my_ind in linear_array:  # if pixel is still ungrouped
            neighbor_list = neighbors(ypos, xpos)  # find neigboring pixels
            for neighbor in neighbor_list:
                n_ypos, n_xpos = neighbor[0], neighbor[1]
                if 0 <= n_ypos < imageheight and 0 <= n_xpos < imagewidth:  # if neighboring pixels in image dimensions
                    dist = np.linalg.norm(
                        np.array(lab_array[ypos, xpos]) -
                        np.array(lab_array[n_ypos, n_xpos])
                    )  #calculate distance between neighbor and given pixel
                    if dist <= tau and my_array[
                            n_ypos,
                            n_xpos] == num_groups:  # if distance smaller than tau and there is currently one connected component
                        linear_array.remove(
                            my_ind)  # remove grouped pixel from to-be-grouped
                        my_array[ypos, xpos] = my_array[
                            n_ypos,
                            n_xpos]  # give neighboring pixel same value as given pixel
                        nonlocal num_grouped
                        num_grouped += 1
                        break  # don't need to look at any other neighbors

    def pos2bin(ypos, xpos):
        """Bin pixel from given image at specified position. Variables unspecified in this function body are nonlocally defined."""

        Linput, ainput, binput = lab_img[ypos, xpos][0], lab_img[
            ypos, xpos][1], lab_img[ypos, xpos][2]
        bin = str(binner_v2(Linput, ainput, binput))  # string b/c dictionary
        if bin in unique_bins:
            unique_bins[bin] += 1
        else:
            unique_bins[bin] = 1

        my_vals = bins2lab(binner_v2(Linput, ainput, binput))
        L_list.append(my_vals[0])
        a_list.append(my_vals[1])
        b_list.append(my_vals[2])

    #*********************
    """DATA COLLECTION"""
    #*********************
    for my_image in os.listdir(
            my_input_folder):  # iterate through folder of images
        total_images += 1
        border_pixels = []  # initialize list for border pixels
        my_image_path = my_input_folder + '/' + my_image  # reverse engineer file path to image
        try:
            rgb_img = mpimg.imread(my_image_path)  # array [height][width][RGB]
        except ValueError as error:
            print(error, ';', '%s was skipped' % my_image)
            images_skipped += 1
            continue
        except OSError as os_error:
            print(os_error, ';',
                  'No image was skipped')  # .DS_store, not an image
            total_images -= 1
            continue
        rgb_img = rgb_img / 255  # np.array(height, width, [RGB])
        try:
            lab_img = color.rgb2lab(rgb_img)  # np.array(height, width, [LAB])
        except ValueError as error:
            print(error, ';', '%s was skipped' % my_image)
            images_skipped += 1
            continue

        imshape = lab_img.shape  # (height, width, depth)
        imageheight, imagewidth = imshape[0], imshape[1]

        # initialize imageheightximagewidth array of lab values
        lab_array = np.zeros((imageheight, imagewidth), dtype="object")

        # iterate through all pixels, add CIELAB values to unique bins and to lab_array
        for xpos in range(imagewidth):
            for ypos in range(imageheight):
                Linput, ainput, binput = lab_img[ypos, xpos][0], lab_img[
                    ypos, xpos][1], lab_img[ypos, xpos][2]
                lab_array[ypos, xpos] = [Linput, ainput, binput]

                #***************************************************************
                foo = color.lab2rgb(np.array([[[Linput, ainput,
                                                binput]]]))[0][0]
                if foo[0] < 0 or foo[0] > 1 or foo[1] < 0 or foo[1] > 1 or foo[
                        2] < 0 or foo[2] > 1:
                    print('HELP')
                #***************************************************************

        if seg:
            # Iterate through border pixels |=|, add CIELAB values to border_pixels list
            for xpos in range(1, imagewidth - 1):
                for ypos in [0, imageheight - 1]:
                    Linput, ainput, binput = lab_img[ypos, xpos][0], lab_img[
                        ypos, xpos][1], lab_img[ypos, xpos][2]
                    border_pixels.append((Linput, ainput, binput))
            for ypos in range(imageheight):
                for xpos in [0, imagewidth - 1]:
                    Linput, ainput, binput = lab_img[ypos, xpos][0], lab_img[
                        ypos, xpos][1], lab_img[ypos, xpos][2]
                    border_pixels.append((Linput, ainput, binput))

            # b/w background exists if >= 75% of border pixels are within tau=3 of b/w (Lin et. al)
            white, black = np.array((100, 0, 0)), np.array((0, 0, 0))
            border_dist_white = [
                np.linalg.norm(pixel - white) for pixel in border_pixels
            ]
            border_dist_black = [
                np.linalg.norm(pixel - black) for pixel in border_pixels
            ]
            border_bool_white = [dist <= tau for dist in border_dist_white]
            border_bool_black = [dist <= tau for dist in border_dist_black]
            whiteborder = sum(border_bool_white) / len(
                border_bool_white) >= 0.75  #boolean
            blackborder = sum(border_bool_black) / len(
                border_bool_black) >= 0.75  #boolean
            border = whiteborder or blackborder

            # segmentation if a border exists
            if border:
                print('%s has a border' % my_image)
                #lab_array = np.zeros((imageheight,imagewidth), dtype="object")
                linear_array = list(
                    range(imageheight * imagewidth)
                )  # linear indices of coordinates for to-be-grouped pixels
                my_array = np.zeros(
                    (imageheight, imagewidth),
                    dtype="int")  # representation of segmentation
                lab_array[ypos, xpos] = [Linput, ainput, binput]
                my_array[
                    0,
                    0] = 1  # leftmost, topmost pixel assumed to have the same color as the background
                my_array[imageheight - 1, imagewidth - 1] = 1
                linear_array.remove(
                    0)  # have just "looked" at leftmost, topmost pixel
                linear_array.remove(imageheight * imagewidth - 1)
                num_groups = 1  # of different connected components (1, 2,...)
                num_grouped = 2  # of pixels in a connected component (left-topmost, bottom-rightmost)

                #start from top left of image
                for ypos in range(imageheight):
                    for xpos in range(imagewidth):
                        grouper(ypos, xpos)

                #start from bottom right of image
                for ypos in list(range(imageheight))[::-1]:
                    for xpos in list(range(imagewidth))[::-1]:
                        grouper(ypos, xpos)

                # if there are still ungrouped values, there must be at least 2 connected components
                if linear_array:
                    num_groups += 1

            # if there is a background and there are at least two connected components
            if border and num_groups > 1:
                for xpos in range(imagewidth):
                    for ypos in range(imageheight):
                        if my_array[ypos,
                                    xpos] == 1:  # ignore background pixels
                            continue
                        else:
                            pos2bin(ypos, xpos)

                plt.close()
                plt.imshow(my_array)
                try:
                    plt.savefig(my_new_folder + '/' + my_image + '.png',
                                format='png',
                                bbox_inches='tight')
                except ValueError as error:
                    print(my_image + ' was processed but not saved')

            # image fails to meet two conditions (background and >=2 connected components)
            else:
                for xpos in range(imagewidth):
                    for ypos in range(imageheight):
                        pos2bin(ypos, xpos)

        # seg==False
        else:
            for xpos in range(imagewidth):
                for ypos in range(imageheight):
                    pos2bin(ypos, xpos)

    print('Out of %s total images seen, %d were skipped' %
          (total_images, images_skipped))

    #*******************
    """VISUALIZATION"""
    #*******************
    # for 3D histogram
    plt.close()
    varL, vara, varb = np.asarray(L_list), np.asarray(a_list), np.asarray(
        b_list)
    if kde:
        myPDF, axes = fastKDE.pdf(varL, vara, varb)
        varL, vara, varb = axes
        varlist, vardensity = [], []
        for L in range(len(varL)):
            for a in range(len(vara)):
                for b in range(len(varb)):
                    varlist.append([varL[L], vara[a], varb[b]])
                    vardensity.append(myPDF[b][a][L])
    else:
        unique_bins_sorted = sorted(
            unique_bins.items(), key=operator.itemgetter(1),
            reverse=1)  #list of tuples sorted by descending frequency
        varlist, vardensity = [], []
        for unique_bin in unique_bins_sorted:
            varlist.append(bins2lab(eval(unique_bin[0])))
            vardensity.append(unique_bin[1])

    # sorted by descending frequency
    s_vardensity = sorted(vardensity, reverse=True)
    s_varlist = [
        bin for _, bin in sorted(zip(vardensity, varlist), reverse=True)
    ]

    if 0 in s_vardensity:
        last = s_vardensity.index(0)
        s_varlist, s_vardensity = s_varlist[:last + 1], s_vardensity[:last + 1]
        print("Color-density pairs with density=0 have been removed")
    my_colors_valid = []

    # For LAB values outside of RGB gamut, use spatial.cKDTree to find nearest LAB values inside of RGB gamut
    # https://stackoverflow.com/questions/10818546/finding-index-of-nearest-point-in-numpy-arrays-of-x-and-y-coordinates
    count = 0
    valid_lab_tree = spatial.cKDTree(valid_lab)
    for lab_color in s_varlist:
        if lab_color not in valid_lab:
            lab_color = valid_lab[valid_lab_tree.query(lab_color)[1]]
        my_colors_valid.append(lab_color)
        count += 1
        print(len(s_varlist) + 1 - count)

    # as of this line, s_varlist and s_vardensity are sorted by density

    #sorting by s_varlist so the while loop works
    my_densities = [
        bin for _, bin in sorted(zip(my_colors_valid, s_vardensity))
    ]
    my_colors_valid = sorted(my_colors_valid)

    colors_valid, densities = [], []

    # basically a linked list, sums repeated color-density pairs from using spatial.cKDTree above
    my_colors_valid.append("empty")
    my_densities.append("empty")

    currDense = my_densities[0]
    while my_colors_valid[0] != "empty":
        currColor, nextColor = my_colors_valid[0], my_colors_valid[1]
        if nextColor == currColor:
            currDense += my_densities[1]
            my_densities.remove(my_densities[0])
        else:
            colors_valid.append(currColor)
            densities.append(currDense)
            my_densities.remove(my_densities[0])
            currDense = my_densities[0]
        my_colors_valid.remove(currColor)

    s_varlist = [bin for bin, _ in sorted(zip(colors_valid, densities))]
    s_vardensity = sorted(densities)

    # sorted and chopped
    if chop:
        if chop < last:
            s_c_vardensity, s_c_varlist = s_vardensity[:
                                                       chop_size], s_varlist[:
                                                                             chop_size]
        else:
            print(
                'Chop size of %c >= number of non-zero values %f. No values were chopped.'
                % (chop, last))
            s_c_vardensity, s_c_varlist = s_vardensity, s_varlist
    else:
        s_c_vardensity, s_c_varlist = s_vardensity, s_varlist

    # to thin out plot, use a random sample
    if sample:
        s_c_varlist, s_c_vardensity = zip(*random.sample(
            list(zip(s_c_varlist, s_c_vardensity)), sample_size))

    x, y, z = [], [], []
    for lab in s_c_varlist:
        x.append(float(lab[0]))  #L
        y.append(float(lab[1]))  #a
        z.append(float(lab[2]))  #b

    # color points by position in CIELAB space
    colors = []
    for i in range(len(s_c_varlist)):
        lab_color = s_c_varlist[i]  # already binned
        rgb_color = list(valid_rgb[valid_lab.index(lab_color)][0])
        colors.append(rgb_color)
    colors = np.asarray(colors)

    plt.close()
    fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
    # resizing points
    if kde:
        ax.scatter(z,
                   y,
                   x,
                   s=[foo * 80000 for foo in s_c_vardensity],
                   c=colors)
    else:
        ax.scatter(z, y, x, s=[foo / 100 for foo in s_c_vardensity], c=colors)
    ax.set_xlabel('b')
    ax.set_ylabel('a')
    ax.set_zlabel('L')
    ax.set_xlim([-b_amp, b_amp])
    ax.set_ylim([-a_amp, a_amp])
    ax.set_zlim([0, L_amp])
    plt.title(title)
    plt.savefig(my_new_folder + '/' + 'histogram 3D' + '.svg',
                format='svg',
                bbox_inches='tight')

    with open(my_new_folder + '/' + 'colors.pkl', 'wb') as pickle_file:
        pickle.dump(s_varlist, pickle_file, protocol=pickle.HIGHEST_PROTOCOL)

    with open(my_new_folder + '/' + 'densities.pkl', 'wb') as pickle_file:
        pickle.dump(s_vardensity,
                    pickle_file,
                    protocol=pickle.HIGHEST_PROTOCOL)

Example #18

0

Show file

File: simple_3d.py Project: LBL-EESA/fastkde

            'size' : '15', \
            'weight' : 'bold'}
mpl.rc('font', **font)
mpl.rc('axes', labelweight='bold'
       )  # needed for bold axis labels in more recent version of matplotlib

N = int(1e3)  # number of points

# generate 3 independent samples from 3 different distributions
x_1 = stats.norm.rvs(size=N)
x_2 = stats.gamma.rvs(2, size=N)
x_3 = stats.betaprime.rvs(5, 6, size=N)

# calculate the 3D PDF
pdf, values = fastKDE.pdf(x_1, x_2, x_3, numPoints=[
    65, 65, 65
])  # simply add more variables to the argument list for higher dimensions
# note though that memory quickly becomes an issue
# the numPoints argument results in a coarser PDF--but one that is calculated
# faster (and with less memory)

# calculate the index of the mode of the distribution
# (we'll plot 2D slices through the mode)
i_mode_ravel = argmax(pdf.ravel())
nmode = unravel_index(i_mode_ravel, shape(pdf))

# set the levels
clevels = linspace(0, pdf[nmode], 64)

# create the plot
fig, axs = PP.subplots(1, 3, figsize=(15, 5))

Example #19

0

Show file

File: TrainConvnet.py Project: Diviyan-Kalainathan/causal-humans

def featurizePairs(pathinput,pairsfilename,targetfilename, publicinfofilename, filepairsvectorized, filetargetsvectorized, maxstd, size,  featurizingmethod = 0, ratio=1, doubletrainset=True, isTestSet = False, onlynumerical = False):


    print(pathinput)

    for i in range(0,len(pathinput)):

        dfpairs = pd.read_csv(pathinput[i] + pairsfilename[i] + ".csv", index_col="SampleID")
        dfpublicinfo = pd.read_csv(pathinput[i] + publicinfofilename[i] + ".csv", index_col="SampleID")

        if(isTestSet == False):
            dftarget = pd.read_csv(pathinput[i] + targetfilename[i] + ".csv", index_col="SampleID")


        if(i==0):
            dfpairsGlobal = dfpairs
            dfpublicinfoGlobal = dfpublicinfo

            if (isTestSet == False):
                dftargetGlobal = dftarget
        else:
            dfpairsGlobal = dfpairsGlobal.append(dfpairs)
            dfpublicinfoGlobal = dfpublicinfoGlobal.append(dfpublicinfo)

            if (isTestSet == False):
                dftargetGlobal = dftargetGlobal.append(dftarget)



    print("Total number of pairs to featurize : " + str(dfpairsGlobal.shape[0]))
    cpt = 0

    for k in range(0, int(dfpairsGlobal.shape[0]*ratio)):

        if(k%100==0):
            print(k)

        A = dfpairsGlobal['A'].iloc[k]
        B = dfpairsGlobal['B'].iloc[k]

        publicinfoA = dfpublicinfoGlobal['A type'].iloc[k]
        publicinfoB = dfpublicinfoGlobal['B type'].iloc[k]

        if (isTestSet == False):
            target = dftargetGlobal['Target'].iloc[k]

        if((publicinfoA == "Numerical" and publicinfoB == "Numerical") or onlynumerical == False):

            x = scale(np.array(A.split(), dtype=np.float))
            y = scale(np.array(B.split(), dtype=np.float))


            mask = (x > -maxstd) & (x < maxstd) & ( y > -maxstd) & ( y < maxstd)
            x = x[mask]
            y = y[mask]

            try:
                if(featurizingmethod == 0):
                    pXY = getHisto(x, y, size, maxstd)

                elif(featurizingmethod == 1):

                    pXY, axes = fastKDE.pdf(x, y, numPoints=size+1, axisExpansionFactor = 0.1)

                    pXY = delete(pXY, s_[0], axis=0)
                    pXY = delete(pXY, s_[0], axis=1)

                elif (featurizingmethod == 2):

                    pOfXGivenY, axes = fastKDE.conditional(x, y, numPoints=size+1, axisExpansionFactor=0.1)
                    pOfYGivenX, axes = fastKDE.conditional(y, x, numPoints=size+1, axisExpansionFactor=0.1)

                    pOfXGivenY = delete(pOfXGivenY, s_[0], axis=0)
                    pOfXGivenY = delete(pOfXGivenY, s_[0], axis=1)

                    pOfYGivenX = delete(pOfYGivenX, s_[0], axis=0)
                    pOfYGivenX = delete(pOfYGivenX, s_[0], axis=1)

                    pXY = np.hstack((pOfYGivenX, pOfXGivenY))

                elif (featurizingmethod == 3):

                    pOfXGivenY, axes1 = fastKDE.conditional(x, y, numPoints=size+1, axisExpansionFactor=0.1)
                    pOfYGivenX, axes2 = fastKDE.conditional(y, x, numPoints=size+1, axisExpansionFactor=0.1)


                    # pXY = np.dstack((pOfYGivenX, np.transpose(pOfXGivenY)))
                    pXY = np.dstack((pOfYGivenX, pOfXGivenY))

                    pXY = delete(pXY, s_[0], axis=0)
                    pXY = delete(pXY, s_[0], axis=1)

                    pYX = np.dstack((pOfXGivenY, pOfYGivenX))
                    pYX = delete(pYX, s_[0], axis=0)
                    pYX = delete(pYX, s_[0], axis=1)

                arrayXY = np.ravel(pXY)
                # arrayXY = pXY

                if(cpt==0):
                    vectorizedPairs = arrayXY
                else:
                    vectorizedPairs = np.vstack((vectorizedPairs, arrayXY))

                if(doubletrainset == True):

                    if (featurizingmethod == 3):
                        arrayYX = np.ravel(pYX)

                    else:
                        arrayYX = np.ravel(np.transpose(pXY))


                    vectorizedPairs = np.vstack((vectorizedPairs, arrayYX))

                if(isTestSet == False):
                    if target == -1:
                        arrayTargetXY = np.array([1,0,0])
                        arrayTargetYX = np.array([0,0,1])

                    elif target == 0:
                        arrayTargetXY = np.array([0,1,0])
                        arrayTargetYX = np.array([0,1,0])

                    elif target == 1:
                        arrayTargetXY = np.array([0,0,1])
                        arrayTargetYX = np.array([1,0,0])

                    if(cpt==0):
                        vectorizedTarget = arrayTargetXY
                    else:
                        vectorizedTarget = np.vstack((vectorizedTarget, arrayTargetXY))

                    if (doubletrainset == True):
                        vectorizedTarget = np.vstack((vectorizedTarget, arrayTargetYX))

                cpt = cpt + 1

            except ValueError:
                print("pbkde nbpoints pairs " + str(k))


    np.savetxt(filepairsvectorized, vectorizedPairs)

    if (isTestSet == False):
        np.savetxt(filetargetsvectorized, vectorizedTarget)
        return vectorizedPairs,vectorizedTarget
    else:
        return vectorizedPairs

Example #20

0

Show file

File: kde.py Project: binghongcha08/pyQMD

# -*- coding: utf-8 -*-
"""
Created on Tue May 24 18:05:04 2016

@author: bing
"""

#!python

import numpy as np
from fastkde import fastKDE
import pylab as PP

#Generate two random variables dataset (representing 100000 pairs of datapoints)
N = 2e5
var1 = 50*np.random.normal(size=N) + 0.1
var2 = 0.01*np.random.normal(size=N) - 300

#Do the self-consistent density estimate
myPDF,axes = fastKDE.pdf(var1,var2)

#Extract the axes from the axis list
v1,v2 = axes

#Plot contours of the PDF should be a set of concentric ellipsoids centered on
#(0.1, -300) Comparitively, the y axis range should be tiny and the x axis range
#should be large
PP.contour(v1,v2,myPDF)
PP.show()

Example #21

0

Show file

File: causal_convet_Keras.py Project: Diviyan-Kalainathan/causal-humans

def featurizePairs(pathinput,pairsfilename,targetfilename, publicinfofilename, pathoutput, maxstd, size, featurizingmethod = 0, ratio=1, doubletrainset=True):


    print(pathinput)

    for i in range(0,len(pathinput)):

        dfpairs = pd.read_csv(pathinput[i] + pairsfilename[i] + ".csv", index_col="SampleID")

        dftarget = pd.read_csv(pathinput[i] + targetfilename[i] + ".csv", index_col="SampleID")
        dfpublicinfo = pd.read_csv(pathinput[i] + publicinfofilename[i] + ".csv", index_col="SampleID")

        if(i==0):
            dfpairsGlobal = dfpairs
            dftargetGlobal = dftarget
            dfpublicinfoGlobal = dfpublicinfo
        else:
            dfpairsGlobal = dfpairsGlobal.append(dfpairs)
            dftargetGlobal = dftargetGlobal.append(dftarget)
            dfpublicinfoGlobal = dfpublicinfoGlobal.append(dfpublicinfo)


    #
    # f = open(path + filepairs );
    # pairs = f.readlines();
    # pairs.pop(0)
    # f.close();

    # y_te = np.genfromtxt(path + filetargets, delimiter=",")

    print(dfpairsGlobal.shape[0])

    cpt = 0

    for k in range(0, int(dfpairsGlobal.shape[0]*ratio)):

        if(k%100==0):
            print(k)

        A = dfpairsGlobal['A'].iloc[k]
        B = dfpairsGlobal['B'].iloc[k]
        target = dftargetGlobal['Target'].iloc[k]
        publicinfoA = dfpublicinfoGlobal['A type'].iloc[k]
        publicinfoB = dfpublicinfoGlobal['B type'].iloc[k]




        if(publicinfoA == "Numerical" and publicinfoB == "Numerical"):

            x = scale(np.array(A.split(), dtype=np.float))
            y = scale(np.array(B.split(), dtype=np.float))


            mask = (x > -maxstd) & (x < maxstd) & ( y > -maxstd) & ( y < maxstd)
            x = x[mask]
            y = y[mask]

            try:
                if(featurizingmethod == 0):
                    pXY = getHisto(x, y, size, maxstd)

                elif(featurizingmethod == 1):

                    pXY, axes = fastKDE.pdf(x, y, numPoints=size+1, axisExpansionFactor = 0.1)

                    pXY = delete(pXY, s_[0], axis=0)
                    pXY = delete(pXY, s_[0], axis=1)

                elif (featurizingmethod == 2):

                    pOfXGivenY, axes = fastKDE.conditional(x, y, numPoints=size+1, axisExpansionFactor=0.1)
                    pOfYGivenX, axes = fastKDE.conditional(y, x, numPoints=size+1, axisExpansionFactor=0.1)

                    pOfXGivenY = delete(pOfXGivenY, s_[0], axis=0)
                    pOfXGivenY = delete(pOfXGivenY, s_[0], axis=1)

                    pOfYGivenX = delete(pOfYGivenX, s_[0], axis=0)
                    pOfYGivenX = delete(pOfYGivenX, s_[0], axis=1)

                    pXY = np.hstack((pOfYGivenX, pOfXGivenY))


                arrayXY = np.ravel(pXY)

                if(cpt==0):
                    vectorizedPairs = arrayXY
                else:
                    vectorizedPairs = np.vstack((vectorizedPairs, arrayXY))

                if(doubletrainset == True):
                    arrayYX = np.ravel(np.transpose(pXY))
                    vectorizedPairs = np.vstack((vectorizedPairs, arrayYX))

                if target == -1:
                    arrayTargetXY = np.array([1,0,0])
                    arrayTargetYX = np.array([0,0,1])

                elif target == 0:
                    arrayTargetXY = np.array([0,1,0])
                    arrayTargetYX = np.array([0,1,0])

                elif target == 1:
                    arrayTargetXY = np.array([0,0,1])
                    arrayTargetYX = np.array([1,0,0])

                if(cpt==0):
                    vectorizedTarget = arrayTargetXY
                else:
                    vectorizedTarget = np.vstack((vectorizedTarget, arrayTargetXY))

                if (doubletrainset == True):
                    vectorizedTarget = np.vstack((vectorizedTarget, arrayTargetYX))

                cpt = cpt + 1

            except ValueError:
                print("pbkde nbpoints pairs " + str(k))


    np.savetxt(pathoutput + "vectorized" + "_maxstd" + str(maxstd) + "_size" + str(size) + "_method" + str(featurizingmethod) + pairsfilename[0], vectorizedPairs)
    np.savetxt(pathoutput + "vectorized" + "_maxstd" + str(maxstd) + "_size" + str(size) + "_method" + str(featurizingmethod) + targetfilename[0], vectorizedTarget)

    return vectorizedPairs,vectorizedTarget

Example #22

0

Show file

from fastkde import fastKDE
from scipy import stats
import pylab as PP
import matplotlib as mpl
import numpy as np

# set plot default fonts (fonts that are generally nice figures
font = {    'family' : 'serif', \
            'size' : '15', \
            'weight' : 'bold'}
mpl.rc('font', **font)
mpl.rc('axes', labelweight='bold'
       )  # needed for bold axis labels in more recent version of matplotlib

#Generate two random variables dataset (representing 100000 pairs of datapoints)
N = int(2e5)
var1 = 50 * np.random.normal(size=N) + 0.1
var2 = 0.01 * np.random.normal(size=N) - 300

#Do the self-consistent density estimate
myPDF, axes = fastKDE.pdf(var1, var2)

#Extract the axes from the axis list
v1, v2 = axes

#Plot contours of the PDF should be a set of concentric ellipsoids centered on
#(0.1, -300) Comparitively, the y axis range should be tiny and the x axis range
#should be large
PP.contour(v1, v2, myPDF)
PP.show()

Example #23

0

Show file

print 'Read beam files = ', time.clock() - start
m1 = 100 * beam.x
m2 = 100 * beam.y
print max(beam.x)
xmin = -0.03
xmax = 0.03
ymin = -0.03
ymax = 0.03

X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
print 'Grid = ', time.clock() - start
positions = np.vstack([X.ravel(), Y.ravel()])
print 'Positions = ', time.clock() - start
values = [m1, m2]
print 'Values = ', time.clock() - start
kernel, axes = fastKDE.pdf(m1, m2)
print 'Kernel = ', time.clock() - start
kpos = kernel(positions)
print 'Kpos = ', time.clock() - start
Z = np.reshape(kpos.T, X.shape)
print 'Z= ', time.clock() - start
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.imshow(np.rot90(Z),
          cmap=plt.cm.gist_earth_r,
          extent=[xmin, xmax, ymin, ymax])
# ax.plot(m1[::10], m2[::10], 'k.', markersize=2)
ax.set_xlim([xmin, xmax])
ax.set_ylim([ymin, ymax])
plt.show()