Esempio n. 1
0
def corr_multiple_frames(features, boundary=None, r_min=None, r_max=None,
                         dr=None):
    d = features.Duty.values[0]
    area = calculate_area_from_boundary(boundary)
    radius = features.r.mean()
    group = features.groupby('frame')
    N = group.x.count().mean()
    density = N / area

    res = group.apply(dists_and_orders, t=r_max * radius).values
    dists, orders, N_queried = list(zip(*res))
    dists = np.concatenate(dists)
    orders = np.concatenate(orders)
    N_queried = np.sum(N_queried)

    r_values = np.arange(r_min, r_max, dr) * radius

    divisor = 2 * np.pi * r_values * (dr * radius) * density * N_queried

    g = histogram1d(dists, len(r_values),
                    (np.min(r_values), np.max(r_values)))
    g6 = histogram1d(dists, len(r_values),
                     (np.min(r_values), np.max(r_values)),
                     weights=orders)
    g = g / divisor
    g6 = g6 / divisor
    res = pd.DataFrame({'r': r_values, 'g': g, 'g6': g6})
    return res
def ChiSquaredDistance(array1, array2, K = 16) :
    #minA = min(min(array1),min(array2))
    #maxA = max(max(array1),max(array2))
    
    #print(minA, maxA, 1.0*(maxA-minA)/K)
    #if minA == maxA :
    #    return 0
    
    #interval = np.arange(start = 0, stop = 1, step = 1.0/K)
    
    if fastHist:
        hist1 = histogram1d(array1, bins=K, range=(0, 1))
        hist2 = histogram1d(array2, bins=K, range=(0, 1))  
    else: 
        hist1,_ = np.histogram(array1, bins=K, range=(0, 1))
        hist2,_ = np.histogram(array2, bins=K, range=(0, 1))
    
    hist1d = hist1 / len(array1)
    hist2d = hist2 / len(array2)
    
    den = hist1d + hist2d + 0.00001
    num = ((hist1d-hist2d)**2)
    sumi = 2*num/den
    
    #sumi = list(filter(lambda x: math.isnan(x) == False, sumi))
    
    return np.sum(sumi)
Esempio n. 3
0
def mass_function_list(mass,
                       volume,
                       nb,
                       low,
                       upp,
                       scatter=0.2,
                       correction=1.0,
                       nsample=100):
    """Estimate mass function with potential additional scatter."""
    bins = np.linspace(low, upp, nb + 1)

    if scatter == 0.:
        return [
            histogram1d(mass[np.floor(
                np.random.rand(len(mass)) * len(mass)).astype(int)],
                        bins=nb,
                        range=[low, upp]) * correction / volume /
            (bins[1] - bins[0]) for i in np.arange(nsample)
        ]

    return [
        histogram1d(
            np.random.normal(mass, scale=scatter), bins=nb, range=[low, upp]) *
        correction / volume / (bins[1] - bins[0]) for i in np.arange(nsample)
    ]
Esempio n. 4
0
def calc_pr(dist, Nbins, contrast, Dmax_poly, polydispersity, r):
    """
    calculate p(r)
    p(r) is the contrast-weighted histogram of distances, without the self-terms (dist = 0)
    due to lack of self-terms it is not used to calc scattering (fast Debye by histogram), but used for structural interpretation
    
    input: 
    dist      : all pairwise distances
    contrast  : all pair-wise contrast products
    Dmax_poly : Dmax for polydisperse ensemble
    polydispersity: boolian, True or False
    r         : pair distances of bins

    output:
    pr        : pair distance distribution function (PDDF) for monodisperse shape
    pr_poly   : PDDF for polydisperse ensemble
    """
    ## remove non-zero elements (tr for truncate)
    idx_nonzero = np.where(dist > 0.0)
    dist_tr = dist[idx_nonzero]
    del dist  # less memory consumption

    contrast_tr = contrast[idx_nonzero]
    del contrast  # less memory consumption

    # calculate monodisperse p(r)
    pr = histogram1d(dist_tr,
                     bins=Nbins,
                     weights=contrast_tr,
                     range=(0, Dmax_poly))

    ## calculate polydisperse p(r)
    N_poly_integral = 9
    if polydispersity > 0.0:
        pr_poly = 0.0
        factor_range = 1 + np.linspace(-3, 3, N_poly_integral) * polydispersity
        for factor_d in factor_range:
            dpr = histogram1d(dist_tr * factor_d,
                              bins=Nbins,
                              weights=contrast_tr,
                              range=(0, Dmax_poly))
            res = (1.0 - factor_d) / polydispersity
            w = np.exp(-res**2 /
                       2.0)  # give weight according to normal distribution
            vol = factor_d**3  # give weight according to (relative) volume square
            pr_poly += dpr * w * vol
    else:
        pr_poly = pr

    ## normalize so pr_max = 1
    pr /= np.amax(pr)
    pr_poly /= np.amax(pr_poly)

    ## save p(r) to textfile
    with open('pr.d', 'w') as f:
        f.write('#  r p(r) p_polydisperse(r)\n')
        for i in range(Nbins):
            f.write('%f %f %f\n' % (r[i], pr[i], pr_poly[i]))

    return pr, pr_poly
Esempio n. 5
0
def calc_pr(dist, Nbins, contrast, Dmax, polydispersity, r, Model):
    """
    calculate p(r)
    p(r) is the contrast-weighted histogram of distances, without the self-terms (dist = 0)
    due to lack of self-terms it is not used to calc scattering (fast Debye by histogram), but used for structural interpretation
    
    input: 
    dist      : all pairwise distances
    contrast  : all pair-wise contrast products
    Dmax_poly : Dmax for polydisperse ensemble
    polydispersity: boolian, True or False
    r         : pair distances of bins

    output:
    pr        : pair distance distribution function (PDDF) for monodisperse shape
    pr_poly   : PDDF for polydisperse ensemble
    """
    ## remove non-zero elements (tr for truncate)
    idx_nonzero = np.where(dist > 0.0)
    dist_tr = dist[idx_nonzero]
    del dist  # less memory consumption

    contrast_tr = contrast[idx_nonzero]
    del contrast  # less memory consumption

    ## calculate p(r)
    if polydispersity > 0.0:
        N_poly_integral = 9
        pr = 0.0
        factor_range = 1 + np.linspace(-3, 3, N_poly_integral) * polydispersity
        for factor_d in factor_range:
            dpr = histogram1d(dist_tr * factor_d,
                              bins=Nbins,
                              weights=contrast_tr,
                              range=(0, Dmax * 1.5))
            res = (1.0 - factor_d) / polydispersity
            w = np.exp(-res**2 / 2.0)  # weight: normal distribution
            vol = factor_d**3  # weight: relative volume, because larger particles scatter more
            pr += dpr * w * vol**2
    else:
        pr = histogram1d(dist_tr,
                         bins=Nbins,
                         weights=contrast_tr,
                         range=(0, Dmax * 1.5))

    ## normalize so pr_max = 1
    pr /= np.amax(pr)

    ## save p(r) to textfile
    with open('pr%s.d' % Model, 'w') as f:
        f.write('# %-17s %-17s\n' % ('r', 'p(r)'))
        for i in range(Nbins):
            f.write('  %-17.5e %-17.5e\n' % (r[i], pr[i]))

    return pr
Esempio n. 6
0
def calc_mutual_information(ndfx: np.ndarray,
                            ndfy: np.ndarray,
                            *args,
                            bins: int = 100,
                            base_max: int = 1) -> (np.ndarray, np.ndarray):
    """
    相互情報量を計算する
    Params::
        ndfx, ndfy: 入力は0~base_maxに正規化されているとする
    """
    """ DEBUG
    import numpy as np
    from fast_histogram import histogram1d, histogram2d
    x = np.random.rand(1000, 20)
    y = np.random.rand(1000, 10)
    ndfx, ndfy, bins, base_max = x, y, 100, 1
    """
    from fast_histogram import histogram1d, histogram2d
    logger.info("START")
    list_ndf = []
    for x in ndfx.T:
        for y in ndfy.T:
            ndf = histogram2d(x,
                              y,
                              range=[[0, base_max], [0, base_max]],
                              bins=bins)
            ndf = (ndf / ndf.sum()).astype(np.float16)
            list_ndf.append(ndf.reshape(1, *ndf.shape))
    ndf_xy = np.concatenate(list_ndf, axis=0)
    ndf_x = np.array(
        [histogram1d(x, range=[0, base_max], bins=bins)
         for x in ndfx.T]) / ndfx.shape[0]
    ndf_y = np.array(
        [histogram1d(x, range=[0, base_max], bins=bins)
         for x in ndfy.T]) / ndfy.shape[0]
    ndf_x = np.tile(np.tile(ndf_x.reshape(-1, bins, 1), bins),
                    (1, ndfy.shape[1], 1)).reshape(-1, bins, bins)
    ndf_y = np.tile(
        np.tile(ndf_y, bins).reshape(-1, bins, bins), (ndfx.shape[1], 1, 1))
    ndf_x_y = ndf_x * ndf_y
    elem: np.ma.core.MaskedArray = ndf_xy * np.ma.log(ndf_xy / ndf_x_y)
    val: np.ma.core.MaskedArray = np.sum(elem * base_max / bins * base_max /
                                         bins,
                                         axis=(1, 2))
    val = np.ma.filled(val, 0)
    index_x = np.tile(np.arange(ndfx.shape[1]).reshape(-1, 1),
                      ndfy.shape[1]).reshape(-1)
    index_y = np.tile(np.arange(ndfy.shape[1]), ndfx.shape[1]).reshape(-1)
    index_list = np.concatenate([[index_x], [index_y]], axis=0).T
    logger.info("END")
    return (index_list, val, *args)
Esempio n. 7
0
def scaleImage(im, lowcut=0.00001, highcut=0.9995, scaleImage=1):

    # make a histogram of the image in the bitdept that the image was recorded.
    imageHist = histogram1d(im.ravel(), 2**16, [0, 2**16],
                            weights=None).astype('float')

    # Calculate the cumulative probability ignoring zero values
    cumHist = np.empty(imageHist.shape, dtype='float')
    cumHist[0] = 0
    cumHist[1:] = np.cumsum(imageHist[1:])

    # if you expect a lot of zero set
    cumRange = cumHist[2**16 - 1] - cumHist[0]
    # if you expect a lot of zero set
    cumHist -= cumHist[0]
    cumHist /= cumRange

    # set low and high values ot normalize image contrast.
    loval = np.argmax(cumHist >= lowcut)
    hival = np.argmax(cumHist >= highcut)
    scIm = np.clip(im, loval, hival).astype('float')
    # scale the image linearly over the range given.  This does not set alpha values or whatever.
    scaleFactor = 254 / (hival - loval)
    scIm -= loval
    scIm *= scaleFactor
    adjIm = np.require(scIm, dtype='uint8', requirements='C')

    # resize if you need to
    rsIm = cv2.resize(
        adjIm,
        (round(im.shape[1] / scaleImage), round(im.shape[0] / scaleImage)))

    return rsIm
Esempio n. 8
0
def fast_otsu(image, nbins=256, eps=0.1):
    """
    A thin wrapper around skimage.filter.threshold otsu that uses
    fast_histogram.histogram1d to make things ~5x faster per image.

    Parameters
    ----------
    image : np.ndarrary (M,N)
        Grayscale image from which to compute the threshold.
    nbins : int default 265
        Number of bins to compute in the histogram.
    eps : float default = 0.1
        Small offset to expand the edges of the histogram by so
        that the minimum-valued elements get appropriately counted.

    Returns
    -------
    threshold : float
        Threshold value for image. Pixels greater than threshold are considered foreground.
    """
    im_min = image.min()
    im_max = image.max()

    counts = histogram1d(image, nbins, (im_min - eps, im_max + eps))

    bin_width = (im_max - im_min) / nbins

    idx = threshold_otsu(nbins=nbins, hist=counts)

    threshold = im_min + bin_width * (idx + 0.5)

    return threshold
Esempio n. 9
0
def histo(path, x_min=np.amin(path), x_max=np.amax(path)):
    number_bins = 1000

    histo = histogram1d(path, range=[x_min, x_max], bins=number_bins)
    h = np.linspace(x_min, x_max, number_bins)
    plt.plot(h, histo, ".")
    plt.xlabel("position [nm]")
    plt.ylabel("|Psi(x)|^2")
Esempio n. 10
0
    def readImage(self, lowcut=0.0005, highcut=0.99995):

        roiLbl = self.roiLbl
        imNum = self.imageNumber

        imagePath = pathlib.Path(self.odelayConfig['LocalImageDir'])
        dataPath = pathlib.Path(self.odelayConfig['LocalDataDir'])
        # Generate image file Path by combining the region of interest lable with the experiment path
        roiFolder = pathlib.Path('./' + roiLbl)
        imageFileName = pathlib.Path('./' + roiLbl + '_' + f'{imNum:00d}' +
                                     '.mat')
        imageFilePath = imagePath / roiFolder / imageFileName

        # Load Region of Interest Data.  This HDF5 file should containt location of image stitch coordinates
        roiPath = dataPath / 'ODELAY Roi Data' / f'{roiLbl}.hdf5'

        roiData = fio.loadData(roiPath)
        background = self.experimentData['backgroundImage']

        # This data should be extracted from the Experiment Index file or stage data file.
        pixSize = self.experimentData['pixSize']
        magnification = self.experimentData['magnification']

        stInd = f'{imNum-1:03d}'
        stitchCorners = roiData['stitchMeta'][stInd]['imPix']

        anImage = opl.assembleImage(imageFilePath, pixSize, magnification,
                                    background, stitchCorners)
        im = anImage['Bf']
        # make a histogram of the image in the bitdept that the image was recorded.
        imageHist = histogram1d(im.ravel(), 2**16, [0, 2**16],
                                weights=None).astype('float')

        # Calculate the cumulative probability ignoring zero values
        cumHist = np.zeros(imageHist.shape, dtype='float')
        cumHist[1:] = np.cumsum(imageHist[1:])

        # if you expect a lot of zero set
        cumProb = (cumHist - cumHist[0]) / (cumHist[2**16 - 1] - cumHist[0])

        # set low and high values ot normalize image contrast.
        loval = np.argmax(cumProb >= lowcut)
        hival = np.argmax(cumProb >= highcut)

        scIm = (im.astype('float') - loval.astype('float')) / (
            hival.astype('float') - loval.astype('float')) * 254
        lim = np.iinfo('uint8')
        scIm = np.clip(scIm, lim.min, lim.max)
        # Set image data type and make sure the array is contiguous in memory.
        imageData = np.require(scIm, dtype='uint8', requirements='C')
        # Set data as a QImage.  This is a greyscale image
        Qim = QImage(imageData.data, imageData.shape[1], imageData.shape[0],
                     imageData.shape[1], QImage.Format_Grayscale8)

        Qim.data = imageData

        return Qim
Esempio n. 11
0
    def _set_histogram(self):
        hist_values = histogram1d(self.source,
                                  bins=self.no_of_colors,
                                  range=[0, self.no_of_colors])

        def to_hist_value(color):
            return hist_values[color]

        self.histogram = np_array(to_hist_value(self.source))
Esempio n. 12
0
 def time_spent_in_headdirection(self, binnum=20):
     timenow = self.__class__._timenow
     idxes = range(0, abs(self.data['locationtime'] - timenow).argmin())
     #        hist, bin_edges = np.histogram(self.data['headdirection'][idxes],binnum,(0,360))
     hist = fast_histogram.histogram1d(self.data['headdirection'][idxes],
                                       binnum, (0, 360))
     bin_centers = np.arange(0, 360, 360 / binnum) + 360 / binnum / 2
     #        bin_centers=np.convolve(bin_edges,[0.5,0.5],mode='valid')
     return hist, bin_centers
    def __init__(self, xmin, xmax, nbins=10):

        self.nbins = nbins
        self.edges = np.linspace(xmin, xmax, nbins + 1)
        self.centers = (self.edges[:-1] + self.edges[1:]) / 2.0

        self.delta = 0.0
        self.range = (xmin, xmax + self.delta)
        self.hists = histogram1d([], nbins, self.range)
Esempio n. 14
0
def showImage(roiLbl, imNum, imageDir, dataDir):

    # image = odp.stitchImage(imageFileName, pixSize, magnification, background)

    expPath = pathlib.Path(imageDir)

    # Generate image file Path by combining the region of interest lable with the experiment path
    roiFolder = pathlib.Path('./' + roiLbl)
    imageFileName = pathlib.Path('./' + roiLbl + '_' + f'{imNum:00d}' + '.mat')
    imageFilePath = expPath / roiFolder / imageFileName

    # Load Region of Interest Data.  This HDF5 file should containt location of image stitch coordinates
    dataPath = pathlib.Path(dataDir)
    initPath = list(dataPath.glob('*Index_ODELAYData.hdf5'))
    initData = fio.loadData(initPath[0])
    roiPath = dataPath / 'ODELAY Roi Data' / f'{roiLbl}.hdf5'

    roiData = fio.loadData(roiPath)
    background = initData['backgroundImage']

    # This data should be extracted from the Experiment Index file or stage data file.
    pixSize = initData['pixSize']
    magnification = initData['magnification']

    stInd = f'{imNum-1:03d}'
    stitchCorners = roiData['stitchMeta'][stInd]['imPix']
    # breakpoint()
    anImage = opl.assembleImage(imageFilePath, pixSize, magnification,
                                background, stitchCorners)
    im = anImage['Bf']
    # im = opl.SobelGradient(im)
    imSize = im.shape

    # This data should be recorded from image display to make sure the image is visible.
    imageHist = histogram1d(im.ravel(), 2**16, [0, 2**16],
                            weights=None).astype('float')
    # Calculate the cumulative probability ignoring zero values
    cumHist = np.cumsum(imageHist)
    cumProb = (cumHist - cumHist[0]) / (cumHist[2**16 - 1] - cumHist[0])
    # set low and high values ot normalize image contrast.
    loval = np.argmax(cumProb > 0.00001)
    hival = np.argmax(cumProb >= 0.9995)

    adjIm = np.array((im.astype('float') - loval.astype('float')) /
                     (hival.astype('float') - loval.astype('float')) * 254,
                     dtype='uint8')

    rsIm = cv2.resize(adjIm, (round(imSize[1] / 5), round(imSize[0] / 5)))

    cv2.imshow('Display Image', rsIm)
    k = cv2.waitKey(0)

    if k == 107 or k == -1:
        cv2.destroyWindow('Display Image')

    return k
Esempio n. 15
0
 def APnum_in_Y(self, binnum=20):
     runnum = self.__class__._runnum
     timenow = self.__class__._timenow
     idxes = range(0, abs(self.data[runnum]['APtimes'] - timenow).argmin())
     #        hist, bin_edges = np.histogram(self.data[runnum]['Y'][idxes],binnum,(0,200))
     hist = fast_histogram.histogram1d(self.data[runnum]['Y'][idxes],
                                       binnum, (0, 200))
     bin_centers = np.arange(0, 200, 200 / binnum) + 200 / binnum / 2
     #        bin_centers=np.convolve(bin_edges,[0.5,0.5],mode='valid')
     return hist, bin_centers
Esempio n. 16
0
def histo(path):
    number_bins=200
    x_min=np.amin(path)
    x_max=np.amax(path)
    #x_min=-2
    #x_max=1
    histo = histogram1d(path,range=[x_min,x_max],bins=number_bins)
    h=np.linspace(x_min,x_max,number_bins)
    #e_k=[E(i) for i in h]
    #plt.figure()
    plt.plot(h,histo)
Esempio n. 17
0
    def get_hist_sub_image(self, offset, dimensions):
        x, y = offset
        width, height = dimensions
        sub = self.source[y:y + height, x:x + width]
        hist_values = histogram1d(sub,
                                  bins=self.no_of_colors,
                                  range=[0, self.no_of_colors])

        def to_hist_value(color):
            return hist_values[color]

        return np_array(to_hist_value(sub))
Esempio n. 18
0
def calc_dens(x1, y1, z1, xmax, dx, nbins, weight):
    """
    This function calculates the radial density of N1 number of particles
    from three cartesian arrays, x1, y1, z1. These arrays:
        1. Have the same length, N1
        2. Record the x, y, and z positions of the N1 particles.
        
    The function also requires a defined grid. The grid is expected to be
    zero-based, and contain nbins grid points. For example, the grid could
    run from -3 to 3 and contain the points -3, -2, -1, 0, 1, 2, 3. From
    the grid, this function requires the inputs xmax, dx, and nbins.
    
    This function returns the radial density in particles / m**3 at each 
    grid point. The density is defined on a new radial grid where the 
    grid spacing is sqrt(3) * the cartesian grid spacing.
    
    If the x1,y1,z1 arrays contain N1 pseudo-random normally-distributed
    points with an rms width (1 standard deviation) of r0, and if the
    radial grid is 
        rgrid = np.sqrt( 3 ) * xgrid[n1:nbins] 
    Then the theoretical density is equal to 
        d0 = N1 * (2*np.pi*r0**2)**(-1.5) * np.exp(-rgrid**2 / 2 / r0**2) 
    If a weight is specified, then this density, d0 or d1, is multiplied
    by the weight
    """
    N1 = np.size(x1)
    n1 = np.int((nbins - 1) / 2)
    hx1 = histogram1d(x1,
                      range=[-xmax - 0.5 * dx, xmax + 0.5 * dx],
                      bins=nbins)
    hy1 = histogram1d(y1,
                      range=[-xmax - 0.5 * dx, xmax + 0.5 * dx],
                      bins=nbins)
    hz1 = histogram1d(z1,
                      range=[-xmax - 0.5 * dx, xmax + 0.5 * dx],
                      bins=nbins)
    d1 = hx1 * hy1 * hz1 * N1**(-2) * dx**(-3)
    d1 = d1[n1:nbins] * weight
    return d1
Esempio n. 19
0
def calc_hr(dist,Nbins,contrast,polydispersity,Model):
    """
    calculate h(r)
    h(r) is the contrast-weighted histogram of distances, including self-terms (dist = 0)
    
    input: 
    dist      : all pairwise distances
    contrast  : all pair-wise contrast products
    polydispersity: boolian, True or False

    output:
    hr        : pair distance distribution function 
    """
    
    ## calculate p(r)
    Dmax = calc_Dmax(dist,polydispersity)
    r,hr = generate_histogram(dist,contrast,Dmax,Nbins)

    ## calculate p(r)
    if polydispersity > 0.0:
        N_poly_integral = 9
        hr = 0.0
        factor_range = 1 + np.linspace(-3,3,N_poly_integral)*polydispersity
        for factor_d in factor_range:
            dhr = histogram1d(dist*factor_d,bins=Nbins,weights=contrast,range=(0,Dmax*1.5))
            res = (1.0-factor_d)/polydispersity
            w = np.exp(-res**2/2.0) # weight: normal distribution
            vol = factor_d**3 # weight: relative volume, because larger particles scatter more
            hr += dhr*w*vol**2
    else:
        hr = histogram1d(dist,bins=Nbins,weights=contrast,range=(0,Dmax*1.5))
    
    ## normalize so hr_max = 1 
    hr /= np.amax(hr) 
    
    ## calculate Rg
    Rg = calc_Rg(r,hr)

    return r,hr,Dmax,Rg
Esempio n. 20
0
def histo(path):

    x_min = np.amin(path[:])
    x_max = np.amax(path[:])
    number_bins = 41

    histo = histogram1d(path, range=[x_min, x_max], bins=number_bins)
    histo = histo / ((len(path_list) - 1) * numTimeSlices)
    #histo=histo/(numTimeSlices)
    h = np.linspace(x_min, x_max, number_bins)
    plt.plot(h, histo, ".")

    plt.xlabel("Position")
    plt.ylabel("Expectation value <x>")
Esempio n. 21
0
def findThresh(gray, recall_pix_num=1000):
    #Find Appropriate Threshold
    #hist_img, _ = np.histogram(gray, 256,range=[0,255])
    hist_img = histogram1d(gray.ravel(), 256, (0, 255))
    cdf_img = np.cumsum(hist_img)
    num_total = cdf_img[-1]
    num_base = cdf_img[0]
    res = 10
    for i in range(240):
        index = 255 - i
        if (num_total - cdf_img[index]) > recall_pix_num:
            res = index
            break
    return res
Esempio n. 22
0
def fgsd_features(graph_list):

    S_max = 0
    S_list = []
    print('Computing pseudo inverse...')
    t = time.time()
    for i, A in enumerate(graph_list):
        if (i + 1) % 1000 == 0:
            print('num graphs processed so far: ', i + 1)
        A = np.array(A.todense(), dtype=np.float32)
        D = np.sum(A, axis=0)
        L = np.diag(D) - A

        ones_vector = np.ones(L.shape[0])
        try:
            fL = np.linalg.pinv(L)
        except np.linalg.LinAlgError:
            fL = np.array(eng.fgsd_fast_pseudo_inverse(matlab.double(L.tolist()), nargout=1))
        fL[np.isinf(fL)] = 0
        fL[np.isnan(fL)] = 0

        S = np.outer(np.diag(fL), ones_vector) + np.outer(ones_vector, np.diag(fL)) - 2 * fL
        if S.max() > S_max:
            S_max = S.max()
        S_list.append(S)

    print('S_max: ', S_max)
    print('Time Taken: ', time.time() - t)

    feature_matrix = []
    nbins = 1000000
    range_hist = (0, S_max)
    print('Computing histogram...')
    t = time.time()
    for i, S in enumerate(S_list):
        if (i + 1) % 1000 == 0:
            print('num graphs processed so far: ', i + 1)
        # hist, _ = np.histogram(S.flatten(), bins=nbins, range=range_hist)
        hist = histogram1d(S.flatten(), bins=nbins, range=range_hist)
        hist = sparse.csr_matrix(hist)
        feature_matrix.append(hist)
    print('Time Taken: ', time.time() - t)

    feature_matrix = sparse.vstack(feature_matrix)
    return feature_matrix
Esempio n. 23
0
def histo(path):
    print("N=", N)
    print("beta=", beta)
    print("num_path= ", num_path)
    
    #x_min=np.amin(path)
    #x_max=np.amax(path)
    
    x_min=0
    x_max=8
    
    number_bins=100
    
    histo = histogram1d(path,range=[x_min,x_max],bins=number_bins)
    h=np.linspace(x_min,x_max,number_bins)
    plt.plot(h,histo,".")
    plt.xlabel("position [nm]")
    plt.ylabel("|Psi(x)|^2")    
Esempio n. 24
0
def histo1(a, path_list):

    li = []
    for i in range(0, len(path_list)):
        li.append(path_list[i][a, :])
    x_min = np.amin(li)
    x_max = np.amax(li)

    #print("N=", N)
    #print("beta", beta)

    number_bins = 1000

    histo = histogram1d(li, range=[x_min, x_max], bins=number_bins)
    h = np.linspace(x_min, x_max, number_bins)
    plt.plot(h, histo)
    #plt.xlim(0.75,1.25)
    plt.xlabel("position [nm]")
    plt.ylabel("$|\Psi(x)|^2$")
Esempio n. 25
0
    def build_histogram(self, pixel_stacks, val_range, nbins):
        """
        build up the histogram of each pixel stacks
        :param pixel_stacks: the pixel stacks need to build histogram - numpy array (T, H, W, C)
        :param val_range: a tuple of the range of pixel values
        :param nbins: number of bins
        :return: histogram of each position of pixel
        """
        # converting RGB to grayscale assuming 4th axis is channel
        if len(pixel_stacks.shape) > 3:
            imgs = np.mean(pixel_stacks, axis=3)
        else:
            imgs = pixel_stacks

        # Creating histogram for eaxh spatial pixel position flattened out
        T, H, W = imgs.shape
        self.num_imgs = T
        self.shape = H * W
        self.H = H
        self.W = W
        self.val_range = val_range
        self.num_bins = nbins
        self.bin_size = (val_range[1] - val_range[0]) / float(self.num_bins)
        imgs = imgs.transpose(1, 2, 0)
        imgs = imgs.reshape(H * W, -1)

        # initializing histogram and adding filler to avoid divide by zero error
        #hist = np.zeros((H*W, self.num_bins+1))
        hist = np.zeros((self.shape, self.num_bins + 1))
        filler = np.ones((self.shape, self.num_bins))  #*1.0

        for k in range(0, H * W):
            hist[k, 0:self.num_bins] = (fht.histogram1d(imgs[k, :],
                                                        range=val_range,
                                                        bins=self.num_bins))

        hist[:, self.num_bins] = self.num_imgs
        #hist = hist/(self.num_imgs + self.num_bins)
        self.hist = hist
        self.filler = filler

        return hist
def _bw_isj(x, grid_counts=None):
    """
    Improved Sheather and Jones method as explained in [1]
    This is an internal version pretended to be used by the KDE estimator
    
    References
    ----------
    .. [1] Kernel density estimation via diffusion.
       Z. I. Botev, J. F. Grotowski, and D. P. Kroese.
       Ann. Statist. 38 (2010), no. 5, 2916--2957.
    """

    x_len = len(x)
    x_min = np.min(x)
    x_max = np.max(x)
    x_range = x_max - x_min

    # Relative frequency per bin
    if grid_counts is None:
        x_std = np.std(x)
        grid_len = 256
        grid_min = x_min - 0.5 * x_std
        grid_max = x_max + 0.5 * x_std
        grid_counts = histogram1d(x, bins=grid_len, range=(grid_min, grid_max))
    else:
        grid_len = len(grid_counts) - 1

    grid_relfreq = grid_counts / x_len

    # Discrete cosine transform of the data
    a_k = _dct1d(grid_relfreq)

    k_sq = np.arange(1, grid_len)**2
    a_sq = a_k[range(1, grid_len)]**2

    t = fsolve(_fixed_point, 0.02, args=(x_len, k_sq, a_sq))
    h = t[0]**0.5 * x_range

    return h
Esempio n. 27
0
def mode_norm(arr, mode_cutoff_ratio=3.0, n_bins=4096, eps=0.01):
    """
    Normalize a single image (arr) by subtracting the mode, zeroing elements
    less than 0 after subtraction, and dividing by the maximum pixel value.

    Parameters
    ----------
    arr : np.array
        Single image to normalize.
    mode_cutoff_ratio : float default 3.0
        Ratio to check for overly bright pixels which will mess
        with normalization. Larger values will leave more bright spots
        in the range. Set to 0 to always scale to the brightest pixel.
    n_bins : int default 4096
        Number of bins in the histogram used to compute the most common
        pixel value. Default 4096 works well for ~1Mb images.
    eps : float default 0.01
        Small offset to expand the edge bins in the histogram by in order
        to ensure that the min and max pixels are properly included.

    Returns
    -------
    normed : np.array (same shape as arr)
        Image with values normalized to be in the range [0,1]
    """

    hist_range = (np.min(arr) - eps, np.max(arr) + eps)
    w = (hist_range[1] - hist_range[0]) / n_bins
    mode_idx = np.argmax(histogram1d(arr, bins=n_bins, range=hist_range))
    mode_val = hist_range[0] + w * (mode_idx + 0.5)

    if (hist_range[1] / mode_val) > mode_cutoff_ratio:
        scale_val = mode_cutoff_ratio * mode_val
    else:
        scale_val = hist_range[1]

    normed = np.clip((arr - mode_val) / (scale_val - mode_val), 0, 1)
    return normed
Esempio n. 28
0
    # monodisperse intensity
    I = 0.0
    for i in range(Nbins):
        qr = q * r[i]
        I += hr[i] * sinc(qr)
    I /= np.amax(I)
    del hr

    # polydisperse intensity
    if polydispersity > 0.0:
        I_poly = 0.0
        factor_range = 1 + np.linspace(-3, 3, N_poly_integral) * polydispersity
        for factor_d in factor_range:
            dhr = histogram1d(dist * factor_d,
                              bins=Nbins,
                              weights=contrast,
                              range=(0, Dmax_poly))
            res = (1.0 - factor_d) / polydispersity
            w = np.exp(-res**2 /
                       2.0)  # give weight according to normal distribution
            #vol = factor_d**3 # relative volume
            #hr_poly += dhr*(w*vol)**2
            dI = 0.0
            for i in range(Nbins):
                qr = q * r[i]
                dI += dhr[i] * sinc(qr)
            dI /= np.amax(dI)
            I_poly += w * dI
            message.udpmessage({"_textarea": "."})
        I_poly /= np.amax(I_poly)
        del dhr
Esempio n. 29
0
nbins = np.int(201)
xmax = 3e-3

# define the grid in x,y,z, (same in all dimensions)
ymax = xmax
zmax = xmax
rmax = np.sqrt(xmax**2 + +ymax**2 + zmax**2)
n1 = np.int((nbins - 1) / 2)  # we'll need this for the radial grid
xgrid = np.linspace(-xmax, xmax, nbins)
dx = xgrid[1] - xgrid[0]

# fill the arrays wtih random numbers
x1, y1, z1 = np.random.randn(3, N1) * r0

# calculate the density from the data
hx1 = histogram1d(x1, range=[-xmax - 0.5 * dx, xmax + 0.5 * dx], bins=nbins)
hy1 = histogram1d(y1, range=[-xmax - 0.5 * dx, xmax + 0.5 * dx], bins=nbins)
hz1 = histogram1d(z1, range=[-xmax - 0.5 * dx, xmax + 0.5 * dx], bins=nbins)
d1 = hx1 * hy1 * hz1 * N1**(-2) * dx**(-3)
d1 = d1[n1:nbins]

# calculate the known analytic density
rgrid = np.sqrt(3) * xgrid[n1:nbins]
dr = rgrid[1] - rgrid[0]
d0 = N1 * (2 * np.pi * r0**2)**(-1.5) * np.exp(-rgrid**2 / 2 / r0**2)

# plot and compare the densities
fig = plt.figure()

ax1 = fig.add_subplot(211)
ax1.plot(rgrid, d0, rgrid, d1, 'ko')
Esempio n. 30
0
def estimate_density(
    # pylint: disable=too-many-arguments,too-many-locals
    x,
    bw="silverman",
    grid_len=256,
    extend=True,
    bound_correction=False,
    adaptive=False,
    extend_fct=0.5,
    bw_fct=1,
    bw_return=False,
    custom_lims=None,
):
    """
    1 dimensional density estimation.
    
    Given an array of data points `x` it returns an estimate of
    the probability density function that generated the samples in `x`.
    
    Parameters
    ----------
    x : 1D numpy array
        Data used to calculate the density estimation.
        Theoritically it is a random sample obtained from $f$, 
        the true probability density function we aim to estimate.
    bw: int, float or str, optional
        If numeric, indicates the bandwidth and must be positive.
        If str, indicates the method to estimate the bandwidth and must be
        one of "scott", "silverman", "lscv", "sj", "isj" or "experimental".
        Defaults to "silverman".
    grid_len: int, optional
        The number of intervals used to bin the data points.
        Defaults to 256.
    extend: boolean, optional
        Whether to extend the observed range for `x` in the estimation.
        It extends each bound by a multiple of the standard deviation of `x`
        given by `extend_fct`. Defaults to True.
    bound_correction: boolean, optional
        Whether to perform boundary correction on the bounds of `x` or not.
        Defaults to False.
    adaptive: boolean, optional
        Indicates if the bandwidth is adaptative or not.
        It is the recommended approach when there are multiple modalities
        with different spread. 
        It is not compatible with convolution. Defaults to False.
    extend_fct: float, optional
        Number of standard deviations used to widen the 
        lower and upper bounds of `x`. Defaults to 0.5.
    bw_fct: float, optional
        A value that multiplies `bw` which enables tuning smoothness by hand.
        Must be positive. Defaults to 1 (no modification).
    bw_return: bool, optional
        Whether to return the estimated bandwidth in addition to the 
        other objects. Defaults to False.
    custom_lims: list or tuple, optional
        A list or tuple of length 2 indicating custom bounds
        for the range of `x`. Defaults to None which disables custom bounds.
    
    Returns
    -------
    grid : Gridded numpy array for the x values.
    pdf : Numpy array for the density estimates.
    bw: optional, the estimated bandwidth.
    """

    # Check `x` is from appropiate type
    x = check_type(x)
    
    # Assert `bw_fct` is numeric and positive
    # Note: a `bool` will not trigger the first AssertionError, 
    #       but it is not a problem since True will be 1
    #       and False will be 0, which triggers the second AssertionError.
    assert isinstance(bw_fct, (int, float))
    assert bw_fct > 0

    # Preliminary calculations
    x_len = len(x)
    x_min = x.min()
    x_max = x.max()
    x_std = (((x ** 2).sum() / x_len) - (x.sum() / x_len) ** 2) ** 0.5
    x_range = x_max - x_min

    # Length warning. Not completely sure if it is necessary
    len_warning(x_len)
    
    # Determine grid
    grid_min, grid_max, grid_len = get_grid(
        x_min, x_max, x_std, extend_fct, grid_len, custom_lims, extend, bound_correction
    )
    
    grid_counts = histogram1d(x, bins=grid_len, range=(grid_min, grid_max))
    grid_edges = np.linspace(grid_min, grid_max, num=grid_len + 1)  

    # Bandwidth estimation
    bw = bw_fct * get_bw(x, bw, grid_counts=grid_counts, x_std=x_std, x_range=x_range)

    # Density estimation
    if adaptive:
        grid, pdf = kde_adaptive(x, bw, grid_edges, grid_counts, grid_len, bound_correction)
    else:
        grid, pdf = kde_convolution(x, bw, grid_edges, grid_counts, grid_len, bound_correction)
    
    if bw_return:
        return grid, pdf, bw
    else:
        return grid, pdf