def KLdivTree(X1, X2):
    "fast KL estimation using KDTrees"
    n, d = X1.shape
    m, dy = X2.shape
    xtree = KDTree(X1)
    ytree = KDTree(X2)
    r = xtree.query(X1, k=2, eps=.01, p=2)[0][:, 1]
    s = ytree.query(X1, k=1, eps=.01, p=2)[0]
    diff = r/s
    return -np.log(diff).sum() * d / n + np.log(m/(n-1))
def kldivergence(x, y):
    """Compute the Kullback-Leibler divergence between two multivariate samples.
    
    Parameters
    ----------
    x : 2D array (n,d)
    Samples from distribution P, which typically represents the true
    distribution.
    y : 2D array (m,d)
    Samples from distribution Q, which typically represents the approximate
    distribution.
    
    Returns
    -------
    out : float
    The estimated Kullback-Leibler divergence D(P||Q).
    
    References
    ----------
    Perez-Cruz, F. Kullback-Leibler divergence estimation of
    continuous distributions IEEE International Symposium on Information
    Theory, 2008.
    """
    from scipy.spatial import cKDTree as KDTree
    
    # Check the dimensions are consistent
    x = NP.atleast_2d(x)
    y = NP.atleast_2d(y)
    
    n,d = x.shape
    m,dy = y.shape
    
    assert(d == dy)
    
    
    # Build a KD tree representation of the samples and find the nearest neighbour
    # of each point in x.
    xtree = KDTree(x)
    ytree = KDTree(y)
    
    # Get the first two nearest neighbours for x, since the closest one is the
    # sample itself.
    r = xtree.query(x, k=2, eps=.01, p=2)[0][:,1]
    s = ytree.query(x, k=1, eps=.01, p=2)[0]
    
    print r
    print s
    # There is a mistake in the paper. In Eq. 14, the right side  misses a negative sign
    # on the first term of the right hand side.
    return -NP.log(r/s).sum() * d / n + NP.log(m / (n - 1.))
Esempio n. 3
0
def CartMatch(coord1, coord2, tol = None, nnearest=1):
    """
    Cartesian Coordinate mathcing
    """
    # sanitize
    coord1      =       np.array(coord1, ndmin = 1)
    coord2      =       np.array(coord2, ndmin = 1)

    # check the dimensions of the coordinate
    npairs1     =       len( coord1 )
    ndim1       =       1    if   len( np.shape(coord1) )  ==   1  else   \
                        np.shape(coord1)[1]
    npairs2     =       len( coord2 )
    ndim2       =       1    if   len( np.shape(coord2) )  ==   1  else   \
                        np.shape(coord2)[1]

    # check whether the coord1 and coord2 have the same shape
    if  ndim1   !=      ndim2:
        raise RuntimeError("The dims of coord1/2 are not the same.")
    else:
        ndim     =       ndim1

    # make proper arrays if they are 1d arrays
    if      ndim == 1:
        coord1  =       np.array([ coord1, np.zeros(len(coord1)) ]).T
        coord2  =       np.array([ coord2, np.zeros(len(coord2)) ]).T

    # kdtree the coord2
    kdt = KDT(coord2)
    if nnearest == 1:
        idxs2 = kdt.query(coord1)[1]
    elif nnearest > 1:
        idxs2 = kdt.query(coord1, nnearest)[1][:, -1]
    else:
        raise ValueError('invalid nnearest ' + str(nnearest))

    # distance - warning: this could be over float if the precision is not enough, we assume that case is beyond the distance of interest...
    ds  =   np.sqrt( np.sum( (coord1 - coord2[idxs2])**2, axis = 1) )

    # index of coord1 
    idxs1 = np.arange(npairs1)

    # distance filtering
    if tol is not None:
        msk = ds < tol
        idxs1 = idxs1[msk]
        idxs2 = idxs2[msk]
        ds = ds[msk]

    return idxs1, idxs2, ds
Esempio n. 4
0
    def swept_extrude(self, thickness):
        """
        outer is a copy of inner, possibly with added detail, but with identical boundary
        we seek to create a castable object with a constant thickness 'thickness'
        to that end, we need to match the boundary points to make a closed extrusion
        extrusion is done iteratively
        we init by radially shinking the inner mesh by thickness
        """
        assert thickness > 0
        outer = self.vertices
        tree = KDTree(outer)

        outer_radius = np.linalg.norm(outer, axis=1)
        inner = outer

        #incremental updates
        while True:
            # find nearest point for each inner point
            dist, idx = tree.query(inner, k=1)

            inner_radius = np.linalg.norm(inner, axis=1)
            radial_dist = inner_radius - outer_radius[idx]
            ortho_dist2 = dist**2 - radial_dist**2
            new_radius = outer_radius[idx] - np.sqrt(1 - ortho_dist2 / thickness ** 2) * thickness

            if np.allclose(inner_radius, new_radius):
                break
            inner = inner / (inner_radius / new_radius)[:, None]

        #return inner surface swepth by thickness
        return self.extrude(inner)
Esempio n. 5
0
def main():
    # read in the file
    try:
        ifs = open(sys.argv[1])
        sample, ext = os.path.splitext(sys.argv[1])
    except IndexError:
        ifs = sys.stdin
        sample = ''
    data = np.loadtxt(ifs, delimiter=',')
    if ifs is not sys.stdin:
        ifs.close()
    # view of the com
    com = data[:,1:4]
    # construct a KD tree
    tree = KDTree(com)
    # query KD tree to find the first nearest neighbor
    dist, idx = tree.query(com, k=2)
    nn = [(i, j, d2) for ((d1, d2), (i, j)) in zip(dist, idx)]
    # histogram of the nearest neighbor distance
    hist(np.array(nn)[:,2])
         #title='{} pore-pore distances'.format(sample),
         #output='{}.pdf'.format(sample))
    # save the nearest neighbor distance to .json files
    ofile = '{}_pore-distribution.json'.format(sample)
    medianDist = np.median(np.array(nn)[:,2])
    cmp0 = lambda lhs, rhs: -1 if lhs[0] < rhs[0] else \
        (1 if lhs[0] > rhs[0] else 0)
Esempio n. 6
0
def sht_isosurface(filename, l_max=20, prop='electric_potential', 
                   test=None):
    """Given an SBF, describe the set of vertices and their esp using sht.
    Will scale the mesh to be of unit mean radius.

    Arguments:
    filename -- name of the SBF file containing a surface

    Keyword arguments:
    prop -- the name of the vertex property to describe in combination
    with the shape (or radius)
    l_max -- maximum angular momenta
    test -- use to keep the actual shape and property values for
    examination of accuracy of descriptor

    """
    name = Path(filename).stem
    LOG.debug('Describing %s surface with spherical harmonics', name)
    datafile = sbf.read_file(filename)
    pts = datafile['vertices'].data.transpose()
    LOG.debug('Loaded vertex data')
    # shift to be centered about the origin
    pts -= np.mean(pts, axis=0)

    # this is faster for some reason than np.apply_along_axis
    norms = np.sqrt(pts[:, 0] ** 2 + pts[:, 1] ** 2 + pts[:, 2] ** 2)
    mean_norm = np.mean(norms)
    pts /= mean_norm
    norms /= mean_norm
    pts_normalized = pts / np.reshape(norms, (pts.shape[0], 1))
    LOG.debug('Normalized points')
    sht = SHT(l_max)
    grid_cartesian = spherical_to_cartesian(
        np.c_[np.ones(sht.grid.shape[0]), sht.grid[:, 1], sht.grid[:, 0]])
    LOG.debug('Constructing tree')
    tree = KDTree(pts_normalized)
    LOG.debug('Done')
    LOG.debug('Interpolating values')
    nearest = tree.query(grid_cartesian, 1)
    LOG.debug('Done')
    shape = values_from_grid(norms, nearest[1])
    property_values = values_from_grid(datafile[prop].data, nearest[1])

    if test is not None:
        test['actual'] = shape

    # normalize property to be in [0,1], keep track of min and range
    prop_min = np.min(property_values)
    prop_scale = np.abs(np.max(property_values) - np.min(property_values))
    property_values -= prop_min
    if prop_scale != 0:
        property_values /= prop_scale
    others = [mean_norm, prop_min, prop_scale]
    combined = np.zeros(property_values.shape, dtype=np.complex128)
    combined.real = shape
    combined.imag = property_values

    return name, others, sht.analyse(combined)
def closest_index(sample_points, indices):
    r"""
    Find the nearest sample_point at a given index
    (along with the distance to the point). Input is
    an array of sample_points and an array of indicies to
    test at. Output is array of indices and distances.
    """
    kdtree = KDTree(sample_points)
    distance, index = kdtree.query(indices)
    return index, distance
Esempio n. 8
0
        def check(name):
            points = DATASETS[name]

            tree = KDTree(points)
            vor = qhull.Voronoi(points)

            for p, v in vor.ridge_dict.items():
                # consider only finite ridges
                if not np.all(np.asarray(v) >= 0):
                    continue

                ridge_midpoint = vor.vertices[v].mean(axis=0)
                d = 1e-6 * (points[p[0]] - ridge_midpoint)

                dist, k = tree.query(ridge_midpoint + d, k=1)
                assert_equal(k, p[0])

                dist, k = tree.query(ridge_midpoint - d, k=1)
                assert_equal(k, p[1])
Esempio n. 9
0
def kdtree_clean(xx2d, yy2d, xS, yS, elevation2d):
	#REMOVE DODGY ADDED DATA FROM THE REGRIDDING BASED ON KDTREE. 
	# dist is how far away the nearest neighbours are. 
	# need to decide on this threshold.
	# ONLY DO THIS FOR POINTS THAT HAVE ALREADY BEEN CLASSIFIED AS RIDGES
	grid_points = np.c_[xx2d.ravel(), yy2d.ravel()]
	tree = KDTree(np.c_[xS, yS])
	dist, _ = tree.query(grid_points, k=1)
	dist = dist.reshape(xx2d.shape)
	elevation2d_KD=ma.masked_where(dist > 4, elevation2d)
	return elevation2d_KD
Esempio n. 10
0
def match_model_masses(isoMasses, starMasses):
    kdt = KDTree( isoMasses.reshape((len(isoMasses), 1)) )
    q_results = kdt.query(starMasses.reshape((len(starMasses), 1)), k=1)
    indices = q_results[1]

    dm_frac = np.abs(starMasses - isoMasses[indices]) / starMasses

    idx = np.where(dm_frac > 0.1)[0]
    indices[idx] = -1
    
    return indices
Esempio n. 11
0
def generate_galaxy(num_stars, spiral_arm_count, spiral_tightness, galaxy_radius, bulge_height, disk_height):
    
    #generate vertices
    star_dict = {}
    
    next_index = 0
    #spiral stars
    for i in xrange(int(num_stars*0.65)):
        star_dict[next_index] = create_vertex_spiral(max_radius=galaxy_radius, arm_count=spiral_arm_count, beta=spiral_tightness, disk_height=disk_height)
        next_index += 1
    
    #inner cluster stars
    for i in xrange(int(num_stars*0.15)):
        star_dict[next_index] = create_vertex_inner(max_radius=galaxy_radius * 0.8, bulge_height=bulge_height)
        next_index += 1
    
    #outer "spread out" stars
    while(len(star_dict) < num_stars):
        star_dict[next_index] = create_vertex_outer(max_radius=galaxy_radius * 0.9, disk_height=disk_height)
        next_index += 1
    
    #generate a KDTree from the star data in order to help with edges
    star_keys = star_dict.keys()
    star_values = star_dict.values()
    star_tree = KDTree(star_values)
    
    #compute the nearest neighbors for each vertex
    distance_data, index_data = star_tree.query(star_values, k=20, eps=0.1)
    
    #for each vertex, randomly add edges to its nearest neighbors
    edge_dict = {}
    for distances, indexes in zip(distance_data, index_data):
        v1 = star_keys[int(indexes[0])]
        
        if(v1 not in edge_dict):
            edge_dict[v1] = set()
        
        for distance, v2 in create_edges(zip(distances[1:],indexes[1:])):
            
            v2 = star_keys[int(v2)]
            
            edge_dict[v1].add(v2)
            
            if(v2 not in edge_dict):
                edge_dict[v2] = set()
            edge_dict[v2].add(v1)
    
    #remove disconnected components from the graph
    star_dict, edge_dict = remove_disconnected_stars(star_dict, edge_dict)
    
    #convert the star array to an array of dictionaries before returning, so other data can be added
    star_dict = {key:{'position':Vector3D(*p)} for key, p in star_dict.iteritems()}
    
    return star_dict, edge_dict
Esempio n. 12
0
def sample_colors(img, sample_points, n):
    h, w = img.shape[:2]

    print("Sampling colors...")
    tree = KDTree(np.array(sample_points))
    color_samples = collections.defaultdict(list)
    img_lab = rgb2lab(img)
    xx, yy = np.meshgrid(np.arange(h), np.arange(w))
    pixel_coords = np.c_[xx.ravel(), yy.ravel()]
    nearest = tree.query(pixel_coords)[1]

    i = 0
    for pixel_coord in pixel_coords:
        color_samples[tuple(tree.data[nearest[i]])].append(
            img_lab[tuple(pixel_coord)])
        i += 1

    print("Computing color means...")
    samples = []
    for point, colors in color_samples.items():
        avg_color = np.sum(colors, axis=0) / len(colors)
        samples.append(np.append(point, avg_color))

    if len(samples) > n:
        print("Downsampling {} to {} points...".format(len(samples), n))

    while len(samples) > n:
        tree = KDTree(np.array(samples))
        dists, neighbours = tree.query(np.array(samples), 2)
        dists = dists[:, 1]
        worst_idx = min(range(len(samples)), key=lambda i: dists[i])
        samples[neighbours[worst_idx][1]] += samples[neighbours[worst_idx][0]]
        samples[neighbours[worst_idx][1]] /= 2
        samples.pop(neighbours[worst_idx][0])

    color_samples = []
    for sample in samples:
        color = lab2rgb([[sample[2:]]])[0][0]
        color_samples.append(tuple(sample[:2][::-1]) + tuple(color))

    return color_samples
Esempio n. 13
0
def point_find_nearest_businesses(df, point, k=5, loc_cols=['latitude', 'longitude']):
    """
    Given a point (lat, long)
    :param df:
    :param point:
    :param k:
    :param loc_cols:
    :return:
    """
    tree = KDTree(df[loc_cols])
    distance, indices = tree.query(point, k)
    return df.ix[indices]
Esempio n. 14
0
	def neighborDistances(self,neighbors=64):

		"""
		Find the N-th nearest neighbors to each particle

		:param neighbors: neighbor order
		:type neighbors: int.

		:returns: array with units

		"""

		#Get the particle positions if not available get
		if hasattr(self,"positions"):
			positions = self.positions.copy()
		else:
			positions = self.getPositions(save=False)

		#Build the KD-Tree
		particle_tree = KDTree(positions.value)

		#For memory reasons, with large datasets it's better to proceed in chunks with nearest neighbors queries
		numPart = positions.shape[0]
		rp = np.zeros(numPart)

		#Split the particles in chunks
		chunkSize = numPart // neighbors
		remaining = numPart % neighbors

		#Cycle over the chunks, querying the tree
		for i in range(neighbors):
			rp[i*chunkSize:(i+1)*chunkSize] = particle_tree.query(positions[i*chunkSize:(i+1)*chunkSize].value,k=neighbors)[0][:,neighbors-1]

		if remaining:
			rp[neighbors*chunkSize:] = particle_tree.query(positions[neighbors*chunkSize:].value,k=neighbors)[0][:,neighbors-1]

		#Return
		return rp * positions.unit
Esempio n. 15
0
    def test_ridges(self, name):
        # Check that the ridges computed by Voronoi indeed separate
        # the regions of nearest neighborhood, by comparing the result
        # to KDTree.

        points = DATASETS[name]

        tree = KDTree(points)
        vor = qhull.Voronoi(points)

        for p, v in vor.ridge_dict.items():
            # consider only finite ridges
            if not np.all(np.asarray(v) >= 0):
                continue

            ridge_midpoint = vor.vertices[v].mean(axis=0)
            d = 1e-6 * (points[p[0]] - ridge_midpoint)

            dist, k = tree.query(ridge_midpoint + d, k=1)
            assert_equal(k, p[0])

            dist, k = tree.query(ridge_midpoint - d, k=1)
            assert_equal(k, p[1])
Esempio n. 16
0
    def compute_errors(self, mag_err_lim=None, dx_lim=None):
        """Estimates errors and completeness per star.
        
        Load photometry from fake table (from same chip, ext as primary data.
        For each star in the phot table, get its magnitude.
        Use a kdtree to get the N most similar stars; compute statistics

        Parameters
        ----------

        frac : float
            Scalar fractional level of completeness. For example, 0.5 is the
            50% completeness limit.
        mag_err_lim : float
            Maximum absolute difference in magnitudes, in any band, for the
            star to be considered recovered.
        dx_lim : float
            Maximum distance between a fake star's input site and its
            observed site for the fake star to be considered recovered.
        """
        mag_errors = self._f.mag_errors()  # diffs nstars x nimages
        recovered = self._f.recovered(mag_err_lim=mag_err_lim, dx_lim=dx_lim)
        tree = KDTree(self._f.data['mag'])
        obs_mags = np.array([row['mag']
            for row in self._p.photTable.iterrows()])
        dists, indices = tree.query(obs_mags,
                k=100)
                # distance_upper_bound=mag_err_lim)
        nObs = obs_mags.shape[0]
        nImages = obs_mags.shape[1]
        sigmas = np.empty([nObs, nImages])
        comps = np.empty(nObs)
        for i in xrange(nObs):
            if np.any(obs_mags[i] > 50.):
                for j in xrange(nImages):
                    sigmas[i, j] = np.nan
                comps[i] = np.nan
                continue
            idx = indices[i, :].flatten()
            for j in xrange(nImages):
                # Estimate uncertainty in this band (image index)
                sigmas[i, j] = np.std(mag_errors[idx, j])
            # Estimate completeness for this star
            c = recovered[indices[i, :]]
            comps[i] = np.float(c.sum()) / len(c)

        # insert errors into the HDF5 table (need to make a new column
        self._p.add_column("ast_mag_err", sigmas)
        # insert completeness for this star
        self._p.add_column("comp", comps)
Esempio n. 17
0
def main():
    # read in the file
    try:
        ifs = open(sys.argv[1])
        sample, ext = os.path.splitext(sys.argv[1])
    except IndexError:
        ifs = sys.stdin
        sample = ''
    data = np.loadtxt(ifs, delimiter=',')
    if ifs is not sys.stdin:
        ifs.close()
    # view of the com
    com = data[:,1:4]
    # construct a KD tree
    tree = KDTree(com)
    # query KD tree to find the first nearest neighbor
    dist, idx = tree.query(com, k=2)
    nn = [(i, j, d2) for ((d1, d2), (i, j)) in zip(dist, idx)]
    # histogram of the nearest neighbor distance
    hist(np.array(nn)[:,2],
         title='{} pore-pore distances'.format(sample),
         output='{}.pdf'.format(sample))
    # save the nearest neighbor distance to .json files
    ofile = '{}_pore-distribution.json'.format(sample)
    medianDist = np.median(np.array(nn)[:,2])
    cmp0 = lambda lhs, rhs: -1 if lhs[0] < rhs[0] else \
        (1 if lhs[0] > rhs[0] else 0)
    dist = {
        'Pore ID' : list(data[:,0].astype(int)),
        'center of mass X' : {
            'units' : '$\mu$m',
            'values' : list(data[:,1])},
        'center of mass Y' : {
            'units' : '$\mu$m',
            'values' : list(data[:,2])},
        'center of mass Z' : {
            'units' : '$\mu$m',
            'values' : list(data[:,3])},
        'volume' : {
            'units' : '$\mu$m^3',
            'values' : list(data[:,4])},
        'nearest neighbor distance' : {
            'units' : '$\mu$m',
            'values' : [entry[2] for entry in sorted(nn, cmp=cmp0)]},
        'median nearest neighbor distance' : {
            'units' : '$\mu$m',
            'values' : medianDist}
    }
    json.dump(dist, open(ofile, 'w'))
Esempio n. 18
0
def match(s, h, fits_image, tolerance=4):
    """
    Parameters
    ----------
    s, h : obj
        Catalog objects. Each must have `ra` and `dec` attributes
        as 1-D Numpy arrays.

    fits_image : string
        FITS image for conversion of RA,DEC to X,Y.

    tolerance : number
        Match tolerance in pixels.

    Returns
    -------
    xmatch, ymatch
        Matched X,Y from first catalog.

    xhmatch, yhmatch
        Matched X,Y from second catalog.

    """
    # Now use pywcs to put these on some sort of projection. I think as
    # long as you use the same for both data sets it's not really important
    # what the projection is. In my case I read in a fits image associated
    # with the first catalog and use that header info.
    hdu = io.fits.open(fits_image)
    wcs = pywcs.WCS(hdu['PRIMARY'].header)

    # Convert sky to x,y positions
    x, y = wcs.wcs_world2pix(s.ra, s.dec, 0)
    xh, yh = wcs.wcs_world2pix(h.ra, h.dec, 0)

    # Create a KD Tree
    tree = KDTree(zip(x.ravel(), y.ravel()))

    # Search it for the nearest neighbor
    # d = distance of the nearest neighbor
    # i = index in x,y arrays of the nearest neighbor for each source in xh,yh
    d, i = tree.query(zip(xh.ravel(), yh.ravel()), k=1)

    # Give me just the matchers within a tolerance
    j = d < tolerance
    ii = i[j]  # match within N pixels; tricker to do this in ra,dec
    xmatch, ymatch = x[ii], y[ii]
    xhmatch, yhmatch = xh[j], yh[j]

    return xmatch, ymatch, xhmatch, yhmatch
Esempio n. 19
0
class FStrategy(object):
    '''Class implements a connection strategy based on nearest neighbors.
    The class keeps track of states using a k-d tree which is polled for nearest
    states and is needs to be updated whenever new states are added to the
    transition system.
    '''
    def __init__(self, no_neighbors, radius):
        # nearest neighbors data structure
        self.nn = None
        self.max_no_neighbors = no_neighbors
        self.max_dist = radius
        self.states = [] # TODO: can I get rid of this? maybe take it from ts
    
    def add(self, state):
        self.states.append(state)
        self.nn = NearestNeightbor([s.conf[:2] for s in self.states])
    
    def nearest(self, state, ret_dist=False):
        s, d = None, -1
        if self.nn:
            idx = self.nn.query(state.conf[:2])
            d, idx = idx
            assert idx == int(idx)
            p = self.nn.data[idx]
            s = self.states[idx]
            assert np.all(s.conf[:2] == p)
        if ret_dist:
            return s, d
        return s
    
    def near(self, state):
        if self.nn:
            _, idxs = self.nn.query(state.conf[:2], k=self.max_no_neighbors,
                                    distance_upper_bound=self.max_dist)
            return [self.states[idx] for idx in idxs if idx < len(self.states)]
        return
Esempio n. 20
0
def EstimateLatticeConstant(pos):
    """
    Estimate the lattice constant of a point set that represent a square grid.

    Parameters
    ----------
    pos : array like
        A 2D array of shape (N, 2) containing the coordinates of the points.

    Returns
    -------
    kxy : array like [2x2]
        lattice constants

    """
    # Find the closest 4 neighbours (excluding itself) for each point.
    tree = KDTree(pos)
    dd, ii = tree.query(pos, k=5)
    dr = dd[:, 1:]

    # Determine the median radial distance and filter all points beyond
    # 2*sigma.
    med = numpy.median(dr)
    std = numpy.std(dr)
    outliers = numpy.abs(dr - med) > (2 * std)  # doesn't work well if std is very high

    # Determine horizontal and vertical distance (only radial distance is
    # returned by tree.query).
    dpos = pos[ii[:, 0, numpy.newaxis]] - pos[ii[:, 1:]]
    dx, dy = dpos[:, :, 0], dpos[:, :, 1]
    assert numpy.all(numpy.abs(dr - numpy.hypot(dx, dy)) < 1.0e-12)
    # Use k-means to group the points into two directions.
    X = numpy.column_stack((dx[~outliers], dy[~outliers]))
    X[X[:, 0] < -0.5 * med] *= -1
    X[X[:, 1] < -0.5 * med] *= -1

    centroids, _ = kmeans(X, 2)
    labels = numpy.argmin(cdist(X, centroids), axis=1)
    kxy = numpy.array([numpy.median(X[labels.ravel() == 0], axis=0),
                       numpy.median(X[labels.ravel() == 1], axis=0)])

    # The angle between the two directions should be close to 90 degrees.
    alpha = numpy.math.atan2(numpy.linalg.norm(numpy.cross(*kxy)), numpy.dot(*kxy))
    if abs(alpha - math.pi / 2) > math.radians(2.5):
        logging.warning('Estimated lattice angle differs from 90 degrees by '
                        'more than 2.5 degrees. Input data could be wrong')

    return kxy
Esempio n. 21
0
def render(img, color_samples):
    print("Rendering...")
    h, w = [2*x for x in img.shape[:2]]
    xx, yy = np.meshgrid(np.arange(h), np.arange(w))
    pixel_coords = np.c_[xx.ravel(), yy.ravel()]

    colors = np.empty([h, w, 3])
    coords = []
    for color_sample in color_samples:
        coord = tuple(x*2 for x in color_sample[:2][::-1])
        colors[coord] = color_sample[2:]
        coords.append(coord)

    tree = KDTree(coords)
    idxs = tree.query(pixel_coords)[1]
    data = colors[tuple(tree.data[idxs].astype(int).T)].reshape((w, h, 3))
    data = np.transpose(data, (1, 0, 2))

    return downscale_local_mean(data, (2, 2, 1))
Esempio n. 22
0
def row_find_nearest_businesses(df, row, k=5, loc_cols=['latitude', 'longitude']):
    """
    Finds the nearest neighbor of a

    :param row: Row that we are interested in finding the nearest neighbors for
    :param df: pd.DataFrame with loc_cols
    :param k:
    :param loc_cols: List of columns that we are comparing to for nearest neighbors
    :return:

    Example
    ---
    >>> find_nearest_neighbors(businesses, 1)
    """
    tree = KDTree(df[loc_cols])
    distance, indices = tree.query(df[loc_cols], k + 1)
    neighbors = df.ix[indices[row][1:]] # Start at 1 to ignore the current index

    return neighbors
Esempio n. 23
0
def FindGridSpots(image, repetition):
    """
    Find a grid of spots in an image.

    Parameters
    ----------
    image : array like
        Data array containing the greyscale image.
    repetition : tuple of ints
        Number of expected spots in (X, Y).

    Returns
    -------
    spot_coordinates : array like
        A 2D array of shape (N, 2) containing the coordinates of the spots.
    translation : tuple of two floats
    scaling : tuple of two floats
    rotation : float

    """
    spot_positions = MaximaFind(image, repetition[0] * repetition[1])
    if len(spot_positions) < repetition[0] * repetition[1]:
        logging.warning('Not enough spots found, returning only the found spots.')
        return spot_positions, None, None, None
    # Estimate transformation
    lattice_constants = EstimateLatticeConstant(spot_positions)
    transformation_matrix = numpy.transpose(lattice_constants)
    if numpy.linalg.det(lattice_constants) < 0.:
        transformation_matrix = numpy.fliplr(transformation_matrix)
    translation = numpy.mean(spot_positions, axis=0)
    transform_to_spot_positions = Transform(translation=translation)
    transform_to_spot_positions.transformation_matrix = transformation_matrix
    # Iterative closest point algorithm - single iteration, to fit a grid to the found spot positions
    grid = GridPoints(*repetition)
    spot_grid = transform_to_spot_positions.apply(grid)
    tree = KDTree(spot_positions)
    dd, ii = tree.query(spot_grid, k=1)

    pos_sorted = spot_positions[ii.ravel(), :]
    transformation = Transform.from_pointset(grid, pos_sorted)
    spot_coordinates = transformation.apply(grid)

    return spot_coordinates, translation, transformation.scaling, transformation.rotation
Esempio n. 24
0
def main():
    image = Image.open(sys.argv[1])
    image2 = Image.new('RGB', image.size, BACKGROUND)
    draw_image = ImageDraw.Draw(image2)

    width, height = image.size

    min_diameter = (width + height) / 200
    max_diameter = (width + height) / 75

    circle = generate_circle(width, height, min_diameter, max_diameter)
    circles = [circle]

    circle_draw(draw_image, image, circle)

    try:
        for i in xrange(TOTAL_CIRCLES):
            tries = 0
            if IMPORTED_SCIPY:
                kdtree = KDTree([(x, y) for (x, y, _) in circles])
                while True:
                    circle = generate_circle(width, height, min_diameter, max_diameter)
                    elements, indexes = kdtree.query([(circle[0], circle[1])], k=12)
                    for element, index in zip(elements[0], indexes[0]):
                        if not np.isinf(element) and circle_intersection(circle, circles[index]):
                            break
                    else:
                        break
                    tries += 1
            else:
                while any(circle_intersection(circle, circle2) for circle2 in circles):
                    tries += 1
                    circle = generate_circle(width, height, min_diameter, max_diameter)

            print '{}/{} {}'.format(i, TOTAL_CIRCLES, tries)

            circles.append(circle)
            circle_draw(draw_image, image, circle)
    except (KeyboardInterrupt, SystemExit):
        pass

    image2.show()
Esempio n. 25
0
def spherematch(ra1, dec1, ra2, dec2, tolerance=1/3600.):

	"""
	Uses a k-d tree to efficiently match two pairs of coordinates in spherical
	geometry, with a tolerance in degrees.
	"""

	args = ra1, dec1, ra2, dec2
	ra1, dec1, ra2, dec2 = map(partial(np.array, copy=False), args)
	coords1 = radec_to_coords(ra1, dec1)
	coords2 = radec_to_coords(ra2, dec2)
	kdt = KDT(coords2)
	idx2 = kdt.query(coords1)[1]
	ds = great_circle_distance(ra1, dec1, ra2[idx2], dec2[idx2])
	idx1 = np.arange(ra1.size)
	msk = ds < tolerance
	idx1 = idx1[msk]
	idx2 = idx2[msk]
	ds = ds[msk]
	return idx1, idx2, ds
Esempio n. 26
0
 def _find_nearest_neighbors(self):
     """
     Internal function to compute the nearest neighbors of all 
     collided/unresolved galaxies
     """
     # we can double count using any uncollided galaxies (for which
     # we have a redshift)
     cond = (self._data.collided == 0)|(self._data.resolved == 1)
     uncollided_gals = self._data[cond]
 
     # initialize the kdtree for NN calculations
     tree = KDTree(uncollided_gals[self.coord_keys])
 
     # find the NN for only the collided galaxies
     cond = (self.sample.collided == 1)&(self.sample.resolved == 0)
     collided_gals = self.sample[cond]
     dists, inds = tree.query(collided_gals[self.coord_keys], k=1)
 
     self._collided_unresolved_ids = collided_gals.index
     self._nearest_neighbor_ids    = uncollided_gals.iloc[inds].index
     self._metadata += ['_collided_unresolved_ids', '_nearest_neighbor_ids']
Esempio n. 27
0
def lab2ind(im, colors=256):
    """convert a Lab image to indexed colors
    :param a: nparray (x,y,n) containing image
    :param colors: int number of colors or predefined Palette
    :ref: http://scikit-learn.org/stable/auto_examples/cluster/plot_color_quantization.html
    """
    # http://stackoverflow.com/questions/10818546/finding-index-of-nearest-point-in-numpy-arrays-of-x-and-y-coordinates
    if isinstance(colors, int):
        p = palette(im, colors)  #
        pal = [Color(c, 'lab') for c in p]
    else:
        pal = colors
        p = [c.lab for c in flatten(pal)]
    w, h, d = im.shape
    s = w * h  # number of pixels
    flat = np.reshape(im, (s, d))
    from scipy.spatial import cKDTree as KDTree  # compiled is MUCH faster
    mytree = KDTree(p)
    _, indexes = mytree.query(flat)
    im = indexes.reshape(w, h)
    return im, pal
Esempio n. 28
0
def initial_weights(data, k, min_dist=None):
    """ Calculate a matrix of weights for the k nearest points.
    
    For N data points in D dimensions, data has shape (N, D).
    
    min_dist (c_dist in K06) is 1.0/(some known minimum distance to a nearest neighbor)
    
    k is the number of nearest neighbors for which weights (K06 eq. 1) should be calculated
    
    """
    
    knn = KDTree(data)
    # D, i contains the point itself and the associated zero distance, of course.
    # D are distances (0, d1, d2, d3, ... , dk-1)
    # i are indices (i0, i1, i2, i3, ..., ik-1) where i[:,0] increment by one
    # Both have shape (N points, k neighbors)

    D, conn = knn.query(knn.data, k)
    if min_dist is None:
        min_dist = D[D>0].min()
    W = np.exp(-min_dist * D)
    return W, conn
Esempio n. 29
0
class Parsec(StarKitModel):

    mh = Parameter()
    mass = Parameter()
    age = Parameter()

    inputs = ()
    outputs = ('teff', 'logg', 'lum')

    def __init__(self, parsec_store, mh=0.0, mass=1.0, age=5e9):
        super(Parsec, self).__init__(mh, mass, age)
        try:
            self.parsec_store = pd.HDFStore(parsec_store)
        except TypeError:
            self.parsec_store = parsec_store

        self.evolution_data = [self.parsec_store[key]
                               for key in self.parsec_store.keys()]
        self.parsec_store.close()
        self.mh_mass = np.empty((len(self.evolution_data), 2))

        for i, ev_data in enumerate(self.evolution_data):
            mh = ev_data['MH'][0]
            mass = ev_data['MASS'][0]
            self.mh_mass[i] = mh, mass

        self.mh_mass_kd_tree = KDTree(self.mh_mass)


    def evaluate(self, mh, mass, age):
        distance, idx = self.mh_mass_kd_tree.query(
            np.array([mh, mass]).squeeze())
        ev_data = self.evolution_data[idx]
        age_ev_data = ev_data['AGE'].values
        out_ev_data = ev_data[['TEFF', 'LOG_G', 'LOG_L']].values
        teff, logg, log_l = interpolate.interp1d(
            age_ev_data, out_ev_data.T, bounds_error=False)(np.squeeze(age))
        return teff, logg, 10**log_l
def get_potential_cells(coors, cmesh, centroids=None, extrapolate=True):
    """
    Get cells that potentially contain points with the given physical
    coordinates.

    Parameters
    ----------
    coors : array
        The physical coordinates.
    cmesh : CMesh instance
        The cmesh defining the cells.
    centroids : array, optional
        The centroids of the cells.
    extrapolate : bool
        If True, even the points that are surely outside of the
        cmesh are considered and assigned potential cells.

    Returns
    -------
    potential_cells : array
        The indices of the cells that potentially contain the points.
    offsets : array
        The offsets into `potential_cells` for each point: a point ``ip`` is
        potentially in cells ``potential_cells[offsets[ip]:offsets[ip+1]]``.
    """
    from scipy.spatial import cKDTree as KDTree

    if centroids is None:
        centroids = cmesh.get_centroids(cmesh.tdim)

    kdtree = KDTree(coors)

    conn = cmesh.get_cell_conn()
    cc = conn.indices.reshape(cmesh.n_el, -1)
    cell_coors = cmesh.coors[cc]

    rays = cell_coors - centroids[:, None]
    radii = nm.linalg.norm(rays, ord=nm.inf, axis=2).max(axis=1)

    potential_cells = [[]] * coors.shape[0]
    for ic, centroid in enumerate(centroids):
        ips = kdtree.query_ball_point(centroid, radii[ic], p=nm.inf)
        if len(ips):
            for ip in ips:
                if not len(potential_cells[ip]):
                    potential_cells[ip] = []

                potential_cells[ip].append(ic)

    lens = nm.array([0] + [len(ii) for ii in potential_cells], dtype=nm.int32)

    if extrapolate:
        # Deal with the points outside of the field domain - insert elements
        # incident to the closest mesh vertex.
        iin = nm.where(lens[1:] == 0)[0]
        if len(iin):
            kdtree = KDTree(cmesh.coors)
            ics = kdtree.query(coors[iin])[1]
            cmesh.setup_connectivity(0, cmesh.tdim)
            conn = cmesh.get_conn(0, cmesh.tdim)

            oo = conn.offsets
            for ii, ip in enumerate(iin):
                ik = ics[ii]
                potential_cells[ip] = conn.indices[oo[ik]:oo[ik + 1]]
                lens[ip + 1] = len(potential_cells[ip])

    offsets = nm.cumsum(lens, dtype=nm.int32)
    potential_cells = nm.concatenate(potential_cells).astype(nm.int32)

    return potential_cells, offsets
Esempio n. 31
0
def xymatch(x1, y1, x2, y2, tol=None, nnearest=1):
    """
    Finds matches in one catalog to another.

    Parameters
    x1 : array-like
        X-coordinates of first catalog
    y1 : array-like
        Y-coordinates of first catalog
    x2 : array-like
        X-coordinates of second catalog
    y2 : array-like
        Y-coordinates of second catalog
    tol : float or None, optional
        How close a match has to be to count as a match.  If None,
        all nearest neighbors for the first catalog will be returned.
    nnearest : int, optional
        The nth neighbor to find.  E.g., 1 for the nearest nearby, 2 for the
        second nearest neighbor, etc.  Particularly useful if you want to get
        the nearest *non-self* neighbor of a catalog.  To do this, use:
        ``spherematch(x, y, x, y, nnearest=2)``

    Returns
    -------
    idx1 : int array
        Indecies into the first catalog of the matches. Will never be
        larger than `x1`/`y1`.
    idx2 : int array
        Indecies into the second catalog of the matches. Will never be
        larger than `x1`/`y1`.
    ds : float array
        Distance between the matches

    """

    x1 = np.array(x1, copy=False)
    y1 = np.array(y1, copy=False)
    x2 = np.array(x2, copy=False)
    y2 = np.array(y2, copy=False)

    if x1.shape != y1.shape:
        raise ValueError('x1 and y1 do not match!')
    if x2.shape != y2.shape:
        raise ValueError('x2 and y2 do not match!')

    # this is equivalent to, but faster than just doing np.array([x1, y1])
    coords1 = np.empty((x1.size, 2))
    coords1[:, 0] = x1
    coords1[:, 1] = y1

    # this is equivalent to, but faster than just doing np.array([x2, y2])
    coords2 = np.empty((x2.size, 2))
    coords2[:, 0] = x2
    coords2[:, 1] = y2

    kdt = KDT(coords2)
    if nnearest == 1:
        ds,idxs2 = kdt.query(coords1)
    elif nnearest > 1:
        retval = kdt.query(coords1, nnearest)
        ds = retval[0]
        idxs2 = retval[1][:, -1]
    else:
        raise ValueError('invalid nnearest ' + str(nnearest))

    idxs1 = np.arange(x1.size)

    if tol is not None:
        msk = ds < tol
        idxs1 = idxs1[msk]
        idxs2 = idxs2[msk]
        ds = ds[msk]

    return idxs1, idxs2, ds
Esempio n. 32
0
class KDicTree(dict):
    '''
    Wrapper around the scipy.spatial.KDTree for labelled points.
    Use like dict to register or update points:
    
    tree = KDicTree({'1':(0,0), 2:(2,2), '3':(45,45)})
    tree['1'] = (1, 1)
    tree['2'] = (5, 5)
    tree['3'] = (50, 50)
    
    Then use KDTree querys:
    
    tree.query_ball_point( (3, 3), 10 )
        ['1', 2, '2']

    Parameters
    ----------
    data : labelled (N,K) dict
        The data points to be indexed, labelled in a dictionary.
    leafsize : int, optional
        The number of points at which the algorithm switches over to 
        brute-force. Has to be positive.
    
    See Also
    --------
    scipy.spatial.KDTree
    scipy.spatial.cKDTree
    '''
    def __init__(self, data, leafsize=16):
        self.tree = None
        self.ids = []  # maps tree to dict keys
        self.altered = True
        self.leafsize = leafsize
        super().__init__(data)

    def __setitem__(self, key, point):
        '''Set point for self[key]'''
        super().__setitem__(key, point)
        self.altered = True

    def __delitem__(self, key):
        '''Delete self[key].'''
        super().__delitem__(key)
        self.altered = True

    def build_tree(self):
        '''Gets called automatically by a query.'''
        if not self.altered: return
        self.tree = KDTree(list(self.values()), leafsize=self.leafsize)
        self.ids = list(self.keys())
        self.altered = False

    def map_ids(self, ids):
        '''Maps the result of Querys to dict keys.'''
        if isinstance(ids, (tuple, list, ndarray)):
            return tuple(map(self.map_ids, ids))
        return self.ids[ids]

    def query(self, x, k=1, eps=0, p=2, distance_upper_bound=float("inf")):
        '''Query the kd-tree for nearest neighbors.'''
        self.build_tree()
        dists, ids = self.tree.query(x, k, eps, p, distance_upper_bound)
        return (dists, self.map_ids(ids))

    def query_ball_point(self, x, r, p=2., eps=0):
        '''Find all points within distance r of point(s) x.'''
        self.build_tree()
        return self.map_ids(self.tree.query_ball_point(x, r, p, eps))

    def query_pairs(self, r, p=2., eps=0):
        '''Find all pairs of points within a distance r.'''
        self.build_tree()
        return [
            tuple(self.map_ids(pair))
            for pair in self.tree.query_pairs(r, p=p, eps=eps)
        ]
Esempio n. 33
0
def mosaic_texture(humfile, sonpath, cs2cs_args = "epsg:26949", res = 99, nn = 5, weight = 1):
         
    '''
    Create mosaics of the spatially referenced sidescan echograms

    Syntax
    ----------
    [] = PyHum.mosaic_texture(humfile, sonpath, cs2cs_args, res, nn, weight)

    Parameters
    ----------
    humfile : str
       path to the .DAT file
    sonpath : str
       path where the *.SON files are
    cs2cs_args : int, *optional* [Default="epsg:26949"]
       arguments to create coordinates in a projected coordinate system
       this argument gets given to pyproj to turn wgs84 (lat/lon) coordinates
       into any projection supported by the proj.4 libraries
    res : float, *optional* [Default=0]
       grid resolution of output gridded texture map
       if res=99, res will be determined automatically from the spatial resolution of 1 pixel
    nn: int, *optional* [Default=5]
       number of nearest neighbours for gridding
    weight: int, *optional* [Default=1]
       specifies the type of pixel weighting in the gridding process
       weight = 1, based on grazing angle and inverse distance weighting
       weight = 2, based on grazing angle only
       weight = 3, inverse distance weighting only
       weight = 4, no weighting
    
    Returns
    -------

    sonpath+'GroundOverlay.kml': kml file
        contains gridded (or point cloud) sidescan intensity map for importing into google earth
        of the pth chunk

    sonpath+'map.png' : 
        image overlay associated with the kml file

    '''

    # prompt user to supply file if no input file given
    if not humfile:
       print 'An input file is required!!!!!!'
       Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
       humfile = askopenfilename(filetypes=[("DAT files","*.DAT")]) 

    # prompt user to supply directory if no input sonpath is given
    if not sonpath:
       print 'A *.SON directory is required!!!!!!'
       Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
       sonpath = askdirectory() 

    # print given arguments to screen and convert data type where necessary
    if humfile:
       print 'Input file is %s' % (humfile)

    if sonpath:
       print 'Sonar file path is %s' % (sonpath)

    if cs2cs_args:
       print 'cs2cs arguments are %s' % (cs2cs_args)    

    if res:
       res = np.asarray(res,float)
       print 'Gridding resolution: %s' % (str(res))      
       
    if nn:
       nn = int(nn)
       print 'Number of nearest neighbours for gridding: %s' % (str(nn))
                    
    if weight:
       weight = int(weight)
       print 'Weighting for gridding: %s' % (str(weight))                   


    ##nn = 5 #number of nearest neighbours in gridding
    ##noisefloor=10 # noise threshold in dB W

    # start timer
    if os.name=='posix': # true if linux/mac or cygwin on windows
       start = time.time()
    else: # windows
       start = time.clock()

    trans =  pyproj.Proj(init=cs2cs_args)

    # if son path name supplied has no separator at end, put one on
    if sonpath[-1]!=os.sep:
       sonpath = sonpath + os.sep

    base = humfile.split('.DAT') # get base of file name for output
    base = base[0].split(os.sep)[-1]

    # remove underscores, negatives and spaces from basename
    base = humutils.strip_base(base)

    meta = loadmat(os.path.normpath(os.path.join(sonpath,base+'meta.mat')))

    esi = np.squeeze(meta['e'])
    nsi = np.squeeze(meta['n']) 
    
    theta = np.squeeze(meta['heading'])/(180/np.pi)

    # load memory mapped scans
    shape_port = np.squeeze(meta['shape_port'])
    if shape_port!='':
       if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_port_lar.dat'))):
          port_fp = io.get_mmap_data(sonpath, base, '_data_port_lar.dat', 'float32', tuple(shape_port))
       else:
          port_fp = io.get_mmap_data(sonpath, base, '_data_port_la.dat', 'float32', tuple(shape_port))

    shape_star = np.squeeze(meta['shape_star'])
    if shape_star!='':
       if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_star_lar.dat'))):
             star_fp = io.get_mmap_data(sonpath, base, '_data_star_lar.dat', 'float32', tuple(shape_star))
       else:
          star_fp = io.get_mmap_data(sonpath, base, '_data_star_la.dat', 'float32', tuple(shape_star))

    # time varying gain
    tvg = ((8.5*10**-5)+(3/76923)+((8.5*10**-5)/4))*meta['c']
        
    # depth correction
    dist_tvg = np.squeeze(((np.tan(np.radians(25)))*np.squeeze(meta['dep_m']))-(tvg))

    # read in range data
    R_fp = io.get_mmap_data(sonpath, base, '_data_range.dat', 'float32', tuple(shape_star))

    dx = np.arcsin(meta['c']/(1000*meta['t']*meta['f']))
    pix_m = meta['pix_m']
    c = meta['c']

    if not os.path.isfile( os.path.normpath(os.path.join(sonpath,base+"S.p")) ):
    #if 2 > 1:
       inputfiles = []
       if len(shape_star)>2:    
          for p in xrange(len(star_fp)):
             e = esi[shape_port[-1]*p:shape_port[-1]*(p+1)]
             n = nsi[shape_port[-1]*p:shape_port[-1]*(p+1)]
             t = theta[shape_port[-1]*p:shape_port[-1]*(p+1)]
             d = dist_tvg[shape_port[-1]*p:shape_port[-1]*(p+1)]
             dat_port = port_fp[p]
             dat_star = star_fp[p]
             data_R = R_fp[p]
             print "writing chunk %s " % (str(p))
             write_points(e, n, t, d, dat_port, dat_star, data_R, pix_m, res, cs2cs_args, sonpath, p, c, dx)
             inputfiles.append(os.path.normpath(os.path.join(sonpath,'x_y_class'+str(p)+'.asc')))
       else:
          p=0
          print "writing chunk %s " % (str(p))
          write_points(esi, nsi, theta, dist_tvg, port_fp, star_fp, R_fp, meta['pix_m'], res, cs2cs_args, sonpath, 0, c, dx)
          inputfiles.append(os.path.normpath(os.path.join(sonpath,'x_y_class'+str(p)+'.asc')))         
          
       #trans =  pyproj.Proj(init=cs2cs_args)

       # D, R, h, t
       print "reading points from %s files" % (str(len(inputfiles)))
       X,Y,S,D,R,h,t,i = getxys(inputfiles)

       print "%s points read from %s files" % (str(len(S)), str(len(inputfiles)))

       # remove values where sidescan intensity is zero
       ind = np.where(np.logical_not(S==0))[0]

       X = X[ind]; Y = Y[ind]
       S = S[ind]; D = D[ind]
       R = R[ind]; h = h[ind]
       t = t[ind]; i = i[ind]
       del ind   
   
       # save to file for temporary storage
       pickle.dump( S, open( os.path.normpath(os.path.join(sonpath,base+"S.p")), "wb" ) ); del S
       pickle.dump( D, open( os.path.normpath(os.path.join(sonpath,base+"D.p")), "wb" ) ); del D
       pickle.dump( t, open( os.path.normpath(os.path.join(sonpath,base+"t.p")), "wb" ) ); del t
       pickle.dump( i, open( os.path.normpath(os.path.join(sonpath,base+"i.p")), "wb" ) ); del i

       pickle.dump( X, open( os.path.normpath(os.path.join(sonpath,base+"X.p")), "wb" ) ); del X
       pickle.dump( Y, open( os.path.normpath(os.path.join(sonpath,base+"Y.p")), "wb" ) ); del Y
       pickle.dump( R, open( os.path.normpath(os.path.join(sonpath,base+"R.p")), "wb" ) ); 
       pickle.dump( h, open( os.path.normpath(os.path.join(sonpath,base+"h.p")), "wb" ) ); 

       #grazing angle
       g = np.arctan(R.flatten(),h.flatten())
       pickle.dump( g, open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "wb" ) ); del g, R, h
   
    print "creating grids ..."   

    if res==0:
       res=99

    if res==99:

       #### prepare grids
       R = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"R.p")), "rb" ) )

       ## actual along-track resolution is this: dx times dy = Af
       tmp = R * dx * (c*0.007 / 2)
       del R

       resg = np.min(tmp[tmp>0])
       del tmp
    else:
       resg = res

    X = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"X.p")), "rb" ) )
    Y = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"Y.p")), "rb" ) )
    
    humlon, humlat = trans(X, Y, inverse=True)

    grid_x, grid_y = np.meshgrid( np.arange(np.min(X), np.max(X), resg), np.arange(np.min(Y), np.max(Y), resg) )    
 
    shape = np.shape(grid_x)

    tree = KDTree(zip(X.flatten(), Y.flatten()))
    del X, Y

    print "mosaicking ..."   
    #k nearest neighbour
    try:
       dist, inds = tree.query(zip(grid_x.flatten(), grid_y.flatten()), k = nn, n_jobs=-1)
    except:
       #print ".... update your scipy installation to use faster kd-tree"   
       dist, inds = tree.query(zip(grid_x.flatten(), grid_y.flatten()), k = nn)    
    
    #del grid_x, grid_y
    
    if weight==1:
       g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) )
       w = g[inds] + 1.0 / dist**2
       del g
    elif weight==2:
       g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) )
       w = g[inds]
       del g
    elif weight==3:
       w = 1.0 / dist**2    
    elif weight==4:
       w = 1.0
    
    #g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) )
    #w = g[inds] + 1.0 / dist**2
    #del g

    if weight < 4:
       w[np.isinf(w)]=1
       w[np.isnan(w)]=1
       w[w>10000]=10000
       w[w<=0]=1
    
    # load in sidescan intensity
    S = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"S.p")), "rb" ) )
    # filter out noise pixels
    S[S<noisefloor] = np.nan

    if nn==1:
       Sdat_g = (w * S.flatten()[inds]).reshape(shape)
       del w
       dist = dist.reshape(shape)
    else:
       if weight < 4:
          Sdat_g = (np.nansum(w * S.flatten()[inds], axis=1) / np.nansum(w, axis=1)).reshape(shape)
       else:
          Sdat_g = (np.nansum(S.flatten()[inds], axis=1)).reshape(shape)
       del w
       dist = np.nanmean(dist,axis=1).reshape(shape)

    del S

    Sdat_g[dist>1] = np.nan
    Sdat_g[Sdat_g<noisefloor] = np.nan

    dat = Sdat_g.copy()
    dat[dist>1] = 0
    dat2 = replace_nans.RN(dat.astype('float64'),1000,0.01,2,'localmean').getdata()
    dat2[dat==0] = np.nan
    del dat

    dat2[dat2<noisefloor] = np.nan

    Sdat_g = dat2.copy()
    del dat2
   
    Sdat_g[Sdat_g==0] = np.nan
    Sdat_g[np.isinf(Sdat_g)] = np.nan
    Sdat_gm = np.ma.masked_invalid(Sdat_g)
    del Sdat_g

    glon, glat = trans(grid_x, grid_y, inverse=True)
    del grid_x, grid_y
    
    # =========================================================
    print "creating kmz file ..."
    ## new way to create kml file  
    pixels = 1024 * 10
 
    fig, ax = humutils.gearth_fig(llcrnrlon=glon.min(),
                     llcrnrlat=glat.min(),
                     urcrnrlon=glon.max(),
                     urcrnrlat=glat.max(),
                     pixels=pixels)
    cs = ax.pcolormesh(glon, glat, Sdat_gm)
    ax.set_axis_off()
    fig.savefig(os.path.normpath(os.path.join(sonpath,'class_overlay1.png')), transparent=True, format='png')    
    

    fig = plt.figure(figsize=(1.0, 4.0), facecolor=None, frameon=False)
    ax = fig.add_axes([0.0, 0.05, 0.2, 0.9])
    cb = fig.colorbar(cs, cax=ax)
    cb.set_label('Texture lengthscale [m]', rotation=-90, color='k', labelpad=20)
    fig.savefig(os.path.normpath(os.path.join(sonpath,'class_legend.png')), transparent=False, format='png')  


    humutils.make_kml(llcrnrlon=glon.min(), llcrnrlat=glat.min(),
         urcrnrlon=glon.max(), urcrnrlat=glat.max(),
         figs=[os.path.normpath(os.path.join(sonpath,'class_overlay1.png'))], 
         colorbar=os.path.normpath(os.path.join(sonpath,'class_legend.png')),
         kmzfile=os.path.normpath(os.path.join(sonpath,'class_GroundOverlay.kmz')), 
         name='Sidescan Intensity')


    # =========================================================
    print "drawing and printing map ..."
    fig = plt.figure(frameon=False)
    map = Basemap(projection='merc', epsg=cs2cs_args.split(':')[1], 
     resolution = 'i', #h #f
     llcrnrlon=np.min(humlon)-0.001, llcrnrlat=np.min(humlat)-0.001,
     urcrnrlon=np.max(humlon)+0.001, urcrnrlat=np.max(humlat)+0.001)

    gx,gy = map.projtran(glon, glat)
       
    try:
       map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='ESRI_Imagery_World_2D', xpixels=1000, ypixels=None, dpi=300)
    except:
       map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='World_Imagery', xpixels=1000, ypixels=None, dpi=300)
    #finally:
    #   print "error: map could not be created..."
      
    ax = plt.Axes(fig, [0., 0., 1., 1.], )
    ax.set_axis_off()
    fig.add_axes(ax)

    if Sdat_gm.size > 25000000:
       print "matrix size > 25,000,000 - decimating by factor of 5 for display"
       map.pcolormesh(gx[::5,::5], gy[::5,::5], Sdat_gm[::5,::5], vmin=np.nanmin(Sdat_gm), vmax=np.nanmax(Sdat_gm))
    else:
       map.pcolormesh(gx, gy, Sdat_gm, vmin=np.nanmin(Sdat_gm), vmax=np.nanmax(Sdat_gm))

    custom_save2(sonpath,'class_map_imagery')
    del fig 

   
    if os.name=='posix': # true if linux/mac
       elapsed = (time.time() - start)
    else: # windows
       elapsed = (time.clock() - start)
    print "Processing took ", elapsed , "seconds to analyse"

    print "Done!"
Esempio n. 34
0
def runPixMatch(outpre, filter):

    if filter == 'f606w':
        let = 'v'
    else:
        let = 'i'

    if outpre == 'lower':
        x_drc_low = drc_low['x_' + let]
        y_drc_low = drc_low['y_' + let]

        xm_flc_low = flc_all['xdrc_low_' + filter]
        ym_flc_low = flc_all['ydrc_low_' + filter]

        coords1low = np.empty((xm_flc_low.size, 2))
        coords2low = np.empty((x_drc_low.size, 2))

        coords1low[:, 0] = xm_flc_low
        coords1low[:, 1] = ym_flc_low

        coords2low[:, 0] = x_drc_low
        coords2low[:, 1] = y_drc_low

        kdt = KDT(coords2low)
        idxs2 = kdt.query(coords1low)[1]

        ds = distArr(xm_flc_low, ym_flc_low, x_drc_low[idxs2],
                     y_drc_low[idxs2])

        idxs1 = np.arange(xm_flc_low.size)

        msk = ds < matchtol
        idxs1 = idxs1[msk]
        idxs2 = idxs2[msk]
        ds = ds[msk]

    else:
        x_drc_up = drc_up['x_' + let]
        y_drc_up = drc_up['y_' + let]

        xm_flc_up = flc_all['xdrc_up_' + filter]
        ym_flc_up = flc_all['ydrc_up_' + filter]

        coords1up = np.empty((xm_flc_up.size, 2))
        coords2up = np.empty((x_drc_up.size, 2))

        coords1up[:, 0] = xm_flc_up
        coords1up[:, 1] = ym_flc_up

        coords2up[:, 0] = x_drc_up
        coords2up[:, 1] = y_drc_up

        kdt = KDT(coords2up)
        idxs2 = kdt.query(coords1up)[1]

        ds = distArr(xm_flc_up, ym_flc_up, x_drc_up[idxs2], y_drc_up[idxs2])

        idxs1 = np.arange(xm_flc_up.size)

        msk = ds < matchtol
        idxs1 = idxs1[msk]
        idxs2 = idxs2[msk]
        ds = ds[msk]

    print(len(idxs1))

    outfile = main_dir + 'hor-I-cut_drc_' + outpre + '_' + filter + '_tol{0}_magCuts.txt'.format(
        matchtol)
    np.savetxt(outfile, idxs2, fmt='%4i')

    outfile = main_dir + 'hor-I-cut_flc_' + outpre + '_' + filter + '_tol{0}_magCuts.txt'.format(
        matchtol)
    np.savetxt(outfile, idxs1, fmt='%4i')

    # outfile = main_dir+'hor-I-cut_ds_'+outpre+'_'+filter+'_tol{0}.txt'.format(matchtol)
    # np.savetxt(outfile, ds, fmt='%1.4f')

    return None
Esempio n. 35
0
class Invdisttree:
    """ inverse-distance-weighted interpolation using KDTree:
invdisttree = Invdisttree( X, z )  -- data points, values
interpol = invdisttree( q, nnear=3, eps=0, p=1, weights=None, stat=0 )
    interpolates z from the 3 points nearest each query point q;
    For example, interpol[ a query point q ]
    finds the 3 data points nearest q, at distances d1 d2 d3
    and returns the IDW average of the values z1 z2 z3
        (z1/d1 + z2/d2 + z3/d3)
        / (1/d1 + 1/d2 + 1/d3)
        = .55 z1 + .27 z2 + .18 z3  for distances 1 2 3

    q may be one point, or a batch of points.
    eps: approximate nearest, dist <= (1 + eps) * true nearest
    p: use 1 / distance**p
    weights: optional multipliers for 1 / distance**p, of the same shape as q
    stat: accumulate wsum, wn for average weights

How many nearest neighbors should one take ?
a) start with 8 11 14 .. 28 in 2d 3d 4d .. 10d; see Wendel's formula
b) make 3 runs with nnear= e.g. 6 8 10, and look at the results --
    |interpol 6 - interpol 8| etc., or |f - interpol*| if you have f(q).
    I find that runtimes don't increase much at all with nnear -- ymmv.

p=1, p=2 ?
    p=2 weights nearer points more, farther points less.
    In 2d, the circles around query points have areas ~ distance**2,
    so p=2 is inverse-area weighting. For example,
        (z1/area1 + z2/area2 + z3/area3)
        / (1/area1 + 1/area2 + 1/area3)
        = .74 z1 + .18 z2 + .08 z3  for distances 1 2 3
    Similarly, in 3d, p=3 is inverse-volume weighting.

Scaling:
    if different X coordinates measure different things, Euclidean distance
    can be way off.  For example, if X0 is in the range 0 to 1
    but X1 0 to 1000, the X1 distances will swamp X0;
    rescale the data, i.e. make X0.std() ~= X1.std() .

A nice property of IDW is that it's scale-free around query points:
if I have values z1 z2 z3 from 3 points at distances d1 d2 d3,
the IDW average
    (z1/d1 + z2/d2 + z3/d3)
    / (1/d1 + 1/d2 + 1/d3)
is the same for distances 1 2 3, or 10 20 30 -- only the ratios matter.
In contrast, the commonly-used Gaussian kernel exp( - (distance/h)**2 )
is exceedingly sensitive to distance and to h.

    """
    def __init__(self, X, z, leafsize=10, stat=0):
        assert len(X) == len(z), "len(X) %d != len(z) %d" % (len(X), len(z))
        self.tree = KDTree(X, leafsize=leafsize)  # build the tree
        self.z = z
        self.stat = stat
        self.wn = 0
        self.wsum = None

    def __call__(self, q, nnear=6, eps=0, p=1, weights=None):
        # nnear nearest neighbours of each query point --
        q = np.asarray(q)
        qdim = q.ndim
        if qdim == 1:
            q = np.array([q])
        if self.wsum is None:
            self.wsum = np.zeros(nnear)

        self.distances, self.ix = self.tree.query(q, k=nnear, eps=eps)
        interpol = np.zeros((len(self.distances), ) + np.shape(self.z[0]))
        jinterpol = 0
        for dist, ix in zip(self.distances, self.ix):
            if nnear == 1:
                wz = self.z[ix]
            elif dist[0] < 1e-10:
                wz = self.z[ix[0]]
            else:  # weight z s by 1/dist --
                w = 1 / dist**p
                if weights is not None:
                    w *= weights[ix]  # >= 0
                w /= np.sum(w)
                wz = np.dot(w, self.z[ix])
                if self.stat:
                    self.wn += 1
                    self.wsum += w
            interpol[jinterpol] = wz
            jinterpol += 1
        return interpol if qdim > 1 else interpol[0]
Esempio n. 36
0
def get_area_avg_from_erai_data(start_year=-np.Inf,
                                end_year=np.Inf,
                                var_folder="",
                                varname="",
                                mask=None,
                                mask_lons=None,
                                mask_lats=None):
    """

    Interpolate the mask to the ERA-Interim grid using nearest neighbour approach

    :param start_year:
    :param end_year:
    :param var_folder:
    :param varname:
    :param mask:
    :return:
    """
    def _get_year(fn):
        return int(fn.split(".")[0].split("_")[1])

    flist = [
        os.path.join(var_folder, fn) for fn in os.listdir(var_folder)
        if fn.startswith(varname) and (start_year <= _get_year(fn)) and (
            _get_year(fn) <= end_year)
    ]
    print(flist)

    ktree = None
    mask_interpolated = None
    lons_target, lats_target = None, None

    ser_list = []
    for fp in flist:

        with Dataset(fp) as ds:
            time_var = ds.variables["time"]

            times = num2date(time_var[:], time_var.units)

            print(times[0], times[-1])

            # Determine nearest neighbours for interpolation (do it only once)
            if ktree is None:

                # get lons and lats from the bathymetry file
                data_folder_p = Path(var_folder).parent

                for f in data_folder_p.iterdir():
                    if f.name.lower().startswith("bathy_meter"):
                        with Dataset(str(f)) as ds_bathy:
                            lons_target, lats_target = [
                                ds_bathy.variables[k][:]
                                for k in ["nav_lon", "nav_lat"]
                            ]
                            break

                x, y, z = lat_lon.lon_lat_to_cartesian(mask_lons.flatten(),
                                                       mask_lats.flatten())
                xt, yt, zt = lat_lon.lon_lat_to_cartesian(
                    lons_target.flatten(), lats_target.flatten())
                ktree = KDTree(list(zip(x, y, z)))

                dists, inds = ktree.query(list(zip(xt, yt, zt)), k=1)

                mask_interpolated = mask.flatten()[inds]
                mask_interpolated = mask_interpolated.reshape(
                    lons_target.shape)

            vals = [
                field[mask_interpolated].mean()
                for field in ds.variables[varname][:]
            ]
            ser = pd.Series(index=times, data=vals)

            if varname == "TT":
                ser -= 273.15

            ser.sort_index(inplace=True)

            ser_list.append(ser)

    return pd.concat(ser_list)
Esempio n. 37
0
class NearestNeighborFinder():
    """
    Nearest neighbor search object for NEMO netCDF output files.
    """
    def __init__(self, ncfilename):
        """
        Create new instance.

        :arg str ncfilename: NEMO netCDF file name
        """
        self.filename = ncfilename
        self.data_dim = None
        self.grid_type = None
        self._build_tree()

    def _build_tree(self):
        """
        Construct nearest neighbor tree.
        """
        def parse_grid_type(ncf):
            """
            Figure out which discretization the file contains, T, U or V

            Reads the description attribute, e.g. "ocean T grid variables"

            returns 't', 'u', or 'v'
            """
            return 't'  # HACK assume always T grid
            desc = ncf.description
            words = desc.split()
            assert words[0] == 'ocean'
            assert words[2] == 'grid'
            return words[1].lower()

        with netCDF4.Dataset(self.filename) as ncf:
            self.grid_type = parse_grid_type(ncf)
            assert self.grid_type == 't', 'Only T grid is supported currently'
            # compute land mask
            self.data_dim = 3 if 'e3t' in ncf.variables else 2
            if self.data_dim == 3:
                # NOTE does not take time-dependent wetting-drying into account
                e = ncf['e3t'][0, :, :, :]
                self.landmask = numpy.all(e.mask, axis=0)
                # 1D array of all wet points in raveled index
                self.wetmask = numpy.nonzero(~self.landmask.ravel())[0]
                # get coordinates
                self.lon = ncf['nav_lon'][:]
                self.lat = ncf['nav_lat'][:]
                depth = ncf['deptht'][:]
                self.z = -depth
                # 1D arrays of all wet points
                self.valid_lon = self.lon.ravel()[self.wetmask]
                self.valid_lat = self.lat.ravel()[self.wetmask]
            else:
                # read a field to get landmask
                for v in ncf.variables:
                    var = ncf[v]
                    if len(var.shape) == 3:
                        # 2D time dependent field
                        self.landmask = numpy.all(var[:].mask, axis=0)
                        break
                self.wetmask = numpy.nonzero(~self.landmask.ravel())[0]
                # get coordinates
                self.lon = ncf['nav_lon'][:]
                self.lat = ncf['nav_lat'][:]
                self.z = 0.0
                # 1D arrays of all wet points
                self.valid_lon = self.lon.ravel()[self.wetmask]
                self.valid_lat = self.lat.ravel()[self.wetmask]

        assert len(self.valid_lat) > 0, \
            'No valid points found in {:}'.format(self.filename)
        coords = numpy.vstack((self.valid_lon, self.valid_lat)).T
        self.tree = KDTree(coords)

    def find(self, lon, lat, z):
        """
        Finds nearest neighbor index for point (lon, lat, z)

        :arg lon: longitude coordinate
        :arg lat: latitude coordinate
        :arg z: z coordinate (negative downwards)
        :returns: i, j, k indices of nearest neighbor indices
        """
        dist, index = self.tree.query([lon, lat], k=1)
        index = self.wetmask[index]
        i, j = numpy.unravel_index(index, self.lat.shape)
        if self.data_dim == 3:
            k = numpy.abs(self.z - z).argmin()
        else:
            k = None
        return i, j, k
def remove_ind(reference_pop, removal_size, removal_type):
    begin_time = time.time()

    if removal_type == 'random':
        # reference_pop is a numpy array of size (n_reference_pop, pop_dim)
        reference_pop = list(reference_pop)
        # now reference_pop is a list of numpy arrays (each defining one individual)
        random.shuffle(reference_pop)  # shuffle the list
        # pop last removal_size individuals
        for _ in range(removal_size):
            reference_pop.pop()
        # turn back to numpy array
        reference_pop = np.array(reference_pop)

    if removal_type == 'least_novel':
        # compute novelties of reference_pop inside reference_pop
        novelties = assess_novelties(reference_pop, reference_pop)
        removal_indices = np.argpartition(novelties,
                                          removal_size)[:removal_size]

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Least novel removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'least_novel_iter':
        removal_indices = []
        temp_ref_pop = copy.deepcopy(reference_pop)
        for j in range(removal_size):
            # compute novelties of reference_pop inside reference_pop
            novelties = assess_novelties(temp_ref_pop, temp_ref_pop)
            remov_idx = np.argmin(novelties)
            remov_ind = temp_ref_pop[remov_idx]
            removal_indices.append(np.where(reference_pop == remov_ind)[0][0])
            temp_ref_pop = np.vstack(
                (temp_ref_pop[:remov_idx], temp_ref_pop[remov_idx + 1:]))

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Least novel removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'most_novel':
        # compute novelties of reference_pop inside reference_pop
        novelties = assess_novelties(reference_pop, reference_pop)
        removal_indices = np.argpartition(novelties,
                                          -removal_size)[-removal_size:]

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Least novel removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'most_novel_iter':
        removal_indices = []
        temp_ref_pop = copy.deepcopy(reference_pop)
        for j in range(removal_size):
            # compute novelties of reference_pop inside reference_pop
            novelties = assess_novelties(temp_ref_pop, temp_ref_pop)
            remov_idx = np.argmax(novelties)
            remov_ind = temp_ref_pop[remov_idx]
            removal_indices.append(np.where(reference_pop == remov_ind)[0][0])
            temp_ref_pop = np.vstack(
                (temp_ref_pop[:remov_idx], temp_ref_pop[remov_idx + 1:]))

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Least novel removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'gmm_sampling':
        # hypothesis: n_components equals generative number of components
        n_comp = N
        gmix = mixture.GaussianMixture(n_components=n_comp,
                                       covariance_type='full')
        gmix.fit(reference_pop)
        nodes = gmix.sample(removal_size)[0]
        k_tree = KDTree(reference_pop)
        removal_indices = []
        for node in nodes:
            # for each node, find the closest point in the reference pop
            cond = True
            closest = 1
            # make sure removal indivual was not already chosen
            while cond:
                if closest == 1:
                    possible_removal_index = k_tree.query(node, closest)[1]
                else:
                    possible_removal_index = k_tree.query(
                        node, closest)[1][closest - 1]
                if possible_removal_index not in removal_indices:
                    removal_indices.append(possible_removal_index)
                    cond = False
                else:
                    closest += 1

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('GMM removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'grid':
        n_dim = reference_pop.shape[1]
        # compute maximums and minimums on each dimension
        maximums = np.max(reference_pop, 0)
        minimums = np.min(reference_pop, 0)
        ranges = maximums - minimums
        bins_per_dim = math.floor(math.exp(math.log(removal_size) / n_dim)) + 1
        grid_positions = []
        for i in range(n_dim):
            # important choice on how we make the grid
            grid_position = [
                minimums[i] + ((j + 1) * ranges[i] / bins_per_dim)
                for j in range(bins_per_dim)
            ]
            grid_position.pop()
            grid_positions.append(grid_position)
        mesh = np.meshgrid(*grid_positions)
        nodes = list(zip(*(dim.flat for dim in mesh)))
        nodes = np.array(nodes)

        k_tree = KDTree(reference_pop)
        removal_indices = []
        for node in nodes:
            # for each node, find the closest point in the reference pop
            cond = True
            closest = 1
            # make sure removal indivual was not already chosen
            while cond:
                if closest == 1:
                    possible_removal_index = k_tree.query(node, closest)[1]
                else:
                    possible_removal_index = k_tree.query(
                        node, closest)[1][closest - 1]
                if possible_removal_index not in removal_indices:
                    removal_indices.append(possible_removal_index)
                    cond = False
                else:
                    closest += 1
        # dealing with the missing removals
        nb_missing_removals = removal_size - len(nodes)
        for _ in range(nb_missing_removals):
            query = random.choice(nodes)
            cond = True
            # start with second closest since closest is for sure in removal indices
            closest = 2
            # make sure removal indivual was not already chosen
            while cond:
                possible_removal_index = k_tree.query(query,
                                                      closest)[1][closest - 1]
                if possible_removal_index not in removal_indices:
                    removal_indices.append(possible_removal_index)
                    cond = False
                else:
                    closest += 1

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(nodes[:, 0], nodes[:, 1], label='grid', marker='+', color='black')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Grid removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'grid_density':
        n_dim = reference_pop.shape[1]
        # compute maximums and minimums on each dimension
        maximums = np.max(reference_pop, 0)
        minimums = np.min(reference_pop, 0)
        ranges = maximums - minimums
        bins_per_dim = math.floor(math.exp(math.log(N_CELLS) / n_dim)) + 1
        grid_positions = []
        for i in range(n_dim):
            # important choice on how we make the grid
            grid_position = [
                minimums[i] + (j * ranges[i] / (bins_per_dim - 1))
                for j in range(bins_per_dim)
            ]
            grid_positions.append(grid_position)
        mesh = np.meshgrid(*grid_positions)
        nodes = list(zip(*(dim.flat for dim in mesh)))
        nodes = np.array(nodes)

        removal_indices = []
        nb_cells = (bins_per_dim - 1)**n_dim
        grid_density = np.zeros(nb_cells)
        cells = [[] for _ in range(nb_cells)]

        for ind_idx, ind in enumerate(reference_pop):
            dim_indexs = np.zeros(n_dim)
            for i, dim in enumerate(ind):
                grid_pos = grid_positions[i]
                for j in range(bins_per_dim - 1):
                    if dim >= grid_pos[j] and dim < grid_pos[j + 1]:
                        dim_indexs[i] = j + 1
            if 0 not in dim_indexs:
                # indivudal is inside the grid
                dim_indexs = dim_indexs - 1
                cell_idx = 0
                for k, dim_idx in enumerate(dim_indexs):
                    cell_idx += int(dim_idx * ((bins_per_dim - 1)**k))
                grid_density[cell_idx] += 1
                cells[cell_idx].append(ind_idx)

        grid_density = grid_density / np.sum(grid_density)

        # TEST: square the grid_density to biase more towards high density cells
        # grid_density = np.square(grid_density)

        grid_law = np.cumsum(grid_density)

        for _ in range(removal_size):
            dice = random.random() * grid_law[-1]
            cell_to_remove_from = np.searchsorted(grid_law, dice)
            cond = True
            n = 0
            while cond:
                if n < LIMIT_DENSITY_ITER:
                    removal_idx = random.choice(cells[cell_to_remove_from])
                else:
                    removal_idx = random.choice(list(range(
                        len(reference_pop))))
                if removal_idx not in removal_indices:
                    removal_indices.append(removal_idx)
                    cond = False
                n += 1

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(nodes[:, 0], nodes[:, 1], label='grid', marker='+', color='black')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Grid density removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    end_time = time.time()
    removal_time = end_time - begin_time
    return reference_pop, removal_time
Esempio n. 39
0
class Dataset:
    """
    SELFE Model Binary IO Functions

    Presently enables reading SELFE dataformat version 5.0 binary output files.
    Can read 2D & 3D scalar and vector variables.
    Usage Example:
    model = pyselfe.Dataset('1_hvel.64')
    [t,t_iter,eta,dp,data] = model.read_time_series()
    t = time in seconds
    t_iter = iteration number
    eta = water surface elevation
    dp = bathymetric depth
    data = 2D/3D variables

    @author Dharhas Pothina
    @version 0.2
    """
    def __init__(self, fname, nfiles=1):
        "Initialise by reading header information from file."

        self.fname = fname
        fid = open(fname, 'rb')
        self.read_header(fid)
        self.read_hgrid(fid)
        self.data_start_pos = fid.tell()
        self.compute_step_size()
        self.datadir = os.path.split(fname)[0]
        self.nfiles = nfiles

    def read_header(self, fid):
        """Read header information from SELFE binary output file."""

        # Read misc header info.
        self.data_format = fid.read(48)
        self.version = fid.read(48)
        self.start_time = fid.read(48)
        self.var_type = fid.read(48)
        self.var_dimension = fid.read(48)
        self.nsteps = io.fread(fid, 1, 'i')
        self.dt = io.fread(fid, 1, 'f')
        self.skip = io.fread(fid, 1, 'i')
        self.flag_sv = io.fread(fid, 1, 'i')
        self.flag_dm = io.fread(fid, 1, 'i')

        # @todo check when zDes needs to be read
        # self.zDes = io.fread(fid, 1, 'f').

        # Read vert grid info.
        self.nlevels = io.fread(fid, 1, 'i')
        self.kz = io.fread(fid, 1, 'i')
        self.h0 = io.fread(fid, 1, 'f')
        self.hs = io.fread(fid, 1, 'f')
        self.hc = io.fread(fid, 1, 'f')
        self.theta_b = io.fread(fid, 1, 'f')
        self.theta = io.fread(fid, 1, 'f')
        self.zlevels = io.fread(fid, self.kz, 'f')
        self.slevels = io.fread(fid, self.nlevels - self.kz, 'f')

    def read_hgrid(self, fid):
        """Read horizontal grid info from SELFE binary output file."""

        # Read dimensions.
        self.np = io.fread(fid, 1, 'i')
        self.ne = io.fread(fid, 1, 'i')

        # Read grid and bathymetry.
        pos = fid.tell()
        hgridtmp = io.fread(fid, 4 * self.np, 'f')
        self.x, self.y, self.dp, tmp1 = hgridtmp.reshape(self.np, 4).T

        # Read bottom index.
        fid.seek(pos)
        hgridtmp = io.fread(fid, 4 * self.np, 'i')
        tmp1, tmp2, tmp3, self.bot_idx = hgridtmp.reshape(self.np, 4).T

        # Read element connectivity list.
        self.elem = io.fread(fid, 4 * self.ne, 'i')
        self.elem = self.elem.reshape(self.ne, 4)[:, 1:4]

        # Create kdtree.
        self.kdtree = KDTree(list(zip(self.x, self.y)))

    def compute_step_size(self):
        """
        Compute the data block size to move one timestep within the file.

        """

        # Calculate grid size depending on whether dataset is 3D or 2D.
        if self.flag_dm == 3:
            # @todo check what needs to be done with bIdx (==0?)for dry nodes.
            bIdx = self.bot_idx
            bIdx[bIdx < 1] = 1
            self.grid_size = sum(self.nlevels - bIdx + 1)
        elif self.flag_dm == 2:
            self.grid_size = self.np
        # Compute step size.
        self.step_size = 2 * 4 + self.np * 4 + self.grid_size * 4 * self.flag_sv

    def read_time_series(self,
                         fname,
                         nodes=None,
                         levels=None,
                         xy=np.array([]),
                         nfiles=3,
                         sfile=1,
                         datadir=None):
        """
        Main function to extract a spatial and temporal slice of entire
        3D Time series.

        Returns [t,t_iter,eta,dp,data] where:
        t : time in seconds from simulation start
        t_iter : iteration number from simulation start
        eta : Surface water elevation time series
        dp : Bathymetry (depth of sea bed from MSL)
        data[t,nodes,levels,vars] : extracted data slice
        (i.e. Salinity, Temp, Velocity etc)

        Options:
        nodes : list of nodes to extract (default is all nodes)
        level : list of levels to extract (default is all levels)
        xy : array of x,y coordinates to extract (default is none)
        sfile : serial number of starting file (default is one)
        nfiles : number of files in data sequence (default is one)

        NOTE : node index starts at zero so add one to match up with node
        numbers in SELFE hgrid.gr3 file.

        """

        # Initialize vars.
        t = np.array([])
        t_iter = np.array([])
        eta = []
        data = []

        if nfiles is None:
            nfiles = self.nfiles

        if datadir is None:
            datadir = self.datadir

        # Convert xy points to list of nodes,
        # find parent elements &  calculate interpolation weights.
        if xy.size != 0:
            if xy.shape[1] != 2:
                sys.exit('xy array shape wrong.')
            nodes = np.array([], dtype='int32')
            arco = np.array([])
            for xy00 in xy:
                parent, tmparco, node3 = self.find_parent_element(
                    xy00[0], xy00[1])  # noqa
                nodes = np.append(nodes, node3 - 1)
                arco = np.append(arco, tmparco)

        # Set default for nodes to be all nodes.
        # Node index starts at zero.
        elif nodes is None:
            nodes = np.arange(self.np)

        # Set default for level to be all levels.
        if levels is None:
            levels = np.arange(self.nlevels)

        # Check whether 2D or 3D variable is being read.
        if self.flag_dm == 2:
            nlevs = 1
            levels = np.array([0])
        else:
            nlevs = self.nlevels

        # Read time series slice.
        for files in np.arange(sfile, sfile + nfiles):
            try:
                fname1 = datadir + '/' + str(files) + '_' + fname
                fid = open(fname1, 'rb')
                fid.seek(self.data_start_pos)
                for i in np.arange(self.nsteps):
                    t = np.append(t, io.fread(fid, 1, 'f'))
                    t_iter = np.append(t_iter, io.fread(fid, 1, 'i'))
                    eta.append(io.fread(fid, self.np, 'f'))
                    tmpdata = io.fread(fid, self.flag_sv * self.grid_size, 'f')
                    tmpdata = tmpdata.reshape(self.np, nlevs, self.flag_sv)
                    # Only keep requested slice of tmpdata.
                    # i.e. tmpdata[nodes, levels, var]
                    tmpdata = tmpdata[nodes, :, :]
                    tmpdata = tmpdata[:, levels, :]
                    data.append(tmpdata)
            except:
                continue
        # import pdb; pdb.set_trace()
        eta = np.column_stack(eta[:]).T
        eta = eta[:, nodes]
        data = np.array(data)
        dp = self.dp[nodes]

        # Convert nodal values back to xy point values if needed.
        if xy.size != 0:
            # Not sure about this. Need to look at it on more detail put in to
            # remove shape error.
            # try:
            tmpdata = np.zeros((data.shape[0], data.shape[1] // 3,
                                data.shape[2], data.shape[3])) / 0.  # noqa
            # except:
            #     tmpdata = np.zeros((data.shape[0], data.shape[1]//3, data.shape[2]))/0.  # noqa
            tmpeta = np.zeros((eta.shape[0], eta.shape[1] // 3)) / 0.
            tmpdp = np.zeros(dp.shape[0] // 3) / 0.
            for i in range(xy.shape[0]):
                n1 = i * 3
                n2 = n1 + 1
                n3 = n2 + 1
                tmpdata[:, i, :, :] = (data[:, n1, :, :] * arco[n1] +
                                       data[:, n2, :, :] * arco[n2] +
                                       data[:, n3, :, :] * arco[n3])
                tmpeta[:, i] = (eta[:, n1] * arco[n1] + eta[:, n2] * arco[n2] +
                                eta[:, n3] * arco[n3])
                tmpdp[i] = (dp[n1] * arco[n1] + dp[n2] * arco[n2] +
                            dp[n3] * arco[n3])
            data = tmpdata
            eta = tmpeta
            dp = tmpdp

        return t, t_iter, eta, dp, data

    def find_parent_element(self, x00, y00):
        """
        Find Parent Element of a given (x,y) point and calculate
        interpolation weights.

        Uses brute force search through all elements.
        Calculates whether point is internal/external to element by comparing
        summed area of sub triangles with area of triangle element.
        @todo implement binary tree search for efficiency

        Returns:
        parent, arco, node3 : parent element number, interp wieghts and element
        node numbers.

        """
        def signa(x1, x2, x3, y1, y2, y3):
            "Return signed area of triangle."
            return (((x1 - x3) * (y2 - y3) - (x2 - x3) * (y1 - y3)) / 2)

        parent = -1
        nm = self.elem.view()
        out = np.zeros(3) / 0.
        x = self.x.view()
        y = self.y.view()
        for i in np.arange(self.ne):
            aa = 0
            ar = 0  # Area.
            for j in np.arange(3):
                j1 = j + 1
                j2 = j + 2
                if (j1 > 2):
                    j1 = j1 - 3
                if (j2 > 2):
                    j2 = j2 - 3
                n0 = nm[i,
                        j] - 1  # Zero based index rather than 1 based index.
                n1 = nm[i, j1] - 1
                n2 = nm[i, j2] - 1
                # Temporary storage.
                out[j] = signa(x[n1], x[n2], x00, y[n1], y[n2], y00)
                aa = aa + abs(out[j])
                if (j == 0):
                    ar = signa(x[n1], x[n2], x[n0], y[n1], y[n2], y[n0])

            if (ar <= 0):
                sys.exit('Negative area:' + str(ar))

            ae = abs(aa - ar) / ar
            if (ae <= 1.e-5):
                parent = i
                node3 = nm[i, 0:3]
                arco = out[0:3] / ar
                arco[1] = max(0., min(1., arco[1]))
                arco[2] = max(0., min(1., arco[2]))
                if (arco[0] + arco[1] > 1):
                    arco[2] = 0
                    arco[1] = 1 - arco[0]
                else:
                    arco[2] = 1 - arco[0] - arco[1]
                break
        if (parent == -1):
            sys.exit('Cannot find a parent:' + str(x00) + ',' + str(y00))
        else:
            print('Parent Element :', parent + 1, ' ,Nodes: ', node3)
            return parent, arco, node3

    def compute_relative_rec(self, node, level):
        """
        Computes offset for extracting particular node/level.
        NOTE THIS FUNCTION NOT COMPLETE/TESTED.

        """
        count = 0
        step_size = np.zeros(self.np, self.nlevels, self.flag_sv) / 0.
        for i in range(self.np):
            for k in range(max(1, self.bot_idx[i]), self.nlevels):
                for m in range(self.flag_sv):
                    count = count + 1
                    step_size[i, k, m] = count

    def read_time_series_xy(self,
                            variable,
                            x,
                            y,
                            sigma_level='middle',
                            return_eta=False):
        """
        Finds nearest 3 nodes to x,y and returns the average value.

        """
        xy = np.hstack((x, y))
        dist, nodes = self.kdtree.query(xy, k=3)
        data = []

        if sigma_level == 'average':
            t, t_iter, eta, dp, data = self.read_time_series(
                variable, nodes=nodes)  # noqa
            eta = eta.mean(axis=1)
            data = data[:, :, :, 0].mean(axis=2).mean(axis=1)
            # Take average of all levels and then 3 nodes for now.
            # Implement idw or area weighted a average later.
            data = data.mean(axis=1).mean(axis=1)
            if return_eta:
                return np.column_stack((t, data)), np.column_stack((t, eta))
            else:
                return np.column_stack((t, data))

        elif sigma_level == 'top':
            sigma_level = 0
        elif sigma_level == 'bottom':
            sigma_level = self.nlevels - 1
        elif sigma_level == 'middle':
            sigma_level = self.nlevels // 2

        t, t_iter, eta, dp, data = self.read_time_series(variable,
                                                         nodes=nodes,
                                                         levels=sigma_level)
        eta = eta.mean(axis=1)
        data = data[:, :, 0, :].mean(axis=1)
        # data.mean(axis=1).shape[:, 0, :]
        # Take average of all levels and then 3 nodes for now.
        # Implement idw or area weighted average later/
        # data = data.mean(axis=1)
        # import pdb; pdb.set_trace()
        if return_eta:
            return np.column_stack((t, data)), np.column_stack((t, eta))
        else:
            return np.column_stack((t, data))
Esempio n. 40
0
def KLdivergence(x, y):
    """Compute the Kullback-Leibler divergence between two multivariate samples.
    Parameters
    ----------
    x : 2D array (n,d)
    Samples from distribution P, which typically represents the true
    distribution.
    y : 2D array (m,d)
    Samples from distribution Q, which typically represents the approximate
    distribution.
    Returns
    -------
    out : float
    The estimated Kullback-Leibler divergence D(P||Q).
    References
    ----------
    Pérez-Cruz, F. Kullback-Leibler divergence estimation of
    continuous distributions IEEE International Symposium on Information
    Theory, 2008.
    
    https://gist.github.com/atabakd/ed0f7581f8510c8587bc2f41a094b518
    """

    eta = 0.0000000001

    # Check the dimensions are consistent
    x = np.atleast_2d(x)
    y = np.atleast_2d(y)

    n,d = x.shape
    m,dy = y.shape

    assert d == dy
    assert n != 0
    assert n != 1

    # Build a KD tree representation of the samples and find the nearest neighbour
    # of each point in x.
    xtree = KDTree(x)
    ytree = KDTree(y)

    # Get the first two nearest neighbours for x, since the closest one is the
    # sample itself.
    r = xtree.query(x, k=2, eps=.01, p=2)[0][:,1]
    s = ytree.query(x, k=1, eps=.01, p=2)[0]
    s[s == 0] = eta
    
    #np.seterr(all='raise') 
    #try:
    #    ratio = r / s
    #    _ = np.log(ratio, where=ratio > 0).sum()
    #except Exception as ex:
    #    print(ex)
    #    print(np.sum(s==0))
    #    print(np.sum(np.isclose(s, 0)))
    #    assert False, "log(r/s) produces 'divide by zero' error or other exception."
    
    if np.any(s == 0):
        return "ERR: s=0"
    else:
        # There is a mistake in the paper. In Eq. 14, the right side misses a negative sign
        # on the first term of the right hand side.
        ratio = r/s
        return -np.log(ratio, where=ratio > 0).sum() * d / n + np.log(m / (n - 1.))
Esempio n. 41
0
            .assign(Fueltype=lambda df: (
                    df.Fueltype
                      .where(df.Fueltype != 'Natural Gas',
                             df.Technology.replace('Steam Turbine',
                                                   'OCGT').fillna('OCGT')))))

    ppl_query = snakemake.config['electricity']['powerplants_filter']
    if isinstance(ppl_query, str):
        ppl.query(ppl_query, inplace=True)

    ppl = add_custom_powerplants(ppl) # add carriers from own powerplant files

    cntries_without_ppl = [c for c in countries if c not in ppl.Country.unique()]

    for c in countries:
        substation_i = n.buses.query('substation_lv and country == @c').index
        kdtree = KDTree(n.buses.loc[substation_i, ['x','y']].values)
        ppl_i = ppl.query('Country == @c').index

        tree_i = kdtree.query(ppl.loc[ppl_i, ['lon','lat']].values)[1]
        ppl.loc[ppl_i, 'bus'] = substation_i.append(pd.Index([np.nan]))[tree_i]

    if cntries_without_ppl:
        logging.warning(f"No powerplants known in: {', '.join(cntries_without_ppl)}")

    bus_null_b = ppl["bus"].isnull()
    if bus_null_b.any():
        logging.warning(f"Couldn't find close bus for {bus_null_b.sum()} powerplants")

    ppl.to_csv(snakemake.output[0])
    def phase_detection(self):
        self.solids = []
        self.detect = []
        self.liquid_density = list()
        self.solid_density = list()
        self.solid_fraction = list()
        self.liquid_fraction = list()
        self.solid_molecular_order = list()
        self.liquid_molecular_order = list()
        self.solid_local_bond4 = list()
        self.solid_local_bond6 = list()
        self.solid_local_mole4 = list()
        self.solid_polar = list()
        self.solid_polar_fraction = list()
        self.radi_polar = list()
        self.radi_polar_fraction = list()
        self.xys = dict()
        self.final_id = dict()
        self.vor_area = list()
        self.vornoi_history = list()
        self.inter_boundary_number = list()
        #self.liquid_vor_area = np.empty(0)
        plot_number = 0
        sorted_keys = sorted(self.config_vdata.keys())
        for startframe in sorted_keys:                
            #pids = v_data.keys()
            v_data = self.config_vdata[startframe]
            qualify_id, order_para_mean, vr_mean, vomega_list = self.single_config_detect(v_data)
            self.detect.append(len(qualify_id))
            qualify_id_set = set(qualify_id)            
            order_mask, vr_mask, vomega_mask = self.solid_criteria(order_para_mean, vr_mean, vomega_list)
            
            fdata = helpy.load_framesets(v_data)              
                
            
            # find the frame where contains all the qualified particles
            count = 0
            while (count < 49) & (not set(fdata[startframe+count]['t']).issuperset(qualify_id_set)):
                count+=1
                if count == 49:
                    break
            startframe += count
            # for the idtentified frame, make TRUE if t in qualified_id
            fdata_track = fdata[startframe]['t']
            track_mask = list()
            for t in fdata_track:
                track_mask.append(t in qualify_id)
            track_mask = np.asarray(track_mask)            
            #build KDTree to query the nearest neighbor
            xys = helpy.consecutive_fields_view(fdata[startframe][track_mask], 'xy')
            ors = helpy.consecutive_fields_view(fdata[startframe][track_mask], 'o')
            disp = xys - [self.x0, self.y0] # displacement to the center
            radial = np.hypot(*disp.T)
            criteria = self.R - 1.4*self.side_len
            radial_mask = radial >= criteria
            #switch x, y coordinate into the regular orientation
            xys = xys[:,::-1]
            xys[:,1] = 1024 - xys[:,1]
            self.xys[startframe] = xys
            
            ftree = KDTree(xys, leafsize = 16)
            #####################################################################
            # 1st iteration, 
            # find at least two particles within 1.5 particle size radius
            # 3 of your neighbor must satisfy vr_criteria.
            # need to meet vomega criteria 
            #####################################################################           
            final_mask = []       

            for pt_id in range(len(xys)):
                if not vr_mask[pt_id]:
                    final_mask.append(False)
                    continue
                dists, ids = ftree.query(xys[pt_id], self.nnn)
                #if np.all(dists < self.side_len * 2.0):
                if np.sum(dists < self.side_len*1.5) > 2:
                    final_mask.append(np.sum(vr_mask[ids]) > 3)
                else:
                    final_mask.append(False)
            temp_mask = np.array(final_mask) & np.array(vomega_mask)
            
                        
            ##############################################################################
            # if you neighbors qualified then you will be solid ,exclude detection error
            # 
            # qualified_id is a True or False mask
            ##############################################################################
            qualified_solid = list()
            for pt_id in range(len(xys)):
                dists, ids = ftree.query(xys[pt_id], self.nnn)
                qualified_solid.append(temp_mask[pt_id] or np.sum(temp_mask[ids[1:]]) >= 3)
            
            self.final_id[startframe] = qualified_solid    
            solid_number = np.sum(qualified_solid)
            self.solids.append(solid_number) 
            
            
                       
            plot_vor = startframe < 550
            voronoi = self.density_calculation(solid_number, len(qualify_id), xys, ors, disp,\
                                                             qualified_solid, plot_vor, radial_mask)
            #print(qualified_solid)
            self.liquid_density.append(voronoi.liquid_density)
            self.solid_density.append(voronoi.solid_density)
            self.solid_local_bond4.append(np.nanmean(voronoi.solid_local_bond4))
            self.solid_local_bond6.append(np.nanmean(voronoi.solid_local_bond6))
            self.solid_local_mole4.append(np.nanmean(voronoi.solid_local_mole4))
            self.solid_polar.append(np.nanmean(voronoi.solid_polar))
            self.solid_polar_fraction.append(voronoi.solid_polar_fraction)
            self.radi_polar.append(np.nanmean(voronoi.radius_polar))
            self.radi_polar_fraction.append(voronoi.R_polar_fraction)
            self.inter_boundary_number.append(voronoi.interface_number)
    
            
            self.vornoi_history.append(voronoi)
            if plot_number < self.plot_check:
                xs = helpy.consecutive_fields_view(fdata[startframe][track_mask],'x')
                ys = helpy.consecutive_fields_view(fdata[startframe][track_mask],'y')
                self.plot_check_solid(xs, ys, vr_mean, vr_mask, order_para_mean, \
                                      order_mask,vomega_list, vomega_mask, final_mask,\
                                      qualified_solid)
                plot_number += 1
        self.save_phase()
        return len(qualify_id)
Esempio n. 43
0
class RGeocoder(metaclass=Singleton):
    """
    The main reverse geocoder class
    """
    def __init__(self, mode=2, verbose=True, stream=None):
        """ Class Instantiation
        Args:
        mode (int): Library supports the following two modes:
                    - 1 = Single-threaded K-D Tree
                    - 2 = Multi-threaded K-D Tree (Default)
        verbose (bool): For verbose output, set to True
        stream (io.StringIO): An in-memory stream of a custom data source
        """
        self.mode = mode
        self.verbose = verbose
        if stream:
            coordinates, self.locations = self.load(stream)
        else:
            coordinates, self.locations = self.extract(rel_path(RG_FILE))

        if mode == 1:  # Single-process
            self.tree = KDTree(coordinates)
        else:  # Multi-process
            self.tree = KDTree_MP.cKDTree_MP(coordinates)

    def query(self, coordinates):
        """
        Function to query the K-D tree to find the nearest city
        Args:
        coordinates (list): List of tuple coordinates, i.e. [(latitude, longitude)]
        """
        if self.mode == 1:
            _, indices = self.tree.query(coordinates, k=1)
        else:
            _, indices = self.tree.pquery(coordinates, k=1)
        return [self.locations[index] for index in indices]

    @staticmethod
    def load(stream):
        """
        Function that loads a custom data source
        Args:
        stream (io.StringIO): An in-memory stream of a custom data source.
                              The format of the stream must be a comma-separated file
                              with header containing the columns defined in RG_COLUMNS.
        """
        stream_reader = csv.DictReader(stream, delimiter=',')
        header = stream_reader.fieldnames

        if header != RG_COLUMNS:
            raise csv.Error('Input must be a comma-separated file with header containing ' + \
                            'the following columns - %s. For more help, visit: ' % (','.join(RG_COLUMNS)) + \
                            'https://github.com/thampiman/reverse-geocoder')

        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for row in stream_reader:
            geo_coords.append((row['lat'], row['lon']))
            locations.append(row)

        return geo_coords, locations

    def extract(self, local_filename):
        """
        Function loads the already extracted GeoNames cities file or downloads and extracts it if
        it doesn't exist locally
        Args:
        local_filename (str): Path to local RG_FILE
        """
        if os.path.exists(local_filename):
            if self.verbose:
                print('Loading formatted geocoded file...')
            rows = csv.DictReader(open(local_filename, 'rt'))
        else:
            gn_cities1000_url = GN_URL + GN_CITIES1000 + '.zip'
            gn_admin1_url = GN_URL + GN_ADMIN1
            gn_admin2_url = GN_URL + GN_ADMIN2

            cities1000_zip_filename = GN_CITIES1000 + '.zip'
            cities1000_filename = GN_CITIES1000 + '.txt'

            if not os.path.exists(cities1000_zip_filename):
                if self.verbose:
                    print('Downloading files from Geoname...')

                urllib.request.urlretrieve(gn_cities1000_url,
                                           cities1000_zip_filename)
                urllib.request.urlretrieve(gn_admin1_url, GN_ADMIN1)
                urllib.request.urlretrieve(gn_admin2_url, GN_ADMIN2)

            if self.verbose:
                print('Extracting cities1000...')
            _z = zipfile.ZipFile(open(cities1000_zip_filename, 'rb'))
            open(cities1000_filename, 'wb').write(_z.read(cities1000_filename))

            if self.verbose:
                print('Loading admin1 codes...')
            admin1_map = {}
            t_rows = csv.reader(open(GN_ADMIN1, 'rt'), delimiter='\t')
            for row in t_rows:
                admin1_map[row[ADMIN_COLUMNS['concatCodes']]] = row[
                    ADMIN_COLUMNS['asciiName']]

            if self.verbose:
                print('Loading admin2 codes...')
            admin2_map = {}
            for row in csv.reader(open(GN_ADMIN2, 'rt'), delimiter='\t'):
                admin2_map[row[ADMIN_COLUMNS['concatCodes']]] = row[
                    ADMIN_COLUMNS['asciiName']]

            if self.verbose:
                print('Creating formatted geocoded file...')
            writer = csv.DictWriter(open(local_filename, 'wt'),
                                    fieldnames=RG_COLUMNS)
            rows = []
            for row in csv.reader(open(cities1000_filename, 'rt'), \
                                  delimiter='\t', quoting=csv.QUOTE_NONE):
                lat = row[GN_COLUMNS['latitude']]
                lon = row[GN_COLUMNS['longitude']]
                name = row[GN_COLUMNS['asciiName']]
                cc = row[GN_COLUMNS['countryCode']]

                admin1_c = row[GN_COLUMNS['admin1Code']]
                admin2_c = row[GN_COLUMNS['admin2Code']]

                cc_admin1 = cc + '.' + admin1_c
                cc_admin2 = cc + '.' + admin1_c + '.' + admin2_c

                admin1 = ''
                admin2 = ''

                if cc_admin1 in admin1_map:
                    admin1 = admin1_map[cc_admin1]
                if cc_admin2 in admin2_map:
                    admin2 = admin2_map[cc_admin2]

                write_row = {
                    'lat': lat,
                    'lon': lon,
                    'name': name,
                    'admin1': admin1,
                    'admin2': admin2,
                    'cc': cc
                }
                rows.append(write_row)
            writer.writeheader()
            writer.writerows(rows)

            if self.verbose:
                print('Removing extracted cities1000 to save space...')
            os.remove(cities1000_filename)

        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for row in rows:
            geo_coords.append((row['lat'], row['lon']))
            locations.append(row)
        return geo_coords, locations
Esempio n. 44
0
coordsP[:, 0] = x_psf
coordsP[:, 1] = y_psf
coordsP[:, 2] = z_psf

coordsF[:, 0] = x_flc
coordsF[:, 1] = y_flc
coordsF[:, 2] = z_flc

########################################################################

# kdt = KDT(coordsF)
# idxsF = kdt.query(coordsP)[1]
# ds = distArr(x_psf,y_psf,z_psf,x_flc[idxsF],y_flc[idxsF],z_flc[idxsF])

kdt = KDT(coordsP)
idxsP = kdt.query(coordsF)[1]

ds = distArr(x_flc, y_flc, z_flc, x_psf[idxsP], y_psf[idxsP], z_psf[idxsP])

# print(len(ds))

idxsF = np.arange(x_flc.size)

msk = ds < matchtol
idxsF = idxsF[msk]
idxsP = idxsP[msk]
ds = ds[msk]

# print(len(idxs1))

# outfile = magDir+'hor-I-cut_F606W_match_pix.txt'
Esempio n. 45
0
    def init_subproblems(self, conf, **kwargs):
        from sfepy.discrete.state import State
        from sfepy.discrete import Problem
        from sfepy.base.conf import ProblemConf, get_standard_keywords
        from scipy.spatial import cKDTree as KDTree

        # init subproblems
        problem = self.context
        pb_vars = problem.get_variables()
        # get "master" DofInfo and last index
        pb_adi_indx = problem.equations.variables.adi.indx
        self.adi_indx = pb_adi_indx.copy()
        last_indx = -1
        for ii in six.itervalues(self.adi_indx):
            last_indx = nm.max([last_indx, ii.stop])

        # coupling variables
        self.cvars_to_pb = {}
        for jj in conf.coupling_variables:
            self.cvars_to_pb[jj] = [None, None]
            if jj in pb_vars.names:
                if pb_vars[jj].dual_var_name is not None:
                    self.cvars_to_pb[jj][0] = -1

                else:
                    self.cvars_to_pb[jj][1] = -1

        # init subproblems
        self.subpb = []
        required, other = get_standard_keywords()
        master_prefix = output.get_output_prefix()
        for ii, ifname in enumerate(conf.others):
            sub_prefix = master_prefix[:-1] + '-sub%d:' % (ii + 1)
            output.set_output_prefix(sub_prefix)
            kwargs['master_problem'] = problem
            confi = ProblemConf.from_file(ifname, required, other,
                                          define_args=kwargs)
            pbi = Problem.from_conf(confi, init_equations=True)
            sti = State(pbi.equations.variables)
            pbi.equations.set_data(None, ignore_unknown=True)
            pbi.time_update()
            pbi.update_materials()
            sti.apply_ebc()
            pbi_vars = pbi.get_variables()
            output.set_output_prefix(master_prefix)
            self.subpb.append([pbi, sti, None])

            # append "slave" DofInfo
            for jj in pbi_vars.names:
                if not(pbi_vars[jj].is_state()):
                    continue

                didx = pbi.equations.variables.adi.indx[jj]
                ndof = didx.stop - didx.start
                if jj in self.adi_indx:
                    if ndof != \
                      (self.adi_indx[jj].stop - self.adi_indx[jj].start):
                        raise ValueError('DOFs do not match!')

                else:
                    self.adi_indx.update({
                        jj: slice(last_indx, last_indx + ndof, None)})
                    last_indx += ndof

            for jj in conf.coupling_variables:
                if jj in pbi_vars.names:
                    if pbi_vars[jj].dual_var_name is not None:
                        self.cvars_to_pb[jj][0] = ii

                    else:
                        self.cvars_to_pb[jj][1] = ii

        self.subpb.append([problem, None, None])

        self.cvars_to_pb_map = {}
        for varname, pbs in six.iteritems(self.cvars_to_pb):
            # match field nodes
            coors = []
            for ii in pbs:
                pbi = self.subpb[ii][0]
                pbi_vars = pbi.get_variables()
                fcoors = pbi_vars[varname].field.coors
                dc = nm.abs(nm.max(fcoors, axis=0)\
                            - nm.min(fcoors, axis=0))
                ax = nm.where(dc > 1e-9)[0]
                coors.append(fcoors[:,ax])

            if len(coors[0]) != len(coors[1]):
                raise ValueError('number of nodes does not match!')

            kdtree = KDTree(coors[0])
            map_12 = kdtree.query(coors[1])[1]

            pbi1 = self.subpb[pbs[0]][0]
            pbi1_vars = pbi1.get_variables()
            eq_map_1 = pbi1_vars[varname].eq_map

            pbi2 = self.subpb[pbs[1]][0]
            pbi2_vars = pbi2.get_variables()
            eq_map_2 = pbi2_vars[varname].eq_map

            dpn = eq_map_2.dpn
            nnd = map_12.shape[0]

            map_12_nd = nm.zeros((nnd * dpn,), dtype=nm.int32)
            if dpn > 1:
                for ii in range(dpn):
                    map_12_nd[ii::dpn] = map_12 * dpn + ii
            else:
                map_12_nd = map_12

            idx = nm.where(eq_map_2.eq >= 0)[0]
            self.cvars_to_pb_map[varname] = eq_map_1.eq[map_12[idx]]
Esempio n. 46
0
def correlate_neighbourhood(calcium_signal: np.ndarray,
                            kd_tree: cKDTree,
                            center_ix: int,
                            init_radius=0.02,
                            max_radius=.08,
                            min_corr=.5,
                            step=0.01,
                            measure=correlation,
                            verbose=True):
    """
    Given a center neuron and parameters of the neighbourhood definition, tries to group neurons
    The basic idea is:
    1. Look at all neurons within a given radius of the center neurons,
    2. Correlate their calcium signal to the center's.
    3. Keep sufficiently highly correlated neurons as being part of the group.
    4. Compute the fraction correlated / all neighboring neurons
    5. Move the center to the neuron closest to the center of mass of this group
    6. Increase slightly the radius and start again.
    7. As long as the fraction of correlated neurons is not droppping significantly, keep on increasing the radius
    8. Label the neurons as being part of this group. If some were already part of another group,
       they belong to the biggest group

    Parameters
    ----------
    calcium_signal
    kd_tree
    center_ix
    init_radius
    max_radius
    min_corr
    step
    measure
    verbose

    Returns
    -------

    """
    FRAC_DEC = .95
    radii = np.arange(init_radius, max_radius, step)
    radius = radii[0]  # not necessary due to loop?
    frac_corr = 0
    w_correlated = np.array([])
    for radius in radii:
        neighbors_ix, _ = get_neighbors(kd_tree, center_ix, radius)
        if len(neighbors_ix) == 0:  # one neuron left so no neighbours
            break
        corr_neigh = measure(calcium_signal, center_ix, neighbors_ix)
        # Fraction of correlated neurons in the neighboorhod
        correlated = corr_neigh >= min_corr
        n_correlated = np.sum(correlated)
        new_frac_corr = n_correlated / len(corr_neigh)
        if verbose:
            print(
                f'Number of neurons: {len(corr_neigh)} ; fraction correlated: {new_frac_corr * 100:.2f}% ;'
                f' Correlated neurons: {np.sum(correlated)}')
        # More correlations than before
        if new_frac_corr >= FRAC_DEC * frac_corr and n_correlated > 2:  # 100
            frac_corr = new_frac_corr
            w_correlated = neighbors_ix[correlated]
            centroid = np.mean(kd_tree.data[w_correlated, :], 0)
            _, center_ix = kd_tree.query(centroid, 1)
        else:
            break
    if radius == radii[-1]:
        # print('\t >>> Reached maximum radius <<<')
        pass
    return w_correlated
Esempio n. 47
0
def spherematch(ra1, dec1, ra2, dec2, tol=None, nnearest=1):
    """
    Finds matches in one catalog to another.

    Parameters
    ra1 : array-like
        Right Ascension in degrees of the first catalog
    dec1 : array-like
        Declination in degrees of the first catalog (shape of array must match
        `ra1`)
    ra2 : array-like
        Right Ascension in degrees of the second catalog
    dec2 : array-like
        Declination in degrees of the second catalog (shape of array must match
        `ra2`)
    tol : float or None, optional
        How close (in degrees) a match has to be to count as a match.  If None,
        all nearest neighbors for the first catalog will be returned.
    nnearest : int, optional
        The nth neighbor to find.  E.g., 1 for the nearest nearby, 2 for the
        second nearest neighbor, etc.  Particularly useful if you want to get
        the nearest *non-self* neighbor of a catalog.  To do this, use:
        ``spherematch(ra, dec, ra, dec, nnearest=2)``

    Returns
    -------
    idx1 : int array
        Indecies into the first catalog of the matches. Will never be
        larger than `ra1`/`dec1`.
    idx2 : int array
        Indecies into the second catalog of the matches. Will never be
        larger than `ra1`/`dec1`.
    ds : float array
        Distance (in degrees) between the matches



    """

    ra1 = np.array(ra1, copy=False)
    dec1 = np.array(dec1, copy=False)
    ra2 = np.array(ra2, copy=False)
    dec2 = np.array(dec2, copy=False)

    if ra1.shape != dec1.shape:
        raise ValueError('ra1 and dec1 do not match!')
    if ra2.shape != dec2.shape:
        raise ValueError('ra2 and dec2 do not match!')

    x1, y1, z1 = _spherical_to_cartesian(ra1.ravel(), dec1.ravel())

    # this is equivalent to, but faster than just doing np.array([x1, y1, z1])
    coords1 = np.empty((x1.size, 3))
    coords1[:, 0] = x1
    coords1[:, 1] = y1
    coords1[:, 2] = z1

    x2, y2, z2 = _spherical_to_cartesian(ra2.ravel(), dec2.ravel())

    # this is equivalent to, but faster than just doing np.array([x1, y1, z1])
    coords2 = np.empty((x2.size, 3))
    coords2[:, 0] = x2
    coords2[:, 1] = y2
    coords2[:, 2] = z2

    kdt = KDT(coords2)
    if nnearest == 1:
        idxs2 = kdt.query(coords1)[1]
    elif nnearest > 1:
        idxs2 = kdt.query(coords1, nnearest)[1][:, -1]
    else:
        raise ValueError('invalid nnearest ' + str(nnearest))

    ds = _great_circle_distance(ra1, dec1, ra2[idxs2], dec2[idxs2])

    idxs1 = np.arange(ra1.size)

    if tol is not None:
        msk = ds < tol
        idxs1 = idxs1[msk]
        idxs2 = idxs2[msk]
        ds = ds[msk]

    return idxs1, idxs2, ds
Esempio n. 48
0
                        random_state=41).fit(subset_data_unref)
                    print('Kmeans done: Time elapsed: {} seconds'.format(
                        time.time() - time_start))
                    labels_unref = kmeans_unref.labels_
                    centroids_unref = kmeans_unref.cluster_centers_

                    counting_occurence_in_patient_compare = Counter(
                        labels_unref)

                    vals_unref = np.fromiter(
                        counting_occurence_in_patient_compare.values(),
                        dtype=float)

                    #COMPARING USING KDTREE
                    k = KDTree(centroids_unref)
                    (dists, idxs) = k.query(centroids_ref)

                    vals_unref[idxs]

                    reference_dataframe[f'Count_{name}'] = vals_unref[idxs]

                    print(reference_dataframe.shape,
                          reference_dataframe.columns)

    reference_dataframe.sort_values(by=['Cluster'], inplace=True)
    reference_dataframe.sort_index(axis=1, ascending=True, inplace=True)

    reference_dataframe.to_csv(
        path_to_store_frame +
        f'/Data_for_LDA_from_generate_data_with_n_{number_of_cluster}_configuration_{configuration}.csv'
    )
Esempio n. 49
0
class VoronoiClosestPolytope:
    def __init__(self,
                 polytopes,
                 key_vertices_count=0,
                 process_count=8,
                 max_number_key_points=None):
        '''
        Compute the closest polytope using Voronoi cells
        :param polytopes:
        '''
        self.init_start_time = default_timer()
        self.section_start_time = self.init_start_time
        self.polytopes = np.asarray(polytopes, dtype='object')
        self.type = self.polytopes[0].type
        self.process_count = process_count
        self.key_vertices_count = key_vertices_count
        if self.type == 'AH_polytope':
            self.dim = self.polytopes[0].t.shape[0]
        elif self.type == 'zonotope':
            self.dim = self.polytopes[0].x.shape[0]
        else:
            raise NotImplementedError
        if self.key_vertices_count > 0:
            self.key_points = np.zeros([
                len(self.polytopes) * (1 + 2**self.key_vertices_count),
                self.dim
            ])
        else:
            self.key_points = np.zeros([len(self.polytopes), self.dim])
        for i, z in enumerate(polytopes):
            if self.type == 'AH_polytope':
                if self.key_vertices_count > 0:
                    raise NotImplementedError
                else:
                    self.key_points[i, :] = self.polytopes[i].t[:, 0]
            elif self.type == 'zonotope':
                if self.key_vertices_count > 0:
                    self.key_points[i * (2**self.key_vertices_count +
                                         1), :] = self.polytopes[i].x[:, 0]
                    self.key_points[
                        i * (2**self.key_vertices_count + 1) + 1:(i + 1) *
                        (2**self.key_vertices_count +
                         1), :] = get_k_random_edge_points_in_zonotope(
                             self.polytopes[i], self.key_vertices_count)
                else:
                    self.key_points[i, :] = self.polytopes[i].x[:, 0]
            else:
                raise NotImplementedError
        if max_number_key_points:
            # sample the key points
            n = self.key_points.shape[0]
            chosen_key_points = np.random.choice(n,
                                                 size=min(
                                                     n, max_number_key_points),
                                                 replace=False)
            self.key_points = self.key_points[chosen_key_points, :]
            # print(self.key_points.shape)
        self.key_point_to_polytope_map = dict(
        )  # stores the potential closest polytopes associated with each Voronoi (centroid)
        for key_point in self.key_points:
            ds = np.zeros(self.polytopes.shape[0])
            self.key_point_to_polytope_map[str(key_point)] = np.rec.fromarrays(
                [self.polytopes, ds], names=('polytopes', 'distances'))

        self.build_cell_polytope_map_default()

        #build kd-tree for centroids
        self.key_point_tree = KDTree(self.key_points)
        print(('Completed precomputation in %f seconds' %
               (default_timer() - self.init_start_time)))

    def build_cell_polytope_map_default(self):
        polytope_key_point_indices = np.array(
            np.meshgrid(np.arange(self.polytopes.shape[0]),
                        np.arange(self.key_points.shape[0]))).T.reshape(-1, 2)
        arguments = []
        for i in polytope_key_point_indices:
            arguments.append(
                (self.key_points, self.key_point_to_polytope_map, i[0], i[1]))
        p = Pool(self.process_count)
        pca = p.map(set_polytope_pair_distance, arguments)
        polytope_key_point_arrays = np.asarray(pca).reshape(
            (self.polytopes.shape[0]), self.key_points.shape[0])
        # print(polytope_centroid_arrays)
        # compute pairwise distances of the centroids and the polytopes
        #fixme
        for key_point_index, key_point in enumerate(self.key_points):
            key_point_string = str(key_point)
            for polytope_index, polytope in enumerate(
                    self.key_point_to_polytope_map[key_point_string]
                ['polytopes']):
                self.key_point_to_polytope_map[str(key_point)].distances[
                    polytope_index] = polytope_key_point_arrays[
                        polytope_index, key_point_index]
                # print(polytope_key_point_arrays[polytope_index, key_point_index])
            self.key_point_to_polytope_map[key_point_string].sort(
                order='distances')
            # print(self.centroid_to_polytope_map[centroid_string])

    def find_closest_polytope(self,
                              query_point,
                              return_intermediate_info=False):
        #find the closest centroid
        d, i = self.key_point_tree.query(query_point)
        closest_key_point = self.key_point_tree.data[i]
        # print('closest key point', closest_key_point)
        closest_key_point_polytope = self.key_point_to_polytope_map[str(
            closest_key_point)]['polytopes'][0]
        # print('closest polytope centroid' + str(closest_key_point_polytope.x))
        dist_query_centroid_polytope = distance_point_polytope(
            closest_key_point_polytope, query_point, ball='l2')[0]
        dist_query_key_point = np.linalg.norm(query_point - closest_key_point)
        # print(dist_query_key_point, dist_query_centroid_polytope)
        cutoff_index = np.searchsorted(
            self.key_point_to_polytope_map[str(closest_key_point)].distances,
            dist_query_key_point + dist_query_centroid_polytope)
        # print(cutoff_index)
        # print(self.key_point_to_polytope_map[str(closest_key_point)]['distances'][0:cutoff_index])
        # print(self.key_point_to_polytope_map[str(closest_key_point)]['distances'][cutoff_index:])
        # print('dqc',dist_query_key_point)
        # print(self.centroid_to_polytope_map[str(closest_key_point)].distances)
        closest_polytope_candidates = self.key_point_to_polytope_map[str(
            closest_key_point)].polytopes[0:cutoff_index]
        # print(closest_polytope_candidates)
        best_polytope = None
        best_distance = np.inf
        for polytope in closest_polytope_candidates:
            if best_distance < 1e-9:
                break
            dist = distance_point_polytope(polytope, query_point, ball='l2')[0]
            if best_distance > dist:
                best_distance = dist
                best_polytope = polytope
        # print('best distance', best_distance)
        if return_intermediate_info:
            return best_polytope, best_distance, closest_polytope_candidates
        return best_polytope
Esempio n. 50
0
def tsne(fdarray,
         new_label='tsne',
         channels=None,
         transform='arcsinh',
         sample=6000,
         verbose=False,
         backgate=True):
    """Perform t-SNE/viSNE on the FlowData object
    
    """

    fdarray = util.make_list(fdarray)

    # If the user has not provided a list of channels to use,
    # use the intersection of all isotope channels
    if channels is None:
        channel_set = []
        for fd in fdarray:
            channel_set.append(set(fd.isotopes))
        channels = list(set.intersection(*channel_set))

    # Make a copy of the data in files that we want
    points = []
    for fd in fdarray:
        points.append(np.vstack([fd[ch] for ch in channels]).T)

    # transform
    if transform == 'arcsinh':
        for pts in points:
            # Apply the transform inplace to the data
            np.arcsinh(5 * pts, pts)

    # Randomly sample to reduce the number of points
    sample_masks = []
    for pts in points:
        if sample < pts.shape[0]:
            # If we have enough points to subsample
            sample_masks.append(
                np.random.choice(pts.shape[0], sample, replace=False))
        else:
            # Otherwise we add all the points
            sample_masks.append(np.array(range(pts.shape[0])))

    # Sample the points, and construct a large matrix
    sample_points = []
    for mask, pts in zip(sample_masks, points):
        sample_points.append(pts[mask, :])
    X = np.vstack(sample_points)

    # Perform t-SNE
    Y = lib_tsne.tsne(X, verbose=verbose)
    assert Y is not None, ('t-SNE failed to return')

    # Split Y into a matrix for each dataset
    splits = np.cumsum(
        np.array([mask.shape[0] for mask in sample_masks], dtype=int))
    Y_split = np.split(Y, splits, axis=0)

    # now expand data to reassign these points back into the dataset
    tsne_coords = []
    for (pts, mask, Yspt) in zip(points, sample_masks, Y_split):
        npoints = pts.shape[0]
        Z = np.zeros((npoints, 2)) * float('NaN')
        Z[mask, :] = Yspt
        tsne_coords.append(Z)

    # If a point didn't get sampled, place its t-SNE coordinates at its nearest
    # neighbor.
    if backgate:
        kd = KDTree(X)
        # select points not assigned values with t-SNE
        for pts, mask, coords, j in zip(points, sample_masks, tsne_coords,
                                        range(len(points))):
            nan_points = np.argwhere(np.isnan(coords[:, 0]))
            d, near = kd.query(pts[nan_points], 1)
            # convert back to coordinates on the whole dataset
            coords[nan_points, :] = Y[near, :]
            tsne_coords[j] = coords
    # add to data to FlowData structure
    for fd, coords in zip(fdarray, tsne_coords):
        fd[new_label + '1'] = coords[:, 0]
        fd[new_label + '2'] = coords[:, 1]
Esempio n. 51
0
def kldiv(x, y, *, k=1):
    r"""
    Compute the Kullback-Leibler divergence between two multivariate samples.

    .. math
        D(P||Q) = "\"frac{d}{n} "\"sum_i^n "\"log{"\"frac{r_k(x_i)}{s_k(x_i)}} + "\"log{"\"frac{m}{n-1}}

    where r_k(x_i) and s_k(x_i) are, respectively, the euclidean distance
    to the kth neighbour of x_i in the x array (excepting x_i) and
    in the y array.

    Parameters
    ----------
    x : ndarray (n,d)
        Samples from distribution P, which typically represents the true
        distribution (reference).
    y : ndarray (m,d)
        Samples from distribution Q, which typically represents the
        approximate distribution (candidate)
    k : int or sequence
        The kth neighbours to look for when estimating the density of the
        distributions. Defaults to 1, which can be noisy.

    Returns
    -------
    out : float or sequence
        The estimated Kullback-Leibler divergence D(P||Q) computed from
        the distances to the kth neighbour.

    Notes
    -----
    In information theory, the Kullback–Leibler divergence is a non-symmetric
    measure of the difference between two probability distributions P and Q,
    where P is the "true" distribution and Q an approximation. This nuance is
    important because D(P||Q) is not equal to D(Q||P).

    For probability distributions P and Q of a continuous random variable,
    the K–L  divergence is defined as:

        D_{KL}(P||Q) = "\"int p(x) "\"log{p()/q(x)} dx

    This formula assumes we have a representation of the probability
    densities p(x) and q(x).  In many cases, we only have samples from the
    distribution, and most methods first estimate the densities from the
    samples and then proceed to compute the K-L divergence. In Perez-Cruz,
    the authors propose an algorithm to estimate the K-L divergence directly
    from the sample using an empirical CDF. Even though the CDFs do not
    converge to their true values, the paper proves that the K-L divergence
    almost surely does converge to its true value.

    References
    ----------
    Kullback-Leibler Divergence Estimation of Continuous Distributions (2008).
    Fernando Pérez-Cruz.
    """
    mk = np.iterable(k)
    ka = np.atleast_1d(k)

    nx, d = x.shape
    ny, d = y.shape

    # Limit the number of dimensions to 10, too slow otherwise.
    if d > 10:
        raise ValueError("Too many dimensions: {}.".format(d))

    # Not enough data to draw conclusions.
    if nx < 5 or ny < 5:
        return np.nan if not mk else [np.nan] * len(k)

    # Build a KD tree representation of the samples.
    xtree = KDTree(x)
    ytree = KDTree(y)

    # Get the k'th nearest neighbour from each points in x for both x and y.
    # We get the values for K + 1 to make sure the output is a 2D array.
    kmax = max(ka) + 1
    r, _ = xtree.query(x, k=kmax, eps=0, p=2, n_jobs=2)
    s, _ = ytree.query(x, k=kmax, eps=0, p=2, n_jobs=2)

    # There is a mistake in the paper. In Eq. 14, the right side misses a
    # negative sign on the first term of the right hand side.
    out = []
    for ki in ka:
        # The 0th nearest neighbour of x[i] in x is x[i] itself.
        # Hence we take the k'th + 1, which in 0-based indexing is given by
        # index k.
        out.append(-np.log(r[:, ki] / s[:, ki - 1]).sum() * d / nx +
                   np.log(ny / (nx - 1.0)))

    if mk:
        return out
    return out[0]
Esempio n. 52
0
class GeocodeData:

    def __init__(self, geocode_filename='geocode.csv', country_filename='countries.csv'):
        coordinates, self.__locations = self.__extract(rel_path(geocode_filename))
        self.__tree = KDTree(coordinates)
        self.__load_countries(rel_path(country_filename))

    def __load_countries(self, country_filename):
        """Load a map of country code to name
        """
        self.__countries = {}
        with open(country_filename, 'r') as handler:
            for code, name in csv.reader(handler):
                self.__countries[code] = name

    def query(self, coordinates):
        """Find closest match to this list of coordinates
        """
        try:
            distances, indices = self.__tree.query(coordinates, k=1)
        except ValueError as e:
            logging.info('Unable to parse coordinates: {}'.format(coordinates))
            raise e
        else:
            results = [self.__locations[index] for index in indices]
            for result in results:
                result['country'] = self.__countries.get(result['country_code'], '')
            return results

    def __download(self):
        """Download geocode file
        """
        local_filename = os.path.abspath(os.path.basename(GEOCODE_URL))
        if not os.path.exists(local_filename):
            logging.info('Downloading: {}'.format(GEOCODE_URL))
            urlretrieve(GEOCODE_URL, local_filename)
        return local_filename

    def __extract(self, local_filename):
        """Extract geocode data from zip
        """
        if os.path.exists(local_filename):
            # open compact CSV
            rows = csv.reader(open(local_filename, 'r'))
        else:
            if not os.path.exists(GEOCODE_FILENAME):
                # remove GEOCODE_FILENAME to get updated data
                downloadedFile = self.__download()
                z = zipfile.ZipFile(downloadedFile)
                logging.info('Extracting: {}'.format(GEOCODE_FILENAME))
                open(GEOCODE_FILENAME, 'wb').write(z.read(GEOCODE_FILENAME))
                z.close()

            # extract coordinates into more compact CSV for faster loading
            writer = csv.writer(open(local_filename, 'w'))
            rows = []
            for row in csv.reader(open(GEOCODE_FILENAME, 'r'), delimiter='\t'):
                latitude, longitude = row[4:6]
                country_code = row[8]
                if latitude and longitude and country_code:
                    city = row[1]
                    row = latitude, longitude, country_code, city
                    writer.writerow(row)
                    rows.append(row)
            # cleanup downloaded files
            os.remove(downloadedFile)
            os.remove(GEOCODE_FILENAME)

        # load a list of known coordinates and corresponding __locations
        coordinates, __locations = [], []
        for latitude, longitude, country_code, city in rows:
            coordinates.append((latitude, longitude))
            __locations.append(dict(country_code=country_code, city=city))
        return coordinates, __locations
Esempio n. 53
0
class ShapeMatcher(object):
    def __init__(self, ids, invariants):
        """Match other shapes based on euclidean distance.
        Constructs a KDTree in order to do nearest neighbour queries.
        For large datasets it might take a second or two to build the tree.

        Arguments:
        ids -- set names/identifiers for the shapes
        invariants -- 2D array of invariants that describe the shapes
        """
        self.ids = ids
        self.invariants = invariants
        LOG.debug('Constructing tree from %d invariants', len(invariants))
        self.tree = KDTree(invariants)

    def search_invariants(self, invariants, n=10, df=False):
        """Search for matches based on invariants.

        Arguments:
        invariants -- N length array of shape descriptors

        Keyword arguments:
        n -- number of matches to return (default 10)
        df -- return matches as a pandas DataFrame (default False)
        """
        if n == 'max':
            n = len(self.invariants)
        LOG.debug('Searching for %d closest points', n)
        distances, indexes = self.tree.query(invariants, n)
        invariants = self.invariants[indexes]
        # Need to handle case of n == 1 correctly
        if isinstance(indexes, int):
            ids = self.ids[indexes].decode('utf-8')
            return SearchResult(ids, distances, invariants)
        else:
            ids = [x.decode('utf-8') for x in self.ids[indexes]]
        if df:
            return pd.DataFrame({
                'ID': ids,
                'Proximity': distances
            }).set_index('ID')
        else:
            return [
                SearchResult(n, d, i)
                for n, d, i in zip(ids, distances, invariants)
            ]

    def search_shape(self, shape, **kwargs):
        """Search for matches based on a shape object. (convenience function)

        Arguments:
        shape -- a Shape object.

        Keyword arguments:
        n -- number of matches to return (default 10)
        df -- return matches as a pandas DataFrame (default False)
        """
        LOG.debug('Searching for closest shapes to %s', shape.name)
        # delegate to search_invariants method
        return self.search_invariants(shape.invariants, **kwargs)

    @staticmethod
    def from_datafile(filename, l_max=20):
        """Construct a CSD matcher based on the bundled data

        Keyword arguments:
        l_max -- maximum angular momenta to use for invariants
        (default 20)
        use_radius -- use the mean radius as the first invariant
        (default True)
        """
        names, invariants = load_data(filename)
        return ShapeMatcher(names, invariants)

    @staticmethod
    def from_shapes(shapes, l_max=20):
        """Construct a shapematcher object from a list of shapes

        Arguments:
        shapes -- A list of Shape objects
        Keyword arguments:
        l_max -- maximuma angular momenta to use for invariants
        (default 20)
        """
        invariants, names = [], []
        if isinstance(shapes, dict):
            for name, s in shapes.items():
                invariants.append(s.invariants)
                names.append(name)
        else:
            for s in shapes:
                invariants.append(s.invariants)
                names.append(s.name)
        invariants = np.array(invariants)
        names = np.array(names, dtype='|S64')
        return ShapeMatcher(names, invariants)

    @staticmethod
    def from_surface_files(files, property_name='shape'):
        """Construct a CSD matcher based on the bundled data

        Keyword arguments:
        l_max -- maximum angular momenta to use for invariants
        (default 20)
        use_radius -- use the mean radius as the first invariant
        (default True)
        """
        shapes = {}
        for f in files:
            shapes['f.stem'] = surface_description(f,
                                                   property_name=property_name)

        return ShapeMatcher.from_shapes(shapes)

    def all(self):
        return self.search_invariants(self.invariants[0],
                                      n=len(self.invariants))
def get_ref_coors_convex(field,
                         coors,
                         close_limit=0.1,
                         cache=None,
                         verbose=False):
    """
    Get reference element coordinates and elements corresponding to given
    physical coordinates.

    Parameters
    ----------
    field : Field instance
        The field defining the approximation.
    coors : array
        The physical coordinates.
    close_limit : float, optional
        The maximum limit distance of a point from the closest
        element allowed for extrapolation.
    cache : Struct, optional
        To speed up a sequence of evaluations, the field mesh and other data
        can be cached. Optionally, the cache can also contain the reference
        element coordinates as `cache.ref_coors`, `cache.cells` and
        `cache.status`, if the evaluation occurs in the same coordinates
        repeatedly. In that case the mesh related data are ignored.
    verbose : bool
        If False, reduce verbosity.

    Returns
    -------
    ref_coors : array
        The reference coordinates.
    cells : array
        The cell indices corresponding to the reference coordinates.
    status : array
        The status: 0 is success, 1 is extrapolation within `close_limit`, 2 is
        extrapolation outside `close_limit`, 3 is failure, 4 is failure due to
        non-convergence of the Newton iteration in tensor product cells.

    Notes
    -----
    Outline of the algorithm for finding xi such that X(xi) = P:

    1. make inverse connectivity - for each vertex have cells it is in.
    2. find the closest vertex V.
    3. choose initial cell: i0 = first from cells incident to V.
    4. while not P in C_i, change C_i towards P, check if P in new C_i.
    """
    timer = Timer()

    ref_coors = get_default_attr(cache, 'ref_coors', None)
    if ref_coors is None:
        extrapolate = close_limit > 0.0

        ref_coors = nm.empty_like(coors)
        cells = nm.empty((coors.shape[0], ), dtype=nm.int32)
        status = nm.empty((coors.shape[0], ), dtype=nm.int32)

        cmesh = get_default_attr(cache, 'cmesh', None)
        if cmesh is None:
            timer.start()
            mesh = field.create_mesh(extra_nodes=False)
            cmesh = mesh.cmesh

            gels = create_geometry_elements()

            cmesh.set_local_entities(gels)
            cmesh.setup_entities()

            centroids = cmesh.get_centroids(cmesh.tdim)

            if field.gel.name != '3_8':
                normals0 = cmesh.get_facet_normals()
                normals1 = None

            else:
                normals0 = cmesh.get_facet_normals(0)
                normals1 = cmesh.get_facet_normals(1)

            output('cmesh setup: %f s' % timer.stop(), verbose=verbose)

        else:
            centroids = cache.centroids
            normals0 = cache.normals0
            normals1 = cache.normals1

        kdtree = get_default_attr(cache, 'kdtree', None)
        if kdtree is None:
            from scipy.spatial import cKDTree as KDTree

            timer.start()
            kdtree = KDTree(cmesh.coors)
            output('kdtree: %f s' % timer.stop(), verbose=verbose)

        timer.start()
        ics = kdtree.query(coors)[1]
        output('kdtree query: %f s' % timer.stop(), verbose=verbose)

        ics = nm.asarray(ics, dtype=nm.int32)

        coors = nm.ascontiguousarray(coors)
        ctx = field.create_basis_context()

        timer.start()
        crc.find_ref_coors_convex(ref_coors, cells, status, coors, cmesh,
                                  centroids, normals0, normals1, ics,
                                  extrapolate, 1e-15, close_limit, ctx)
        output('ref. coordinates: %f s' % timer.stop(), verbose=verbose)

    else:
        cells = cache.cells
        status = cache.status

    return ref_coors, cells, status
Esempio n. 55
0
def match_arbitrary_translation_dilatation(x1,y1,x2,y2) :
    """
    Match two catalogs in different coordinate systems, 1 and 2, related by a translation, a dilatation, and possibly a "small" rotation
    The orientation of triangles is used for the match so the rotation has to be small.
    Inspired from http://articles.adsabs.harvard.edu/pdf/1986AJ.....91.1244G
    
    Args:
        x1 : float numpy array of coordinates along first axis of cartesian coordinate system 1
        y1 : float numpy array of coordinates along second axis of cartesian coordinate system 1
        x2 : float numpy array of coordinates along first axis of cartesian coordinate system 2
        y2 : float numpy array of coordinates along second axis of cartesian coordinate system 2
    
    returns:
        indices_2 : integer numpy array. if ii is a index array for entries in the first catalog, 
                            indices_2[ii] is the index array of best matching entries in the second catalog.
                            (one should compare x1[ii] with x2[indices_2[ii]])
                            negative values for unmatched entries.
        distance : distance between pairs of triangles. It can be used to discard bad matches. 

    """

    log = get_logger()
    
    # compute all possible triangles in both data sets
    # txyz are properties of the shape and orientation of the triangles
    log.debug("compute triangles")
    tk1,txyz1 = compute_triangles_with_fixed_orientation(x1,y1)
    tk2,txyz2 = compute_triangles_with_fixed_orientation(x2,y2)
    
    log.debug("match triangles")
    # match with kdtree triangles with same shape and orientation
    tree2=KDTree(txyz2)
    triangle_distances,triangle_indices_2 = tree2.query(txyz1,k=1)
    
    # now that we have match of triangles , need to match back catalog entries
    ranked_pairs = np.argsort(triangle_distances)
    
    indices_2 = -1*np.ones(x1.size,dtype=int)
    distances = np.zeros(x1.size)
    
    all_matched = False
    log.debug("match catalogs using pairs of triangles")
    for p in ranked_pairs :

        k1=tk1[p] # incides (in x1,y1) of vertices of this triangle (size=3)
        k2=tk2[triangle_indices_2[p]] # incides (in x2,y2) of vertices of other triangle
        
        # check unmatched or equal
        if np.any((indices_2[k1]>=0)&(indices_2[k1]!=k2)) :
            log.warning("skip {} <=> {}".format(k1,k2))
            continue
        indices_2[k1]=k2
        distances[k1]=triangle_distances[p]
        all_matched = (np.sum(indices_2>=0)==x1.size)
        if all_matched :
            log.debug("all matched")
            break

    # check duplicates
    for i2 in np.unique(indices_2[indices_2>=0]) :
        ii=(indices_2==i2)
        if np.sum(ii) > 1 :
            log.warning("{} duplicates for i2={}".format(np.sum(ii),i2))
            indices_2[ii]=-1
    
    return indices_2 , distances
    x_drc_low = low_x[ff]
    y_drc_low = low_y[ff]
    xm_flc_low = flc_all['xdrc_low_'+filter]
    ym_flc_low = flc_all['ydrc_low_'+filter]

    coords1low = np.empty((xm_flc_low.size,2))
    coords2low = np.empty((x_drc_low.size,2))

    coords1low[:,0] = xm_flc_low
    coords1low[:,1] = ym_flc_low

    coords2low[:,0] = x_drc_low
    coords2low[:,1] = y_drc_low

    kdt = KDT(coords2low)
    idxs2 = kdt.query(coords1low)[1]

    ds = distArr(xm_flc_low,ym_flc_low,x_drc_low[idxs2],y_drc_low[idxs2])

    idxs1 = np.arange(xm_flc_low.size)

    msk = ds < matchtol
    idxs1 = idxs1[msk]
    idxs2 = idxs2[msk]
    ds = ds[msk]

    outfile = outDir+'hor-I-cut_drc_low_'+filter+'_tol1.txt'
    np.savetxt(outfile, idxs2, fmt='%4i')

    outfile = outDir+'hor-I-cut_flc_low_'+filter+'_tol1.txt'
    np.savetxt(outfile, idxs1, fmt='%4i')
Esempio n. 57
0
if 'snakemake' not in globals():
    from vresutils.snakemake import MockSnakemake, Dict

    snakemake = MockSnakemake(input=Dict(base_network='networks/base.nc'),
                              output=['resources/powerplants.csv'])

logging.basicConfig(level=snakemake.config['logging_level'])

n = pypsa.Network(snakemake.input.base_network)

ppl = (ppm.collection.matched_data()[lambda df: ~df.Fueltype.isin(
    ('Solar', 'Wind'))].pipe(ppm.cleaning.clean_technology).assign(
        Fueltype=lambda df: (df.Fueltype.where(
            df.Fueltype != 'Natural Gas',
            df.Technology.replace('Steam Turbine', 'OCGT').fillna('OCGT')))).
       pipe(ppm.utils.fill_geoposition, parse=True,
            only_saved_locs=True).pipe(ppm.heuristics.fill_missing_duration))

# ppl.loc[(ppl.Fueltype == 'Other') & ppl.Technology.str.contains('CCGT'), 'Fueltype'] = 'CCGT'
# ppl.loc[(ppl.Fueltype == 'Other') & ppl.Technology.str.contains('Steam Turbine'), 'Fueltype'] = 'CCGT'

ppl = ppl.loc[ppl.lon.notnull() & ppl.lat.notnull()]

substation_lv_i = n.buses.index[n.buses['substation_lv']]
kdtree = KDTree(n.buses.loc[substation_lv_i, ['x', 'y']].values)
ppl = ppl.assign(
    bus=substation_lv_i[kdtree.query(ppl[['lon', 'lat']].values)[1]])

ppl.to_csv(snakemake.output[0])
Esempio n. 58
0
    N = row_splits.size - 1
    consumed = np.zeros((N,), dtype=np.bool)
    out = []
    for i in range(N):
        if len(out) >= max_size:
            break
        if not consumed[i]:
            consumed[indices[row_splits[i]:row_splits[i + 1]]] = True
            out.append(i)
    return np.array(out, dtype=np.uint32)


np.random.seed(123)
x = np.random.uniform(size=(in_size, 2)).astype(dtype=np.float32)
tree = KDTree(x)
dists, indices = tree.query(x, k)
valid = dists < max_dist
indices = indices[valid]
row_lengths = np.count_nonzero(valid, axis=1)
row_splits = np.pad(np.cumsum(row_lengths), [[1, 0]], 'constant')

kwargs = dict(indices=indices, row_splits=row_splits, max_size=max_size)

num_runs = 100
print('cython implementation')
print(
    timeit(functools.partial(rejection_sample_ragged, **kwargs),
           number=num_runs) / num_runs)
print('python implementation')
print(
    timeit(functools.partial(rejection_sample_ragged_base, **kwargs),
Esempio n. 59
0
    class cholesky_NN(object):

        def __init__(self,xdata,ydata):

            #Do some tests here

            #Find data covariance
            cov = np.cov(xdata.T)

            #Cholesky decompose to make new basis
            L_mat = np.linalg.cholesky(cov)
            self.L_mat = np.linalg.inv(L_mat)

            #Transform xdata into new basis
            self.xtrain = xdata
            self.transf_x = np.array([np.dot(self.L_mat,x) for x in xdata])

            #DEBUG
            #plt.plot(xdata[:,0],xdata[:,1],'.',color='r')
            #plt.plot(self.transf_x[:,0],self.transf_x[:,1],'.')
            #plt.show()
            #sys.exit()

            #Store training
            self.ytrain = ydata

            #Build KDTree for quick lookup
            self.transf_xtree = KDTree(self.transf_x)

        def __call__(self,x,k=5):

            if k<2:
                raise Exception("Need k>1")
            if x.ndim != self.xtrain[0].ndim:
                raise Exception("Requested x and training set do not have the same number of dimension.")

            #Change basis
            x0 = np.dot(self.L_mat,x)

            #Get nearest neighbors
            dist, loc = self.transf_xtree.query(x0,k=k)
            #Protect div by zero
            dist = np.array([np.max([1e-15,d]) for d in dist])
            weight = 1.0/dist
            nearest_y = self.ytrain[loc]

            #Interpolate with weighted average
            if self.ytrain.ndim > 1:
                y_predict = np.array([np.average(y0,weights=weight) for y0 in nearest_y.T])
                testgood = all([test_good(y) for y in y_predict])
            elif self.ytrain.ndim==1:
                y_predict = np.average(nearest_y,weights=weight)
                testgood = test_good(y_predict)
            else:
                raise Exception('The dimension of y training data is weird')


            if not testgood:
                raise Exception('y prediction went wrong')

            return y_predict


        def train_dist_error_model(self,xtrain,ytrain,k=5):
            """Rather than learning a non-parametric error model, we can define a parametric error model instead and learn its parameters."""

            if xtrain.shape[0]!=ytrain.shape[0]:
                raise TypeError('Xtrain and Ytrain do not have same shape.')

            dist_list = []
            for x0 in xtrain:

                #Change basis
                x0 = np.dot(self.L_mat,x0)

                #Get nearest neighbors in original training set
                dist, loc = self.transf_xtree.query(x0,k=k)
                #Weighted density in ball for NN
                #dist = np.array([np.max([1e-15,d]) for d in dist])
                #weight = 1.0/dist
                #dist_list.append(np.sum(weight))
                dist_list.append(np.mean(dist))

            dist_list = np.array(dist_list)

            def error_model(dist, a, b, c):
                return a*(dist) + b*(dist)**c

            bestfit, cov = opt.curve_fit(error_model,
                    dist_list,np.abs(ytrain),
                    #bounds=((0.0,0.0,0.0),(np.inf,np.inf,np.inf)))
                    bounds=((0.0,0.0,0.0),(1e1,1e1,1e1)))

            #print "this is bestfit:", bestfit

            def new_error_model(xval):
                xval = np.dot(self.L_mat,xval)
                #Get nearest neighbors in original training set
                dist, loc = self.transf_xtree.query(xval,k=k)
                #Mean distance to NN
                dist = np.mean(dist)

                #dist = dist/bestfit[2]

                err_guess = bestfit[0]*dist + bestfit[1]*dist**bestfit[2]
                rand_sign = np.random.rand() - 0.5
                #err_guess *= 1.0 if rand_sign>0.0 else -1.0

                return err_guess


            #DEBUG
            #plt.plot(dist_list, np.abs(ytrain),'bo')
            #plt.plot(dist_list, map(new_error_model,xtrain),'ro')
            #plt.show()


            return new_error_model
Esempio n. 60
0
class InvDistTree:
    """
    As seen in http://stackoverflow.com/questions/3104781/inverse-distance-weighted-idw-interpolation-with-python
    inverse-distance-weighted interpolation using KDTree:
    invdisttree = Invdisttree( X, z )  -- data points, values
    interpol = invdisttree( q, nnear=3, eps=0, p=1, weights=None, stat=0 )
        interpolates z from the 3 points nearest each query point q;
        For example, interpol[ a query point q ]
        finds the 3 data points nearest q, at distances d1 d2 d3
        and returns the IDW average of the values z1 z2 z3
            (z1/d1 + z2/d2 + z3/d3)
            / (1/d1 + 1/d2 + 1/d3)
            = .55 z1 + .27 z2 + .18 z3  for distances 1 2 3

        q may be one point, or a batch of points.
        eps: approximate nearest, dist <= (1 + eps) * true nearest
        p: use 1 / distance**p
        weights: optional multipliers for 1 / distance**p, of the same shape as q
        stat: accumulate wsum, wn for average weights

    How many nearest neighbors should one take ?
    a) start with 8 11 14 .. 28 in 2d 3d 4d .. 10d; see Wendel's formula
    b) make 3 runs with nnear= e.g. 6 8 10, and look at the results --
        |interpol 6 - interpol 8| etc., or |f - interpol*| if you have f(q).
        I find that runtimes don't increase much at all with nnear -- ymmv.

    p=1, p=2 ?
        p=2 weights nearer points more, farther points less.
        In 2d, the circles around query points have areas ~ distance**2,
        so p=2 is inverse-area weighting. For example,
            (z1/area1 + z2/area2 + z3/area3)
            / (1/area1 + 1/area2 + 1/area3)
            = .74 z1 + .18 z2 + .08 z3  for distances 1 2 3
        Similarly, in 3d, p=3 is inverse-volume weighting.

    Scaling:
        if different X coordinates measure different things, Euclidean distance
        can be way off.  For example, if X0 is in the range 0 to 1
        but X1 0 to 1000, the X1 distances will swamp X0;
        rescale the data, i.e. make X0.std() ~= X1.std() .

    A nice property of IDW is that it's scale-free around query points:
    if I have values z1 z2 z3 from 3 points at distances d1 d2 d3,
    the IDW average
        (z1/d1 + z2/d2 + z3/d3)
        / (1/d1 + 1/d2 + 1/d3)
    is the same for distances 1 2 3, or 10 20 30 -- only the ratios matter.
    In contrast, the commonly-used Gaussian kernel exp( - (distance/h)**2 )
    is exceedingly sensitive to distance and to h.

    """

    # anykernel( dj / av dj ) is also scale-free
    # error analysis, |f(x) - idw(x)| ?

    def __init__(self, measured_points, measured_values, leafsize=10, stat=0):
        """

        @param measured_points:
        @param measured_values:
        @param leafsize:
        @param stat:
        """
        assert len(measured_points) == len(
            measured_values), "len(X) %d != len(z) %d" % (len(measured_points),
                                                          len(measured_values))
        self.tree = KDTree(measured_points,
                           leafsize=leafsize)  # build the tree
        self.z = measured_values
        self.stat = stat
        self.wn = 0
        self.wsum = None

    def __call__(self, new_points, num_near=6, eps=0, p=1, weights=None):
        """
        Call an interpolation with the trained data
        @param new_points:
        @param num_near: Number of near-by points
        @param eps: Tolerance
        @param p: 1<=p<=infinity. Which Minkowski p-norm to use. 1 is the sum-of-absolute-values "Manhattan" distance 2
                  is the usual Euclidean distance infinity is the maximum-coordinate-difference distance
        @param weights:
        @return:
        """

        # num_near nearest neighbours of each query point --
        new_points = np.asarray(new_points, dtype=complex)
        qdim = new_points.ndim
        if qdim == 1:
            new_points = np.array([new_points], dtype=complex)
        if self.wsum is None:
            self.wsum = np.zeros(num_near)

        # get the nearest neighbours of each point
        '''
        self.distances : array of floats. The distances to the nearest neighbors. If x has shape tuple+(self.m,), then
                         d has shape tuple+(k,). Missing neighbors are indicated with infinite distances.

        self.ix : ndarray of ints. The locations of the neighbors in self.data. If x has shape tuple+(self.m,), then i
                  has shape tuple+(k,). Missing neighbors are indicated with self.n.
        '''
        self.distances, self.ix = self.tree.query(new_points,
                                                  k=num_near,
                                                  eps=eps)

        # declare the interpolation array
        interpol = np.empty((len(self.distances), ) + np.shape(self.z[0]),
                            dtype=complex)

        # Perform the interpolation
        idx = 0
        for dist, ix in zip(self.distances, self.ix):
            if num_near == 1:
                wz = self.z[ix]
            elif dist[0] < 1e-10:
                wz = self.z[ix[0]]
            else:  # weight z s by 1/dist --
                w = 1 / np.power(dist, p)
                if weights is not None:
                    w *= weights[ix]  # >= 0
                w /= np.sum(w)
                wz = np.dot(w, self.z[ix])
                if self.stat:
                    self.wn += 1
                    self.wsum += w
            interpol[idx] = wz
            idx += 1

        return interpol if qdim > 1 else interpol[0]