Ejemplo n.º 1
0
    def trim(self, points, radius):
        """
        remove points too close to the cut curve. they dont add anything, and only lead to awkward faces
        """
        #some precomputations
        tree   = KDTree(points)
        cp     = self.vertices[self.faces]
        normal = util.normalize(np.cross(cp[:,0], cp[:,1]))
        mid    = util.normalize(cp.sum(axis=1))
        diff   = np.diff(cp, axis=1)[:,0,:]
        edge_radius = np.sqrt(util.dot(diff, diff)/4 + radius**2)

        index = np.ones(len(points), np.bool)

        #eliminate near edges
        def near_edge(e, p):
            return np.abs(np.dot(points[p]-mid[e], normal[e])) < radius
        for i,(p,r) in enumerate(izip(mid, edge_radius)):
            coarse = tree.query_ball_point(p, r)
            index[[c for c in coarse if near_edge(i, c)]] = 0
        #eliminate near points
        for p in self.vertices:
            coarse = tree.query_ball_point(p, radius)
            index[coarse] = 0

        return points[index]
Ejemplo n.º 2
0
def find_pairs(cutoff, X, Y=None):
    """
    Find pairs with euclidean distance below C{cutoff}. Either between
    C{X} and C{Y}, or within C{X} if C{Y} is C{None}.

    Uses a KDTree and thus is memory efficient and reasonable fast.

    @type cutoff: float
    @type X: (m,n) numpy.array
    @type Y: (k,n) numpy.array
    @return: set of index tuples
    @rtype: iterable
    """
    try:
        from scipy.spatial import cKDTree as KDTree
        KDTree.query_pairs
        KDTree.query_ball_tree
    except (ImportError, AttributeError):
        from scipy.spatial import KDTree

    tree = KDTree(X, len(X))
    if Y is None:
        return tree.query_pairs(cutoff)

    other = KDTree(Y, len(Y))
    contacts = tree.query_ball_tree(other, cutoff)
    return ((i, j) for (i, js) in enumerate(contacts) for j in js)
Ejemplo n.º 3
0
def group_vectors(vectors, 
                  angle = np.radians(10), 
                  include_negative = False):
    '''
    Group vectors based on an angle tolerance, with the option to 
    include negative vectors. 
    
    This is very similar to a group_rows(stack_negative(rows))
    The main difference is that max_angle can be much looser, as we
    are doing actual distance queries. 
    '''
    dist_max            = np.tan(angle)
    unit_vectors, valid = unitize(vectors, check_valid = True)
    valid_index         = np.nonzero(valid)[0]
    consumed            = np.zeros(len(unit_vectors), dtype=np.bool)
    tree                = KDTree(unit_vectors)
    unique_vectors      = deque()
    aligned_index       = deque()
    
    for index, vector in enumerate(unit_vectors):
        if consumed[index]: 
            continue
        aligned = np.array(tree.query_ball_point(vector, dist_max))
        vectors = unit_vectors[aligned]
        if include_negative:
            aligned_neg = tree.query_ball_point(-1.0*vector, dist_max)
            vectors     = np.vstack((vectors, -unit_vectors[aligned_neg]))
            aligned     = np.append(aligned, aligned_neg)
        aligned = aligned.astype(int)
        consumed[aligned] = True
        unique_vectors.append(np.median(vectors, axis=0))
        aligned_index.append(valid_index[aligned])
    return np.array(unique_vectors), np.array(aligned_index)
Ejemplo n.º 4
0
    def swept_extrude(self, thickness):
        """
        outer is a copy of inner, possibly with added detail, but with identical boundary
        we seek to create a castable object with a constant thickness 'thickness'
        to that end, we need to match the boundary points to make a closed extrusion
        extrusion is done iteratively
        we init by radially shinking the inner mesh by thickness
        """
        assert thickness > 0
        outer = self.vertices
        tree = KDTree(outer)

        outer_radius = np.linalg.norm(outer, axis=1)
        inner = outer

        #incremental updates
        while True:
            # find nearest point for each inner point
            dist, idx = tree.query(inner, k=1)

            inner_radius = np.linalg.norm(inner, axis=1)
            radial_dist = inner_radius - outer_radius[idx]
            ortho_dist2 = dist**2 - radial_dist**2
            new_radius = outer_radius[idx] - np.sqrt(1 - ortho_dist2 / thickness ** 2) * thickness

            if np.allclose(inner_radius, new_radius):
                break
            inner = inner / (inner_radius / new_radius)[:, None]

        #return inner surface swepth by thickness
        return self.extrude(inner)
Ejemplo n.º 5
0
def remove_close(points, radius):
    """
    Given an (n, m) set of points where n=(2|3) return a list of points
    where no point is closer than radius.

    Parameters
    ------------
    points : (n, dimension) float
      Points in space
    radius : float
      Minimum radius between result points

    Returns
    ------------
    culled : (m, dimension) float
      Points in space
    mask : (n,) bool
      Which points from the original set were returned
    """
    from scipy.spatial import cKDTree as KDTree

    tree = KDTree(points)
    consumed = np.zeros(len(points), dtype=np.bool)
    unique = np.zeros(len(points), dtype=np.bool)
    for i in range(len(points)):
        if consumed[i]:
            continue
        neighbors = tree.query_ball_point(points[i], r=radius)
        consumed[neighbors] = True
        unique[i] = True

    return points[unique], unique
Ejemplo n.º 6
0
def main():
    # read in the file
    try:
        ifs = open(sys.argv[1])
        sample, ext = os.path.splitext(sys.argv[1])
    except IndexError:
        ifs = sys.stdin
        sample = ''
    data = np.loadtxt(ifs, delimiter=',')
    if ifs is not sys.stdin:
        ifs.close()
    # view of the com
    com = data[:,1:4]
    # construct a KD tree
    tree = KDTree(com)
    # query KD tree to find the first nearest neighbor
    dist, idx = tree.query(com, k=2)
    nn = [(i, j, d2) for ((d1, d2), (i, j)) in zip(dist, idx)]
    # histogram of the nearest neighbor distance
    hist(np.array(nn)[:,2])
         #title='{} pore-pore distances'.format(sample),
         #output='{}.pdf'.format(sample))
    # save the nearest neighbor distance to .json files
    ofile = '{}_pore-distribution.json'.format(sample)
    medianDist = np.median(np.array(nn)[:,2])
    cmp0 = lambda lhs, rhs: -1 if lhs[0] < rhs[0] else \
        (1 if lhs[0] > rhs[0] else 0)
Ejemplo n.º 7
0
def binder(positions, orientations, bl, m=4, method='ball', margin=0):
    """Calculate the binder cumulant, given positions and orientations.

    bl: the binder length scale, such that
        B(bl) = 1 - .333 * S4 / S2^2
    where SN are <phibl^N> averaged over each block/cluster of size bl in frame.
    """
    if margin:
        if margin < ss:
            margin *= ss
        center = 0.5*(positions.max(0) + positions.min(0))
        dmask = d < d.max() - margin
        positions = positions[dmask]
        orientations = orientations[dmask]
    if 'neigh' in method or 'ball' in method:
        tree = KDTree(positions)
        balls = tree.query_ball_tree(tree, bl)
        balls, ball_mask = helpy.pad_uneven(balls, 0, True, int)
        ball_orient = orientations[balls]
        ball_orient[ball_mask] = np.nan
        phis = np.nanmean(np.exp(m*ball_orient*1j), 1)
        phi2 = np.dot(phis, phis) / len(phis)
        phiphi = phis*phis
        phi4 = np.dot(phiphi, phiphi) / len(phiphi)
        return 1 - phi4 / (3*phi2*phi2)
    else:  # elif method=='block':
        raise ValueError("method {} not implemented".format(method))
Ejemplo n.º 8
0
def get_N_ngbs(positions, radii, N=12, maxdist=3.0, edge = None):
    """N first neighbours, with a maximum relative distances, such that $r_{ij} < maxdist (R_i + R_j)$.
    If a potential neighbour is further away than the distance to the edge of the field of view, 
    the current particle of interest is considered as "on the edge" and the neighbour not taken into account.
    
    Returns neighbours, inside"""
    assert len(positions)==len(radii)
    if edge is None:
        edge = (positions.min(0), positions.max(0))
    #initialize the geometry of each particle
    to_edge = np.minimum((positions - edge[0]).min(-1), (edge[0] - positions).min(-1))**2
    inside = np.full(len(positions), True, dtype=bool)
    neighbours = np.full([len(positions), N], -1, dtype=int)
    tree = KDTree(positions)
    rmax = radii.max()
    for i, js in enumerate(tree.query_ball_tree(tree, 2*rmax*maxdist)):
        disq = np.sum((positions[js] - positions[i])**2, -1)
        ags = np.argsort(disq)[:N]
        if disq[ags[-1]] < to_edge[i]:
            neighbours[i, :len(js)] = np.array(js)[ags]
        else:
            inside[i] = False
            N2 = np.where(disq[ags] < to_edge[i])[0][0]+1
            neighbours[i, :N2] = np.array(js)[ags[:N2]]
    return neighbours, inside
Ejemplo n.º 9
0
def find_neighbor_pixels(pix_x, pix_y, rad):
    """uses a KD-Tree to quickly find nearest neighbors of the pixels in a
    camera. This function can be used to find the neighbor pixels if
    such a list is not already present in the file.

    Parameters
    ----------
    pix_x : array_like
        x position of each pixel
    pix_y : array_like
        y position of each pixels
    rad : float
        radius to consider neighbor it should be slightly larger
        than the pixel diameter.

    Returns
    -------
    array of neighbor indices in a list for each pixel
    """

    points = np.array([pix_x, pix_y]).T
    indices = np.arange(len(pix_x))
    kdtree = KDTree(points)
    neighbors = [kdtree.query_ball_point(p, r=rad) for p in points]
    for nn, ii in zip(neighbors, indices):
        nn.remove(ii)  # get rid of the pixel itself
    return neighbors
Ejemplo n.º 10
0
def sht_isosurface(filename, l_max=20, prop='electric_potential', 
                   test=None):
    """Given an SBF, describe the set of vertices and their esp using sht.
    Will scale the mesh to be of unit mean radius.

    Arguments:
    filename -- name of the SBF file containing a surface

    Keyword arguments:
    prop -- the name of the vertex property to describe in combination
    with the shape (or radius)
    l_max -- maximum angular momenta
    test -- use to keep the actual shape and property values for
    examination of accuracy of descriptor

    """
    name = Path(filename).stem
    LOG.debug('Describing %s surface with spherical harmonics', name)
    datafile = sbf.read_file(filename)
    pts = datafile['vertices'].data.transpose()
    LOG.debug('Loaded vertex data')
    # shift to be centered about the origin
    pts -= np.mean(pts, axis=0)

    # this is faster for some reason than np.apply_along_axis
    norms = np.sqrt(pts[:, 0] ** 2 + pts[:, 1] ** 2 + pts[:, 2] ** 2)
    mean_norm = np.mean(norms)
    pts /= mean_norm
    norms /= mean_norm
    pts_normalized = pts / np.reshape(norms, (pts.shape[0], 1))
    LOG.debug('Normalized points')
    sht = SHT(l_max)
    grid_cartesian = spherical_to_cartesian(
        np.c_[np.ones(sht.grid.shape[0]), sht.grid[:, 1], sht.grid[:, 0]])
    LOG.debug('Constructing tree')
    tree = KDTree(pts_normalized)
    LOG.debug('Done')
    LOG.debug('Interpolating values')
    nearest = tree.query(grid_cartesian, 1)
    LOG.debug('Done')
    shape = values_from_grid(norms, nearest[1])
    property_values = values_from_grid(datafile[prop].data, nearest[1])

    if test is not None:
        test['actual'] = shape

    # normalize property to be in [0,1], keep track of min and range
    prop_min = np.min(property_values)
    prop_scale = np.abs(np.max(property_values) - np.min(property_values))
    property_values -= prop_min
    if prop_scale != 0:
        property_values /= prop_scale
    others = [mean_norm, prop_min, prop_scale]
    combined = np.zeros(property_values.shape, dtype=np.complex128)
    combined.real = shape
    combined.imag = property_values

    return name, others, sht.analyse(combined)
Ejemplo n.º 11
0
def cull_dataset(outdir, field_ra, field_dec, table):

	"""
	Efficiently finds all neighbors within 0.01 degrees using
	kdt.query_ball_point method to get points within radius d, where
	d is the cartesian distance equivalent to 0.01 degree separation
	resulting from calculation:

	ra1, ra2 = 0, 0.01
	dec1, dec2 = 0, 0
	c1 = spherical_to_cartesian(ra1, dec1)
	c2 = spherical_to_cartesian(ra2, dec2)
	d = np.sqrt(sum( [ (c1[i] - c2[i])**2 for i in range(3) ] ))

	If there are any neighbors within 0.01 degrees of a given source,
	and if any of these neighbors are brighter than 2 magnitudes fainter
	than the source, remove the source from the table. Also use the
	Wang et al. 2012 relations to get the angular size of each source and
	remove any sources with angular size greater than 0.01 arcsec.

	Returns a pandas DataFrame object containing the culled dataset.
	"""

	good = (table.array['V'] != 30.0) & (table.array['K'] != 30.0)
	arr = table.array[good]
	df = get_ang_size(pd.DataFrame.from_records(arr))

	# ignore sources with theta >= 0.01 arcsec
	df = df[df.theta < 0.01]
	if df.shape[0] == 0:
		return None
	ra, dec, Vmag = [np.array(i) for i in [df.RA, df.DEC, df.V]]
	kdt = KDT(radec_to_coords(ra, dec))
	d = 0.00017453292497790891
	no_neighbors = np.ones(df.shape[0])

	for i in range(df.shape[0]):

		coords = radec_to_coords(ra[i],dec[i])

		# skip the first returned index - this is the query point itself
		idx = kdt.query_ball_point(coords,d)[0][1:]

		if len(idx) < 1:
			continue

		ds = great_circle_distance(ra[i],dec[i],ra[idx],dec[idx])[0]
		Vmag_i = Vmag[i]
		Vmag_neighbors = Vmag[idx]

		# flag sources that have bright nearby neighbors as bad
		for Vmag_j in Vmag_neighbors:
			if Vmag_j - Vmag_i < 2:
				no_neighbors[i] = 0

	df = df[no_neighbors.astype('bool')]
	log(outdir, field_ra, field_dec, df.shape[0], arr.shape[0])
	return df
Ejemplo n.º 12
0
def closest_index(sample_points, indices):
    r"""
    Find the nearest sample_point at a given index
    (along with the distance to the point). Input is
    an array of sample_points and an array of indicies to
    test at. Output is array of indices and distances.
    """
    kdtree = KDTree(sample_points)
    distance, index = kdtree.query(indices)
    return index, distance
Ejemplo n.º 13
0
def KLdivTree(X1, X2):
    "fast KL estimation using KDTrees"
    n, d = X1.shape
    m, dy = X2.shape
    xtree = KDTree(X1)
    ytree = KDTree(X2)
    r = xtree.query(X1, k=2, eps=.01, p=2)[0][:, 1]
    s = ytree.query(X1, k=1, eps=.01, p=2)[0]
    diff = r/s
    return -np.log(diff).sum() * d / n + np.log(m/(n-1))
Ejemplo n.º 14
0
def _fast_construct_edges(G, radius, p):
    """Construct edges for random geometric graph.

    Requires scipy to be installed.
    """
    pos = nx.get_node_attributes(G, 'pos')
    nodes, coords = list(zip(*pos.items()))
    kdtree = KDTree(coords)  # Cannot provide generator.
    edge_indexes = kdtree.query_pairs(radius, p)
    edges = ((nodes[u], nodes[v]) for u, v in edge_indexes)
    G.add_edges_from(edges)
Ejemplo n.º 15
0
def kdtree_clean(xx2d, yy2d, xS, yS, elevation2d):
	#REMOVE DODGY ADDED DATA FROM THE REGRIDDING BASED ON KDTREE. 
	# dist is how far away the nearest neighbours are. 
	# need to decide on this threshold.
	# ONLY DO THIS FOR POINTS THAT HAVE ALREADY BEEN CLASSIFIED AS RIDGES
	grid_points = np.c_[xx2d.ravel(), yy2d.ravel()]
	tree = KDTree(np.c_[xS, yS])
	dist, _ = tree.query(grid_points, k=1)
	dist = dist.reshape(xx2d.shape)
	elevation2d_KD=ma.masked_where(dist > 4, elevation2d)
	return elevation2d_KD
Ejemplo n.º 16
0
def generate_galaxy(num_stars, spiral_arm_count, spiral_tightness, galaxy_radius, bulge_height, disk_height):
    
    #generate vertices
    star_dict = {}
    
    next_index = 0
    #spiral stars
    for i in xrange(int(num_stars*0.65)):
        star_dict[next_index] = create_vertex_spiral(max_radius=galaxy_radius, arm_count=spiral_arm_count, beta=spiral_tightness, disk_height=disk_height)
        next_index += 1
    
    #inner cluster stars
    for i in xrange(int(num_stars*0.15)):
        star_dict[next_index] = create_vertex_inner(max_radius=galaxy_radius * 0.8, bulge_height=bulge_height)
        next_index += 1
    
    #outer "spread out" stars
    while(len(star_dict) < num_stars):
        star_dict[next_index] = create_vertex_outer(max_radius=galaxy_radius * 0.9, disk_height=disk_height)
        next_index += 1
    
    #generate a KDTree from the star data in order to help with edges
    star_keys = star_dict.keys()
    star_values = star_dict.values()
    star_tree = KDTree(star_values)
    
    #compute the nearest neighbors for each vertex
    distance_data, index_data = star_tree.query(star_values, k=20, eps=0.1)
    
    #for each vertex, randomly add edges to its nearest neighbors
    edge_dict = {}
    for distances, indexes in zip(distance_data, index_data):
        v1 = star_keys[int(indexes[0])]
        
        if(v1 not in edge_dict):
            edge_dict[v1] = set()
        
        for distance, v2 in create_edges(zip(distances[1:],indexes[1:])):
            
            v2 = star_keys[int(v2)]
            
            edge_dict[v1].add(v2)
            
            if(v2 not in edge_dict):
                edge_dict[v2] = set()
            edge_dict[v2].add(v1)
    
    #remove disconnected components from the graph
    star_dict, edge_dict = remove_disconnected_stars(star_dict, edge_dict)
    
    #convert the star array to an array of dictionaries before returning, so other data can be added
    star_dict = {key:{'position':Vector3D(*p)} for key, p in star_dict.iteritems()}
    
    return star_dict, edge_dict
Ejemplo n.º 17
0
def match_model_masses(isoMasses, starMasses):
    kdt = KDTree( isoMasses.reshape((len(isoMasses), 1)) )
    q_results = kdt.query(starMasses.reshape((len(starMasses), 1)), k=1)
    indices = q_results[1]

    dm_frac = np.abs(starMasses - isoMasses[indices]) / starMasses

    idx = np.where(dm_frac > 0.1)[0]
    indices[idx] = -1
    
    return indices
Ejemplo n.º 18
0
def _fast_edges(G, radius, p):
    """Returns edge list of node pairs within `radius` of each other
       using scipy KDTree and Minkowski distance metric `p`

    Requires scipy to be installed.
    """
    pos = nx.get_node_attributes(G, 'pos')
    nodes, coords = list(zip(*pos.items()))
    kdtree = KDTree(coords)  # Cannot provide generator.
    edge_indexes = kdtree.query_pairs(radius, p)
    edges = ((nodes[u], nodes[v]) for u, v in edge_indexes)
    return edges
Ejemplo n.º 19
0
def point_find_nearest_businesses(df, point, k=5, loc_cols=['latitude', 'longitude']):
    """
    Given a point (lat, long)
    :param df:
    :param point:
    :param k:
    :param loc_cols:
    :return:
    """
    tree = KDTree(df[loc_cols])
    distance, indices = tree.query(point, k)
    return df.ix[indices]
Ejemplo n.º 20
0
def spatialCorelation(points, fields, vectorColumns=None, Nbins=200, maxDist=50.0):
    """Compute the spatial corellation of each field

    points -- 2D array of points coordinates. Shape is (N,d) with d the number of spatial dimensions.
    fields -- 2D array of scalar field or of coordinates of vector fields. Shape is (N, F)
    with F the sum of the dimensions of each field.
    vectorColumns -- 1D array indexing the columns of fields into vector fields.
    for example [0, 1, 1, 1] means that the first column of fields is the scalar field 0 and
    the next 3 columns are the coordinates of a 3D vector field.
    Nbins -- The number of bins of the histogram
    maxLength -- The maximum distance between a pair of points taken into account in the histogram
    """
    #parameters parsing
    if len(points) != len(fields):
        raise ValueError(
            'You must have exactly one field value per point\n'
            + 'Here points id %i and fieds is %i'%(len(points), len(fields))
            )
    if vectorColumns==None:
        vectorColumns = np.arange(fields.shape[1])
    if len(vectorColumns) != fields.shape[1]:
        vectorColumns = np.concatenate((
            vectorColumns,
            np.arange(vectorColumns.max()+1,fields.shape[1])
            ))
    slices = [np.where(vectorColumns==v)[0] for v in range(vectorColumns.max()+1)]
    #spatial query
    lowerBound = points.min(axis=0) + maxDist/2
    upperBound = points.max(axis=0) - maxDist/2
    inside_id = [
        i for i, p in enumerate(points)
        if (p>= lowerBound).all() and (p <= upperBound).all()
        ]
    tree = KDTree(points)
    inside_tree = KDTree(points[inside_id])
    pairs = inside_tree.query_ball_tree(tree, maxDist)
    #binning
    coord_bins = np.zeros((Nbins, fields.shape[1]))
    nb_bins = np.zeros((Nbins), dtype=int)
    for p, qs in zip(inside_id, pairs):
        qs.remove(p)
        rs = np.asarray(
            np.sqrt(
                ((points[qs] - points[p])**2).sum(axis=1)
                ) * Nbins / maxDist,
            dtype=int)
        nb_bins[rs] += 1 
        coord_bins[rs] += fields[qs]*fields[p]
    bins = np.column_stack([coord_bins[:,cols].sum(axis=1) for cols in slices])
    bins[np.nonzero(nb_bins)] /= nb_bins[np.nonzero(nb_bins)][:,np.newaxis]
    return np.column_stack((np.arange(Nbins, dtype=float)/maxDist,bins))
Ejemplo n.º 21
0
def remove_close(points, radius):
    '''
    Given an (n, m) set of points where n=(2|3) return a list of points
    where no point is closer than radius
    '''
    tree     = KDTree(points)
    consumed = np.zeros(len(points), dtype=np.bool)
    unique   = np.zeros(len(points), dtype=np.bool)
    for i in xrange(len(points)):
        if consumed[i]: continue
        neighbors = tree.query_ball_point(points[i], r=radius)
        consumed[neighbors] = True
        unique[i]           = True
    return points[unique]
def kldivergence(x, y):
    """Compute the Kullback-Leibler divergence between two multivariate samples.
    
    Parameters
    ----------
    x : 2D array (n,d)
    Samples from distribution P, which typically represents the true
    distribution.
    y : 2D array (m,d)
    Samples from distribution Q, which typically represents the approximate
    distribution.
    
    Returns
    -------
    out : float
    The estimated Kullback-Leibler divergence D(P||Q).
    
    References
    ----------
    Perez-Cruz, F. Kullback-Leibler divergence estimation of
    continuous distributions IEEE International Symposium on Information
    Theory, 2008.
    """
    from scipy.spatial import cKDTree as KDTree
    
    # Check the dimensions are consistent
    x = NP.atleast_2d(x)
    y = NP.atleast_2d(y)
    
    n,d = x.shape
    m,dy = y.shape
    
    assert(d == dy)
    
    
    # Build a KD tree representation of the samples and find the nearest neighbour
    # of each point in x.
    xtree = KDTree(x)
    ytree = KDTree(y)
    
    # Get the first two nearest neighbours for x, since the closest one is the
    # sample itself.
    r = xtree.query(x, k=2, eps=.01, p=2)[0][:,1]
    s = ytree.query(x, k=1, eps=.01, p=2)[0]
    
    print r
    print s
    # There is a mistake in the paper. In Eq. 14, the right side  misses a negative sign
    # on the first term of the right hand side.
    return -NP.log(r/s).sum() * d / n + NP.log(m / (n - 1.))
Ejemplo n.º 23
0
def remove_close_set(points_fixed, points_reduce, radius):
    '''
    Given two sets of points and a radius, return a set of points
    that is the subset of points_reduce where no point is within 
    radius of any point in points_fixed
    '''
    tree_fixed  = KDTree(points_fixed)
    tree_reduce = KDTree(points_reduce)
    reduce_duplicates = tree_fixed.query_ball_tree(tree_reduce, r = radius)
    reduce_duplicates = np.unique(np.hstack(reduce_duplicates).astype(int))
    reduce_mask = np.ones(len(points_reduce), dtype=np.bool)
    reduce_mask[reduce_duplicates] = False
    points_clean = points_reduce[reduce_mask]
    return points_clean
Ejemplo n.º 24
0
def CartMatch(coord1, coord2, tol = None, nnearest=1):
    """
    Cartesian Coordinate mathcing
    """
    # sanitize
    coord1      =       np.array(coord1, ndmin = 1)
    coord2      =       np.array(coord2, ndmin = 1)

    # check the dimensions of the coordinate
    npairs1     =       len( coord1 )
    ndim1       =       1    if   len( np.shape(coord1) )  ==   1  else   \
                        np.shape(coord1)[1]
    npairs2     =       len( coord2 )
    ndim2       =       1    if   len( np.shape(coord2) )  ==   1  else   \
                        np.shape(coord2)[1]

    # check whether the coord1 and coord2 have the same shape
    if  ndim1   !=      ndim2:
        raise RuntimeError("The dims of coord1/2 are not the same.")
    else:
        ndim     =       ndim1

    # make proper arrays if they are 1d arrays
    if      ndim == 1:
        coord1  =       np.array([ coord1, np.zeros(len(coord1)) ]).T
        coord2  =       np.array([ coord2, np.zeros(len(coord2)) ]).T

    # kdtree the coord2
    kdt = KDT(coord2)
    if nnearest == 1:
        idxs2 = kdt.query(coord1)[1]
    elif nnearest > 1:
        idxs2 = kdt.query(coord1, nnearest)[1][:, -1]
    else:
        raise ValueError('invalid nnearest ' + str(nnearest))

    # distance - warning: this could be over float if the precision is not enough, we assume that case is beyond the distance of interest...
    ds  =   np.sqrt( np.sum( (coord1 - coord2[idxs2])**2, axis = 1) )

    # index of coord1 
    idxs1 = np.arange(npairs1)

    # distance filtering
    if tol is not None:
        msk = ds < tol
        idxs1 = idxs1[msk]
        idxs2 = idxs2[msk]
        ds = ds[msk]

    return idxs1, idxs2, ds
Ejemplo n.º 25
0
    def compute_errors(self, mag_err_lim=None, dx_lim=None):
        """Estimates errors and completeness per star.
        
        Load photometry from fake table (from same chip, ext as primary data.
        For each star in the phot table, get its magnitude.
        Use a kdtree to get the N most similar stars; compute statistics

        Parameters
        ----------

        frac : float
            Scalar fractional level of completeness. For example, 0.5 is the
            50% completeness limit.
        mag_err_lim : float
            Maximum absolute difference in magnitudes, in any band, for the
            star to be considered recovered.
        dx_lim : float
            Maximum distance between a fake star's input site and its
            observed site for the fake star to be considered recovered.
        """
        mag_errors = self._f.mag_errors()  # diffs nstars x nimages
        recovered = self._f.recovered(mag_err_lim=mag_err_lim, dx_lim=dx_lim)
        tree = KDTree(self._f.data['mag'])
        obs_mags = np.array([row['mag']
            for row in self._p.photTable.iterrows()])
        dists, indices = tree.query(obs_mags,
                k=100)
                # distance_upper_bound=mag_err_lim)
        nObs = obs_mags.shape[0]
        nImages = obs_mags.shape[1]
        sigmas = np.empty([nObs, nImages])
        comps = np.empty(nObs)
        for i in xrange(nObs):
            if np.any(obs_mags[i] > 50.):
                for j in xrange(nImages):
                    sigmas[i, j] = np.nan
                comps[i] = np.nan
                continue
            idx = indices[i, :].flatten()
            for j in xrange(nImages):
                # Estimate uncertainty in this band (image index)
                sigmas[i, j] = np.std(mag_errors[idx, j])
            # Estimate completeness for this star
            c = recovered[indices[i, :]]
            comps[i] = np.float(c.sum()) / len(c)

        # insert errors into the HDF5 table (need to make a new column
        self._p.add_column("ast_mag_err", sigmas)
        # insert completeness for this star
        self._p.add_column("comp", comps)
Ejemplo n.º 26
0
def main():
    # read in the file
    try:
        ifs = open(sys.argv[1])
        sample, ext = os.path.splitext(sys.argv[1])
    except IndexError:
        ifs = sys.stdin
        sample = ''
    data = np.loadtxt(ifs, delimiter=',')
    if ifs is not sys.stdin:
        ifs.close()
    # view of the com
    com = data[:,1:4]
    # construct a KD tree
    tree = KDTree(com)
    # query KD tree to find the first nearest neighbor
    dist, idx = tree.query(com, k=2)
    nn = [(i, j, d2) for ((d1, d2), (i, j)) in zip(dist, idx)]
    # histogram of the nearest neighbor distance
    hist(np.array(nn)[:,2],
         title='{} pore-pore distances'.format(sample),
         output='{}.pdf'.format(sample))
    # save the nearest neighbor distance to .json files
    ofile = '{}_pore-distribution.json'.format(sample)
    medianDist = np.median(np.array(nn)[:,2])
    cmp0 = lambda lhs, rhs: -1 if lhs[0] < rhs[0] else \
        (1 if lhs[0] > rhs[0] else 0)
    dist = {
        'Pore ID' : list(data[:,0].astype(int)),
        'center of mass X' : {
            'units' : '$\mu$m',
            'values' : list(data[:,1])},
        'center of mass Y' : {
            'units' : '$\mu$m',
            'values' : list(data[:,2])},
        'center of mass Z' : {
            'units' : '$\mu$m',
            'values' : list(data[:,3])},
        'volume' : {
            'units' : '$\mu$m^3',
            'values' : list(data[:,4])},
        'nearest neighbor distance' : {
            'units' : '$\mu$m',
            'values' : [entry[2] for entry in sorted(nn, cmp=cmp0)]},
        'median nearest neighbor distance' : {
            'units' : '$\mu$m',
            'values' : medianDist}
    }
    json.dump(dist, open(ofile, 'w'))
Ejemplo n.º 27
0
def match(s, h, fits_image, tolerance=4):
    """
    Parameters
    ----------
    s, h : obj
        Catalog objects. Each must have `ra` and `dec` attributes
        as 1-D Numpy arrays.

    fits_image : string
        FITS image for conversion of RA,DEC to X,Y.

    tolerance : number
        Match tolerance in pixels.

    Returns
    -------
    xmatch, ymatch
        Matched X,Y from first catalog.

    xhmatch, yhmatch
        Matched X,Y from second catalog.

    """
    # Now use pywcs to put these on some sort of projection. I think as
    # long as you use the same for both data sets it's not really important
    # what the projection is. In my case I read in a fits image associated
    # with the first catalog and use that header info.
    hdu = io.fits.open(fits_image)
    wcs = pywcs.WCS(hdu['PRIMARY'].header)

    # Convert sky to x,y positions
    x, y = wcs.wcs_world2pix(s.ra, s.dec, 0)
    xh, yh = wcs.wcs_world2pix(h.ra, h.dec, 0)

    # Create a KD Tree
    tree = KDTree(zip(x.ravel(), y.ravel()))

    # Search it for the nearest neighbor
    # d = distance of the nearest neighbor
    # i = index in x,y arrays of the nearest neighbor for each source in xh,yh
    d, i = tree.query(zip(xh.ravel(), yh.ravel()), k=1)

    # Give me just the matchers within a tolerance
    j = d < tolerance
    ii = i[j]  # match within N pixels; tricker to do this in ra,dec
    xmatch, ymatch = x[ii], y[ii]
    xhmatch, yhmatch = xh[j], yh[j]

    return xmatch, ymatch, xhmatch, yhmatch
Ejemplo n.º 28
0
def EstimateLatticeConstant(pos):
    """
    Estimate the lattice constant of a point set that represent a square grid.

    Parameters
    ----------
    pos : array like
        A 2D array of shape (N, 2) containing the coordinates of the points.

    Returns
    -------
    kxy : array like [2x2]
        lattice constants

    """
    # Find the closest 4 neighbours (excluding itself) for each point.
    tree = KDTree(pos)
    dd, ii = tree.query(pos, k=5)
    dr = dd[:, 1:]

    # Determine the median radial distance and filter all points beyond
    # 2*sigma.
    med = numpy.median(dr)
    std = numpy.std(dr)
    outliers = numpy.abs(dr - med) > (2 * std)  # doesn't work well if std is very high

    # Determine horizontal and vertical distance (only radial distance is
    # returned by tree.query).
    dpos = pos[ii[:, 0, numpy.newaxis]] - pos[ii[:, 1:]]
    dx, dy = dpos[:, :, 0], dpos[:, :, 1]
    assert numpy.all(numpy.abs(dr - numpy.hypot(dx, dy)) < 1.0e-12)
    # Use k-means to group the points into two directions.
    X = numpy.column_stack((dx[~outliers], dy[~outliers]))
    X[X[:, 0] < -0.5 * med] *= -1
    X[X[:, 1] < -0.5 * med] *= -1

    centroids, _ = kmeans(X, 2)
    labels = numpy.argmin(cdist(X, centroids), axis=1)
    kxy = numpy.array([numpy.median(X[labels.ravel() == 0], axis=0),
                       numpy.median(X[labels.ravel() == 1], axis=0)])

    # The angle between the two directions should be close to 90 degrees.
    alpha = numpy.math.atan2(numpy.linalg.norm(numpy.cross(*kxy)), numpy.dot(*kxy))
    if abs(alpha - math.pi / 2) > math.radians(2.5):
        logging.warning('Estimated lattice angle differs from 90 degrees by '
                        'more than 2.5 degrees. Input data could be wrong')

    return kxy
Ejemplo n.º 29
0
        def __init__(self,xdata,ydata):

            #Do some tests here

            #Find data covariance
            cov = np.cov(xdata.T)

            #Cholesky decompose to make new basis
            L_mat = np.linalg.cholesky(cov)
            self.L_mat = np.linalg.inv(L_mat)

            #Transform xdata into new basis
            self.xtrain = xdata
            self.transf_x = np.array([np.dot(self.L_mat,x) for x in xdata])

            #DEBUG
            #plt.plot(xdata[:,0],xdata[:,1],'.',color='r')
            #plt.plot(self.transf_x[:,0],self.transf_x[:,1],'.')
            #plt.show()
            #sys.exit()

            #Store training
            self.ytrain = ydata

            #Build KDTree for quick lookup
            self.transf_xtree = KDTree(self.transf_x)
Ejemplo n.º 30
0
 def __init__( self, X, z, leafsize=10, stat=0 ):
     assert len(X) == len(z), "len(X) %d != len(z) %d" % (len(X), len(z))
     self.tree = KDTree( X, leafsize=leafsize )  # build the tree
     self.z = z
     self.stat = stat
     self.wn = 0
     self.wsum = None;
Ejemplo n.º 31
0
class ShapeMatcher(object):
    def __init__(self, ids, invariants):
        """Match other shapes based on euclidean distance.
        Constructs a KDTree in order to do nearest neighbour queries.
        For large datasets it might take a second or two to build the tree.

        Arguments:
        ids -- set names/identifiers for the shapes
        invariants -- 2D array of invariants that describe the shapes
        """
        self.ids = ids
        self.invariants = invariants
        LOG.debug('Constructing tree from %d invariants', len(invariants))
        self.tree = KDTree(invariants)

    def search_invariants(self, invariants, n=10, df=False):
        """Search for matches based on invariants.

        Arguments:
        invariants -- N length array of shape descriptors

        Keyword arguments:
        n -- number of matches to return (default 10)
        df -- return matches as a pandas DataFrame (default False)
        """
        if n == 'max':
            n = len(self.invariants)
        LOG.debug('Searching for %d closest points', n)
        distances, indexes = self.tree.query(invariants, n)
        invariants = self.invariants[indexes]
        # Need to handle case of n == 1 correctly
        if isinstance(indexes, int):
            ids = self.ids[indexes].decode('utf-8')
            return SearchResult(ids, distances, invariants)
        else:
            ids = [x.decode('utf-8') for x in self.ids[indexes]]
        if df:
            return pd.DataFrame({
                'ID': ids,
                'Proximity': distances
            }).set_index('ID')
        else:
            return [
                SearchResult(n, d, i)
                for n, d, i in zip(ids, distances, invariants)
            ]

    def search_shape(self, shape, **kwargs):
        """Search for matches based on a shape object. (convenience function)

        Arguments:
        shape -- a Shape object.

        Keyword arguments:
        n -- number of matches to return (default 10)
        df -- return matches as a pandas DataFrame (default False)
        """
        LOG.debug('Searching for closest shapes to %s', shape.name)
        # delegate to search_invariants method
        return self.search_invariants(shape.invariants, **kwargs)

    @staticmethod
    def from_datafile(filename, l_max=20):
        """Construct a CSD matcher based on the bundled data

        Keyword arguments:
        l_max -- maximum angular momenta to use for invariants
        (default 20)
        use_radius -- use the mean radius as the first invariant
        (default True)
        """
        names, invariants = load_data(filename)
        return ShapeMatcher(names, invariants)

    @staticmethod
    def from_shapes(shapes, l_max=20):
        """Construct a shapematcher object from a list of shapes

        Arguments:
        shapes -- A list of Shape objects
        Keyword arguments:
        l_max -- maximuma angular momenta to use for invariants
        (default 20)
        """
        invariants, names = [], []
        if isinstance(shapes, dict):
            for name, s in shapes.items():
                invariants.append(s.invariants)
                names.append(name)
        else:
            for s in shapes:
                invariants.append(s.invariants)
                names.append(s.name)
        invariants = np.array(invariants)
        names = np.array(names, dtype='|S64')
        return ShapeMatcher(names, invariants)

    @staticmethod
    def from_surface_files(files, property_name='shape'):
        """Construct a CSD matcher based on the bundled data

        Keyword arguments:
        l_max -- maximum angular momenta to use for invariants
        (default 20)
        use_radius -- use the mean radius as the first invariant
        (default True)
        """
        shapes = {}
        for f in files:
            shapes['f.stem'] = surface_description(f,
                                                   property_name=property_name)

        return ShapeMatcher.from_shapes(shapes)

    def all(self):
        return self.search_invariants(self.invariants[0],
                                      n=len(self.invariants))
Ejemplo n.º 32
0
 def __init__(self, xgrid, ygrid, invalid_mask=None):
     print("Generating tree")
     self.tree = KDTree(np.array(list(zip(xgrid.ravel(), ygrid.ravel()))))
     self.imask = None
     if invalid_mask is not None:
         self.imask = np.asarray(invalid_mask).astype(bool)
Ejemplo n.º 33
0
                        n_clusters=number_of_cluster,
                        random_state=41).fit(subset_data_unref)
                    print('Kmeans done: Time elapsed: {} seconds'.format(
                        time.time() - time_start))
                    labels_unref = kmeans_unref.labels_
                    centroids_unref = kmeans_unref.cluster_centers_

                    counting_occurence_in_patient_compare = Counter(
                        labels_unref)

                    vals_unref = np.fromiter(
                        counting_occurence_in_patient_compare.values(),
                        dtype=float)

                    #COMPARING USING KDTREE
                    k = KDTree(centroids_unref)
                    (dists, idxs) = k.query(centroids_ref)

                    vals_unref[idxs]

                    reference_dataframe[f'Count_{name}'] = vals_unref[idxs]

                    print(reference_dataframe.shape,
                          reference_dataframe.columns)

    reference_dataframe.sort_values(by=['Cluster'], inplace=True)
    reference_dataframe.sort_index(axis=1, ascending=True, inplace=True)

    reference_dataframe.to_csv(
        path_to_store_frame +
        f'/Data_for_LDA_from_generate_data_with_n_{number_of_cluster}_configuration_{configuration}.csv'
Ejemplo n.º 34
0
class RGeocoder(object):
    """
    The main reverse geocoder class
    """
    def __init__(self, mode=2, verbose=False, stream=None):
        """ Class Instantiation
        Args:
        mode (int): Library supports the following two modes:
                    - 1 = Single-threaded K-D Tree
                    - 2 = Multi-threaded K-D Tree (Default)
        verbose (bool): For verbose output, set to True
        stream (io.StringIO): An in-memory stream of a custom data source
        """
        self.mode = mode
        self.verbose = verbose
        if stream:
            coordinates, self.locations = self.load(stream)
        else:
            coordinates, self.locations = self.extract(rel_path(RG_FILE))

        if mode == 1: # Single-process
            self.tree = KDTree(coordinates)
        else: # Multi-process
            self.tree = KDTree_MP.cKDTree_MP(coordinates)

    def query(self, coordinates):
        """
        Function to query the K-D tree to find the nearest city
        Args:
        coordinates (list): List of tuple coordinates, i.e. [(latitude, longitude)]
        """
        if self.mode == 1:
            _, indices = self.tree.query(coordinates, k=1)
        else:
            _, indices = self.tree.pquery(coordinates, k=1)
        return [self.locations[index] for index in indices]

    def load(self, stream):
        """
        Function that loads a custom data source
        Args:
        stream (io.StringIO): An in-memory stream of a custom data source.
                              The format of the stream must be a comma-separated file
                              with header containing the columns defined in RG_COLUMNS.
        """
        stream_reader = csv.DictReader(stream, delimiter=',')
        header = stream_reader.fieldnames

        if header != RG_COLUMNS:
            raise csv.Error('Input must be a comma-separated file with header containing ' + \
                'the following columns - %s. For more help, visit: ' % (','.join(RG_COLUMNS)) + \
                'https://github.com/thampiman/reverse-geocoder')

        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for row in stream_reader:
            geo_coords.append((row['lat'], row['lon']))
            locations.append(row)

        return geo_coords, locations

    def extract(self, local_filename):
        """
        Function loads the already extracted GeoNames cities file or downloads and extracts it if
        it doesn't exist locally
        Args:
        local_filename (str): Path to local RG_FILE
        """
        if os.path.exists(local_filename):
            if self.verbose:
                print('Loading formatted geocoded file...', file=sys.stderr)
            rows = csv.DictReader(open(local_filename, 'rt'))
        else:
            gn_cities1000_url = GN_URL + GN_CITIES1000 + '.zip'
            gn_admin1_url = GN_URL + GN_ADMIN1
            gn_admin2_url = GN_URL + GN_ADMIN2

            cities1000_zipfilename = GN_CITIES1000 + '.zip'
            cities1000_filename = GN_CITIES1000 + '.txt'

            if not os.path.exists(cities1000_zipfilename):
                if self.verbose:
                    print('Downloading files from Geoname...', file=sys.stderr)
                try: # Python 3
                    import urllib.request
                    urllib.request.urlretrieve(gn_cities1000_url, cities1000_zipfilename)
                    urllib.request.urlretrieve(gn_admin1_url, GN_ADMIN1)
                    urllib.request.urlretrieve(gn_admin2_url, GN_ADMIN2)
                except ImportError: # Python 2
                    import urllib
                    urllib.urlretrieve(gn_cities1000_url, cities1000_zipfilename)
                    urllib.urlretrieve(gn_admin1_url, GN_ADMIN1)
                    urllib.urlretrieve(gn_admin2_url, GN_ADMIN2)


            if self.verbose:
                print('Extracting cities1000...'file=sys.stderr)
            _z = zipfile.ZipFile(open(cities1000_zipfilename, 'rb'))
            open(cities1000_filename, 'wb').write(_z.read(cities1000_filename))

            if self.verbose:
                print('Loading admin1 codes...', file=sys.stderr)
            admin1_map = {}
            t_rows = csv.reader(open(GN_ADMIN1, 'rt'), delimiter='\t')
            for row in t_rows:
                admin1_map[row[ADMIN_COLUMNS['concatCodes']]] = row[ADMIN_COLUMNS['asciiName']]

            if self.verbose:
                print('Loading admin2 codes...', file=sys.stderr)
            admin2_map = {}
            for row in csv.reader(open(GN_ADMIN2, 'rt'), delimiter='\t'):
                admin2_map[row[ADMIN_COLUMNS['concatCodes']]] = row[ADMIN_COLUMNS['asciiName']]

            if self.verbose:
                print('Creating formatted geocoded file...', file=sys.stderr)
            writer = csv.DictWriter(open(local_filename, 'wt'), fieldnames=RG_COLUMNS)
            rows = []
            for row in csv.reader(open(cities1000_filename, 'rt'), \
                    delimiter='\t', quoting=csv.QUOTE_NONE):
                lat = row[GN_COLUMNS['latitude']]
                lon = row[GN_COLUMNS['longitude']]
                name = row[GN_COLUMNS['asciiName']]
                cc = row[GN_COLUMNS['countryCode']]

                admin1_c = row[GN_COLUMNS['admin1Code']]
                admin2_c = row[GN_COLUMNS['admin2Code']]

                cc_admin1 = cc+'.'+admin1_c
                cc_admin2 = cc+'.'+admin1_c+'.'+admin2_c

                admin1 = ''
                admin2 = ''

                if cc_admin1 in admin1_map:
                    admin1 = admin1_map[cc_admin1]
                if cc_admin2 in admin2_map:
                    admin2 = admin2_map[cc_admin2]

                write_row = {'lat':lat,
                             'lon':lon,
                             'name':name,
                             'admin1':admin1,
                             'admin2':admin2,
                             'cc':cc}
                rows.append(write_row)
            writer.writeheader()
            writer.writerows(rows)

            if self.verbose:
                print('Removing extracted cities1000 to save space...', file=sys.stderr)
            os.remove(cities1000_filename)

        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for row in rows:
            geo_coords.append((row['lat'], row['lon']))
            locations.append(row)
        return geo_coords, locations
Ejemplo n.º 35
0
def gG_l(pos, qlms, is_center, Nbins, maxdist):
    """
Spatial correlation of the qlms (non normalized).

For each particle i tagged as is_center,
for each particle j closer than maxdist,
do the cross product between their qlm and count,
then bin each quantity with respect to distance.
The two first sums need to be normalised by the last one.

Periodic boundary conditions are not supported.

Parameters
----------
pos : (N, 3) array of floats
    Spatial coordinates
qlms : list
    A list of M (N, 2l+1) arrays of boo coordinates for l-fold symmetry.
    l can be different for each item.
is_center : (N) array of bool.
    For example all particles further away than maxdist from any edge of the box.
Nbins : int
    The number of bins along r
maxdist : float
    The maximum distance considered.

Returns
----------
hqQ : (Nbins, M) array of floats
    The sum of cross products for each distance and each qlm
g : (Nbins) array of ints
    The number of pairs for each distance
"""
    for qlm in qlms:
        assert len(pos) == len(qlm)
    assert len(is_center) == len(pos)
    #conversion factor between indices and bins
    l2r = Nbins / maxdist
    #result containers
    #an additional bin for the case where the distance is exactly equal to maxdist
    hqQ = np.zeros((Nbins + 1, len(qlms)))
    g = np.zeros(Nbins + 1, int)
    #compute ql for all particles
    qQ = np.array([ql(qlm) for qlm in qlms])
    nonzero = qQ.min(0) + 1.0 > 1.0
    #spatial indexing
    tree = KDTree(pos[nonzero], 12)
    centertree = KDTree(pos[is_center & nonzero], 12)
    #all pairs of points closer than maxdist with their distances in a record array
    query = centertree.sparse_distance_matrix(tree,
                                              maxdist,
                                              output_type='ndarray')
    #convert in original indices
    nonzeroindex = np.where(nonzero)[0]
    centerindex = np.where(is_center & nonzero)[0]
    query['i'] = centerindex[query['i']]
    query['j'] = nonzeroindex[query['j']]
    #keep only pairs where the points are distinct
    good = query['i'] != query['j']
    query = query[good]
    #binning of distances
    rs = (query['v'] * l2r).astype(int)
    np.add.at(g, rs, 1)
    #binning of boo cross products
    pqQs = np.empty((len(rs), len(qlms)))
    for it, qlm in enumerate(qlms):
        pqQs[:, it] = product(qlm[query['i']], qlm[query['j']])
        prodnorm = qQ[it, query['i']] * qQ[it, query['j']]
        pqQs[:, it] /= prodnorm
    np.add.at(hqQ, rs, pqQs)
    return hqQ[:-1], g[:-1]
Ejemplo n.º 36
0
def get_area_avg_from_erai_data(start_year=-np.Inf,
                                end_year=np.Inf,
                                var_folder="",
                                varname="",
                                mask=None,
                                mask_lons=None,
                                mask_lats=None):
    """

    Interpolate the mask to the ERA-Interim grid using nearest neighbour approach

    :param start_year:
    :param end_year:
    :param var_folder:
    :param varname:
    :param mask:
    :return:
    """
    def _get_year(fn):
        return int(fn.split(".")[0].split("_")[1])

    flist = [
        os.path.join(var_folder, fn) for fn in os.listdir(var_folder)
        if fn.startswith(varname) and (start_year <= _get_year(fn)) and (
            _get_year(fn) <= end_year)
    ]
    print(flist)

    ktree = None
    mask_interpolated = None
    lons_target, lats_target = None, None

    ser_list = []
    for fp in flist:

        with Dataset(fp) as ds:
            time_var = ds.variables["time"]

            times = num2date(time_var[:], time_var.units)

            print(times[0], times[-1])

            # Determine nearest neighbours for interpolation (do it only once)
            if ktree is None:

                # get lons and lats from the bathymetry file
                data_folder_p = Path(var_folder).parent

                for f in data_folder_p.iterdir():
                    if f.name.lower().startswith("bathy_meter"):
                        with Dataset(str(f)) as ds_bathy:
                            lons_target, lats_target = [
                                ds_bathy.variables[k][:]
                                for k in ["nav_lon", "nav_lat"]
                            ]
                            break

                x, y, z = lat_lon.lon_lat_to_cartesian(mask_lons.flatten(),
                                                       mask_lats.flatten())
                xt, yt, zt = lat_lon.lon_lat_to_cartesian(
                    lons_target.flatten(), lats_target.flatten())
                ktree = KDTree(list(zip(x, y, z)))

                dists, inds = ktree.query(list(zip(xt, yt, zt)), k=1)

                mask_interpolated = mask.flatten()[inds]
                mask_interpolated = mask_interpolated.reshape(
                    lons_target.shape)

            vals = [
                field[mask_interpolated].mean()
                for field in ds.variables[varname][:]
            ]
            ser = pd.Series(index=times, data=vals)

            if varname == "TT":
                ser -= 273.15

            ser.sort_index(inplace=True)

            ser_list.append(ser)

    return pd.concat(ser_list)
Ejemplo n.º 37
0
def mosaic_texture(humfile, sonpath, cs2cs_args = "epsg:26949", res = 99, nn = 5, weight = 1):
         
    '''
    Create mosaics of the spatially referenced sidescan echograms

    Syntax
    ----------
    [] = PyHum.mosaic_texture(humfile, sonpath, cs2cs_args, res, nn, weight)

    Parameters
    ----------
    humfile : str
       path to the .DAT file
    sonpath : str
       path where the *.SON files are
    cs2cs_args : int, *optional* [Default="epsg:26949"]
       arguments to create coordinates in a projected coordinate system
       this argument gets given to pyproj to turn wgs84 (lat/lon) coordinates
       into any projection supported by the proj.4 libraries
    res : float, *optional* [Default=0]
       grid resolution of output gridded texture map
       if res=99, res will be determined automatically from the spatial resolution of 1 pixel
    nn: int, *optional* [Default=5]
       number of nearest neighbours for gridding
    weight: int, *optional* [Default=1]
       specifies the type of pixel weighting in the gridding process
       weight = 1, based on grazing angle and inverse distance weighting
       weight = 2, based on grazing angle only
       weight = 3, inverse distance weighting only
       weight = 4, no weighting
    
    Returns
    -------

    sonpath+'GroundOverlay.kml': kml file
        contains gridded (or point cloud) sidescan intensity map for importing into google earth
        of the pth chunk

    sonpath+'map.png' : 
        image overlay associated with the kml file

    '''

    # prompt user to supply file if no input file given
    if not humfile:
       print 'An input file is required!!!!!!'
       Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
       humfile = askopenfilename(filetypes=[("DAT files","*.DAT")]) 

    # prompt user to supply directory if no input sonpath is given
    if not sonpath:
       print 'A *.SON directory is required!!!!!!'
       Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
       sonpath = askdirectory() 

    # print given arguments to screen and convert data type where necessary
    if humfile:
       print 'Input file is %s' % (humfile)

    if sonpath:
       print 'Sonar file path is %s' % (sonpath)

    if cs2cs_args:
       print 'cs2cs arguments are %s' % (cs2cs_args)    

    if res:
       res = np.asarray(res,float)
       print 'Gridding resolution: %s' % (str(res))      
       
    if nn:
       nn = int(nn)
       print 'Number of nearest neighbours for gridding: %s' % (str(nn))
                    
    if weight:
       weight = int(weight)
       print 'Weighting for gridding: %s' % (str(weight))                   


    ##nn = 5 #number of nearest neighbours in gridding
    ##noisefloor=10 # noise threshold in dB W

    # start timer
    if os.name=='posix': # true if linux/mac or cygwin on windows
       start = time.time()
    else: # windows
       start = time.clock()

    trans =  pyproj.Proj(init=cs2cs_args)

    # if son path name supplied has no separator at end, put one on
    if sonpath[-1]!=os.sep:
       sonpath = sonpath + os.sep

    base = humfile.split('.DAT') # get base of file name for output
    base = base[0].split(os.sep)[-1]

    # remove underscores, negatives and spaces from basename
    base = humutils.strip_base(base)

    meta = loadmat(os.path.normpath(os.path.join(sonpath,base+'meta.mat')))

    esi = np.squeeze(meta['e'])
    nsi = np.squeeze(meta['n']) 
    
    theta = np.squeeze(meta['heading'])/(180/np.pi)

    # load memory mapped scans
    shape_port = np.squeeze(meta['shape_port'])
    if shape_port!='':
       if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_port_lar.dat'))):
          port_fp = io.get_mmap_data(sonpath, base, '_data_port_lar.dat', 'float32', tuple(shape_port))
       else:
          port_fp = io.get_mmap_data(sonpath, base, '_data_port_la.dat', 'float32', tuple(shape_port))

    shape_star = np.squeeze(meta['shape_star'])
    if shape_star!='':
       if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_star_lar.dat'))):
             star_fp = io.get_mmap_data(sonpath, base, '_data_star_lar.dat', 'float32', tuple(shape_star))
       else:
          star_fp = io.get_mmap_data(sonpath, base, '_data_star_la.dat', 'float32', tuple(shape_star))

    # time varying gain
    tvg = ((8.5*10**-5)+(3/76923)+((8.5*10**-5)/4))*meta['c']
        
    # depth correction
    dist_tvg = np.squeeze(((np.tan(np.radians(25)))*np.squeeze(meta['dep_m']))-(tvg))

    # read in range data
    R_fp = io.get_mmap_data(sonpath, base, '_data_range.dat', 'float32', tuple(shape_star))

    dx = np.arcsin(meta['c']/(1000*meta['t']*meta['f']))
    pix_m = meta['pix_m']
    c = meta['c']

    if not os.path.isfile( os.path.normpath(os.path.join(sonpath,base+"S.p")) ):
    #if 2 > 1:
       inputfiles = []
       if len(shape_star)>2:    
          for p in xrange(len(star_fp)):
             e = esi[shape_port[-1]*p:shape_port[-1]*(p+1)]
             n = nsi[shape_port[-1]*p:shape_port[-1]*(p+1)]
             t = theta[shape_port[-1]*p:shape_port[-1]*(p+1)]
             d = dist_tvg[shape_port[-1]*p:shape_port[-1]*(p+1)]
             dat_port = port_fp[p]
             dat_star = star_fp[p]
             data_R = R_fp[p]
             print "writing chunk %s " % (str(p))
             write_points(e, n, t, d, dat_port, dat_star, data_R, pix_m, res, cs2cs_args, sonpath, p, c, dx)
             inputfiles.append(os.path.normpath(os.path.join(sonpath,'x_y_class'+str(p)+'.asc')))
       else:
          p=0
          print "writing chunk %s " % (str(p))
          write_points(esi, nsi, theta, dist_tvg, port_fp, star_fp, R_fp, meta['pix_m'], res, cs2cs_args, sonpath, 0, c, dx)
          inputfiles.append(os.path.normpath(os.path.join(sonpath,'x_y_class'+str(p)+'.asc')))         
          
       #trans =  pyproj.Proj(init=cs2cs_args)

       # D, R, h, t
       print "reading points from %s files" % (str(len(inputfiles)))
       X,Y,S,D,R,h,t,i = getxys(inputfiles)

       print "%s points read from %s files" % (str(len(S)), str(len(inputfiles)))

       # remove values where sidescan intensity is zero
       ind = np.where(np.logical_not(S==0))[0]

       X = X[ind]; Y = Y[ind]
       S = S[ind]; D = D[ind]
       R = R[ind]; h = h[ind]
       t = t[ind]; i = i[ind]
       del ind   
   
       # save to file for temporary storage
       pickle.dump( S, open( os.path.normpath(os.path.join(sonpath,base+"S.p")), "wb" ) ); del S
       pickle.dump( D, open( os.path.normpath(os.path.join(sonpath,base+"D.p")), "wb" ) ); del D
       pickle.dump( t, open( os.path.normpath(os.path.join(sonpath,base+"t.p")), "wb" ) ); del t
       pickle.dump( i, open( os.path.normpath(os.path.join(sonpath,base+"i.p")), "wb" ) ); del i

       pickle.dump( X, open( os.path.normpath(os.path.join(sonpath,base+"X.p")), "wb" ) ); del X
       pickle.dump( Y, open( os.path.normpath(os.path.join(sonpath,base+"Y.p")), "wb" ) ); del Y
       pickle.dump( R, open( os.path.normpath(os.path.join(sonpath,base+"R.p")), "wb" ) ); 
       pickle.dump( h, open( os.path.normpath(os.path.join(sonpath,base+"h.p")), "wb" ) ); 

       #grazing angle
       g = np.arctan(R.flatten(),h.flatten())
       pickle.dump( g, open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "wb" ) ); del g, R, h
   
    print "creating grids ..."   

    if res==0:
       res=99

    if res==99:

       #### prepare grids
       R = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"R.p")), "rb" ) )

       ## actual along-track resolution is this: dx times dy = Af
       tmp = R * dx * (c*0.007 / 2)
       del R

       resg = np.min(tmp[tmp>0])
       del tmp
    else:
       resg = res

    X = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"X.p")), "rb" ) )
    Y = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"Y.p")), "rb" ) )
    
    humlon, humlat = trans(X, Y, inverse=True)

    grid_x, grid_y = np.meshgrid( np.arange(np.min(X), np.max(X), resg), np.arange(np.min(Y), np.max(Y), resg) )    
 
    shape = np.shape(grid_x)

    tree = KDTree(zip(X.flatten(), Y.flatten()))
    del X, Y

    print "mosaicking ..."   
    #k nearest neighbour
    try:
       dist, inds = tree.query(zip(grid_x.flatten(), grid_y.flatten()), k = nn, n_jobs=-1)
    except:
       #print ".... update your scipy installation to use faster kd-tree"   
       dist, inds = tree.query(zip(grid_x.flatten(), grid_y.flatten()), k = nn)    
    
    #del grid_x, grid_y
    
    if weight==1:
       g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) )
       w = g[inds] + 1.0 / dist**2
       del g
    elif weight==2:
       g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) )
       w = g[inds]
       del g
    elif weight==3:
       w = 1.0 / dist**2    
    elif weight==4:
       w = 1.0
    
    #g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) )
    #w = g[inds] + 1.0 / dist**2
    #del g

    if weight < 4:
       w[np.isinf(w)]=1
       w[np.isnan(w)]=1
       w[w>10000]=10000
       w[w<=0]=1
    
    # load in sidescan intensity
    S = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"S.p")), "rb" ) )
    # filter out noise pixels
    S[S<noisefloor] = np.nan

    if nn==1:
       Sdat_g = (w * S.flatten()[inds]).reshape(shape)
       del w
       dist = dist.reshape(shape)
    else:
       if weight < 4:
          Sdat_g = (np.nansum(w * S.flatten()[inds], axis=1) / np.nansum(w, axis=1)).reshape(shape)
       else:
          Sdat_g = (np.nansum(S.flatten()[inds], axis=1)).reshape(shape)
       del w
       dist = np.nanmean(dist,axis=1).reshape(shape)

    del S

    Sdat_g[dist>1] = np.nan
    Sdat_g[Sdat_g<noisefloor] = np.nan

    dat = Sdat_g.copy()
    dat[dist>1] = 0
    dat2 = replace_nans.RN(dat.astype('float64'),1000,0.01,2,'localmean').getdata()
    dat2[dat==0] = np.nan
    del dat

    dat2[dat2<noisefloor] = np.nan

    Sdat_g = dat2.copy()
    del dat2
   
    Sdat_g[Sdat_g==0] = np.nan
    Sdat_g[np.isinf(Sdat_g)] = np.nan
    Sdat_gm = np.ma.masked_invalid(Sdat_g)
    del Sdat_g

    glon, glat = trans(grid_x, grid_y, inverse=True)
    del grid_x, grid_y
    
    # =========================================================
    print "creating kmz file ..."
    ## new way to create kml file  
    pixels = 1024 * 10
 
    fig, ax = humutils.gearth_fig(llcrnrlon=glon.min(),
                     llcrnrlat=glat.min(),
                     urcrnrlon=glon.max(),
                     urcrnrlat=glat.max(),
                     pixels=pixels)
    cs = ax.pcolormesh(glon, glat, Sdat_gm)
    ax.set_axis_off()
    fig.savefig(os.path.normpath(os.path.join(sonpath,'class_overlay1.png')), transparent=True, format='png')    
    

    fig = plt.figure(figsize=(1.0, 4.0), facecolor=None, frameon=False)
    ax = fig.add_axes([0.0, 0.05, 0.2, 0.9])
    cb = fig.colorbar(cs, cax=ax)
    cb.set_label('Texture lengthscale [m]', rotation=-90, color='k', labelpad=20)
    fig.savefig(os.path.normpath(os.path.join(sonpath,'class_legend.png')), transparent=False, format='png')  


    humutils.make_kml(llcrnrlon=glon.min(), llcrnrlat=glat.min(),
         urcrnrlon=glon.max(), urcrnrlat=glat.max(),
         figs=[os.path.normpath(os.path.join(sonpath,'class_overlay1.png'))], 
         colorbar=os.path.normpath(os.path.join(sonpath,'class_legend.png')),
         kmzfile=os.path.normpath(os.path.join(sonpath,'class_GroundOverlay.kmz')), 
         name='Sidescan Intensity')


    # =========================================================
    print "drawing and printing map ..."
    fig = plt.figure(frameon=False)
    map = Basemap(projection='merc', epsg=cs2cs_args.split(':')[1], 
     resolution = 'i', #h #f
     llcrnrlon=np.min(humlon)-0.001, llcrnrlat=np.min(humlat)-0.001,
     urcrnrlon=np.max(humlon)+0.001, urcrnrlat=np.max(humlat)+0.001)

    gx,gy = map.projtran(glon, glat)
       
    try:
       map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='ESRI_Imagery_World_2D', xpixels=1000, ypixels=None, dpi=300)
    except:
       map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='World_Imagery', xpixels=1000, ypixels=None, dpi=300)
    #finally:
    #   print "error: map could not be created..."
      
    ax = plt.Axes(fig, [0., 0., 1., 1.], )
    ax.set_axis_off()
    fig.add_axes(ax)

    if Sdat_gm.size > 25000000:
       print "matrix size > 25,000,000 - decimating by factor of 5 for display"
       map.pcolormesh(gx[::5,::5], gy[::5,::5], Sdat_gm[::5,::5], vmin=np.nanmin(Sdat_gm), vmax=np.nanmax(Sdat_gm))
    else:
       map.pcolormesh(gx, gy, Sdat_gm, vmin=np.nanmin(Sdat_gm), vmax=np.nanmax(Sdat_gm))

    custom_save2(sonpath,'class_map_imagery')
    del fig 

   
    if os.name=='posix': # true if linux/mac
       elapsed = (time.time() - start)
    else: # windows
       elapsed = (time.clock() - start)
    print "Processing took ", elapsed , "seconds to analyse"

    print "Done!"
Ejemplo n.º 38
0
def get_ref_coors_convex(field,
                         coors,
                         close_limit=0.1,
                         cache=None,
                         verbose=False):
    """
    Get reference element coordinates and elements corresponding to given
    physical coordinates.

    Parameters
    ----------
    field : Field instance
        The field defining the approximation.
    coors : array
        The physical coordinates.
    close_limit : float, optional
        The maximum limit distance of a point from the closest
        element allowed for extrapolation.
    cache : Struct, optional
        To speed up a sequence of evaluations, the field mesh and other data
        can be cached. Optionally, the cache can also contain the reference
        element coordinates as `cache.ref_coors`, `cache.cells` and
        `cache.status`, if the evaluation occurs in the same coordinates
        repeatedly. In that case the mesh related data are ignored.
    verbose : bool
        If False, reduce verbosity.

    Returns
    -------
    ref_coors : array
        The reference coordinates.
    cells : array
        The cell indices corresponding to the reference coordinates.
    status : array
        The status: 0 is success, 1 is extrapolation within `close_limit`, 2 is
        extrapolation outside `close_limit`, 3 is failure, 4 is failure due to
        non-convergence of the Newton iteration in tensor product cells.

    Notes
    -----
    Outline of the algorithm for finding xi such that X(xi) = P:

    1. make inverse connectivity - for each vertex have cells it is in.
    2. find the closest vertex V.
    3. choose initial cell: i0 = first from cells incident to V.
    4. while not P in C_i, change C_i towards P, check if P in new C_i.
    """
    timer = Timer()

    ref_coors = get_default_attr(cache, 'ref_coors', None)
    if ref_coors is None:
        extrapolate = close_limit > 0.0

        ref_coors = nm.empty_like(coors)
        cells = nm.empty((coors.shape[0], ), dtype=nm.int32)
        status = nm.empty((coors.shape[0], ), dtype=nm.int32)

        cmesh = get_default_attr(cache, 'cmesh', None)
        if cmesh is None:
            timer.start()
            mesh = field.create_mesh(extra_nodes=False)
            cmesh = mesh.cmesh

            gels = create_geometry_elements()

            cmesh.set_local_entities(gels)
            cmesh.setup_entities()

            centroids = cmesh.get_centroids(cmesh.tdim)

            if field.gel.name != '3_8':
                normals0 = cmesh.get_facet_normals()
                normals1 = None

            else:
                normals0 = cmesh.get_facet_normals(0)
                normals1 = cmesh.get_facet_normals(1)

            output('cmesh setup: %f s' % timer.stop(), verbose=verbose)

        else:
            centroids = cache.centroids
            normals0 = cache.normals0
            normals1 = cache.normals1

        kdtree = get_default_attr(cache, 'kdtree', None)
        if kdtree is None:
            from scipy.spatial import cKDTree as KDTree

            timer.start()
            kdtree = KDTree(cmesh.coors)
            output('kdtree: %f s' % timer.stop(), verbose=verbose)

        timer.start()
        ics = kdtree.query(coors)[1]
        output('kdtree query: %f s' % timer.stop(), verbose=verbose)

        ics = nm.asarray(ics, dtype=nm.int32)

        coors = nm.ascontiguousarray(coors)
        ctx = field.create_basis_context()

        timer.start()
        crc.find_ref_coors_convex(ref_coors, cells, status, coors, cmesh,
                                  centroids, normals0, normals1, ics,
                                  extrapolate, 1e-15, close_limit, ctx)
        output('ref. coordinates: %f s' % timer.stop(), verbose=verbose)

    else:
        cells = cache.cells
        status = cache.status

    return ref_coors, cells, status
Ejemplo n.º 39
0
def get_potential_cells(coors, cmesh, centroids=None, extrapolate=True):
    """
    Get cells that potentially contain points with the given physical
    coordinates.

    Parameters
    ----------
    coors : array
        The physical coordinates.
    cmesh : CMesh instance
        The cmesh defining the cells.
    centroids : array, optional
        The centroids of the cells.
    extrapolate : bool
        If True, even the points that are surely outside of the
        cmesh are considered and assigned potential cells.

    Returns
    -------
    potential_cells : array
        The indices of the cells that potentially contain the points.
    offsets : array
        The offsets into `potential_cells` for each point: a point ``ip`` is
        potentially in cells ``potential_cells[offsets[ip]:offsets[ip+1]]``.
    """
    from scipy.spatial import cKDTree as KDTree

    if centroids is None:
        centroids = cmesh.get_centroids(cmesh.tdim)

    kdtree = KDTree(coors)

    conn = cmesh.get_cell_conn()
    cc = conn.indices.reshape(cmesh.n_el, -1)
    cell_coors = cmesh.coors[cc]

    rays = cell_coors - centroids[:, None]
    radii = nm.linalg.norm(rays, ord=nm.inf, axis=2).max(axis=1)

    potential_cells = [[]] * coors.shape[0]
    for ic, centroid in enumerate(centroids):
        ips = kdtree.query_ball_point(centroid, radii[ic], p=nm.inf)
        if len(ips):
            for ip in ips:
                if not len(potential_cells[ip]):
                    potential_cells[ip] = []

                potential_cells[ip].append(ic)

    lens = nm.array([0] + [len(ii) for ii in potential_cells], dtype=nm.int32)

    if extrapolate:
        # Deal with the points outside of the field domain - insert elements
        # incident to the closest mesh vertex.
        iin = nm.where(lens[1:] == 0)[0]
        if len(iin):
            kdtree = KDTree(cmesh.coors)
            ics = kdtree.query(coors[iin])[1]
            cmesh.setup_connectivity(0, cmesh.tdim)
            conn = cmesh.get_conn(0, cmesh.tdim)

            oo = conn.offsets
            for ii, ip in enumerate(iin):
                ik = ics[ii]
                potential_cells[ip] = conn.indices[oo[ik]:oo[ik + 1]]
                lens[ip + 1] = len(potential_cells[ip])

    offsets = nm.cumsum(lens, dtype=nm.int32)
    potential_cells = nm.concatenate(potential_cells).astype(nm.int32)

    return potential_cells, offsets
Ejemplo n.º 40
0
class NearestNeighborFinder():
    """
    Nearest neighbor search object for NEMO netCDF output files.
    """
    def __init__(self, ncfilename):
        """
        Create new instance.

        :arg str ncfilename: NEMO netCDF file name
        """
        self.filename = ncfilename
        self.data_dim = None
        self.grid_type = None
        self._build_tree()

    def _build_tree(self):
        """
        Construct nearest neighbor tree.
        """
        def parse_grid_type(ncf):
            """
            Figure out which discretization the file contains, T, U or V

            Reads the description attribute, e.g. "ocean T grid variables"

            returns 't', 'u', or 'v'
            """
            return 't'  # HACK assume always T grid
            desc = ncf.description
            words = desc.split()
            assert words[0] == 'ocean'
            assert words[2] == 'grid'
            return words[1].lower()

        with netCDF4.Dataset(self.filename) as ncf:
            self.grid_type = parse_grid_type(ncf)
            assert self.grid_type == 't', 'Only T grid is supported currently'
            # compute land mask
            self.data_dim = 3 if 'e3t' in ncf.variables else 2
            if self.data_dim == 3:
                # NOTE does not take time-dependent wetting-drying into account
                e = ncf['e3t'][0, :, :, :]
                self.landmask = numpy.all(e.mask, axis=0)
                # 1D array of all wet points in raveled index
                self.wetmask = numpy.nonzero(~self.landmask.ravel())[0]
                # get coordinates
                self.lon = ncf['nav_lon'][:]
                self.lat = ncf['nav_lat'][:]
                depth = ncf['deptht'][:]
                self.z = -depth
                # 1D arrays of all wet points
                self.valid_lon = self.lon.ravel()[self.wetmask]
                self.valid_lat = self.lat.ravel()[self.wetmask]
            else:
                # read a field to get landmask
                for v in ncf.variables:
                    var = ncf[v]
                    if len(var.shape) == 3:
                        # 2D time dependent field
                        self.landmask = numpy.all(var[:].mask, axis=0)
                        break
                self.wetmask = numpy.nonzero(~self.landmask.ravel())[0]
                # get coordinates
                self.lon = ncf['nav_lon'][:]
                self.lat = ncf['nav_lat'][:]
                self.z = 0.0
                # 1D arrays of all wet points
                self.valid_lon = self.lon.ravel()[self.wetmask]
                self.valid_lat = self.lat.ravel()[self.wetmask]

        assert len(self.valid_lat) > 0, \
            'No valid points found in {:}'.format(self.filename)
        coords = numpy.vstack((self.valid_lon, self.valid_lat)).T
        self.tree = KDTree(coords)

    def find(self, lon, lat, z):
        """
        Finds nearest neighbor index for point (lon, lat, z)

        :arg lon: longitude coordinate
        :arg lat: latitude coordinate
        :arg z: z coordinate (negative downwards)
        :returns: i, j, k indices of nearest neighbor indices
        """
        dist, index = self.tree.query([lon, lat], k=1)
        index = self.wetmask[index]
        i, j = numpy.unravel_index(index, self.lat.shape)
        if self.data_dim == 3:
            k = numpy.abs(self.z - z).argmin()
        else:
            k = None
        return i, j, k
Ejemplo n.º 41
0
if 'snakemake' not in globals():
    from vresutils.snakemake import MockSnakemake, Dict

    snakemake = MockSnakemake(input=Dict(base_network='networks/base.nc'),
                              output=['resources/powerplants.csv'])

logging.basicConfig(level=snakemake.config['logging_level'])

n = pypsa.Network(snakemake.input.base_network)

ppl = (ppm.collection.matched_data()[lambda df: ~df.Fueltype.isin(
    ('Solar', 'Wind'))].pipe(ppm.cleaning.clean_technology).assign(
        Fueltype=lambda df: (df.Fueltype.where(
            df.Fueltype != 'Natural Gas',
            df.Technology.replace('Steam Turbine', 'OCGT').fillna('OCGT')))).
       pipe(ppm.utils.fill_geoposition, parse=True,
            only_saved_locs=True).pipe(ppm.heuristics.fill_missing_duration))

# ppl.loc[(ppl.Fueltype == 'Other') & ppl.Technology.str.contains('CCGT'), 'Fueltype'] = 'CCGT'
# ppl.loc[(ppl.Fueltype == 'Other') & ppl.Technology.str.contains('Steam Turbine'), 'Fueltype'] = 'CCGT'

ppl = ppl.loc[ppl.lon.notnull() & ppl.lat.notnull()]

substation_lv_i = n.buses.index[n.buses['substation_lv']]
kdtree = KDTree(n.buses.loc[substation_lv_i, ['x', 'y']].values)
ppl = ppl.assign(
    bus=substation_lv_i[kdtree.query(ppl[['lon', 'lat']].values)[1]])

ppl.to_csv(snakemake.output[0])
Ejemplo n.º 42
0
def match_arbitrary_translation_dilatation(x1,y1,x2,y2) :
    """
    Match two catalogs in different coordinate systems, 1 and 2, related by a translation, a dilatation, and possibly a "small" rotation
    The orientation of triangles is used for the match so the rotation has to be small.
    Inspired from http://articles.adsabs.harvard.edu/pdf/1986AJ.....91.1244G
    
    Args:
        x1 : float numpy array of coordinates along first axis of cartesian coordinate system 1
        y1 : float numpy array of coordinates along second axis of cartesian coordinate system 1
        x2 : float numpy array of coordinates along first axis of cartesian coordinate system 2
        y2 : float numpy array of coordinates along second axis of cartesian coordinate system 2
    
    returns:
        indices_2 : integer numpy array. if ii is a index array for entries in the first catalog, 
                            indices_2[ii] is the index array of best matching entries in the second catalog.
                            (one should compare x1[ii] with x2[indices_2[ii]])
                            negative values for unmatched entries.
        distance : distance between pairs of triangles. It can be used to discard bad matches. 

    """

    log = get_logger()
    
    # compute all possible triangles in both data sets
    # txyz are properties of the shape and orientation of the triangles
    log.debug("compute triangles")
    tk1,txyz1 = compute_triangles_with_fixed_orientation(x1,y1)
    tk2,txyz2 = compute_triangles_with_fixed_orientation(x2,y2)
    
    log.debug("match triangles")
    # match with kdtree triangles with same shape and orientation
    tree2=KDTree(txyz2)
    triangle_distances,triangle_indices_2 = tree2.query(txyz1,k=1)
    
    # now that we have match of triangles , need to match back catalog entries
    ranked_pairs = np.argsort(triangle_distances)
    
    indices_2 = -1*np.ones(x1.size,dtype=int)
    distances = np.zeros(x1.size)
    
    all_matched = False
    log.debug("match catalogs using pairs of triangles")
    for p in ranked_pairs :

        k1=tk1[p] # incides (in x1,y1) of vertices of this triangle (size=3)
        k2=tk2[triangle_indices_2[p]] # incides (in x2,y2) of vertices of other triangle
        
        # check unmatched or equal
        if np.any((indices_2[k1]>=0)&(indices_2[k1]!=k2)) :
            log.warning("skip {} <=> {}".format(k1,k2))
            continue
        indices_2[k1]=k2
        distances[k1]=triangle_distances[p]
        all_matched = (np.sum(indices_2>=0)==x1.size)
        if all_matched :
            log.debug("all matched")
            break

    # check duplicates
    for i2 in np.unique(indices_2[indices_2>=0]) :
        ii=(indices_2==i2)
        if np.sum(ii) > 1 :
            log.warning("{} duplicates for i2={}".format(np.sum(ii),i2))
            indices_2[ii]=-1
    
    return indices_2 , distances
Ejemplo n.º 43
0
def extract_edges_in_block(db_name, db_host, soft_mask_container,
                           soft_mask_dataset, distance_threshold,
                           evidence_threshold, graph_number, block):

    graph_provider = MongoDbGraphProvider(
        db_name,
        db_host,
        mode='r+',
        position_attribute=['z', 'y', 'x'],
        directed=False,
        edges_collection='edges_g{}'.format(graph_number))

    if check_function(graph_provider.database, block,
                      "edges_g{}".format(graph_number)):
        return 0

    logger.debug("Finding edges in %s, reading from %s", block.write_roi,
                 block.read_roi)

    start = time.time()

    soft_mask_array = daisy.open_ds(soft_mask_container, soft_mask_dataset)

    graph = graph_provider[block.read_roi.intersect(soft_mask_array.roi)]

    if graph.number_of_nodes() == 0:
        logger.info("No nodes in roi %s. Skipping", block.read_roi)
        write_done(graph_provider.database, block,
                   'edges_g{}'.format(graph_number))
        return 0

    logger.debug("Read %d candidates in %.3fs", graph.number_of_nodes(),
                 time.time() - start)

    start = time.time()
    """
    candidates = [(candidate_id, 
                   np.array([data[d] for d in ['z', 'y', 'x']])) 
                   for candidate_id, data in graph.nodes(data=True) if 'z' in data]
    """
    candidates = np.array(
        [[candidate_id] + [data[d] for d in ['z', 'y', 'x']]
         for candidate_id, data in graph.nodes(data=True) if 'z' in data],
        dtype=np.uint64)

    kdtree_start = time.time()
    kdtree = KDTree([[candidate[1], candidate[2], candidate[3]]
                     for candidate in candidates])
    #kdtree = KDTree(candidates[])
    pairs = kdtree.query_pairs(distance_threshold, p=2.0, eps=0)
    logger.debug("Query pairs in %.3fs", time.time() - kdtree_start)

    soft_mask_array = daisy.open_ds(soft_mask_container, soft_mask_dataset)

    voxel_size = np.array(soft_mask_array.voxel_size, dtype=np.uint32)
    soft_mask_roi = block.read_roi.snap_to_grid(
        voxel_size=voxel_size).intersect(soft_mask_array.roi)
    soft_mask_array_data = soft_mask_array.to_ndarray(roi=soft_mask_roi)

    sm_dtype = soft_mask_array_data.dtype
    if sm_dtype == np.uint8:  # standard pipeline pm 0-255
        pass
    elif sm_dtype == np.float32 or sm_dtype == np.float64:
        if not (soft_mask_array_data.min() >= 0
                and soft_mask_array_data.max() <= 1):
            raise ValueError(
                "Provided soft_mask has dtype float but not in range [0,1], abort"
            )
        else:
            soft_mask_array_data *= 255
    else:
        raise ValueError("Soft mask dtype {} not understood".format(sm_dtype))

    soft_mask_array_data = soft_mask_array_data.astype(np.float64)

    if evidence_threshold is not None:
        soft_mask_array_data = (soft_mask_array_data >= evidence_threshold *
                                255).astype(np.float64) * 255

    offset = np.array(np.array(soft_mask_roi.get_offset()) / voxel_size,
                      dtype=np.uint64)
    evidence_start = time.time()

    if pairs:
        pairs = np.array(list(pairs), dtype=np.uint64)
        evidence_array = cpp_get_evidence(candidates, pairs,
                                          soft_mask_array_data, offset,
                                          voxel_size)
        graph.add_weighted_edges_from(evidence_array, weight='evidence')

        logger.debug("Accumulate evidence in %.3fs",
                     time.time() - evidence_start)

        logger.debug("Found %d edges", graph.number_of_edges())

        logger.debug("Extracted edges in %.3fs", time.time() - start)

        start = time.time()

        graph.write_edges(block.write_roi)

        logger.debug("Wrote edges in %.3fs", time.time() - start)
    else:
        logger.debug("No pairs in block, skip")

    write_done(graph_provider.database, block,
               'edges_g{}'.format(graph_number))
    return 0
Ejemplo n.º 44
0
 def fit(self, df, location_cols, y_col, **kwargs):
     super().fit(df, location_cols, y_col)
     self.model = KDtree(self.locations)
Ejemplo n.º 45
0
    def __init__(self,
                 polytopes,
                 key_vertices_count=0,
                 process_count=8,
                 max_number_key_points=None):
        '''
        Compute the closest polytope using Voronoi cells
        :param polytopes:
        '''
        self.init_start_time = default_timer()
        self.section_start_time = self.init_start_time
        self.polytopes = np.asarray(polytopes, dtype='object')
        self.type = self.polytopes[0].type
        self.process_count = process_count
        self.key_vertices_count = key_vertices_count
        if self.type == 'AH_polytope':
            self.dim = self.polytopes[0].t.shape[0]
        elif self.type == 'zonotope':
            self.dim = self.polytopes[0].x.shape[0]
        else:
            raise NotImplementedError
        if self.key_vertices_count > 0:
            self.key_points = np.zeros([
                len(self.polytopes) * (1 + 2**self.key_vertices_count),
                self.dim
            ])
        else:
            self.key_points = np.zeros([len(self.polytopes), self.dim])
        for i, z in enumerate(polytopes):
            if self.type == 'AH_polytope':
                if self.key_vertices_count > 0:
                    raise NotImplementedError
                else:
                    self.key_points[i, :] = self.polytopes[i].t[:, 0]
            elif self.type == 'zonotope':
                if self.key_vertices_count > 0:
                    self.key_points[i * (2**self.key_vertices_count +
                                         1), :] = self.polytopes[i].x[:, 0]
                    self.key_points[
                        i * (2**self.key_vertices_count + 1) + 1:(i + 1) *
                        (2**self.key_vertices_count +
                         1), :] = get_k_random_edge_points_in_zonotope(
                             self.polytopes[i], self.key_vertices_count)
                else:
                    self.key_points[i, :] = self.polytopes[i].x[:, 0]
            else:
                raise NotImplementedError
        if max_number_key_points:
            # sample the key points
            n = self.key_points.shape[0]
            chosen_key_points = np.random.choice(n,
                                                 size=min(
                                                     n, max_number_key_points),
                                                 replace=False)
            self.key_points = self.key_points[chosen_key_points, :]
            # print(self.key_points.shape)
        self.key_point_to_polytope_map = dict(
        )  # stores the potential closest polytopes associated with each Voronoi (centroid)
        for key_point in self.key_points:
            ds = np.zeros(self.polytopes.shape[0])
            self.key_point_to_polytope_map[str(key_point)] = np.rec.fromarrays(
                [self.polytopes, ds], names=('polytopes', 'distances'))

        self.build_cell_polytope_map_default()

        #build kd-tree for centroids
        self.key_point_tree = KDTree(self.key_points)
        print(('Completed precomputation in %f seconds' %
               (default_timer() - self.init_start_time)))
Ejemplo n.º 46
0
  sys.stdout.flush()
  if name is not None:
    newargs = initargs + ['--imodes','flexm-'+str(nstruc+1)+name+'.dat']
    if not os.path.exists('flexm-'+str(nstruc+1)+name+'.dat'):
      break
    collectlib.collect_iattract(newargs)

  result = collectlib.collect_next()
  if result: break
  nstruc += 1
  coor = collectlib.collect_all_coor()
  pdbsizes2 = np.cumsum([0] + pdbsizes)
  coors = [coor[pdbsizes2[n]:pdbsizes2[n+1]] for n in range(len(pdbs))]
  energy = 0
  eblock = 0
  for n1 in range(len(pdbs)):
    c1 = coors[n1]
    tree1 = KDTree(c1)
    for n2 in range(n1+1, len(pdbs)):
      c2 = coors[n2]
      tree2 = KDTree(c2)
      energyblock = energyblocks[eblock]
      pairs = tree1.query_ball_tree(tree2, 10)
      ene = sum([sum(e[p]) for e,p in zip(energyblock,pairs) if len(p)])
      energy += ene

      eblock += 1


  f1.write("%.3f\n" % energy)
Ejemplo n.º 47
0
class Invdisttree:
    """ inverse-distance-weighted interpolation using KDTree:
invdisttree = Invdisttree( X, z )  -- data points, values
interpol = invdisttree( q, nnear=3, eps=0, p=1, weights=None, stat=0 )
    interpolates z from the 3 points nearest each query point q;
    For example, interpol[ a query point q ]
    finds the 3 data points nearest q, at distances d1 d2 d3
    and returns the IDW average of the values z1 z2 z3
        (z1/d1 + z2/d2 + z3/d3)
        / (1/d1 + 1/d2 + 1/d3)
        = .55 z1 + .27 z2 + .18 z3  for distances 1 2 3

    q may be one point, or a batch of points.
    eps: approximate nearest, dist <= (1 + eps) * true nearest
    p: use 1 / distance**p
    weights: optional multipliers for 1 / distance**p, of the same shape as q
    stat: accumulate wsum, wn for average weights

How many nearest neighbors should one take ?
a) start with 8 11 14 .. 28 in 2d 3d 4d .. 10d; see Wendel's formula
b) make 3 runs with nnear= e.g. 6 8 10, and look at the results --
    |interpol 6 - interpol 8| etc., or |f - interpol*| if you have f(q).
    I find that runtimes don't increase much at all with nnear -- ymmv.

p=1, p=2 ?
    p=2 weights nearer points more, farther points less.
    In 2d, the circles around query points have areas ~ distance**2,
    so p=2 is inverse-area weighting. For example,
        (z1/area1 + z2/area2 + z3/area3)
        / (1/area1 + 1/area2 + 1/area3)
        = .74 z1 + .18 z2 + .08 z3  for distances 1 2 3
    Similarly, in 3d, p=3 is inverse-volume weighting.

Scaling:
    if different X coordinates measure different things, Euclidean distance
    can be way off.  For example, if X0 is in the range 0 to 1
    but X1 0 to 1000, the X1 distances will swamp X0;
    rescale the data, i.e. make X0.std() ~= X1.std() .

A nice property of IDW is that it's scale-free around query points:
if I have values z1 z2 z3 from 3 points at distances d1 d2 d3,
the IDW average
    (z1/d1 + z2/d2 + z3/d3)
    / (1/d1 + 1/d2 + 1/d3)
is the same for distances 1 2 3, or 10 20 30 -- only the ratios matter.
In contrast, the commonly-used Gaussian kernel exp( - (distance/h)**2 )
is exceedingly sensitive to distance and to h.

    """

    # anykernel( dj / av dj ) is also scale-free
    # error analysis, |f(x) - idw(x)| ? todo: regular grid, nnear ndim+1, 2*ndim

    def __init__(self, X, z, leafsize=10, stat=0):
        assert len(X) == len(z), "len(X) %d != len(z) %d" % (len(X), len(z))
        self.tree = KDTree(X, leafsize=leafsize)  # build the tree
        self.z = z
        self.stat = stat
        self.wn = 0
        self.wsum = None

    def __call__(self, q, nnear=6, eps=0, p=1, weights=None):
        # nnear nearest neighbours of each query point --
        q = np.asarray(q)
        qdim = q.ndim
        if qdim == 1:
            q = np.array([q])
        if self.wsum is None:
            self.wsum = np.zeros(nnear)

        self.distances, self.ix = self.tree.query(q, k=nnear, eps=eps)
        interpol = np.zeros((len(self.distances), ) + np.shape(self.z[0]))
        jinterpol = 0
        for dist, ix in zip(self.distances, self.ix):
            if nnear == 1:
                wz = self.z[ix]
            elif dist[0] < 1e-10:
                wz = self.z[ix[0]]
            else:  # weight z s by 1/dist --
                w = 1 / dist**p
                if weights is not None:
                    w *= weights[ix]  # >= 0
                w /= np.sum(w)
                wz = np.dot(w, self.z[ix])
                if self.stat:
                    self.wn += 1
                    self.wsum += w
            interpol[jinterpol] = wz
            jinterpol += 1
        return interpol if qdim > 1 else interpol[0]
Ejemplo n.º 48
0
    def init_subproblems(self, conf, **kwargs):
        from sfepy.discrete.state import State
        from sfepy.discrete import Problem
        from sfepy.base.conf import ProblemConf, get_standard_keywords
        from scipy.spatial import cKDTree as KDTree

        # init subproblems
        problem = self.context
        pb_vars = problem.get_variables()
        # get "master" DofInfo and last index
        pb_adi_indx = problem.equations.variables.adi.indx
        self.adi_indx = pb_adi_indx.copy()
        last_indx = -1
        for ii in six.itervalues(self.adi_indx):
            last_indx = nm.max([last_indx, ii.stop])

        # coupling variables
        self.cvars_to_pb = {}
        for jj in conf.coupling_variables:
            self.cvars_to_pb[jj] = [None, None]
            if jj in pb_vars.names:
                if pb_vars[jj].dual_var_name is not None:
                    self.cvars_to_pb[jj][0] = -1

                else:
                    self.cvars_to_pb[jj][1] = -1

        # init subproblems
        self.subpb = []
        required, other = get_standard_keywords()
        master_prefix = output.get_output_prefix()
        for ii, ifname in enumerate(conf.others):
            sub_prefix = master_prefix[:-1] + '-sub%d:' % (ii + 1)
            output.set_output_prefix(sub_prefix)
            kwargs['master_problem'] = problem
            confi = ProblemConf.from_file(ifname, required, other,
                                          define_args=kwargs)
            pbi = Problem.from_conf(confi, init_equations=True)
            sti = State(pbi.equations.variables)
            pbi.equations.set_data(None, ignore_unknown=True)
            pbi.time_update()
            pbi.update_materials()
            sti.apply_ebc()
            pbi_vars = pbi.get_variables()
            output.set_output_prefix(master_prefix)
            self.subpb.append([pbi, sti, None])

            # append "slave" DofInfo
            for jj in pbi_vars.names:
                if not(pbi_vars[jj].is_state()):
                    continue

                didx = pbi.equations.variables.adi.indx[jj]
                ndof = didx.stop - didx.start
                if jj in self.adi_indx:
                    if ndof != \
                      (self.adi_indx[jj].stop - self.adi_indx[jj].start):
                        raise ValueError('DOFs do not match!')

                else:
                    self.adi_indx.update({
                        jj: slice(last_indx, last_indx + ndof, None)})
                    last_indx += ndof

            for jj in conf.coupling_variables:
                if jj in pbi_vars.names:
                    if pbi_vars[jj].dual_var_name is not None:
                        self.cvars_to_pb[jj][0] = ii

                    else:
                        self.cvars_to_pb[jj][1] = ii

        self.subpb.append([problem, None, None])

        self.cvars_to_pb_map = {}
        for varname, pbs in six.iteritems(self.cvars_to_pb):
            # match field nodes
            coors = []
            for ii in pbs:
                pbi = self.subpb[ii][0]
                pbi_vars = pbi.get_variables()
                fcoors = pbi_vars[varname].field.coors
                dc = nm.abs(nm.max(fcoors, axis=0)\
                            - nm.min(fcoors, axis=0))
                ax = nm.where(dc > 1e-9)[0]
                coors.append(fcoors[:,ax])

            if len(coors[0]) != len(coors[1]):
                raise ValueError('number of nodes does not match!')

            kdtree = KDTree(coors[0])
            map_12 = kdtree.query(coors[1])[1]

            pbi1 = self.subpb[pbs[0]][0]
            pbi1_vars = pbi1.get_variables()
            eq_map_1 = pbi1_vars[varname].eq_map

            pbi2 = self.subpb[pbs[1]][0]
            pbi2_vars = pbi2.get_variables()
            eq_map_2 = pbi2_vars[varname].eq_map

            dpn = eq_map_2.dpn
            nnd = map_12.shape[0]

            map_12_nd = nm.zeros((nnd * dpn,), dtype=nm.int32)
            if dpn > 1:
                for ii in range(dpn):
                    map_12_nd[ii::dpn] = map_12 * dpn + ii
            else:
                map_12_nd = map_12

            idx = nm.where(eq_map_2.eq >= 0)[0]
            self.cvars_to_pb_map[varname] = eq_map_1.eq[map_12[idx]]
Ejemplo n.º 49
0
def KLdivergence(x, y):
    """Compute the Kullback-Leibler divergence between two multivariate samples.
    Parameters
    ----------
    x : 2D array (n,d)
    Samples from distribution P, which typically represents the true
    distribution.
    y : 2D array (m,d)
    Samples from distribution Q, which typically represents the approximate
    distribution.
    Returns
    -------
    out : float
    The estimated Kullback-Leibler divergence D(P||Q).
    References
    ----------
    Pérez-Cruz, F. Kullback-Leibler divergence estimation of
    continuous distributions IEEE International Symposium on Information
    Theory, 2008.
    
    https://gist.github.com/atabakd/ed0f7581f8510c8587bc2f41a094b518
    """

    eta = 0.0000000001

    # Check the dimensions are consistent
    x = np.atleast_2d(x)
    y = np.atleast_2d(y)

    n,d = x.shape
    m,dy = y.shape

    assert d == dy
    assert n != 0
    assert n != 1

    # Build a KD tree representation of the samples and find the nearest neighbour
    # of each point in x.
    xtree = KDTree(x)
    ytree = KDTree(y)

    # Get the first two nearest neighbours for x, since the closest one is the
    # sample itself.
    r = xtree.query(x, k=2, eps=.01, p=2)[0][:,1]
    s = ytree.query(x, k=1, eps=.01, p=2)[0]
    s[s == 0] = eta
    
    #np.seterr(all='raise') 
    #try:
    #    ratio = r / s
    #    _ = np.log(ratio, where=ratio > 0).sum()
    #except Exception as ex:
    #    print(ex)
    #    print(np.sum(s==0))
    #    print(np.sum(np.isclose(s, 0)))
    #    assert False, "log(r/s) produces 'divide by zero' error or other exception."
    
    if np.any(s == 0):
        return "ERR: s=0"
    else:
        # There is a mistake in the paper. In Eq. 14, the right side misses a negative sign
        # on the first term of the right hand side.
        ratio = r/s
        return -np.log(ratio, where=ratio > 0).sum() * d / n + np.log(m / (n - 1.))
Ejemplo n.º 50
0
def runPixMatch(outpre, filter):

    if filter == 'f606w':
        let = 'v'
    else:
        let = 'i'

    if outpre == 'lower':
        x_drc_low = drc_low['x_' + let]
        y_drc_low = drc_low['y_' + let]

        xm_flc_low = flc_all['xdrc_low_' + filter]
        ym_flc_low = flc_all['ydrc_low_' + filter]

        coords1low = np.empty((xm_flc_low.size, 2))
        coords2low = np.empty((x_drc_low.size, 2))

        coords1low[:, 0] = xm_flc_low
        coords1low[:, 1] = ym_flc_low

        coords2low[:, 0] = x_drc_low
        coords2low[:, 1] = y_drc_low

        kdt = KDT(coords2low)
        idxs2 = kdt.query(coords1low)[1]

        ds = distArr(xm_flc_low, ym_flc_low, x_drc_low[idxs2],
                     y_drc_low[idxs2])

        idxs1 = np.arange(xm_flc_low.size)

        msk = ds < matchtol
        idxs1 = idxs1[msk]
        idxs2 = idxs2[msk]
        ds = ds[msk]

    else:
        x_drc_up = drc_up['x_' + let]
        y_drc_up = drc_up['y_' + let]

        xm_flc_up = flc_all['xdrc_up_' + filter]
        ym_flc_up = flc_all['ydrc_up_' + filter]

        coords1up = np.empty((xm_flc_up.size, 2))
        coords2up = np.empty((x_drc_up.size, 2))

        coords1up[:, 0] = xm_flc_up
        coords1up[:, 1] = ym_flc_up

        coords2up[:, 0] = x_drc_up
        coords2up[:, 1] = y_drc_up

        kdt = KDT(coords2up)
        idxs2 = kdt.query(coords1up)[1]

        ds = distArr(xm_flc_up, ym_flc_up, x_drc_up[idxs2], y_drc_up[idxs2])

        idxs1 = np.arange(xm_flc_up.size)

        msk = ds < matchtol
        idxs1 = idxs1[msk]
        idxs2 = idxs2[msk]
        ds = ds[msk]

    print(len(idxs1))

    outfile = main_dir + 'hor-I-cut_drc_' + outpre + '_' + filter + '_tol{0}_magCuts.txt'.format(
        matchtol)
    np.savetxt(outfile, idxs2, fmt='%4i')

    outfile = main_dir + 'hor-I-cut_flc_' + outpre + '_' + filter + '_tol{0}_magCuts.txt'.format(
        matchtol)
    np.savetxt(outfile, idxs1, fmt='%4i')

    # outfile = main_dir+'hor-I-cut_ds_'+outpre+'_'+filter+'_tol{0}.txt'.format(matchtol)
    # np.savetxt(outfile, ds, fmt='%1.4f')

    return None
Ejemplo n.º 51
0
def xymatch(x1, y1, x2, y2, tol=None, nnearest=1):
    """
    Finds matches in one catalog to another.

    Parameters
    x1 : array-like
        X-coordinates of first catalog
    y1 : array-like
        Y-coordinates of first catalog
    x2 : array-like
        X-coordinates of second catalog
    y2 : array-like
        Y-coordinates of second catalog
    tol : float or None, optional
        How close a match has to be to count as a match.  If None,
        all nearest neighbors for the first catalog will be returned.
    nnearest : int, optional
        The nth neighbor to find.  E.g., 1 for the nearest nearby, 2 for the
        second nearest neighbor, etc.  Particularly useful if you want to get
        the nearest *non-self* neighbor of a catalog.  To do this, use:
        ``spherematch(x, y, x, y, nnearest=2)``

    Returns
    -------
    idx1 : int array
        Indecies into the first catalog of the matches. Will never be
        larger than `x1`/`y1`.
    idx2 : int array
        Indecies into the second catalog of the matches. Will never be
        larger than `x1`/`y1`.
    ds : float array
        Distance between the matches

    """

    x1 = np.array(x1, copy=False)
    y1 = np.array(y1, copy=False)
    x2 = np.array(x2, copy=False)
    y2 = np.array(y2, copy=False)

    if x1.shape != y1.shape:
        raise ValueError('x1 and y1 do not match!')
    if x2.shape != y2.shape:
        raise ValueError('x2 and y2 do not match!')

    # this is equivalent to, but faster than just doing np.array([x1, y1])
    coords1 = np.empty((x1.size, 2))
    coords1[:, 0] = x1
    coords1[:, 1] = y1

    # this is equivalent to, but faster than just doing np.array([x2, y2])
    coords2 = np.empty((x2.size, 2))
    coords2[:, 0] = x2
    coords2[:, 1] = y2

    kdt = KDT(coords2)
    if nnearest == 1:
        ds,idxs2 = kdt.query(coords1)
    elif nnearest > 1:
        retval = kdt.query(coords1, nnearest)
        ds = retval[0]
        idxs2 = retval[1][:, -1]
    else:
        raise ValueError('invalid nnearest ' + str(nnearest))

    idxs1 = np.arange(x1.size)

    if tol is not None:
        msk = ds < tol
        idxs1 = idxs1[msk]
        idxs2 = idxs2[msk]
        ds = ds[msk]

    return idxs1, idxs2, ds
Ejemplo n.º 52
0
class Dataset:
    """
    SELFE Model Binary IO Functions

    Presently enables reading SELFE dataformat version 5.0 binary output files.
    Can read 2D & 3D scalar and vector variables.
    Usage Example:
    model = pyselfe.Dataset('1_hvel.64')
    [t,t_iter,eta,dp,data] = model.read_time_series()
    t = time in seconds
    t_iter = iteration number
    eta = water surface elevation
    dp = bathymetric depth
    data = 2D/3D variables

    @author Dharhas Pothina
    @version 0.2
    """
    def __init__(self, fname, nfiles=1):
        "Initialise by reading header information from file."

        self.fname = fname
        fid = open(fname, 'rb')
        self.read_header(fid)
        self.read_hgrid(fid)
        self.data_start_pos = fid.tell()
        self.compute_step_size()
        self.datadir = os.path.split(fname)[0]
        self.nfiles = nfiles

    def read_header(self, fid):
        """Read header information from SELFE binary output file."""

        # Read misc header info.
        self.data_format = fid.read(48)
        self.version = fid.read(48)
        self.start_time = fid.read(48)
        self.var_type = fid.read(48)
        self.var_dimension = fid.read(48)
        self.nsteps = io.fread(fid, 1, 'i')
        self.dt = io.fread(fid, 1, 'f')
        self.skip = io.fread(fid, 1, 'i')
        self.flag_sv = io.fread(fid, 1, 'i')
        self.flag_dm = io.fread(fid, 1, 'i')

        # @todo check when zDes needs to be read
        # self.zDes = io.fread(fid, 1, 'f').

        # Read vert grid info.
        self.nlevels = io.fread(fid, 1, 'i')
        self.kz = io.fread(fid, 1, 'i')
        self.h0 = io.fread(fid, 1, 'f')
        self.hs = io.fread(fid, 1, 'f')
        self.hc = io.fread(fid, 1, 'f')
        self.theta_b = io.fread(fid, 1, 'f')
        self.theta = io.fread(fid, 1, 'f')
        self.zlevels = io.fread(fid, self.kz, 'f')
        self.slevels = io.fread(fid, self.nlevels - self.kz, 'f')

    def read_hgrid(self, fid):
        """Read horizontal grid info from SELFE binary output file."""

        # Read dimensions.
        self.np = io.fread(fid, 1, 'i')
        self.ne = io.fread(fid, 1, 'i')

        # Read grid and bathymetry.
        pos = fid.tell()
        hgridtmp = io.fread(fid, 4 * self.np, 'f')
        self.x, self.y, self.dp, tmp1 = hgridtmp.reshape(self.np, 4).T

        # Read bottom index.
        fid.seek(pos)
        hgridtmp = io.fread(fid, 4 * self.np, 'i')
        tmp1, tmp2, tmp3, self.bot_idx = hgridtmp.reshape(self.np, 4).T

        # Read element connectivity list.
        self.elem = io.fread(fid, 4 * self.ne, 'i')
        self.elem = self.elem.reshape(self.ne, 4)[:, 1:4]

        # Create kdtree.
        self.kdtree = KDTree(list(zip(self.x, self.y)))

    def compute_step_size(self):
        """
        Compute the data block size to move one timestep within the file.

        """

        # Calculate grid size depending on whether dataset is 3D or 2D.
        if self.flag_dm == 3:
            # @todo check what needs to be done with bIdx (==0?)for dry nodes.
            bIdx = self.bot_idx
            bIdx[bIdx < 1] = 1
            self.grid_size = sum(self.nlevels - bIdx + 1)
        elif self.flag_dm == 2:
            self.grid_size = self.np
        # Compute step size.
        self.step_size = 2 * 4 + self.np * 4 + self.grid_size * 4 * self.flag_sv

    def read_time_series(self,
                         fname,
                         nodes=None,
                         levels=None,
                         xy=np.array([]),
                         nfiles=3,
                         sfile=1,
                         datadir=None):
        """
        Main function to extract a spatial and temporal slice of entire
        3D Time series.

        Returns [t,t_iter,eta,dp,data] where:
        t : time in seconds from simulation start
        t_iter : iteration number from simulation start
        eta : Surface water elevation time series
        dp : Bathymetry (depth of sea bed from MSL)
        data[t,nodes,levels,vars] : extracted data slice
        (i.e. Salinity, Temp, Velocity etc)

        Options:
        nodes : list of nodes to extract (default is all nodes)
        level : list of levels to extract (default is all levels)
        xy : array of x,y coordinates to extract (default is none)
        sfile : serial number of starting file (default is one)
        nfiles : number of files in data sequence (default is one)

        NOTE : node index starts at zero so add one to match up with node
        numbers in SELFE hgrid.gr3 file.

        """

        # Initialize vars.
        t = np.array([])
        t_iter = np.array([])
        eta = []
        data = []

        if nfiles is None:
            nfiles = self.nfiles

        if datadir is None:
            datadir = self.datadir

        # Convert xy points to list of nodes,
        # find parent elements &  calculate interpolation weights.
        if xy.size != 0:
            if xy.shape[1] != 2:
                sys.exit('xy array shape wrong.')
            nodes = np.array([], dtype='int32')
            arco = np.array([])
            for xy00 in xy:
                parent, tmparco, node3 = self.find_parent_element(
                    xy00[0], xy00[1])  # noqa
                nodes = np.append(nodes, node3 - 1)
                arco = np.append(arco, tmparco)

        # Set default for nodes to be all nodes.
        # Node index starts at zero.
        elif nodes is None:
            nodes = np.arange(self.np)

        # Set default for level to be all levels.
        if levels is None:
            levels = np.arange(self.nlevels)

        # Check whether 2D or 3D variable is being read.
        if self.flag_dm == 2:
            nlevs = 1
            levels = np.array([0])
        else:
            nlevs = self.nlevels

        # Read time series slice.
        for files in np.arange(sfile, sfile + nfiles):
            try:
                fname1 = datadir + '/' + str(files) + '_' + fname
                fid = open(fname1, 'rb')
                fid.seek(self.data_start_pos)
                for i in np.arange(self.nsteps):
                    t = np.append(t, io.fread(fid, 1, 'f'))
                    t_iter = np.append(t_iter, io.fread(fid, 1, 'i'))
                    eta.append(io.fread(fid, self.np, 'f'))
                    tmpdata = io.fread(fid, self.flag_sv * self.grid_size, 'f')
                    tmpdata = tmpdata.reshape(self.np, nlevs, self.flag_sv)
                    # Only keep requested slice of tmpdata.
                    # i.e. tmpdata[nodes, levels, var]
                    tmpdata = tmpdata[nodes, :, :]
                    tmpdata = tmpdata[:, levels, :]
                    data.append(tmpdata)
            except:
                continue
        # import pdb; pdb.set_trace()
        eta = np.column_stack(eta[:]).T
        eta = eta[:, nodes]
        data = np.array(data)
        dp = self.dp[nodes]

        # Convert nodal values back to xy point values if needed.
        if xy.size != 0:
            # Not sure about this. Need to look at it on more detail put in to
            # remove shape error.
            # try:
            tmpdata = np.zeros((data.shape[0], data.shape[1] // 3,
                                data.shape[2], data.shape[3])) / 0.  # noqa
            # except:
            #     tmpdata = np.zeros((data.shape[0], data.shape[1]//3, data.shape[2]))/0.  # noqa
            tmpeta = np.zeros((eta.shape[0], eta.shape[1] // 3)) / 0.
            tmpdp = np.zeros(dp.shape[0] // 3) / 0.
            for i in range(xy.shape[0]):
                n1 = i * 3
                n2 = n1 + 1
                n3 = n2 + 1
                tmpdata[:, i, :, :] = (data[:, n1, :, :] * arco[n1] +
                                       data[:, n2, :, :] * arco[n2] +
                                       data[:, n3, :, :] * arco[n3])
                tmpeta[:, i] = (eta[:, n1] * arco[n1] + eta[:, n2] * arco[n2] +
                                eta[:, n3] * arco[n3])
                tmpdp[i] = (dp[n1] * arco[n1] + dp[n2] * arco[n2] +
                            dp[n3] * arco[n3])
            data = tmpdata
            eta = tmpeta
            dp = tmpdp

        return t, t_iter, eta, dp, data

    def find_parent_element(self, x00, y00):
        """
        Find Parent Element of a given (x,y) point and calculate
        interpolation weights.

        Uses brute force search through all elements.
        Calculates whether point is internal/external to element by comparing
        summed area of sub triangles with area of triangle element.
        @todo implement binary tree search for efficiency

        Returns:
        parent, arco, node3 : parent element number, interp wieghts and element
        node numbers.

        """
        def signa(x1, x2, x3, y1, y2, y3):
            "Return signed area of triangle."
            return (((x1 - x3) * (y2 - y3) - (x2 - x3) * (y1 - y3)) / 2)

        parent = -1
        nm = self.elem.view()
        out = np.zeros(3) / 0.
        x = self.x.view()
        y = self.y.view()
        for i in np.arange(self.ne):
            aa = 0
            ar = 0  # Area.
            for j in np.arange(3):
                j1 = j + 1
                j2 = j + 2
                if (j1 > 2):
                    j1 = j1 - 3
                if (j2 > 2):
                    j2 = j2 - 3
                n0 = nm[i,
                        j] - 1  # Zero based index rather than 1 based index.
                n1 = nm[i, j1] - 1
                n2 = nm[i, j2] - 1
                # Temporary storage.
                out[j] = signa(x[n1], x[n2], x00, y[n1], y[n2], y00)
                aa = aa + abs(out[j])
                if (j == 0):
                    ar = signa(x[n1], x[n2], x[n0], y[n1], y[n2], y[n0])

            if (ar <= 0):
                sys.exit('Negative area:' + str(ar))

            ae = abs(aa - ar) / ar
            if (ae <= 1.e-5):
                parent = i
                node3 = nm[i, 0:3]
                arco = out[0:3] / ar
                arco[1] = max(0., min(1., arco[1]))
                arco[2] = max(0., min(1., arco[2]))
                if (arco[0] + arco[1] > 1):
                    arco[2] = 0
                    arco[1] = 1 - arco[0]
                else:
                    arco[2] = 1 - arco[0] - arco[1]
                break
        if (parent == -1):
            sys.exit('Cannot find a parent:' + str(x00) + ',' + str(y00))
        else:
            print('Parent Element :', parent + 1, ' ,Nodes: ', node3)
            return parent, arco, node3

    def compute_relative_rec(self, node, level):
        """
        Computes offset for extracting particular node/level.
        NOTE THIS FUNCTION NOT COMPLETE/TESTED.

        """
        count = 0
        step_size = np.zeros(self.np, self.nlevels, self.flag_sv) / 0.
        for i in range(self.np):
            for k in range(max(1, self.bot_idx[i]), self.nlevels):
                for m in range(self.flag_sv):
                    count = count + 1
                    step_size[i, k, m] = count

    def read_time_series_xy(self,
                            variable,
                            x,
                            y,
                            sigma_level='middle',
                            return_eta=False):
        """
        Finds nearest 3 nodes to x,y and returns the average value.

        """
        xy = np.hstack((x, y))
        dist, nodes = self.kdtree.query(xy, k=3)
        data = []

        if sigma_level == 'average':
            t, t_iter, eta, dp, data = self.read_time_series(
                variable, nodes=nodes)  # noqa
            eta = eta.mean(axis=1)
            data = data[:, :, :, 0].mean(axis=2).mean(axis=1)
            # Take average of all levels and then 3 nodes for now.
            # Implement idw or area weighted a average later.
            data = data.mean(axis=1).mean(axis=1)
            if return_eta:
                return np.column_stack((t, data)), np.column_stack((t, eta))
            else:
                return np.column_stack((t, data))

        elif sigma_level == 'top':
            sigma_level = 0
        elif sigma_level == 'bottom':
            sigma_level = self.nlevels - 1
        elif sigma_level == 'middle':
            sigma_level = self.nlevels // 2

        t, t_iter, eta, dp, data = self.read_time_series(variable,
                                                         nodes=nodes,
                                                         levels=sigma_level)
        eta = eta.mean(axis=1)
        data = data[:, :, 0, :].mean(axis=1)
        # data.mean(axis=1).shape[:, 0, :]
        # Take average of all levels and then 3 nodes for now.
        # Implement idw or area weighted average later/
        # data = data.mean(axis=1)
        # import pdb; pdb.set_trace()
        if return_eta:
            return np.column_stack((t, data)), np.column_stack((t, eta))
        else:
            return np.column_stack((t, data))
    filter = filters[ff]
    x_drc_low = low_x[ff]
    y_drc_low = low_y[ff]
    xm_flc_low = flc_all['xdrc_low_'+filter]
    ym_flc_low = flc_all['ydrc_low_'+filter]

    coords1low = np.empty((xm_flc_low.size,2))
    coords2low = np.empty((x_drc_low.size,2))

    coords1low[:,0] = xm_flc_low
    coords1low[:,1] = ym_flc_low

    coords2low[:,0] = x_drc_low
    coords2low[:,1] = y_drc_low

    kdt = KDT(coords2low)
    idxs2 = kdt.query(coords1low)[1]

    ds = distArr(xm_flc_low,ym_flc_low,x_drc_low[idxs2],y_drc_low[idxs2])

    idxs1 = np.arange(xm_flc_low.size)

    msk = ds < matchtol
    idxs1 = idxs1[msk]
    idxs2 = idxs2[msk]
    ds = ds[msk]

    outfile = outDir+'hor-I-cut_drc_low_'+filter+'_tol1.txt'
    np.savetxt(outfile, idxs2, fmt='%4i')

    outfile = outDir+'hor-I-cut_flc_low_'+filter+'_tol1.txt'
Ejemplo n.º 54
0
def spherematch(ra1, dec1, ra2, dec2, tol=None, nnearest=1):
    """
    Finds matches in one catalog to another.

    Parameters
    ra1 : array-like
        Right Ascension in degrees of the first catalog
    dec1 : array-like
        Declination in degrees of the first catalog (shape of array must match
        `ra1`)
    ra2 : array-like
        Right Ascension in degrees of the second catalog
    dec2 : array-like
        Declination in degrees of the second catalog (shape of array must match
        `ra2`)
    tol : float or None, optional
        How close (in degrees) a match has to be to count as a match.  If None,
        all nearest neighbors for the first catalog will be returned.
    nnearest : int, optional
        The nth neighbor to find.  E.g., 1 for the nearest nearby, 2 for the
        second nearest neighbor, etc.  Particularly useful if you want to get
        the nearest *non-self* neighbor of a catalog.  To do this, use:
        ``spherematch(ra, dec, ra, dec, nnearest=2)``

    Returns
    -------
    idx1 : int array
        Indecies into the first catalog of the matches. Will never be
        larger than `ra1`/`dec1`.
    idx2 : int array
        Indecies into the second catalog of the matches. Will never be
        larger than `ra1`/`dec1`.
    ds : float array
        Distance (in degrees) between the matches



    """

    ra1 = np.array(ra1, copy=False)
    dec1 = np.array(dec1, copy=False)
    ra2 = np.array(ra2, copy=False)
    dec2 = np.array(dec2, copy=False)

    if ra1.shape != dec1.shape:
        raise ValueError('ra1 and dec1 do not match!')
    if ra2.shape != dec2.shape:
        raise ValueError('ra2 and dec2 do not match!')

    x1, y1, z1 = _spherical_to_cartesian(ra1.ravel(), dec1.ravel())

    # this is equivalent to, but faster than just doing np.array([x1, y1, z1])
    coords1 = np.empty((x1.size, 3))
    coords1[:, 0] = x1
    coords1[:, 1] = y1
    coords1[:, 2] = z1

    x2, y2, z2 = _spherical_to_cartesian(ra2.ravel(), dec2.ravel())

    # this is equivalent to, but faster than just doing np.array([x1, y1, z1])
    coords2 = np.empty((x2.size, 3))
    coords2[:, 0] = x2
    coords2[:, 1] = y2
    coords2[:, 2] = z2

    kdt = KDT(coords2)
    if nnearest == 1:
        idxs2 = kdt.query(coords1)[1]
    elif nnearest > 1:
        idxs2 = kdt.query(coords1, nnearest)[1][:, -1]
    else:
        raise ValueError('invalid nnearest ' + str(nnearest))

    ds = _great_circle_distance(ra1, dec1, ra2[idxs2], dec2[idxs2])

    idxs1 = np.arange(ra1.size)

    if tol is not None:
        msk = ds < tol
        idxs1 = idxs1[msk]
        idxs2 = idxs2[msk]
        ds = ds[msk]

    return idxs1, idxs2, ds
Ejemplo n.º 55
0
def keypoints_match_geometry(src_frame, dest_frame):
    """
	Finds keypoint matches based on scene geometry

	@return pairs, angles
	`pairs[n] = (src_pt_n, dest_pt_n)`
	`angles[n]` = angle btw rays of src_pt_n and dest_pt_n
	"""

    if hasattr(src_frame, 'kpt_proj_cloud'):
        helper_cloud = src_frame.kpt_proj_cloud
    else:
        helper_cloud = build_point_cloud_for_projection(src_frame)

    # spatial and perspective projection src_frame -> dest_frame
    spatial_mat = dest_frame.world_to_camera @ src_frame.camera_to_world
    full_projection_mat = dest_frame.intrinsic_mat @ spatial_mat[:3, :]

    # project and retrieve keypoints
    helper_projected = projection_apply_rowvec(full_projection_mat,
                                               helper_cloud)
    proj_pts, proj_sizes, proj_orientations = kptproj_interpret_projected_vectors(
        helper_projected)

    # find pairs of neighbours in radius of MATCH_DISTANCE
    tree_proj = KDTree(proj_pts)
    tree_dest = KDTree(dest_frame.kpt_locs)
    match_suggestions = tree_dest.query_ball_tree(tree_proj, r=MATCH_DISTANCE)
    matches = []

    # kpt_matched_id[n] = id of point in src_frame which matches n
    #frame.kpt_matched_id = np.zeros(frame.pt_count, dtype=np.int32)
    #frame.kpt_matched_id[:] = -1 # no match

    #print(src_frame.pt_count, dest_frame.pt_count, len(match_suggestions))

    for dest_pt_idx, suggestions in enumerate(match_suggestions):

        dest_pt_size = dest_frame.kpt_sizes[dest_pt_idx]

        for src_pt_idx in suggestions:
            proj_size = proj_sizes[src_pt_idx]

            if ((max(dest_pt_size / proj_size, proj_size / dest_pt_size) <
                 MATCH_SIZE_DIFF_RELATIVE) and (angular_distance_abs(
                     dest_frame.kpt_orientations[dest_pt_idx],
                     proj_orientations[src_pt_idx]) < MATCH_ANGLE_DIFF)):
                matches.append((src_pt_idx, dest_pt_idx))
                #frame.kpt_matched_id[pt_idx] = src_pt_idx
                break

    if len(matches) == 0:
        return AttrDict(
            pairs=np.zeros((0, 2), dtype=np.int32),
            angles=np.zeros(0, dtype=np.float32),
        )
    else:
        # store matched pairs
        match_pairs = np.array(matches, dtype=np.int32)
        # sort by src_point_id
        match_pairs = match_pairs[np.argsort(match_pairs[:, 0]), :]

        view_angle_changes = derive_keypoint_view_angle_change(
            src_frame, dest_frame, match_pairs)

        return AttrDict(
            pairs=match_pairs,
            angles=view_angle_changes,
        )
def remove_ind(reference_pop, removal_size, removal_type):
    begin_time = time.time()

    if removal_type == 'random':
        # reference_pop is a numpy array of size (n_reference_pop, pop_dim)
        reference_pop = list(reference_pop)
        # now reference_pop is a list of numpy arrays (each defining one individual)
        random.shuffle(reference_pop)  # shuffle the list
        # pop last removal_size individuals
        for _ in range(removal_size):
            reference_pop.pop()
        # turn back to numpy array
        reference_pop = np.array(reference_pop)

    if removal_type == 'least_novel':
        # compute novelties of reference_pop inside reference_pop
        novelties = assess_novelties(reference_pop, reference_pop)
        removal_indices = np.argpartition(novelties,
                                          removal_size)[:removal_size]

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Least novel removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'least_novel_iter':
        removal_indices = []
        temp_ref_pop = copy.deepcopy(reference_pop)
        for j in range(removal_size):
            # compute novelties of reference_pop inside reference_pop
            novelties = assess_novelties(temp_ref_pop, temp_ref_pop)
            remov_idx = np.argmin(novelties)
            remov_ind = temp_ref_pop[remov_idx]
            removal_indices.append(np.where(reference_pop == remov_ind)[0][0])
            temp_ref_pop = np.vstack(
                (temp_ref_pop[:remov_idx], temp_ref_pop[remov_idx + 1:]))

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Least novel removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'most_novel':
        # compute novelties of reference_pop inside reference_pop
        novelties = assess_novelties(reference_pop, reference_pop)
        removal_indices = np.argpartition(novelties,
                                          -removal_size)[-removal_size:]

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Least novel removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'most_novel_iter':
        removal_indices = []
        temp_ref_pop = copy.deepcopy(reference_pop)
        for j in range(removal_size):
            # compute novelties of reference_pop inside reference_pop
            novelties = assess_novelties(temp_ref_pop, temp_ref_pop)
            remov_idx = np.argmax(novelties)
            remov_ind = temp_ref_pop[remov_idx]
            removal_indices.append(np.where(reference_pop == remov_ind)[0][0])
            temp_ref_pop = np.vstack(
                (temp_ref_pop[:remov_idx], temp_ref_pop[remov_idx + 1:]))

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Least novel removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'gmm_sampling':
        # hypothesis: n_components equals generative number of components
        n_comp = N
        gmix = mixture.GaussianMixture(n_components=n_comp,
                                       covariance_type='full')
        gmix.fit(reference_pop)
        nodes = gmix.sample(removal_size)[0]
        k_tree = KDTree(reference_pop)
        removal_indices = []
        for node in nodes:
            # for each node, find the closest point in the reference pop
            cond = True
            closest = 1
            # make sure removal indivual was not already chosen
            while cond:
                if closest == 1:
                    possible_removal_index = k_tree.query(node, closest)[1]
                else:
                    possible_removal_index = k_tree.query(
                        node, closest)[1][closest - 1]
                if possible_removal_index not in removal_indices:
                    removal_indices.append(possible_removal_index)
                    cond = False
                else:
                    closest += 1

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('GMM removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'grid':
        n_dim = reference_pop.shape[1]
        # compute maximums and minimums on each dimension
        maximums = np.max(reference_pop, 0)
        minimums = np.min(reference_pop, 0)
        ranges = maximums - minimums
        bins_per_dim = math.floor(math.exp(math.log(removal_size) / n_dim)) + 1
        grid_positions = []
        for i in range(n_dim):
            # important choice on how we make the grid
            grid_position = [
                minimums[i] + ((j + 1) * ranges[i] / bins_per_dim)
                for j in range(bins_per_dim)
            ]
            grid_position.pop()
            grid_positions.append(grid_position)
        mesh = np.meshgrid(*grid_positions)
        nodes = list(zip(*(dim.flat for dim in mesh)))
        nodes = np.array(nodes)

        k_tree = KDTree(reference_pop)
        removal_indices = []
        for node in nodes:
            # for each node, find the closest point in the reference pop
            cond = True
            closest = 1
            # make sure removal indivual was not already chosen
            while cond:
                if closest == 1:
                    possible_removal_index = k_tree.query(node, closest)[1]
                else:
                    possible_removal_index = k_tree.query(
                        node, closest)[1][closest - 1]
                if possible_removal_index not in removal_indices:
                    removal_indices.append(possible_removal_index)
                    cond = False
                else:
                    closest += 1
        # dealing with the missing removals
        nb_missing_removals = removal_size - len(nodes)
        for _ in range(nb_missing_removals):
            query = random.choice(nodes)
            cond = True
            # start with second closest since closest is for sure in removal indices
            closest = 2
            # make sure removal indivual was not already chosen
            while cond:
                possible_removal_index = k_tree.query(query,
                                                      closest)[1][closest - 1]
                if possible_removal_index not in removal_indices:
                    removal_indices.append(possible_removal_index)
                    cond = False
                else:
                    closest += 1

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(nodes[:, 0], nodes[:, 1], label='grid', marker='+', color='black')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Grid removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    if removal_type == 'grid_density':
        n_dim = reference_pop.shape[1]
        # compute maximums and minimums on each dimension
        maximums = np.max(reference_pop, 0)
        minimums = np.min(reference_pop, 0)
        ranges = maximums - minimums
        bins_per_dim = math.floor(math.exp(math.log(N_CELLS) / n_dim)) + 1
        grid_positions = []
        for i in range(n_dim):
            # important choice on how we make the grid
            grid_position = [
                minimums[i] + (j * ranges[i] / (bins_per_dim - 1))
                for j in range(bins_per_dim)
            ]
            grid_positions.append(grid_position)
        mesh = np.meshgrid(*grid_positions)
        nodes = list(zip(*(dim.flat for dim in mesh)))
        nodes = np.array(nodes)

        removal_indices = []
        nb_cells = (bins_per_dim - 1)**n_dim
        grid_density = np.zeros(nb_cells)
        cells = [[] for _ in range(nb_cells)]

        for ind_idx, ind in enumerate(reference_pop):
            dim_indexs = np.zeros(n_dim)
            for i, dim in enumerate(ind):
                grid_pos = grid_positions[i]
                for j in range(bins_per_dim - 1):
                    if dim >= grid_pos[j] and dim < grid_pos[j + 1]:
                        dim_indexs[i] = j + 1
            if 0 not in dim_indexs:
                # indivudal is inside the grid
                dim_indexs = dim_indexs - 1
                cell_idx = 0
                for k, dim_idx in enumerate(dim_indexs):
                    cell_idx += int(dim_idx * ((bins_per_dim - 1)**k))
                grid_density[cell_idx] += 1
                cells[cell_idx].append(ind_idx)

        grid_density = grid_density / np.sum(grid_density)

        # TEST: square the grid_density to biase more towards high density cells
        # grid_density = np.square(grid_density)

        grid_law = np.cumsum(grid_density)

        for _ in range(removal_size):
            dice = random.random() * grid_law[-1]
            cell_to_remove_from = np.searchsorted(grid_law, dice)
            cond = True
            n = 0
            while cond:
                if n < LIMIT_DENSITY_ITER:
                    removal_idx = random.choice(cells[cell_to_remove_from])
                else:
                    removal_idx = random.choice(list(range(
                        len(reference_pop))))
                if removal_idx not in removal_indices:
                    removal_indices.append(removal_idx)
                    cond = False
                n += 1

        # # plot the reference pop
        # fig = plt.figure(figsize=(5, 5))
        # ax = fig.add_subplot(111)
        # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference')
        # ax.scatter(nodes[:, 0], nodes[:, 1], label='grid', marker='+', color='black')
        # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed',
        #            marker='x', color='red')
        # ax.set_facecolor("#ffebb8")
        # ax.set_title('Grid density removal', fontsize=15)
        # plt.xlim(0, 1)
        # plt.ylim(0, 1)
        # plt.legend()
        # plt.show()

        reference_pop = np.delete(reference_pop, removal_indices, 0)

    end_time = time.time()
    removal_time = end_time - begin_time
    return reference_pop, removal_time
Ejemplo n.º 57
0
def tsne(fdarray,
         new_label='tsne',
         channels=None,
         transform='arcsinh',
         sample=6000,
         verbose=False,
         backgate=True):
    """Perform t-SNE/viSNE on the FlowData object
    
    """

    fdarray = util.make_list(fdarray)

    # If the user has not provided a list of channels to use,
    # use the intersection of all isotope channels
    if channels is None:
        channel_set = []
        for fd in fdarray:
            channel_set.append(set(fd.isotopes))
        channels = list(set.intersection(*channel_set))

    # Make a copy of the data in files that we want
    points = []
    for fd in fdarray:
        points.append(np.vstack([fd[ch] for ch in channels]).T)

    # transform
    if transform == 'arcsinh':
        for pts in points:
            # Apply the transform inplace to the data
            np.arcsinh(5 * pts, pts)

    # Randomly sample to reduce the number of points
    sample_masks = []
    for pts in points:
        if sample < pts.shape[0]:
            # If we have enough points to subsample
            sample_masks.append(
                np.random.choice(pts.shape[0], sample, replace=False))
        else:
            # Otherwise we add all the points
            sample_masks.append(np.array(range(pts.shape[0])))

    # Sample the points, and construct a large matrix
    sample_points = []
    for mask, pts in zip(sample_masks, points):
        sample_points.append(pts[mask, :])
    X = np.vstack(sample_points)

    # Perform t-SNE
    Y = lib_tsne.tsne(X, verbose=verbose)
    assert Y is not None, ('t-SNE failed to return')

    # Split Y into a matrix for each dataset
    splits = np.cumsum(
        np.array([mask.shape[0] for mask in sample_masks], dtype=int))
    Y_split = np.split(Y, splits, axis=0)

    # now expand data to reassign these points back into the dataset
    tsne_coords = []
    for (pts, mask, Yspt) in zip(points, sample_masks, Y_split):
        npoints = pts.shape[0]
        Z = np.zeros((npoints, 2)) * float('NaN')
        Z[mask, :] = Yspt
        tsne_coords.append(Z)

    # If a point didn't get sampled, place its t-SNE coordinates at its nearest
    # neighbor.
    if backgate:
        kd = KDTree(X)
        # select points not assigned values with t-SNE
        for pts, mask, coords, j in zip(points, sample_masks, tsne_coords,
                                        range(len(points))):
            nan_points = np.argwhere(np.isnan(coords[:, 0]))
            d, near = kd.query(pts[nan_points], 1)
            # convert back to coordinates on the whole dataset
            coords[nan_points, :] = Y[near, :]
            tsne_coords[j] = coords
    # add to data to FlowData structure
    for fd, coords in zip(fdarray, tsne_coords):
        fd[new_label + '1'] = coords[:, 0]
        fd[new_label + '2'] = coords[:, 1]
Ejemplo n.º 58
0
class VoronoiClosestPolytope:
    def __init__(self,
                 polytopes,
                 key_vertices_count=0,
                 process_count=8,
                 max_number_key_points=None):
        '''
        Compute the closest polytope using Voronoi cells
        :param polytopes:
        '''
        self.init_start_time = default_timer()
        self.section_start_time = self.init_start_time
        self.polytopes = np.asarray(polytopes, dtype='object')
        self.type = self.polytopes[0].type
        self.process_count = process_count
        self.key_vertices_count = key_vertices_count
        if self.type == 'AH_polytope':
            self.dim = self.polytopes[0].t.shape[0]
        elif self.type == 'zonotope':
            self.dim = self.polytopes[0].x.shape[0]
        else:
            raise NotImplementedError
        if self.key_vertices_count > 0:
            self.key_points = np.zeros([
                len(self.polytopes) * (1 + 2**self.key_vertices_count),
                self.dim
            ])
        else:
            self.key_points = np.zeros([len(self.polytopes), self.dim])
        for i, z in enumerate(polytopes):
            if self.type == 'AH_polytope':
                if self.key_vertices_count > 0:
                    raise NotImplementedError
                else:
                    self.key_points[i, :] = self.polytopes[i].t[:, 0]
            elif self.type == 'zonotope':
                if self.key_vertices_count > 0:
                    self.key_points[i * (2**self.key_vertices_count +
                                         1), :] = self.polytopes[i].x[:, 0]
                    self.key_points[
                        i * (2**self.key_vertices_count + 1) + 1:(i + 1) *
                        (2**self.key_vertices_count +
                         1), :] = get_k_random_edge_points_in_zonotope(
                             self.polytopes[i], self.key_vertices_count)
                else:
                    self.key_points[i, :] = self.polytopes[i].x[:, 0]
            else:
                raise NotImplementedError
        if max_number_key_points:
            # sample the key points
            n = self.key_points.shape[0]
            chosen_key_points = np.random.choice(n,
                                                 size=min(
                                                     n, max_number_key_points),
                                                 replace=False)
            self.key_points = self.key_points[chosen_key_points, :]
            # print(self.key_points.shape)
        self.key_point_to_polytope_map = dict(
        )  # stores the potential closest polytopes associated with each Voronoi (centroid)
        for key_point in self.key_points:
            ds = np.zeros(self.polytopes.shape[0])
            self.key_point_to_polytope_map[str(key_point)] = np.rec.fromarrays(
                [self.polytopes, ds], names=('polytopes', 'distances'))

        self.build_cell_polytope_map_default()

        #build kd-tree for centroids
        self.key_point_tree = KDTree(self.key_points)
        print(('Completed precomputation in %f seconds' %
               (default_timer() - self.init_start_time)))

    def build_cell_polytope_map_default(self):
        polytope_key_point_indices = np.array(
            np.meshgrid(np.arange(self.polytopes.shape[0]),
                        np.arange(self.key_points.shape[0]))).T.reshape(-1, 2)
        arguments = []
        for i in polytope_key_point_indices:
            arguments.append(
                (self.key_points, self.key_point_to_polytope_map, i[0], i[1]))
        p = Pool(self.process_count)
        pca = p.map(set_polytope_pair_distance, arguments)
        polytope_key_point_arrays = np.asarray(pca).reshape(
            (self.polytopes.shape[0]), self.key_points.shape[0])
        # print(polytope_centroid_arrays)
        # compute pairwise distances of the centroids and the polytopes
        #fixme
        for key_point_index, key_point in enumerate(self.key_points):
            key_point_string = str(key_point)
            for polytope_index, polytope in enumerate(
                    self.key_point_to_polytope_map[key_point_string]
                ['polytopes']):
                self.key_point_to_polytope_map[str(key_point)].distances[
                    polytope_index] = polytope_key_point_arrays[
                        polytope_index, key_point_index]
                # print(polytope_key_point_arrays[polytope_index, key_point_index])
            self.key_point_to_polytope_map[key_point_string].sort(
                order='distances')
            # print(self.centroid_to_polytope_map[centroid_string])

    def find_closest_polytope(self,
                              query_point,
                              return_intermediate_info=False):
        #find the closest centroid
        d, i = self.key_point_tree.query(query_point)
        closest_key_point = self.key_point_tree.data[i]
        # print('closest key point', closest_key_point)
        closest_key_point_polytope = self.key_point_to_polytope_map[str(
            closest_key_point)]['polytopes'][0]
        # print('closest polytope centroid' + str(closest_key_point_polytope.x))
        dist_query_centroid_polytope = distance_point_polytope(
            closest_key_point_polytope, query_point, ball='l2')[0]
        dist_query_key_point = np.linalg.norm(query_point - closest_key_point)
        # print(dist_query_key_point, dist_query_centroid_polytope)
        cutoff_index = np.searchsorted(
            self.key_point_to_polytope_map[str(closest_key_point)].distances,
            dist_query_key_point + dist_query_centroid_polytope)
        # print(cutoff_index)
        # print(self.key_point_to_polytope_map[str(closest_key_point)]['distances'][0:cutoff_index])
        # print(self.key_point_to_polytope_map[str(closest_key_point)]['distances'][cutoff_index:])
        # print('dqc',dist_query_key_point)
        # print(self.centroid_to_polytope_map[str(closest_key_point)].distances)
        closest_polytope_candidates = self.key_point_to_polytope_map[str(
            closest_key_point)].polytopes[0:cutoff_index]
        # print(closest_polytope_candidates)
        best_polytope = None
        best_distance = np.inf
        for polytope in closest_polytope_candidates:
            if best_distance < 1e-9:
                break
            dist = distance_point_polytope(polytope, query_point, ball='l2')[0]
            if best_distance > dist:
                best_distance = dist
                best_polytope = polytope
        # print('best distance', best_distance)
        if return_intermediate_info:
            return best_polytope, best_distance, closest_polytope_candidates
        return best_polytope
Ejemplo n.º 59
0
def correlate_neighbourhood(calcium_signal: np.ndarray,
                            kd_tree: cKDTree,
                            center_ix: int,
                            init_radius=0.02,
                            max_radius=.08,
                            min_corr=.5,
                            step=0.01,
                            measure=correlation,
                            verbose=True):
    """
    Given a center neuron and parameters of the neighbourhood definition, tries to group neurons
    The basic idea is:
    1. Look at all neurons within a given radius of the center neurons,
    2. Correlate their calcium signal to the center's.
    3. Keep sufficiently highly correlated neurons as being part of the group.
    4. Compute the fraction correlated / all neighboring neurons
    5. Move the center to the neuron closest to the center of mass of this group
    6. Increase slightly the radius and start again.
    7. As long as the fraction of correlated neurons is not droppping significantly, keep on increasing the radius
    8. Label the neurons as being part of this group. If some were already part of another group,
       they belong to the biggest group

    Parameters
    ----------
    calcium_signal
    kd_tree
    center_ix
    init_radius
    max_radius
    min_corr
    step
    measure
    verbose

    Returns
    -------

    """
    FRAC_DEC = .95
    radii = np.arange(init_radius, max_radius, step)
    radius = radii[0]  # not necessary due to loop?
    frac_corr = 0
    w_correlated = np.array([])
    for radius in radii:
        neighbors_ix, _ = get_neighbors(kd_tree, center_ix, radius)
        if len(neighbors_ix) == 0:  # one neuron left so no neighbours
            break
        corr_neigh = measure(calcium_signal, center_ix, neighbors_ix)
        # Fraction of correlated neurons in the neighboorhod
        correlated = corr_neigh >= min_corr
        n_correlated = np.sum(correlated)
        new_frac_corr = n_correlated / len(corr_neigh)
        if verbose:
            print(
                f'Number of neurons: {len(corr_neigh)} ; fraction correlated: {new_frac_corr * 100:.2f}% ;'
                f' Correlated neurons: {np.sum(correlated)}')
        # More correlations than before
        if new_frac_corr >= FRAC_DEC * frac_corr and n_correlated > 2:  # 100
            frac_corr = new_frac_corr
            w_correlated = neighbors_ix[correlated]
            centroid = np.mean(kd_tree.data[w_correlated, :], 0)
            _, center_ix = kd_tree.query(centroid, 1)
        else:
            break
    if radius == radii[-1]:
        # print('\t >>> Reached maximum radius <<<')
        pass
    return w_correlated
Ejemplo n.º 60
0
coordsP[:, 0] = x_psf
coordsP[:, 1] = y_psf
coordsP[:, 2] = z_psf

coordsF[:, 0] = x_flc
coordsF[:, 1] = y_flc
coordsF[:, 2] = z_flc

########################################################################

# kdt = KDT(coordsF)
# idxsF = kdt.query(coordsP)[1]
# ds = distArr(x_psf,y_psf,z_psf,x_flc[idxsF],y_flc[idxsF],z_flc[idxsF])

kdt = KDT(coordsP)
idxsP = kdt.query(coordsF)[1]

ds = distArr(x_flc, y_flc, z_flc, x_psf[idxsP], y_psf[idxsP], z_psf[idxsP])

# print(len(ds))

idxsF = np.arange(x_flc.size)

msk = ds < matchtol
idxsF = idxsF[msk]
idxsP = idxsP[msk]
ds = ds[msk]

# print(len(idxs1))