def test_cl_sa_h5py_2():
    """
    Test basic cluster file mechanics (using tracks).
    """
    tracks = {
        "x": numpy.arange(11, dtype=numpy.float),
        "y": numpy.arange(11, dtype=numpy.float)
    }

    filename = "test_clusters_sa_h5py.hdf5"
    h5_name = storm_analysis.getPathOutputTest(filename)
    storm_analysis.removeFile(h5_name)

    # Write track data.
    with saH5Py.SAH5Py(h5_name, is_existing=False) as h5:
        h5.setMovieInformation(1, 1, 2, "")
        h5.addTracks(tracks)

    # Write clustering data for tracks.
    cluster_id = numpy.remainder(numpy.arange(11), 3)
    cluster_data = {
        "track_id": numpy.zeros(11, dtype=numpy.int),
        "loc_id": numpy.arange(11)
    }

    cl_size = [0, 4, 4, 3]
    with clSAH5Py.SAH5Clusters(h5_name) as cl_h5:
        cl_h5.addClusters(cluster_id, cluster_data)

        assert (cl_h5.getNClusters() == (len(cl_size) - 1))
        for index, cluster in cl_h5.clustersIterator(skip_unclustered=False):
            for field in cluster:
                assert (cluster[field].size == cl_size[index])
def test_cl_sa_h5py_6():
    """
    Test getting all of the tracks for clustering.
    """
    tracks = {
        "category": numpy.arange(4, dtype=numpy.int32),
        "x": numpy.arange(4, dtype=numpy.float),
        "y": numpy.arange(4, dtype=numpy.float),
        "z": numpy.arange(4, dtype=numpy.float)
    }

    filename = "test_clusters_sa_h5py.hdf5"
    h5_name = storm_analysis.getPathOutputTest(filename)
    storm_analysis.removeFile(h5_name)

    # Write tracks data.
    with saH5Py.SAH5Py(h5_name, is_existing=False) as h5:
        h5.setMovieInformation(1, 1, 2, "")
        h5.setPixelSize(100.0)
        h5.addTracks(tracks)
        h5.addTracks(tracks)

    # Test getting all the tracking data.
    with clSAH5Py.SAH5Clusters(h5_name) as cl_h5:
        [x, y, z, c, cl_dict] = cl_h5.getDataForClustering()
        assert (numpy.allclose(x, cl_dict['loc_id']))
        assert (numpy.allclose(y, cl_dict['loc_id']))
        assert (numpy.allclose(z, cl_dict['loc_id']))
        assert (numpy.allclose(c, cl_dict['loc_id']))
        assert (numpy.allclose(cl_dict['track_id'],
                               numpy.array([0, 0, 0, 0, 1, 1, 1, 1])))
def test_cl_sa_h5py_5():
    """
    Test getting all of the localizations for clustering.
    """
    locs = {
        "category": numpy.arange(4, dtype=numpy.int32),
        "x": numpy.arange(4, dtype=numpy.float),
        "y": numpy.arange(4, dtype=numpy.float)
    }

    filename = "test_clusters_sa_h5py.hdf5"
    h5_name = storm_analysis.getPathOutputTest(filename)
    storm_analysis.removeFile(h5_name)

    # Write localization data.
    with saH5Py.SAH5Py(h5_name, is_existing=False) as h5:
        h5.setMovieInformation(1, 1, 5, "")
        h5.setPixelSize(100.0)
        h5.addLocalizations(locs, 1)
        h5.addLocalizations(locs, 3)

    # Test getting all the localization data.
    with clSAH5Py.SAH5Clusters(h5_name) as cl_h5:
        [x, y, z, c, cl_dict] = cl_h5.getDataForClustering()
        assert (numpy.allclose(x, cl_dict['loc_id']))
        assert (numpy.allclose(y, cl_dict['loc_id']))
        assert (numpy.allclose(z, numpy.zeros(x.size)))
        assert (numpy.allclose(c, cl_dict['loc_id']))
        assert (numpy.allclose(cl_dict['frame'],
                               numpy.array([1, 1, 1, 1, 3, 3, 3, 3])))
def test_cl_sa_h5py_4():
    """
    Test cluster info string round trip.
    """
    locs = {
        "x": numpy.arange(10, dtype=numpy.float),
        "y": numpy.arange(10, dtype=numpy.float)
    }

    filename = "test_clusters_sa_h5py.hdf5"
    h5_name = storm_analysis.getPathOutputTest(filename)
    storm_analysis.removeFile(h5_name)

    # Write localization data.
    with saH5Py.SAH5Py(h5_name, is_existing=False) as h5:
        h5.setMovieInformation(1, 1, 2, "")
        h5.addLocalizations(locs, 1)

    # Write clustering data for localizations.
    cluster_id = numpy.remainder(numpy.arange(10), 3)
    cluster_data = {
        "frame": numpy.ones(10, dtype=numpy.int),
        "loc_id": numpy.arange(10)
    }

    info_string = "dbscan,eps,10.0,mc,5"
    with clSAH5Py.SAH5Clusters(h5_name) as cl_h5:
        cl_h5.addClusters(cluster_id, cluster_data)

        cl_h5.setClusteringInfo(info_string)
        assert (cl_h5.getClusteringInfo() == info_string)
예제 #5
0
def findClusters(h5_name, eps, mc, ignore_z = True, ignore_category = True, z_factor = 1.0):
    """
    Perform DBSCAN clustering on an HDF5 localization file.

    h5_name - The name of the HDF5 file.
    eps - DBSCAN epsilon parameter (in nanometers).
    mc - DBSCAN mc parameter.
    ignore_z - Ignore localization z position when clustering.
    ignore_category - Ignore localization category when clustering.
    z_factor - Weighting of Z versus X/Y position. A value of 0.5 for
               example will make the clustering 1/2 as sensitive to
               Z position.

    Note: Because all the x/y/z location information must be loaded
          into memory for the DBSCAN algorithm there is a limit to
          the size of localization file that can be clustered.
    """
    with clSAH5Py.SAH5Clusters(h5_name) as cl_h5:
        [x, y, z, c, cl_dict] = cl_h5.getDataForClustering()

        if ignore_z:
            print("Warning! Clustering without using localization z value!")

        # Perform analysis without regard to category.
        if ignore_category:
            print("Warning! Clustering without regard to category!")
            c = numpy.zeros(c.size)

        # Convert data to nanometers
        pix_to_nm = cl_h5.getPixelSize()
        x_nm = x * pix_to_nm
        y_nm = y * pix_to_nm

        if ignore_z:
            z_nm = numpy.zeros(z.size)
        else:
            z_nm = 1000.0 * z
        
        # Cluster the data.
        labels = dbscanC.dbscan(x_nm, y_nm, z_nm, c, eps, mc, z_factor = z_factor)

        # Save the data. As an optimization we also save the x,y,z and
        # category values for each cluster with the cluster. Note that
        # these are the original units x/y in pixels and z in microns,
        # not the nanometer values used for clustering.
        #
        cl_dict["x"] = x
        cl_dict["y"] = y
        cl_dict["z"] = z
        cl_dict["category"] = c
        cl_h5.addClusters(labels, cl_dict)

        # Save clustering info.
        info = "dbscan,eps,{0:0.3f},mc,{1:d}".format(eps,mc)
        info += ",iz," + str(ignore_z)
        info += ",ic," + str(ignore_category)
        info += ",zf,{0:3f}".format(z_factor)
        cl_h5.setClusteringInfo(info)
예제 #6
0
def clusterStats(h5_name, min_size, verbose = True):
    """
    Creates a text file containing some common cluster statistics.
    """
    with clSAH5Py.SAH5Clusters(h5_name) as cl_h5:
        pix_to_nm = cl_h5.getPixelSize()

        stats_name = os.path.splitext(h5_name)[0] + "_stats.txt"
        stats_fp = open(stats_name, "w")
        header = ["cluster", "cat", "size",
                  "x-center(nm)", "y-center(nm)", "z-center(nm)",
                  "size-x(nm)", "size-y(nm)", "size-z(nm)", "rg"]
        stats_fp.write(" ".join(header) + "\n")

        # Calculate cluster stats.
        for index, cluster in cl_h5.clustersIterator(min_size = min_size, fields = ["category", "x", "y", "z"]):
            c = cluster['category']
            x = pix_to_nm * cluster['x']
            y = pix_to_nm * cluster['y']
            
            if 'z' in cluster:
                z = 1000.0 * cluster['z']
            else:
                z = numpy.zeros(x.size)

            # Calculate size in x, y, z.
            sx = numpy.max(x) - numpy.min(x)
            sy = numpy.max(y) - numpy.min(y)
            sz = numpy.max(z) - numpy.min(z)

            # Calculate radius of gyration.
            cx = numpy.mean(x)
            cy = numpy.mean(y)

            rx = x - cx
            ry = y - cy

            # 3D radius of gyration if we have 'z' data.
            if 'z' in cluster:
                cz = numpy.mean(z)
                rz = z - cz
                rg = math.sqrt(numpy.sum(rx*rx + ry*ry + rz*rz) / float(x.size))

            # Otherwise 2D.
            else:
                rg = math.sqrt(numpy.sum(rx*rx + ry*ry) / float(x.size))

            if verbose:
                print("Cluster:", index, x.size, "localizations")
            stats = map(str, [index, c[0], x.size, numpy.mean(x), numpy.mean(y), numpy.mean(z), sx, sy, sz, rg])
            stats_fp.write(" ".join(stats) + "\n")
            
        stats_fp.close()

    return stats_name
예제 #7
0
def test_voronoi_clustering_1():
    numpy.random.seed(1)

    filename = "test_clustering_sa_h5py.hdf5"
    h5_name = storm_analysis.getPathOutputTest(filename)
    storm_analysis.removeFile(h5_name)

    # Write tracks data.
    category = numpy.zeros(10, dtype=numpy.int32)
    x = 10.0 * numpy.arange(10)
    y = 10.0 * numpy.arange(10)
    z = numpy.zeros(10)

    with saH5Py.SAH5Py(h5_name, is_existing=False) as h5:
        h5.setMovieInformation(1, 1, 2, "")
        h5.setPixelSize(1.0)

        for i in range(100):
            tracks = {
                "category": category,
                "x": x + numpy.random.normal(scale=0.1, size=10),
                "y": y + numpy.random.normal(scale=0.1, size=10),
                "z": z + numpy.random.normal(scale=0.1, size=10)
            }

            h5.addTracks(tracks)

    # Cluster data with voronoi.
    voronoiAnalysis.findClusters(h5_name, 0.1, 10, verbose=False)

    # Check clustering results.
    with clSAH5Py.SAH5Clusters(h5_name) as cl_h5:
        assert (cl_h5.getNClusters() == 10)
        for index, cluster in cl_h5.clustersIterator(skip_unclustered=True,
                                                     fields=["x", "y", "z"]):
            for elt in ['x', 'y', 'z']:
                dev = numpy.std(cluster[elt])
                assert (dev > 0.07)
                assert (dev < 0.12)

    # Calculate common cluster statistics.
    stats_name = dbscanAnalysis.clusterStats(h5_name, 50, verbose=False)

    # Check statistics.
    stats = numpy.loadtxt(stats_name, skiprows=1)
    index = numpy.argsort(stats[:, 3])
    assert (stats.shape[0] == 10)
    assert (numpy.allclose(stats[:, 0], numpy.arange(10) + 1))
    assert (numpy.allclose(stats[:, 1], numpy.zeros(10)))
    assert (numpy.count_nonzero(
        numpy.greater(stats[:, 2], 80.0 * numpy.ones(10))) == 10)
    assert (numpy.allclose(stats[index, 3], x, rtol=0.2, atol=2.0))
    assert (numpy.allclose(stats[index, 4], y, rtol=0.2, atol=2.0))
    assert (numpy.allclose(stats[index, 5], z, rtol=0.2, atol=20.0))
def test_cl_sa_h5py_3():
    """
    Test that iterator behaves properly if there are no clusters.
    """
    tracks = {
        "x": numpy.arange(11, dtype=numpy.float),
        "y": numpy.arange(11, dtype=numpy.float)
    }

    filename = "test_clusters_sa_h5py.hdf5"
    h5_name = storm_analysis.getPathOutputTest(filename)
    storm_analysis.removeFile(h5_name)

    # Write track data.
    with saH5Py.SAH5Py(h5_name, is_existing=False) as h5:
        h5.setMovieInformation(1, 1, 2, "")
        h5.addTracks(tracks)

    with clSAH5Py.SAH5Clusters(h5_name) as cl_h5:
        for index, cluster in cl_h5.clustersIterator(skip_unclustered=False):
            assert False
예제 #9
0
def findClusters(h5_name, density_factor, min_size, verbose=True):
    """
    h5_name - The localizations HDF5 file.
    density_factor - Multiple of the median density to be a cluster member.
    min_size - The minimum number of localizations a cluster can have.
    """

    with clSAH5Py.SAH5Clusters(h5_name) as cl_h5:
        [x, y, z, c, cl_dict] = cl_h5.getDataForClustering()

        n_locs = x.size
        labels = numpy.zeros(n_locs, dtype=numpy.int32) - 1
        density = numpy.zeros(n_locs)

        # Convert data to nanometers
        pix_to_nm = cl_h5.getPixelSize()
        x_nm = x * pix_to_nm
        y_nm = y * pix_to_nm
        points = numpy.column_stack((x_nm, y_nm))

        if verbose:
            print("Creating Voronoi object.")
        vor = Voronoi(points)

        if verbose:
            print("Calculating 2D region sizes.")
        for i, region_index in enumerate(vor.point_region):
            if ((i % 10000) == 0) and verbose:
                print("Processing point", i)

            vertices = []
            for vertex in vor.regions[region_index]:

                # I think these are edge regions?
                if (vertex == -1):
                    vertices = []
                    break

                vertices.append(vor.vertices[vertex])

            if (len(vertices) > 0):
                area = Polygon(vertices).area
                density[i] = 1.0 / area

        # Used median density based threshold.
        ave_density = numpy.median(density)
        if verbose:
            print("Min density", numpy.amin(density))
            print("Max density", numpy.amax(density))
            print("Median density", ave_density)

        # Record the neighbors of each point. These are polygons so there shouldn't
        # be that many neighbors (sides). 40 is more than safe?
        #
        max_neighbors = 40
        neighbors = numpy.zeros((n_locs, max_neighbors), dtype=numpy.int32) - 1
        neighbors_counts = numpy.zeros((n_locs), dtype=numpy.int32)

        if verbose:
            print("Calculating neighbors")
        for ridge_p in vor.ridge_points:

            p1 = ridge_p[0]
            p2 = ridge_p[1]

            # Add p2 to the list for p1
            neighbors[p1, neighbors_counts[p1]] = p2
            neighbors_counts[p1] += 1

            # Add p1 to the list for p2
            neighbors[p2, neighbors_counts[p2]] = p1
            neighbors_counts[p2] += 1

        if False:
            n1 = neighbors[0, :]
            print(n1)
            print(neighbors[n1[0], :])

        # Mark connected points that meet the minimum density criteria.
        if verbose:
            print("Marking connected regions")
        min_density = density_factor * ave_density
        visited = numpy.zeros(n_locs, dtype=numpy.int32)

        def neighborsList(index):
            nlist = []
            for i in range(neighbors_counts[index]):
                loc_index = neighbors[index, i]
                if (visited[loc_index] == 0):
                    nlist.append(neighbors[index, i])
                    visited[loc_index] = 1
            return nlist

        cluster_id = 0
        for i in range(n_locs):
            if (visited[i] == 0):
                visited[i] = 1
                if (density[i] > min_density):
                    cluster_elt = [i]
                    c_size = 1
                    to_check = neighborsList(i)
                    while (len(to_check) > 0):

                        # Remove last localization from the list.
                        loc_index = to_check[-1]
                        to_check = to_check[:-1]

                        # If the localization has sufficient density add to cluster and
                        # check neighbors.
                        if (density[loc_index] > min_density):
                            to_check += neighborsList(loc_index)
                            cluster_elt.append(loc_index)
                            c_size += 1

                        # Mark as visited.
                        visited[loc_index] = 1

                    # Mark the cluster if there are enough localizations in the cluster.
                    if (c_size > min_size):
                        if verbose:
                            print("cluster", cluster_id, "size", c_size)
                        for elt in cluster_elt:
                            labels[elt] = cluster_id
                        cluster_id += 1

        if verbose:
            print(cluster_id, "clusters")

        # Save the clustering results.
        cl_dict["x"] = x
        cl_dict["y"] = y
        cl_dict["z"] = z
        cl_dict["density"] = density
        cl_dict["category"] = c
        cl_h5.addClusters(labels, cl_dict)

        # Save clustering info.
        info = "voronoi,df,{0:0.3f},ms,{1:d}".format(density_factor, min_size)
        cl_h5.setClusteringInfo(info)
def test_cl_sa_h5py_7():
    """
    Test getting all fields or only the requested fields.
    """
    tracks = {
        "x": numpy.arange(11, dtype=numpy.float),
        "y": numpy.arange(11, dtype=numpy.float)
    }

    filename = "test_clusters_sa_h5py.hdf5"
    h5_name = storm_analysis.getPathOutputTest(filename)
    storm_analysis.removeFile(h5_name)

    # Write track data.
    with saH5Py.SAH5Py(h5_name, is_existing=False) as h5:
        h5.setMovieInformation(1, 1, 2, "")
        h5.addTracks(tracks)

    # Write clustering data for tracks.
    cluster_id = numpy.remainder(numpy.arange(11), 3)
    cluster_data = {
        "track_id": numpy.zeros(11, dtype=numpy.int),
        "loc_id": numpy.arange(11)
    }

    with clSAH5Py.SAH5Clusters(h5_name) as cl_h5:
        cluster_data["x"] = tracks["x"] + 1
        cl_h5.addClusters(cluster_id, cluster_data)

        # Check that we get all the fields.
        for index, cluster in cl_h5.clustersIterator():
            cl_fields = cluster.keys()
            assert (len(cl_fields) == 4)
            assert ("x" in cl_fields)
            assert ("y" in cl_fields)
            assert ("loc_id" in cl_fields)
            assert ("track_id" in cl_fields)

            # This checks that we got the original 'x', not the 'x'
            # saved with the cluster. This is a verification that
            # we did not shortcut.
            assert (index == int(cluster["x"][0]) + 1)

            # Check that values are in the right order.
            for i in range(cluster["x"].size):
                assert (cluster["x"][i] == cluster["y"][i])

        # Check getting fields that are available in the cluster.
        for index, cluster in cl_h5.clustersIterator(fields=["x", "loc_id"]):
            cl_fields = cluster.keys()
            assert (len(cl_fields) == 2)
            assert ("x" in cl_fields)
            assert (not "y" in cl_fields)
            assert ("loc_id" in cl_fields)
            assert (not "track_id" in cl_fields)

            # This checks that we got the 'x' saved with the cluster.
            # This is a verification that we did shortcut.
            assert (index == int(cluster["x"][0]))

        # Check getting fields that are not available in the cluster.
        for index, cluster in cl_h5.clustersIterator(fields=["y"]):
            cl_fields = cluster.keys()
            assert (len(cl_fields) == 1)
            assert (not "x" in cl_fields)
            assert ("y" in cl_fields)
            assert (not "loc_id" in cl_fields)
            assert (not "track_id" in cl_fields)

        # Check getting a mix of fields.
        for index, cluster in cl_h5.clustersIterator(fields=["x", "y"]):
            cl_fields = cluster.keys()
            assert (len(cl_fields) == 2)
            assert ("x" in cl_fields)
            assert ("y" in cl_fields)
            assert (not "loc_id" in cl_fields)
            assert (not "track_id" in cl_fields)

            # This checks that we got the 'x' saved with the cluster.
            assert (index == int(cluster["x"][0]))

            # Check that values are in the right order.
            for i in range(cluster["x"].size):
                assert (cluster["x"][i] == cluster["y"][i] + 1)