Ejemplo n.º 1
 def maximin_design_obj(y, vert=None):
     Ny, n = vert.shape
     N = y.size / n
     Y = y.reshape((N, n))
     D0 = distance_matrix(Y, Y) + 1e4*np.eye(N)
     D1 = distance_matrix(Y, vert)
     return -np.amin(np.hstack((D0.flatten(), D1.flatten())))
Ejemplo n.º 2
def _maximin_design_obj(y, vert=None):
    Objective function for the maximin design optimization.

    :param ndarray y: Contains the coordinates of the points in the design. If
        there are N points in n dimensions then `y` is shape ((Nn, )).
    :param ndarray vert: Contains the fixed vertices defining the zonotope.


    This function returns the minimum squared distance between all points in
    the design and between points and vertices.
    Ny, n = vert.shape
    N = y.size / n
    Y = y.reshape((N, n))

    # get minimum distance among points
    D0 = distance_matrix(Y, Y) + 1e5*np.eye(N)
    d0 = np.power(D0.flatten(), 2)
    d0star = np.amin(d0)

    # get minimum distance between points and vertices
    D1 = distance_matrix(Y, vert)
    d1 = np.power(D1.flatten(), 2)
    d1star = np.amin(d1)
    dstar = np.amin([d0star, d1star])
    return -dstar
Ejemplo n.º 3
def euclDist_infl(subject):

    import numpy as np
    import nibabel.freesurfer.io as fs
    from scipy.spatial import distance_matrix
    fsDir = '/afs/cbs.mpg.de/projects/mar004_lsd-lemon-preproc/freesurfer'
    surfDir = '/afs/cbs.mpg.de/projects/mar005_lsd-lemon-surf/probands'
    for hemi in ['lh', 'rh']:
        # fsaverage5 coords on sphere
        fsa5_sphere_coords = fs.read_geometry('%s/fsaverage5/surf/%s.sphere' % (fsDir, hemi))[0]
        cort = fs.read_label('%s/fsaverage5/label/%s.cortex.label' % (fsDir, hemi))
        # get corresponding nodes on subject sphere (find coords of high-dim subject surface closest to fsa5 nodes in sphere space)
        subj_sphere_coords = fs.read_geometry('%s/%s/surf/%s.sphere' % (fsDir, subject, hemi))[0]
        subj_indices = []
        for node in cort:
            dist2all = np.squeeze(distance_matrix(np.expand_dims(fsa5_sphere_coords[node], axis=0), subj_sphere_coords))
        # pair-wise euclidean distance between included nodes on subject surface (midline)
        subj_surf_coords = fs.read_geometry('%s/%s/surf/%s.inflated' % (fsDir, subject, hemi))[0]
        euclDist = np.zeros((10242,10242))
        euclDist[np.ix_(cort, cort)] = distance_matrix(subj_surf_coords[subj_indices,:],subj_surf_coords[subj_indices,:])
        np.save('%s/%s/distance_maps/%s_%s_euclDist_inflated_fsa5' % (surfDir, subject, subject, hemi), euclDist)
Ejemplo n.º 4
def test_distance_matrix_looping():
    m = 10
    n = 11
    k = 4
    xs = np.random.randn(m,k)
    ys = np.random.randn(n,k)
    ds = distance_matrix(xs,ys)
    dsl = distance_matrix(xs,ys,threshold=1)
Ejemplo n.º 5
    def _update_point_movement(self, points):
        update_points = points is not self.last_points
        update_control_points = self.control_points_need_update
        if update_points or update_control_points:
            if update_control_points:
                self.control_points[:, 0] = self.parameter[0 + self.parameter_separation_index::3]
                self.control_points[:, 1] = self.parameter[1 + self.parameter_separation_index::3]
                self.control_points[:, 2] = self.parameter[2 + self.parameter_separation_index::3]
                self.K = self.kernel_function(distance_matrix(self.control_points, self.control_points))
                self.control_points_need_update = False

            self.last_points = points
            self.last_distance_matrix = distance_matrix(points, self.control_points)
            self.last_kernel_matrix = self.kernel_function(self.last_distance_matrix ** 2)
            self.kernel_deriv_matrix_needs_update = True
Ejemplo n.º 6
def create_dataset_artificial(size1, size2, same=True, sigma1=None,
                              sigma2=None, verbose=False):
    """This function creates two adjacency matrices graphs whose
    respective number of nodes is size1 and size2, respectively.

    The graphs refer to 2D clouds of point where the edges, i.e. the
    values of the adjacency matrices, are similarities between points
    defined as s(x1, x2) = exp(-d(x1,x2)**2 / sigma**2) where d() is
    the Euclidean distance and sigma is either provided by the user or
    defined as the median distance between the points.

    If 'same' is True, then the smaller cloud of points is a subset of
    the larger cloud, i.e. the corresponding graphs have a perfect
    subgraph match.
    print("Dateset creation.")
    if same:
        X = np.random.rand(max([size1, size2]), 2)
        X1 = X[:size1]
        X2 = X[:size2]
        dm = distance_matrix(X, X)
        dm1 = dm[:size1, :size1]
        dm2 = dm[:size2, :size2]
        sigma = np.median(dm[np.triu_indices(dm.shape[0], 1)])
        if sigma1 is None:
            sigma1 = sigma

        if sigma2 is None:
            sigma2 = sigma

        X1 = np.random.rand(size1, 2)
        X2 = np.random.rand(size2, 2)
        dm1 = distance_matrix(X1, X1)
        dm2 = distance_matrix(X2, X2)
        if sigma1 is None:
            sigma1 = np.median(dm1[np.triu_indices(size1, 1)])

        if sigma2 is None:
            sigma2 = np.median(dm2[np.triu_indices(size2, 1)])

    if verbose:
        print("create_dataset_artificial: sigma1=%s ,sigma2=%s" %
              (sigma1, sigma2))
    A = np.exp(- dm1 * dm1 / (sigma1 ** 2))
    B = np.exp(- dm2 * dm2 / (sigma2 ** 2))

    return A, B, X1, X2
    def setUp(self):
        self._num_points = 10
        self._pop_size = 5

        gen = TSPGenerator(self._num_points)
        self._data = gen.generate()
        self._distances = distance_matrix(self._data, self._data)
Ejemplo n.º 8
Archivo: kmeans.py Proyecto: ezass/mltk
def _kmeans(data, threshold, centroids, verbose):
    The *raw* version of k-means.
    # initialize J
    Jprev = inf
    # initialize iteration count
    iter = 0

    # iterations
    while True:
        # calculate the distance from x to each centroids
        dist = distance_matrix(data, centroids)
        # assign x to nearst centroids
        labels = dist.argmin(axis=1)
        # re-calculate each center
        for j in range(len(centroids)):
            idx_j = (labels == j).nonzero()
            centroids[j] = data[idx_j].mean(axis=0)
        # calculate J
        # Note, if you would like to compare the J here to that
        # of k-medoids, here should be 
        #   (((...).sum(axis=1))**0.5).sum()
        J = ((data-centroids[labels])**2).sum()

        iter += 1
        if verbose:
            print '[kmeans] iter %d (J=%.4f)' % (iter, J)

        if Jprev-J < threshold:
        Jprev = J

    return centroids, labels, J
Ejemplo n.º 9
    def predictedPoint(self, x, y, model, coords, values, invg):
        """Prediction of the Big Kriging for a point \o/

        x, y : floats
               coordinates of the desired predicted point
        model : Model
                what model to use (and not your favorite color!)
        coords : ndarray
                 original grid coordinates
        values : ndarray
                 original grid values, ordered like coords
        invg : the resulting inverse gamma matrix based on model and coords

            x, y : coordinates of the desired predicted point
            v    : the predicted value
            e    : the standard error

        dist = spatial.distance_matrix(coords, [[x, y],])
        gg = np.matrix( np.vstack([model.func(dist), [1,]]) )
        weights = invg*gg
        v = np.sum( values[:, np.newaxis]*np.asarray(weights[:-1]) )
        e = np.sqrt( abs(np.sum(gg.A1*weights.A1)) )
        return np.asarray([x, y, v, e])
Ejemplo n.º 10
def _maximin_design_grad(y, vert=None):
    """Gradient of objective function for the maximin design optimization.

    y : ndarray
        contains the coordinates of the points in the design. If there are N 
        points in n dimensions then `y` is shape ((Nn, )).
    vert : ndarray
        contains the fixed vertices defining the zonotope
    Ny, n = vert.shape
    v = vert.reshape((Ny*n, ))

    N = y.size / n
    Y = y.reshape((N, n))

    # get minimum distance among points
    D0 = distance_matrix(Y, Y) + 1e5*np.eye(N)
    d0 = np.power(D0.flatten(), 2)
    d0star, k0star = np.amin(d0), np.argmin(d0)

    # get minimum distance between points and vertices
    D1 = distance_matrix(Y, vert)
    d1 = np.power(D1.flatten(), 2)
    d1star, k1star = np.amin(d1), np.argmin(d1)

    g = np.zeros((N*n, ))
    if d0star < d1star:
        dstar, kstar = d0star, k0star
        istar = kstar/N
        jstar = np.mod(kstar, N)

        for k in range(n):
            g[istar*n + k] = 2*(y[istar*n + k] - y[jstar*n + k])
            g[jstar*n + k] = 2*(y[jstar*n + k] - y[istar*n + k])

        dstar, kstar = d1star, k1star
        istar = kstar/Ny
        jstar = np.mod(kstar, Ny)

        for k in range(n):
            g[istar*n + k] = 2*(y[istar*n + k] - v[jstar*n + k])

    return -g
Ejemplo n.º 11
def covariance(X, Z, h):
    '''This function computes the covariance matrix with a guassian kernel
    between the two matrices.
        Input: two matrices, and bandwidth(h)
        Output: covariance matrix.'''
    d = spatial.distance_matrix(X,Z)
    K = np.exp(-(d**2) / (2*h*h))
    return K
Ejemplo n.º 12
def _choose_bf_metering_pos(positions):
    # find the position which has the smallest distance to its 8 closest neighbors,
    # because that position is likely right in the middle
    pos_names, pos_values = zip(*positions.items())
    xys = numpy.array(pos_values)[:,:2]
    distances = spatial.distance_matrix(xys, xys)
    distance_sums = distances[:,:8].sum(axis=1)
    return pos_names[distance_sums.argmin()]
Ejemplo n.º 13
    def setUp(self):
        self._num_points = 10
        self._pop_size = 20

        gen = TSPGenerator(self._num_points)
        self._data = gen.generate()
        self._distances = distance_matrix(self._data, self._data)

        popGen = SimplePopulationGenerator(self._pop_size)
        self._population = popGen.generate(self._distances[0])
Ejemplo n.º 14
def getDistances(comparisonSet,data):
    answer = []
    for i in range(len(data)):
        searchSet = getNeighbours(data[0][i],data[1][i],comparisonSet,searchRadius)
        dist0 = dist[:,0]
        dist0  = dist0[dist0<searchRadius]
    return answer
Ejemplo n.º 15
def test_distance_matrix():
    m = 10
    n = 11
    k = 4
    xs = np.random.randn(m,k)
    ys = np.random.randn(n,k)
    ds = distance_matrix(xs,ys)
    assert_equal(ds.shape, (m,n))
    for i in range(m):
        for j in range(n):
Ejemplo n.º 16
 def __init__(self, control_points, kernel_function):
     self.control_points = control_points
     self.kernel_function = kernel_function
     self.K = self.kernel_function(distance_matrix(self.control_points, self.control_points))
     self.last_points = None
     self.kernel_deriv_matrix_needs_update = True
     self._identity = numpy.zeros(len(control_points) * 3)
     self.parameter = self.identity.copy()
     self._bounds = numpy.c_[self.identity, self.identity]
     self._bounds[:, 0] = -self.kernel_function.support
     self._bounds[:, 1] = +self.kernel_function.support
def _sample_one_more(X, box, r):
    Sample one more atom.
    if X.shape[0] == 0:
        return box[:, 0] + (box[:, 1] - box[:, 0]) * np.random.rand(1, 3)
    while True:
        x = box[:, 0] + (box[:, 1] - box[:, 0]) * np.random.rand(1, 3)
        d = spt.distance_matrix(X, x)
        if (d > 2.0 * r).all():
            return x
Ejemplo n.º 18
def create_dataset_artificial(size1, size2, same=True):
    print("Dateset creation.")
    if same:
        X = np.random.rand(max([size1, size2]), 2)
        X1 = X[:size1]
        X2 = X[:size2]
        dm = distance_matrix(X, X)
        dm1 = dm[:size1, :size1]
        dm2 = dm[:size2, :size2]
        sigma1 = sigma2 = np.median(dm)
        X1 = np.random.rand(size1, 2)
        X2 = np.random.rand(size2, 2)
        dm1 = distance_matrix(X1, X1)
        dm2 = distance_matrix(X2, X2)
        sigma1 = np.median(dm1)
        sigma2 = np.median(dm2)

    A = np.exp(- dm1 * dm1 / (sigma1 ** 2))
    B = np.exp(- dm2 * dm2 / (sigma2 ** 2))
    return A, B
Ejemplo n.º 19
def _farthest_points(points):
    points = numpy.asarray(points)
    bbox_lower_left = points.min(axis=0)
    lower_left = numpy.linalg.norm(points - bbox_lower_left, axis=1).argmin()
    selected = [lower_left]
    dist = spatial.distance_matrix(points, points)
    for _ in range(len(points) - 1):
        dist_to_selected = dist[selected]
        dist_to_nearest_selected = dist_to_selected.min(axis=0)
        farthest_from_selected = dist_to_nearest_selected.argmax()
    return selected
Ejemplo n.º 20
 def __init__(self, control_points, kernel_function):
     self.control_points = control_points
     self.control_points_need_update = False
     self.parameter_separation_index = len(control_points) * 3
     self.kernel_function = kernel_function
     self.K = self.kernel_function(distance_matrix(self.control_points, self.control_points))
     self.last_points = None
     self.kernel_deriv_matrix_needs_update = True
     self._identity = numpy.zeros(len(control_points) * 6)
     self._identity[3 * len(control_points):] = self.control_points.ravel()
     self._parameter = self.identity.copy()
     self._parameter.flags.writeable = False
    def find_outliers_all(self):

        distances_matrix = spsp.distance_matrix(self.points, self.points)
        outliers = []

        distances_vector = ma.masked_array(np.sum(distances_matrix, axis=1))
        for out in range(self.n_of_outliers):
            outlier = distances_vector.argmax()
            logging.debug("%d of %d", self.n_of_outliers, out)
            distances_vector -= distances_matrix[:, outlier]
            distances_vector[outlier] = ma.masked
        return outliers
Ejemplo n.º 22
    def add_periodic_connections(self, pores1, pores2, apply_label='periodic'):
        Accepts two sets of pores and connects them with new throats.  The
        connections are determined by pairing each pore in ``pores1`` with its
        nearest pore in ``pores2``.  For cubic Networks this will create
        pairings with pores directly across the domain from each other,
        assuming the input pores are 2D co-planar sets of pores.

        pores_1 and pores_2 : array_like
            Lists of pores on the opposing faces which are to be linked to
            create periodicity.

        apply_label = string
            The label to apply to the newly created throats.  The default is

        This method will raise an exception if the input pores do not create
        fully unique pairs.  Specifically, the length of pore_1 and pores_2
        must be the same AND each pore in pores_1 must pair up with one and
        only one pore in pores_2, and vice versa.  If these conditions are
        not met then periodicity cannot be acheived, and an exception is

        logger.debug('Creating periodic pores')
        if sp.shape(pores1)[0] != sp.shape(pores2)[0]:
            raise Exception('Unequal length inputs, periodicity not possible')
        p1 = self['pore.coords'][pores1]
        p2 = self['pore.coords'][pores2]
        dist_mat = sptl.distance_matrix(p1, p2)
        dist_min = sp.amin(dist_mat, axis=1, keepdims=True)
        [a, b] = sp.where(dist_mat == dist_min)
        pairs = sp.vstack([pores1[a], pores2[b]]).T
        # Confirm that each pore in each list is only paired up once
        temp_1 = sp.unique(pairs[:, 0])
        if sp.shape(temp_1) < sp.shape(pores1):
            raise Exception('Non-unique pairs found, periodicity not met')
        temp_2 = sp.unique(pairs[:, 1])
        if sp.shape(temp_2) < sp.shape(pores2):
            raise Exception('Non-unique pairs found, periodicity not met')
        # Add throats to the network for the periodic connections
        self.extend(throat_conns=pairs, labels=apply_label)
        # Create a list which pores are connected which
        self['pore.periodic_neighbor'] = sp.nan
        self['pore.periodic_neighbor'][pairs[:, 0]] = pairs[:, 1]
        self['pore.periodic_neighbor'][pairs[:, 1]] = pairs[:, 0]
        logger.info('Periodic boundary pores added successfully')
Ejemplo n.º 23
 def remove_outliers(self):
     global redetect
     points = self.new_points.reshape(-1,2)
     dist_matrix = distance_matrix(points, points, p=2)
     points = map(list, [p for p in points])
     sum_of_dist = sum(dist_matrix)
     good_points = [ abs(sum_of_dist - np.mean(sum_of_dist)) < 2*np.std(sum_of_dist)]
     for p,g in zip(points, good_points[0]):
         if not g:
     if len(points) < 20:
         redetect = True
     self.new_points = np.array(points).reshape(-1, 1, 2)
Ejemplo n.º 24
def cluster_normals(normals, clusters):
    nb_clusters = clusters.shape[0];

    classif_normals = np.zeros((height, width, nb_clusters));

    for l in range(height):
        for c in range(width):
            #compute all distances
            dist = spatial.distance_matrix(clusters, np.reshape(normals[l, c, :],(1,3)));
            #find the min
            classif_normals[l,c,np.argmin(dist)] = 1;

    return classif_normals
Ejemplo n.º 25
	def getDSorted(self, doSort = True):
		idx = self.getFeaturesIdx()
		#If the features have been changed since last time an update is needed
		if not np.array_equal(idx, self.lastIdx):
			self.lastIdx = idx;
			self.D = np.array([])
		#Find the first "densityNPoints" points in ascending order of max neighborhood point
		if len(self.D) == 0:
			tic = time.time()
			self.D = spatial.distance_matrix(self.OrigDelaySeries[:, idx], self.OrigDelaySeries[:, idx])
			toc = time.time()
			print "Elapsed distance matrix computation time = %g"%(toc - tic)
		if len(self.DSorted) == 0 and doSort:
			tic = time.time()
			self.DSorted = np.sort(self.D, 0)
			toc = time.time()
			print "Elapsed sorting time = %g"%(toc - tic)	
Ejemplo n.º 26
def evaluate_emulator(x, emulator, cov, cov_args=(), cov_kwargs={}):
    Evaluates emulator at given point or sequence of points

    x : ndarray
      Array of length d or of dimension d x m, with each column containing a point
      at which to evaluate the emulator.
    emulator : dict
      Dictionary as output by build_emulator containing grid and v.
    cov : function
      Covariance function for Gaussian process. Must accept ndarray of distances
      as first argument and return an ndarray of the same dimension.  Called as
      cov(dm, *cov_args, **cov_kwargs).
    cov_args : tuple
      Tuple of additional positional arguments for cov.
    cov_kwargs : tuple
      Dictionary of additional kw arguments for cov.

    f_hat : ndarray
      Array of size k x m containing estimated values of function.
    # Convert x to matrix if needed
    if not type(x) is np.ndarray:
        x = np.array(x)
    if len(x.shape) < 2:
        x = x[:, np.newaxis]

    # Evaluate distances between x and grid
    C = spatial.distance_matrix(x.T, emulator['grid'])
    C = cov(C, *cov_args, **cov_kwargs)

    # Estimate function values at x
    f_hat = np.dot(emulator['v'].T, C.T)

    # Add linear term if needed
    if emulator['slope_mean'] is not None:
        f_hat += np.dot(emulator['slope_mean'], (x.T - emulator['center']).T)

    if x.shape[1] < 2:
        f_hat = f_hat[:, 0]

    return f_hat
Ejemplo n.º 27
Archivo: AGS.py Proyecto: fimay/hedp
def project_on_grid(points, grid):
    Project points on a grid

    points : ndarray (N,)
    grid :  ndarray (M,)

    idx: ndarray (N,)
        grid indices closest to given points
    grid_val: ndarray (N,)
        grid values closest to given points
    d = distance_matrix(np.array([points]).T,np.array([grid]).T)
    idx = np.argmin(d,axis=1)
    return idx, grid[idx]
Ejemplo n.º 28
def get_vertices_at_intersections(normals, offsets, ceiling_height):
    """Returns a dict of vertices and normals for each surface intersecton of walls given by the Nx3 arrays of
    normals and offsets."""

    from scipy import spatial

    # Calculate d in equation ax + by + cz = d
    dd = np.sum(normals * offsets, axis=1)

    # Automatically Separate out the floor from the walls.
    floor_idx = normals[:, 1].argsort()[-1]
    wall_normals, wall_d = np.delete(normals, floor_idx, axis=0), np.delete(dd, floor_idx)
    floor_normal, floor_d = normals[floor_idx, :], dd[floor_idx]

    # Get neighbors between all walls (excluding the floor, which touches everything.)
    distances = spatial.distance_matrix(wall_normals, wall_normals) + (3 * np.eye(wall_normals.shape[0]))
    neighboring_walls = np.sort(distances.argsort()[:, :2])  # Get the two closest wall indices to each wall
    neighbors =  {dd: el.tolist() for (dd, el) in enumerate(neighboring_walls)}

    # Solve for intersection between the floor/ceiling and adjacent walls,
    vertices = {wall: [] for wall in range(len(neighbors))}
    floor_verts = []
    for wall in neighbors:
        for adj_wall in neighbors[wall]:
            for normal, d in ((floor_normal, floor_d), (np.array([0., 1., 0.]), ceiling_height)):
                all_norms = np.vstack((wall_normals[wall], wall_normals[adj_wall], normal))
                all_d = np.array((wall_d[wall], wall_d[adj_wall], d))
                vertex = np.linalg.solve(all_norms, all_d).transpose()

                if d < ceiling_height and vertex.tolist() not in floor_verts:

    # Convert vertex lists to dict of NumPy arrays
    vertices = {key: np.array(value) for key, value in vertices.items()}
    vertices[len(vertices)] = np.array(floor_verts)

    norms = {key: np.array(value) for key, value in enumerate(wall_normals)}
    norms[len(norms)] = np.array(floor_normal)

    return vertices, norms
Ejemplo n.º 29
def create_mock_data(data_dir, tracab_id, params, event_telemetry, telemetry_map, video_metadata):

    start_frame = params[0]
    end_frame = params[1]
    xmin = params[2]
    xmax = params[3]
    ymin = params[4]
    ymax = params[5]
    num_x_grid = params[6]
    num_y_grid = params[7]

    tracking_initial_frame = event_telemetry.metadata['initial_frame']
    video_initial_frame = int(video_metadata['MediaproPanaMetaData']['match']['videofile']['start']['@iFrame'])
    global_zero_frame = max(video_initial_frame, tracking_initial_frame)

    xx = np.linspace(xmin, xmax, num_x_grid)
    yy = np.linspace(ymin, ymax, num_y_grid)

    XX, YY = np.meshgrid(xx, yy)
    S = np.array(list(zip(XX.flatten(), YY.flatten())))

    output_dir = os.path.join(data_dir, tracab_id, "mock")
    if not os.path.exists(output_dir):

    for frame in range(start_frame, end_frame):

        adjusted_frame = frame + global_zero_frame - tracking_initial_frame

        positions = get_positions(adjusted_frame, telemetry_map)
        D = distance_matrix(S, positions)
        NN_indices = D.argmin(axis=0)

        # Store grid values
        Z = np.zeros(len(S))
        Z[NN_indices] = 1

        # Save results
        with open(os.path.join(output_dir, "proba-f{}.dat".format(adjusted_frame)), "w") as f:
            f.write("\n".join(["{} {}".format(a, b) for a, b in zip(range(len(Z)), Z)]))
Ejemplo n.º 30
    def plot_nearest_words(word, k=20):

        word : TYPE
        k : int, optional
        # Get distances to target word
        target_vec = wordvecs[word2id[word]]
        dists = []
        for vec_i in wordvecs:
            dists.append(distance.cosine(target_vec, vec_i))
        idxs = np.argsort(dists)
        labels = [words[idx_i] for idx_i in idxs[:k]]
        vecs = [wordvecs[idx_i] for idx_i in idxs[:k]]
        dm = distance_matrix(vecs, vecs)

        fig, axs = plt.subplots(1, 2, figsize=(10, 4))

        # Create distance matrix
        axs[0].set_xticklabels(labels, rotation='vertical')

        # Center the distance matrix
        dm = dm / np.mean(dm, axis=0, keepdims=True)

        # Plot data points in reduced dimensionality using principal components
        # of the distance matrix
        res = PCA(2).fit_transform(dm)
        pc1, pc2 = res[:, 0], res[:, 1]
        axs[1].scatter(pc1, pc2)
        for i in range(len(labels)):
            axs[1].text(pc1[i], pc2[i], labels[i])
def main():

    if len(sys.argv) <= 1:
            " -f ../../cross_validation/try1.csv -d ../../filtered_data_sets/CDR3_from_celiac_trim_3_4_with_labels_unique_sequences_Celiac_model_April_2020_FILTERED_DATA_1K_per_subject.csv -v ../../vectors/CDR3_from_celiac_trim_3_4_with_labels_unique_sequences_Celiac_model_April_2020_VECTORS_1K_per_subject.csv -of ../../cross_validation/ -od try1K_TRAIN_0"

    parser = argparse.ArgumentParser()

        'feature list file, contains the list of relevent features, including feature center and maximal distance from it'
                        help='the filtered data file path')
                        help='the vectors file path')
                        help='Output folder for the feature table')
                        help='description to use inside output file names')
        help='subject column name in data file, default "FILENAME"',
        help='labels column name in data file, default "labels"',
    args = parser.parse_args()

    if not (os.path.isfile(args.features_list)):
            'feature list file error, make sure file path exists\nExiting...')

    if not (os.path.isfile(args.data_file_path)):
        print('feature file error, make sure file path exists\nExiting...')

    if not (os.path.isfile(args.vectors_file_path)):
        print('vectors file error, make sure file path exists\nExiting...')

    # load files
    feature_list = pd.read_csv(args.features_list, index_col=0)
    data_file = pd.read_csv(args.data_file_path)
    vectors_file = pd.read_csv(args.vectors_file_path)

    if not args.labels_col_name in data_file.columns:
            f'label "{args.labels_col_name}" column name doesnt exist in data file.\nExiting...'

    if not args.subject_col_name in data_file.columns:
            f'"{args.subject_col_name}" column name doesnt exist in data file.\nExiting...'

    if not 'feature_index' in feature_list.columns:
            f'"feature list file error, no "feature index" column. please check.\n exiting...'
        print(f'feature indexes: {feature_list.index}')
    features_table = pd.DataFrame(
    )  #define an empty matrix, each raw is a subject, each column is a feature (cluster)

    by_subject = data_file.groupby(args.subject_col_name)
    sub_num = 0

    for subject, frame in by_subject:  # for each subject
        sub_num += 1
        print(f"{str(datetime.now())}: Analysing {subject!r} #{sub_num!r}")
        for vector_index, row in frame.iterrows(
        ):  #for each vector in that subject
            #print(f"{str(datetime.now())}: Analysing {vector_index!r} vector index")
            sum_iloc = 0.0
            cnt_iloc = 0
            sum_euclidean = 0.0
            cnt_eculidean = 0
            start_time_others = time.time()
            #features_count = 0
            multiple_entries = 0

            vector_u = vectors_file.iloc[
                vector_index, :]  # vector in data file
            if True:
                # pavel new
                features = feature_list.iloc[:, -100:]
                distances = distance_matrix(features,
                                            np.array(vector_u, ndmin=2))
                distances = distances.reshape((len(features), ))
                max_distance = feature_list.loc[:, 'max_distance']
                distance_close_enough_vec = distances <= max_distance
                # TODO: where to increment the counters?
                features_count = np.sum(distance_close_enough_vec)
                if features_count > 1:
                    multiple_entries += 1
                add_feature_index = np.where(distance_close_enough_vec == True)
                                                    'feature_index']] += 1

            if False:  # thecode before
                for feature_index in feature_list.index:  #check distances of each vector from all features
                    tic = time.time()
                    vector_v = feature_list.iloc[
                        -100:]  #center vector is the last 100 vectors
                    sum_iloc += time.time() - tic
                    cnt_iloc += 1

                    tic = time.time()
                    distance = euclidean(vector_u, vector_v)
                    sum_euclidean += time.time() - tic
                    cnt_eculidean += 1

                    if distance <= feature_list.loc[feature_index,
                        # print(f'feature {feature} answers condition')
                        features_table.loc[subject, feature_list.loc[
                            feature_index, 'feature_index']] += 1
                        features_count += 1

                    "first iloc time = {}ms cnt={}\t eculedian time={}ms\t all={}"
                    .format(1000 * sum_iloc / cnt_iloc, cnt_iloc,
                            1000 * sum_euclidean / cnt_eculidean,
                            time.time() - start_time_others))
        # print(f'===> A total of {features_count} answered the conditions, out of {len(frame)} raws')

        # Normlize by raw

    normlized_features_table = features_table.div(features_table.sum(axis=1),
                     args.output_description + '_feature_table.csv'))

        'file saved to ',
                     args.output_description + '_feature_table.csv'))
Ejemplo n.º 32
    def get_unique_vectors(self,
        """Returns diffraction vectors considered unique by:
        strict comparison, distance comparison with a specified
        threshold, or by clustering using DBSCAN [1].

        distance_threshold : float
            The minimum distance between diffraction vectors for them to
            be considered unique diffraction vectors. If
            distance_threshold==0, the unique vectors will be determined
            by strict comparison.
        method : str
            The method to use to determine unique vectors. Valid methods
            are 'strict', 'distance_comparison' and 'DBSCAN'.
            'strict' returns all vectors that are strictly unique and
            corresponds to distance_threshold=0.
            'distance_comparison' checks the distance between vectors to
            determine if some should belong to the same unique vector,
            and if so, the unique vector is iteratively updated to the
            average value.
            'DBSCAN' relies on the DBSCAN [1] clustering algorithm, and
            uses the Eucledian distance metric.
        min_samples : int, optional
            The minimum number of not strictly identical vectors within
            one cluster for the cluster to be considered a core sample,
            i.e. to not be considered noise. Only used for method='DBSCAN'.
        return_clusters : bool, optional
            If True (False is default), the DBSCAN clustering result is
            returned. Only used for method='DBSCAN'.

        [1] https://scikit-learn.org/stable/modules/generated/sklearn.

        unique_peaks : DiffractionVectors
            The unique diffraction vectors.
        clusters : DBSCAN
            The results from the clustering, given as class DBSCAN.
            Only returned if method='DBSCAN' and return_clusters=True.
        # Flatten the array of peaks to reach dimension (n, 2), where n
        # is the number of peaks.
        peaks_all = np.concatenate([peaks.ravel() for peaks in self.data.flat
                                    ]).reshape(-1, 2)

        # A distance_threshold of 0 implies a strict comparison. So in that
        # case, a warning is raised unless the specified method is 'strict'.
        if distance_threshold == 0:
            if method is not 'strict':
                warn(message='distance_threshold=0 was given, and therefore ' +
                     'a strict comparison is used, even though the ' +
                     'specified method was ' + method + '.')
                method = 'strict'

        if method == 'strict':
            unique_peaks = np.unique(peaks_all, axis=0)

        elif method == 'distance_comparison':
            unique_vectors, unique_counts = np.unique(peaks_all,

            unique_peaks = np.array([[0, 0]])
            unique_peaks_counts = np.array([0])

            while unique_vectors.shape[0] > 0:
                unique_vector = unique_vectors[0]
                distances = distance_matrix(np.array([unique_vector]),
                indices = np.where(distances < distance_threshold)[1]

                new_count = indices.size
                new_unique_peak = np.array([

                unique_peaks = np.append(unique_peaks, new_unique_peak, axis=0)

                unique_peaks_counts = np.append(unique_peaks_counts, new_count)
                unique_vectors = np.delete(unique_vectors, indices, axis=0)
                unique_counts = np.delete(unique_counts, indices, axis=0)
            unique_peaks = np.delete(unique_peaks, [0], axis=0)

        elif method == 'DBSCAN':
            # All peaks are clustered by DBSCAN so that peaks within
            # one cluster are separated by distance_threshold or less.
            unique_vectors, unique_vectors_counts = np.unique(
                peaks_all, axis=0, return_counts=True)
            clusters = DBSCAN(eps=distance_threshold,
            unique_labels, unique_labels_count = np.unique(clusters.labels_,
            unique_peaks = np.zeros((unique_labels.max() + 1, 2))

            # For each cluster, a center of mass is calculated based
            # on all the peaks within the cluster, and the center of
            # mass is taken as the final unique vector position.
            for n in np.arange(unique_labels.max() + 1):
                peaks_n_temp = unique_vectors[clusters.labels_ == n]
                peaks_n_counts_temp = unique_vectors_counts[clusters.labels_ ==
                unique_peaks[n] = np.average(peaks_n_temp,

        # Manipulate into DiffractionVectors class
        if unique_peaks.size > 0:
            unique_peaks = DiffractionVectors(unique_peaks)
        if return_clusters and method == 'DBSCAN':
            return unique_peaks, clusters
            return unique_peaks
Ejemplo n.º 33
    def from_num_cities(self, n=20, length=100, seed=1):
        self.num_cities = n
        self.coords = np.random.uniform(-length, length, size=(n, 2)).tolist()

        self.dist_mat = distance_matrix(self.coords, self.coords).tolist()
V1_subsub = V1[:, sub_ind1[:N_SUB2]]
# now subsample V2
sub_ind2 = np.array(SubSample(V2, N_SUB1), dtype=np.int)
V2_sub = V2[:, sub_ind2]
a2 = np.mean(V2_sub, axis=1)
b2 = np.matlib.repmat(a2, N_SUB1, 1)
V2_sub = V2_sub - b2.T
V2_sub = V2_sub / np.max(np.linalg.norm(V2_sub, axis=0))
V2_subsub = V2[:, sub_ind2[:N_SUB2]]

## step 1 - Align and Register
R = PrincipalComponentAlignment(V1_sub, V2_sub, ref=False)
min_cost = np.ones(len(R)) * np.inf
permutations = []
for rot, i in zip(R, range(len(R))):
    cost = distance_matrix(V1_sub.T, np.dot(rot, V2_sub).T)
    V1_ind, V2_ind = Hungary(cost)
    min_cost[i] = np.sqrt(np.sum(
        cost[V1_ind, V2_ind]))  # the actual cost of the permutation found

best_rot_ind = np.argmin(min_cost)
best_permutation = permutations[best_rot_ind]
best_rot = R[best_rot_ind]

newV2_sub = np.dot(best_rot.T, V2_sub)
i = 0
while True:
    newV2_sub = newV2_sub[:, best_permutation]
    # Do Kabsch
    cur_rot = Kabsch(newV2_sub.T, V1_sub.T)
Ejemplo n.º 35
pdf = pdf.dropna()
pdf = pdf.reset_index(drop=True)

#Select features
featureset = pdf[[
    'engine_s', 'horsepow', 'wheelbas', 'width', 'length', 'curb_wgt',
    'fuel_cap', 'mpg'

#Normalize data
from sklearn.preprocessing import MinMaxScaler
x = featureset.values
min_max_scaler = MinMaxScaler()
feature_mtx = min_max_scaler.fit_transform(x)

dist_matrix = distance_matrix(feature_mtx, feature_mtx)
agglom = AgglomerativeClustering(n_clusters=6, linkage='complete')
pdf['cluster_'] = agglom.labels_

import matplotlib.cm as cm
n_clusters = max(agglom.labels_) + 1
colors = cm.rainbow(np.linspace(0, 1, n_clusters))
cluster_labels = list(range(0, n_clusters))

import matplotlib.cm as cm
n_clusters = max(agglom.labels_) + 1
colors = cm.rainbow(np.linspace(0, 1, n_clusters))
cluster_labels = list(range(0, n_clusters))

# Create a figure of size 6 inches by 4 inches.
Ejemplo n.º 36
def calc_distance(data_matrix):
    return distance_matrix(data_matrix, data_matrix)
Ejemplo n.º 37
 def predict(self, y):
     match = y[['lat', 'lon', 'mag']].values
     dist = spatial.distance_matrix(self.matching, [match])
     kmin_index = np.argsort(dist, axis=0)
     return kmin_index[:self.neighbors], dist[kmin_index[:self.neighbors]]
Ejemplo n.º 38
                cells[:, i] = (cells[:, i] - means[i]) / stds[i]  #point 1
                      i + 3] = (cells[:, i + 3] - means[i]) / stds[i]  #point 2
                      i + 6] = (cells[:, i + 6] - means[i]) / stds[i]  #point 3
                barycenters[:, i] = (barycenters[:, i] - mins[i]) / (maxs[i] -
                normals[:, i] = (normals[:, i] - nmeans[i]) / nstds[i]

            X = np.column_stack((cells, barycenters, normals))
            #X = (X-np.ones((X.shape[0], 1))*np.mean(X, axis=0)) / (np.ones((X.shape[0], 1))*np.std(X, axis=0))

            # computing A_S and A_L
            A_S = np.zeros([X.shape[0], X.shape[0]], dtype='float32')
            A_L = np.zeros([X.shape[0], X.shape[0]], dtype='float32')
            D = distance_matrix(X[:, 9:12], X[:, 9:12])
            A_S[D < 0.1] = 1.0
            A_S = A_S / np.dot(np.sum(A_S, axis=1, keepdims=True),
                               np.ones((1, X.shape[0])))

            A_L[D < 0.2] = 1.0
            A_L = A_L / np.dot(np.sum(A_L, axis=1, keepdims=True),
                               np.ones((1, X.shape[0])))

            # numpy -> torch.tensor
            X = X.transpose(1, 0)
            X = X.reshape([1, X.shape[0], X.shape[1]])
            X = torch.from_numpy(X).to(device, dtype=torch.float)
            A_S = A_S.reshape([1, A_S.shape[0], A_S.shape[1]])
            A_L = A_L.reshape([1, A_L.shape[0], A_L.shape[1]])
            A_S = torch.from_numpy(A_S).to(device, dtype=torch.float)
Ejemplo n.º 39
lbl_t3 = np.random.randint(-15, 15, size=(1000,2))+lbl_t2

max_displacement = 20
max_discontinuity = 3

timepoints = [1,2,3]
consecutive_tp_pairs = [(timepoints[i], timepoints[i+1])
                        for i in range(len(timepoints)-1)]
lbls = {1: lbl_t1, 2: lbl_t2, 3: lbl_t3}
tp2idx = {tp:i for i, tp in enumerate(timepoints)}
tracks = []
segment_list = []
track_id = 1
for ti, tj in consecutive_tp_pairs:
    lbl_i, lbl_j = lbls[ti], lbls[tj] # Assuming these are centroids
    cost_matrix = distance_matrix(lbl_i, lbl_j)
    total_cost, column2row, row2column = lap.lapjv(cost_matrix,
    for col, row in enumerate(column2row):
        if col == -1:
            tracks.append(([ti], [lbl_i[col]])) # time and xy
            tracks.append(([ti, tj], [lbl_i[col], lbl_j[row]]))
        track_id += 1
track_starts = np.array([i[0][0] for i in tracks])
track_ends = np.array([i[0][1] for i in tracks])

track_xy_start = np.array([i[1][0] for i in tracks])
Ejemplo n.º 40
def coordinates_are_resonable(coords):
    """Check that there are no very short or very long pairwise distances"""
    dist_mat = distance_matrix(coords, coords)
    return 0.8 < np.min(dist_mat + np.identity(len(coords))) < 5.0
Ejemplo n.º 41
    def align_and_rotate(
    ):  # get the local alignment, calculate optimal rotation matrix for structures to fit into each other
        # ab = dm_euclidian(self.query.er, self.target.er) # normal distribution (dmnd) or difference (dm_euclidian)

        ### TESTING
        # print(np.allclose(self.query.er, scale2(self.query.er)))

        # scr1 = np.genfromtxt('scr1.table')
        # scr2 = np.genfromtxt('scr2.table')
        # dspair = np.genfromtxt('ds_pair.table')
        # print(np.allclose(self.query.er, scr1))
        ab = spatial.distance_matrix(self.query.er, self.target.er)
        # print(np.allclose(ab, dspair))
        # ab = spatial.distance_matrix(scr1, scr2)
        # print(np.allclose(ab, dspair))
        # ab = dm_scipy(scr1, scr2)
        # print(np.allclose(ab, dspair))
        # ab = dm_euclidian(self.query.er, self.target.er)
        # ab2 = spatial.distance_matrix(self.query.er, scr2)
        # plt.imshow(ab);plt.colorbar(); plt.title(__file__+' - 1st alignment DM'); plt.show()
        # print(ab[:10,:10])

        ab = dm_ndtr(self.query.er, self.target.er)
        # plt.imshow(ab);plt.colorbar();plt.title(__file__+' - 1st alignment DM (normal distriution)');plt.show()

        # actual alignment, using the fast SW from above
        self.i_list, self.j_list, self.is_gap, self.score = nlocalalign(
            ab, self.gap, self.factor, self.limit)

        self.traceback_len = len(self.is_gap)

        i_list = [i for i, g in zip(self.i_list, self.is_gap) if g == 0]
        j_list = [j for j, g in zip(self.j_list, self.is_gap) if g == 0]
        self.len_wo_gaps = len(i_list)
        self.nrgaps = np.count_nonzero(self.is_gap)

        # Kabsch
        a_pre = self.query.coordinates
        b_pre = self.target.coordinates
        a = a_pre[i_list, :]
        b = b_pre[j_list, :]
        self.query_centroid = np.mean(a, axis=0)
        self.target_centroid = np.mean(b, axis=0)
        a -= self.query_centroid
        b -= self.target_centroid
        h = a.T @ b
        u, s, v = np.linalg.svd(h.T)
        d = np.linalg.det(v.T @ u.T)
        r = v.T @ np.diag([1, 1, d]) @ u.T
        a = a @ r
        self.rmsd = rmsd(a, b)
        self.rotation_matrix = r
        self.query_aligned = a
        self.target_aligned = b
        self.dists = np.linalg.norm(a - b, axis=1)

        # GDT_TS:
        f1 = np.count_nonzero(np.where(self.dists < 1))
        f2 = np.count_nonzero(np.where(self.dists < 2))
        f4 = np.count_nonzero(np.where(self.dists < 4))
        f8 = np.count_nonzero(np.where(self.dists < 8))
        self.gdt_ts = 25 * sum(
            [f1, f2, f4, f8]) / self.len_wo_gaps if self.len_wo_gaps > 0 else 0
        # FATCAT-inspired similarity score
        #GDT-sim "improved", needs further tinkering...
        self.gdt_sim = self.score * self.len_wo_gaps * self.gdt_ts

        # TMscore
        d0 = 1.24 * np.cbrt(self.target.l - 15) - 1.8
        di = np.sqrt(np.sum((a - b)**2, axis=1))
        self.tmq = np.sum(1 / (1 + (di / d0)**2)) / self.query.l
        self.tmt = np.sum(1 / (1 + (di / d0)**2)) / self.target.l
        self.tm = (self.tmq + self.tmt) / 2
Ejemplo n.º 42
 # *******************************************************************************
 # *******************************************************************************
 # create empty list to hold the list of data points assigned to a each center
 center_data_list = [[] for j in range(CLIST_kmcmc.shape[0])]
 # create empty list to hold the list of distances of data points assigned to a each center
 center_data_dist_list = [[] for j in range(CLIST_kmcmc.shape[0])]
 # distance matrix having distance of each data point to each center
 dist_matrix = spatial.distance_matrix(X_mini_batch, CLIST_kmcmc, p = 2)
 # get index of center assigned to each of the corresponding data point
 c_j_index = [np.argmin(dist) for dist in dist_matrix]
 # list of tuples of data point index and corresponding nearest center index         
 zipped1 = zip(c_j_index, np.arange(0, mini_batch_size))
 # list of distance between center and its assigned data point
 center_data_dist = [np.amin(di) for di in dist_matrix]
 count1 = 0
 # loop over all data points and corresponding centers
 for (k1, v1) in zipped1:
cluster_dorsal,centroid_dorsal = kmeans(df_dorsal,c)
cluster_palmar,centroid_palmar = kmeans(df_palmar,c)

# Cluster details command line and html format

# Compute feature descriptors for unlabelled data
csv_file =model + '_unlabeled_set' + str(unlabeled_set) + '.csv'
if os.path.exists(csv_file):


df = pd.read_csv(csv_file, sep=',', header=None)

dist_dorsal = distance_matrix(df.values[:,1:],centroid_dorsal)
dist_palmar = distance_matrix(df.values[:,1:],centroid_palmar)

total_count = len(df.values)

for i in range(len(df.values)):
    if min(dist_dorsal[i]) < min(dist_palmar[i]):

def testing_accuracy(result,unlabeled_set):
    positive = 0
    negative = 0
Ejemplo n.º 44
def generateDisMatrix(df):
    return pd.DataFrame(distance_matrix(df.values, df.values), index=df.index, columns=df.index)
Ejemplo n.º 45
 def _spdistance_matrix(self, x, y, threshold=None):
     dist = distance_matrix(x, y)
     if threshold is not None:
         zeros = dist > threshold
         dist[zeros] = 0
     return sp.csr_matrix(dist)
Ejemplo n.º 46
    def im_callback(self, msg):
        if self.first_spin:
            self.old_image = self.bridge.imgmsg_to_cv2(msg,
            # old coords
            self.old_gray = self.buildMask(self.old_image)
            self.old_coords = self.detectBalls(self.old_gray)
            for c in self.old_coords:
                x, y = c[0], c[1]
                self.balls.append(Balle(x, y, self.nb_ball_spawn, 1, 1))
                self.nb_ball_spawn += 1
            self.first_spin = False
            # read new image
            new_frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8')
            lk_params = dict(winSize=(15, 15),
                                       | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

            # calculate optical flow
            frame_gray = self.buildMask(new_frame)

            coords1, st, err = cv2.calcOpticalFlowPyrLK(
                self.old_gray, frame_gray, self.old_coords, None,

            good_new = coords1[(st == 1).flatten()]
            good_old = self.old_coords[(st == 1).flatten()]

            self.old_gray = frame_gray.copy()
            self.old_coords = good_new.reshape(-1, 1, 2)

            # Check for new balls
            newcoords = self.detectBalls(frame_gray)
            distance = distance_matrix(self.old_coords.reshape((-1, 2)),

            if (newcoords.shape[0] == self.old_coords.shape[0]):
                #print("same number")
                for k in range(self.old_coords.shape[0]):
                    v = np.min(distance[k, :])
                    ind = np.argmin(distance[k, :])
                    x, y = self.old_coords[k][0][0], self.old_coords[k][0][1]
                    self.balls[k].coords = [x, y]
                    self.balls[k].num = ind
                    self.balls[k].is_visible = 1
            # If any new :
            elif (newcoords.shape[0] > self.old_coords.shape[0]):
                matched = []
                for k in range(self.old_coords.shape[0]):
                    ind = np.argmin(distance[k, :])
                    distance[k, ind] = 100000
                if (newcoords.shape[0] > self.old_coords.shape[0]):
                    for l in range(newcoords.shape[0]):
                        if (np.max(distance[:, l]) != 100000):
                #print("nc : ", newcoords.shape[0])
                #print("nc : ", self.old_coords.shape[0])
                #print("mtchd : ", len(matched))
                self.old_coords = np.asarray(matched).reshape(
                    (newcoords.shape[0], 1, 2))

            # Create Balles Objects
            for j in range(len(self.old_coords)):
                x, y = self.old_coords[j][0][0], self.old_coords[j][0][1]
                if (len(self.balls) < len(self.old_coords)):
                    self.balls.append(Balle(x, y, self.nb_ball_spawn, 1, 1))
                self.balls[j].coords = [x, y]
                self.balls[j].num = j
                self.balls[j].is_visible = 1
                self.balls[j].detected = 1
                self.nb_ball_spawn += 1

            if (newcoords.shape[0] < self.old_coords.shape[0]):
                matched = []
                dis = []
                for k in range(self.old_coords.shape[0]):
                    v = np.min(distance[k, :])
                    ind = np.argmin(distance[k, :])
                    if v > 30:
                        #distance[k,ind] = 10000
                        zebbi = 1
                #print("dis : ", dis)
                #self.old_coords = np.asarray(matched).reshape((self.old_coords.shape[0],1,2))
                #print("self.old_coords : ", self.old_coords)
                for j in range(len(self.balls)):
                    #self.balls[j].coords = [self.old_coords[j][0][0], self.old_coords[j][0][1]]
                    if (j in dis):
                        for b in self.balls:
                            #print("ball ", b.num, " : ", b.coords[0], ", ", b.coords[1])
                            if (b.num == j):
                                x, y = self.old_coords[j][0][
                                    0], self.old_coords[j][0][1]
                                self.balls[j].coords = [x, y]
                                self.balls[j].is_visible = 0
            if self.visualize:
                frame_show = new_frame.copy()
                for b in self.balls:
                    x, y = b.coords[0], b.coords[1]
                    #print("x, y", x, ",", y)
                    j = b.num
                    if (b.is_visible):
                        frame_show = cv2.circle(frame_show, (int(x), int(y)),
                                                5, (0, 200, 0), -1)
                        frame_show = cv2.putText(frame_show, str(j),
                                                 (int(x) + 20, int(y) + 20),
                                                 cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                                 (255, 0, 0), 2)
                        frame_show = cv2.circle(frame_show, (int(x), int(y)),
                                                5, (0, 0, 200), -1)
                        frame_show = cv2.putText(frame_show, str(j),
                                                 (int(x) + 20, int(y) + 20),
                                                 cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                                 (0, 0, 200), 2)

                cv2.imshow("tracking", frame_show)
            lst = []
            for b in self.balls:
                if (b.detected and b.is_visible):
                    w = self.imgToWorld(b.coords[0], b.coords[1])

            lst_coords = Float32MultiArray()
            lst_coords.data = [1.0, 2.0, 3.0]
            #print("lst : ", lst)
            lst_coords.data = lst
            #print("lst_coords.data : ", len(lst_coords.data))
in_file = pd.read_csv('/media/miri-o/Documents/AA_triplets_with_embedding_and_clusters.csv')
data = pd.DataFrame(in_file, columns = ['Ngram', 'cluster', 'dim1', 'dim2'])

props = ['CDR3_AA_GRAVY', 'CDR3_AA_BULK',

property_data = pd.DataFrame(in_file)
property_data = property_data.drop(['Ngram', 'dim1', 'dim2'], axis=1)
property_data_clusterized = clusterize_properties(property_data , props)

amino_acid_logo = build_clust_logo(data)

amino_acid_logo_values = amino_acid_logo.drop(['length', 'center_x', 'center_y'], axis = 1)
# compute ditance matrix
logo_cluster_dist_mat = pd.DataFrame(distance_matrix(amino_acid_logo_values.values, amino_acid_logo_values.values))
plt.figure(figsize=(12, 12))
sns.heatmap(logo_cluster_dist_mat, cmap = "RdBu")


fig2 = plt.figure(figsize=(10,10))
ax2 = plt.scatter(data['dim1'], data['dim2'], s=3, marker = 'D')
ax2 = plt.scatter(amino_acid_logo['center_x'], amino_acid_logo['center_y'], s=3, marker = 'x', color = 'r')

amino_acid_coords = amino_acid_logo[['center_x', 'center_y']]
CM_cluster_dist_mat = pd.DataFrame(distance_matrix(amino_acid_coords.values, amino_acid_coords.values))
plt.figure(figsize=(12, 12))
# -*- coding: utf-8 -*-
Created on Tue Dec 29 13:52:12 2020

@author: Pedro Ayres

import numpy as np
import pandas as pd
from scipy.spatial import distance_matrix

# Original code from OP, slightly reformatted
DF_var = pd.DataFrame.from_dict({
    "s1": [1.2, 3.4, 10.2],
    "s2": [1.4, 3.1, 10.7],
    "s3": [2.1, 3.7, 11.3],
    "s4": [1.5, 3.2, 10.9]
DF_var.columns = ["g1", "g2", "g3"]

# Whole similarity algorithm in one line
df_euclid = pd.DataFrame(1 / (1 + distance_matrix(DF_var.T, DF_var.T)),


#           g1        g2        g3
# g1  1.000000  0.215963  0.051408
# g2  0.215963  1.000000  0.063021
# g3  0.051408  0.063021  1.000000
Ejemplo n.º 49
a1 = 4.05  ##lattice parameter
a2 = a1 * np.sqrt(3)  #periodic cell repeat multiple
l = 4
h = 4 * np.sqrt(3)
w = 4
strDataFile = 'new.data'
strDumpFile = 'dump.eam'
strPMFile = strDumpFile + 'PM'
arrSigma = gf.CubicCSLGenerator(np.array([1, 1, 1]), 25)
fltAngle, arrVector = gf.FindRotationVectorAndAngle(np.array([1, 1, 1]),
                                                    np.array([0, 0, 1]))
arrBasisVectors = gf.RotatedBasisVectors(fltAngle, arrVector)
objFirstLattice = gl.ExtrudedRectangle(l, w, h, arrBasisVectors, ld.FCCCell,
                                       np.ones(3), np.zeros(3))
objSecondLattice = gl.ExtrudedRectangle(
    l, w, h,
    gf.RotateVectors(arrSigma[0, 1], np.array([0, 0, 1]), arrBasisVectors),
    ld.FCCCell, np.ones(3), np.zeros(3))
arrPoints1 = objFirstLattice.GetRealPoints()
arrPoints2 = objSecondLattice.GetRealPoints()
arrDistanceMatrix = spatial.distance_matrix(arrPoints1, arrPoints2)
lstPoints = np.where(arrDistanceMatrix < 1e-5)[0]
arrCSLPoints = arrPoints1[lstPoints]
plt.plot(*tuple(zip(*arrPoints1)), 'bo', c='b')
plt.plot(*tuple(zip(*arrPoints2)), 'bo', c='r')
plt.plot(*tuple(zip(*arrCSLPoints)), 'bo', c='black')
Ejemplo n.º 50
def lp_distance(x):

    return distance_matrix(x, x)
Ejemplo n.º 51
 def predict(self, y):
     dist = spatial.distance_matrix(self.data, y)
     kmin_index = np.argsort(dist, axis=0)
     return self.data[kmin_index[:self.neighbors]], dist[
Ejemplo n.º 52
def pearson_affinity(M):
    cov_metrix = np.cov(M)
    dist = (1 - cov_metrix / 2)**0.5
    dist = distance_matrix(dist, dist)

    return dist
Ejemplo n.º 53
                                max_len -= 1
                            timesteps = n_timesteps[:max_len]
                # Downsample if needed
                for trial_idx, n_timesteps in enumerate(merged_timesteps):
                    # We assume they are the same, or they will be discarded in the next step
                    if len(n_timesteps
                           ) == min_ or n_timesteps[-1] < args.min_timesteps:
                        # Discard
                        # merged_mean[trial_idx] = []

                        new_merged_mean, new_merged_std = [], []
                        # Nearest neighbour
                        distance_mat = distance_matrix(
                            n_timesteps.reshape(-1, 1),
                            timesteps.reshape(-1, 1))
                        closest_indices = distance_mat.argmin(axis=0)
                        for closest_idx in closest_indices:
                        merged_mean[trial_idx] = new_merged_mean
                        merged_std[trial_idx] = new_merged_std
                        last_eval[trial_idx] = merged_results[trial_idx][

            # Remove incomplete runs
            mean_tmp, std_tmp, last_eval_tmp = [], [], []
            for idx in range(len(merged_mean)):
Ejemplo n.º 54
from matplotlib import pyplot as plt 
from sklearn import manifold, datasets 
from sklearn.cluster import AgglomerativeClustering 
from sklearn.datasets.samples_generator import make_blobs

#Make the blobs
X2, y2 = make_blobs(n_samples=50, centers=[[4,4], [-2, -1], [1, 1], [10,4]], cluster_std=0.9)
#Create the model and train it
agglom = AgglomerativeClustering(n_clusters = 4, linkage = 'average')
# Create a minimum and maximum range of X2.
x_min, x_max = np.min(X2, axis=0), np.max(X2, axis=0)
# Get the average distance for X2.
X2 = (X2 - x_min) / (x_max - x_min)
#Create the distance matrix
dist_matrix = distance_matrix(X2,X2)
#Create the training data
Z = hierarchy.linkage(dist_matrix, 'complete')
#Create the dendogram
dendro = hierarchy.dendrogram(Z)

# Create a figure of size 6 inches by 4 inches.
# These two lines of code are used to scale the data points down,
# Or else the data points will be scattered very far apart.
# Create a minimum and maximum range of X2.
x_min, x_max = np.min(X2, axis=0), np.max(X2, axis=0)
# Get the average distance for X2.
X2 = (X2 - x_min) / (x_max - x_min)
# This loop displays all of the datapoints.
for i in range(X2.shape[0]):
Ejemplo n.º 55
 def collision_detection(self):
     """Parse collided nodes on to interact function"""
     dm = np.tril(distance_matrix(self.nodes[:, :2], self.nodes[:, :2]))
     collision_pairs = list(zip(*np.where((dm < self.node_radius*2) & (dm != 0.0))))
Ejemplo n.º 56
test = flag_1_data.loc[["S000713", "S000715"]]

# test2 = flag_1_data[flag_1_data["APPLICATION_NUMBER_1"] =="S000713"]

# Calculate distance matrices between PODs within each huc 8 with this package:
from scipy.spatial import distance_matrix
#n is just a counter
n = 0
for huc in flag_1_data["HUC_8_NUMBER"].unique():
    n = n + 1
    print(n, huc)
    data = flag_1_data[["HUC_8_NUMBER", "LATITUDE", "LONGITUDE"]][flag_1_data["HUC_8_NUMBER"]==huc]
    dist = pd.DataFrame(distance_matrix(data.values, data.values), index=data.index, columns=data.index)
    cols = dist.index
    lst.append((pd.DataFrame(np.triu(dist, k = 1), index = cols, columns = cols)).replace(0, 999999999))

threshold = 1000000
app_list = []
x = 0
for i, list_ in enumerate(lst):
    x = x + 1
    df = lst[i]
    for j, app in enumerate(df.index):
        df1 = df[df.loc[df.index[j]] < threshold]
        if len(df1) > 0:
            # app_list.append(tuple((df.index[j], df1.index.values[0])))
Ejemplo n.º 57
    def plot_diffraction_vectors(
            distance_threshold_all=0.005):  # pragma: no cover
        """Plot the unique diffraction vectors.

        xlim : float
            The maximum x coordinate in reciprocal Angstroms to be plotted.
        ylim : float
            The maximum y coordinate in reciprocal Angstroms to be plotted.
        unique_vectors : DiffractionVectors, optional
            The unique vectors to be plotted (optional). If not given, the
            unique vectors will be found by get_unique_vectors.
        distance_threshold : float, optional
            The minimum distance in reciprocal Angstroms between diffraction
            vectors for them to be considered unique diffraction vectors.
            Will be passed to get_unique_vectors if no unique vectors are
        method : str
            The method to use to determine unique vectors, if not given.
            Valid methods are 'strict', 'distance_comparison' and 'DBSCAN'.
            'strict' returns all vectors that are strictly unique and
            corresponds to distance_threshold=0.
            'distance_comparison' checks the distance between vectors to
            determine if some should belong to the same unique vector,
            and if so, the unique vector is iteratively updated to the
            average value.
            'DBSCAN' relies on the DBSCAN [1] clustering algorithm, and
            uses the Eucledian distance metric.
        min_samples : int, optional
            The minimum number of not identical vectors within one cluster
            for it to be considered a core sample, i.e. to not be considered
            noise. Will be passed to get_unique_vectors if no unique vectors
            are given. Only used if method=='DBSCAN'.
        image_to_plot_on : BaseSignal, optional
            If provided, the vectors will be plotted on top of this image.
            The image must be calibrated in terms of offset and scale.
        image_cmap : str, optional
            The colormap to plot the image in.
        plot_label_colors : bool, optional
            If True (default is False), also the vectors contained within each
            cluster will be plotted, with colors according to their
            cluster membership. If True, the unique vectors will be
            calculated by get_unique_vectors. Requires on method=='DBSCAN'.
        distance_threshold_all : float, optional
            The minimum distance, in calibrated units, between diffraction
            vectors inside one cluster for them to be plotted. Only used if
            plot_label_colors is True and requires method=='DBSCAN'.

        fig : matplotlib figure
            The plot as a matplotlib figure.


        fig = plt.figure()
        ax = fig.add_subplot(111)
        offset, scale = 0., 1.
        if image_to_plot_on is not None:
            offset = image_to_plot_on.axes_manager[-1].offset
            scale = image_to_plot_on.axes_manager[-1].scale
            ax.imshow(image_to_plot_on, cmap=image_cmap)
            ax.set_xlim(-xlim, xlim)
            ax.set_ylim(ylim, -ylim)

        if plot_label_colors is True and method == 'DBSCAN':
            clusters = self.get_unique_vectors(distance_threshold,
            labs = clusters.labels_[clusters.core_sample_indices_]
            # Get all vectors from the clustering not considered noise
            cores = clusters.components_
            if cores.size == 0:
                warn('No clusters were found. Check parameters, or '
                     'use plot_label_colors=False.')
                peaks = DiffractionVectors(cores)
                # Since this original number of vectors can be huge, we
                # find a reduced number of vectors that should be plotted, by
                # running a new clustering on all the vectors not considered
                # noise, considering distance_threshold_all.
                peaks = peaks.get_unique_vectors(distance_threshold_all,
                peaks_all_len = peaks.data.shape[0]
                labels_to_plot = np.zeros(peaks_all_len)
                peaks_to_plot = np.zeros((peaks_all_len, 2))
                # Find the labels of each of the peaks to plot by referring back
                # to the list of labels for the original vectors.
                for n, peak in zip(np.arange(peaks_all_len), peaks):
                    index = distance_matrix([peak.data], cores).argmin()
                    peaks_to_plot[n] = cores[index]
                    labels_to_plot[n] = labs[index]
                # Assign a color value to each label, and shuffle these so that
                # adjacent clusters hopefully get distinct colors.
                cmap_lab = get_cmap('gist_rainbow')
                lab_values_shuffled = np.arange(np.max(labels_to_plot) + 1)
                labels_steps = np.array(
                        map(lambda n: lab_values_shuffled[int(n)],
                labels_steps = labels_steps / (np.max(labels_to_plot) + 1)
                # Plot all peaks
                for lab, peak in zip(labels_steps, peaks_to_plot):
                    ax.plot((peak[0] - offset) / scale,
                            (peak[1] - offset) / scale,
        if unique_vectors is None:
            unique_vectors = self.get_unique_vectors(distance_threshold,
        # Plot the unique vectors
        ax.plot((unique_vectors.data.T[0] - offset) / scale,
                (unique_vectors.data.T[1] - offset) / scale, 'kx')
        return fig
Ejemplo n.º 58
import pandas as pd
import numpy as np
from scipy.spatial import distance_matrix
from scipy.spatial import KDTree

df = pd.read_csv(r"C:\Users\Asus\Documents\GitHub\Gisele_MILP\cluster3_PS.csv")

for i in df.index:
    if df.loc[i]['Population'] == 0:
        df.drop(i, inplace=True)

coords = pd.DataFrame()
coords['X'] = df['X']
coords['Y'] = df['Y']

Dist_matrix = pd.DataFrame(distance_matrix(coords.values, coords.values),
Weight = pd.DataFrame()
PS = pd.DataFrame()
df.index = df['id']

k = 0
#for i, row in df.iterrows():
#   if df.loc[i,'Population'] > 10:
#      df[i, 'Weight']= 0

#create new column with absorbed power

df = df.assign(Power=0.1)
df['Power'] = df['PS'].apply(lambda x: '0' if x == 1 else '0.1')
Ejemplo n.º 59
    [5, 3],
    [10, 15],
    [15, 12],
    [24, 10],
    [30, 30],
    [85, 70],
    [71, 80],
    [60, 78],
    [70, 55],
    [80, 91],
print('X =')
df = X
dist = pd.DataFrame(distance_matrix(df, df))

labels = range(1, 11)

plt.figure(figsize=(10, 7))
plt.scatter(X[:, 0], X[:, 1], label='True Position')

for label, x, y in zip(labels, X[:, 0], X[:, 1]):
                 xy=(x, y),
                 xytext=(-3, 3),
                 textcoords='offset points',
    # Replace the data points with their respective cluster value
    # (ex. 0) and is color coded with a colormap (plt.cm.spectral)
        X1[i, 0],
        X1[i, 1],
        str(y1[i]),  #places the cluster # at data points, colors them
            agglom.labels_[i] / 10.
        ),  #gets proper label so that each data point in a cluster is the same color
            'weight': 'bold',
            'size': 9

# Display the plot of the original data before clustering
plt.scatter(X1[:, 0], X1[:, 1], marker='.')
# Display the plot

#---------------Dendrogram/phylogentic tree--------------------------------

#create a distance matrix between every point
dist_matrix = distance_matrix(X1, X1)

#define type of heierarchy
Z = hierarchy.linkage(dist_matrix, 'complete')

#display dendrogram
dendro = hierarchy.dendrogram(Z)