Esempio n. 1
0
    def test_3_filter_galaxies(self):
        """Filter galaxies.
        
        Update 3/14/2022 - filter_galaxies is no longer returning grid_shape
        and coords_min parameters - instead they are calculated inside the
        main body of find_voids() for consistency among the 3 mask_mode
        types
        
        """
        # Take a table of galaxy coordinates, the name of the survey, and the
        # output directory and returns astropy tables of the Cartesian
        # coordinates of the wall and field galaxies as well as the shape 
        # of the grid on which the galaxies will be placed and the coordinates
        # of the lower left corner of the grid.
        
        f_wall, f_field = filter_galaxies(self.galaxies_shuffled, 
                                          'test_', 
                                          '', 
                                          dist_metric='redshift', 
                                          )

        # Check the wall galaxy coordinates
        gal_tree = neighbors.KDTree(self.gal)
        distances, indices = gal_tree.query(self.gal, k=4)
        dist3 = distances[:,3]
        TestVoidFinder.wall = self.gal[dist3 < (np.mean(dist3) + 1.5*np.std(dist3))]
        self.assertTrue(np.isclose(f_wall, TestVoidFinder.wall).all())

        # Check the field galaxy coordinates
        field = self.gal[dist3 >= (np.mean(dist3) + 1.5*np.std(dist3))]
        self.assertTrue(np.isclose(f_field, field).all())
Esempio n. 2
0
def kdd_Neigbors_2(dta, index_ano):
    # the input data should be under Pandas dataframe format.
    start_anp = index_ano
    X = list(map(lambda x: [x, dta.values[x][1]], np.arange(len(dta.values))))
    #X = np.reshape(dta.value.values, (-1, 1))
    tree = nb.KDTree(X, leaf_size=20)
    flag_finding = 0
    initial_index = []
    while flag_finding == 0:
        new_anomaly_point = find_anomaly_point(tree, X, index_ano,
                                               initial_index)
        if (len(index_ano) < len(new_anomaly_point)):
            initial_index = index_ano
            index_ano = np.array(new_anomaly_point, dtype=np.int32)
        else:
            flag_finding = 1

    inverse_neighboor = index_ano
    print("Nearest Neighboor of Anomaly point ", len(inverse_neighboor))
    plt.figure(1)
    plt.subplot(211)
    plt.plot(dta.value.values)
    plt.plot(np.array(inverse_neighboor, dtype=np.int32),
             dta.value.values[np.array(inverse_neighboor,
                                       dtype=np.int32)], 'o')
    plt.plot(np.array(start_anp, dtype=np.int32),
             dta.value.values[np.array(start_anp, dtype=np.int32)], 'x')
    plt.show()
    return inverse_neighboor
Esempio n. 3
0
 def build(self, embs, labels):
     self.labels = labels
     if type(embs) == np.ndarray:
         self.embs = embs
     else:
         self.embs = np.vstack(embs)
     self.tree = neighbors.KDTree(self.embs)
Esempio n. 4
0
def build_kdt(X_norm, **kwargs):

    kdt_kwds = dict(leaf_size=40, metric="minkowski")
    kdt_kwds.update(kwargs)
    kdt = neighbours.KDTree(X_norm, **kdt_kwds)

    return kdt
Esempio n. 5
0
def leiden_clustering(umap_res,
                      resolution_range=(0, 1),
                      random_state=2,
                      kdtree_dist='euclidean'):
    tree = neighbors.KDTree(umap_res, metric=kdtree_dist)
    vals, i, j = [], [], []
    for idx in range(umap_res.shape[0]):
        dist, ind = tree.query([umap_res[idx]], k=25)
        vals.extend(list(dist.squeeze()))
        j.extend(list(ind.squeeze()))
        i.extend([idx] * len(ind.squeeze()))
    print(len(vals))
    ginput = sps.csc_matrix(
        (numpy.array(vals), (numpy.array(i), numpy.array(j))),
        shape=(umap_res.shape[0], umap_res.shape[0]))
    sources, targets = ginput.nonzero()
    edgelist = zip(sources.tolist(), targets.tolist())
    G = ig.Graph(edges=list(edgelist))
    optimiser = leidenalg.Optimiser()
    optimiser.set_rng_seed(random_state)
    profile = optimiser.resolution_profile(G,
                                           leidenalg.CPMVertexPartition,
                                           resolution_range=resolution_range,
                                           number_iterations=0)
    print([len(elt) for elt in profile])
    return profile
Esempio n. 6
0
    def test_add_tracker(self, mock_write, mock_color, mock_mkdir):
        mock_mkdir.side_effect = fake_os
        print("\ntest add tracker")
        matcher = FakeMatcher()
        embs = np.eye(12)
        labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
        matcher.build(embs, labels)
        # total 12 tracker
        zero = np.zeros(12)
        trackers = {}
        for i in range(12):
            vec = zero.copy()
            vec[i] = 1

            tracker = create_tracker([vec] * 40)
            trackers[i] = tracker
        tracker_history = TrackersHistory()
        tracker_history.trackers = trackers
        tracker_history.current_id = 13
        tracker_history.start_time = 0
        tracker_history.labels = labels
        tracker_history.embs = [embs[i, :] for i in range(12)]
        history_matcher = neighbors.KDTree(
            embs, leaf_size=Config.Matcher.INDEX_LEAF_SIZE, metric='euclidean')
        tracker_history.history_matcher = history_matcher
        mock = MagicMock(side_effect=fake_os)
        with patch('os.mkdir', mock):
            tracker_history.add_tracker(trackers[0], matcher, Mock())
Esempio n. 7
0
    def __predict_proba(self, X):
        """ __predict_proba
        
        Private implementation of the predict_proba method.
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
        
        Returns
        -------
        tuple list
            One list with the k-nearest neighbor's distances and another 
            one with their indexes.
        
        Notes
        -----
        If you wish to use our own KDTree implementation please comment 
        the third line of this function and uncomment the first and 
        second lines.
        
        """
        #tree = KDTree(self.window.get_attributes_matrix(), metric='euclidean',
        #              categorical_list=self.categorical_list, return_distance=True)

        tree = sk.KDTree(self.window.get_attributes_matrix(),
                         self.leaf_size,
                         metric='euclidean')
        dist, ind = tree.query(np.asarray(X), k=self.k)
        return dist, ind
Esempio n. 8
0
def _compute_connectivity(positions, radius, add_self_edges):
    """Get the indices of connected edges with radius connectivity.

  Args:
    positions: Positions of nodes in the graph. Shape:
      [num_nodes_in_graph, num_dims].
    radius: Radius of connectivity.
    add_self_edges: Whether to include self edges or not.

  Returns:
    senders indices [num_edges_in_graph]
    receiver indices [num_edges_in_graph]

  """
    tree = neighbors.KDTree(positions)
    receivers_list = tree.query_radius(positions, r=radius)
    num_nodes = len(positions)
    senders = np.repeat(range(num_nodes), [len(a) for a in receivers_list])
    receivers = np.concatenate(receivers_list, axis=0)

    if not add_self_edges:
        # Remove self edges.
        mask = senders != receivers
        senders = senders[mask]
        receivers = receivers[mask]

    return senders, receivers
Esempio n. 9
0
def comparison_kd_tree_library():
    #data
    n_attributes = 2
    n_data = 100
    data = np.column_stack(([9, 4, 5, 7, 8, 2], [6, 7, 4, 2, 1, 3]))
    #target
    target = np.random.uniform(0, 1, n_attributes)
    target = np.array([10, 10])
    #kdtrees initialization
    kdtree_lucas = KDTree
    kdtree_scikit = neighbors.KDTree(data, metric='euclidean')
    #kdtrees query
    k = data.shape[0]
    distances_lucas, indices_lucas = kdtree_lucas.query(target, k)
    distances_scikit, indices_scikit = kdtree_scikit.query(target, k=k)
    nn_scikit = data[indices_scikit]
    nn_lucas = data[indices_lucas]
    nn_lucas = data[indices_lucas]
    #difference
    print('nearest neighbors lucas')
    print(nn_lucas)
    print('')
    print('nearest neighbors scikit')
    print(nn_scikit)
    print('')
    print('difference')
    print(nn_lucas - nn_scikit)
Esempio n. 10
0
    def __predict_proba(self, X):
        """ __predict_proba
        
        Private implementation of the predict_proba method.
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
        
        Returns
        -------
        tuple list
            One list with the k-nearest neighbor's distances and another 
            one with their indexes.
        
        """
        # To use our own KDTree implementation please replace it as follows
        # tree = KDTree(self.window.get_attributes_matrix(), metric='euclidean',
        #              nominal_attributes=self._nominal_attributes, return_distance=True)

        tree = sk.KDTree(self.window.get_attributes_matrix(),
                         self.leaf_size,
                         metric='euclidean')
        dist, ind = tree.query(np.asarray(X), k=self.n_neighbors)
        return dist, ind
Esempio n. 11
0
def k_dist(X, metric, k=3):
    data = []
    tree = sk.KDTree(X, leaf_size=30)
    for n, point in enumerate(X):
        dist, ind = tree.query([point], k=k)
        data.append(dist[0].tolist()[k - 1])
    return data
def quantize(img, representative_points):
    quantized_img = np.zeros_like(img)
    tree = neighbors.KDTree(representative_points, leaf_size=30)
    for x in range(img.shape[0]):
        for y in range(img.shape[1]):
            _, index = tree.query([img[x][y]], k=1)
            quantized_img[x][y] = representative_points[index[0][0]]
    return quantized_img
Esempio n. 13
0
def kdtree(df_reduced, lat_col, long_col, leaf_size, k):
    """
    Takes in:
    Returns: 
    """
    position = df_reduced[[lat_col, long_col]]
    tree = neighbors.KDTree(position, leaf_size=leaf_size)
    dist, ind = tree.query([position][0], k=k)
    return tree, dist, ind
Esempio n. 14
0
    def store_best_threshold(self, stored_embeddings, dir):
        thresholds = np.arange(0, 1, 0.0025)
        embedding_size = 512
        size = 0
        for i, cls in enumerate(stored_embeddings):
            for embs in stored_embeddings[cls].values():
                size += len(embs)

        plain_embeddings = np.zeros([size, embedding_size])
        class_index = []
        emb_i = 0
        for i, cls in enumerate(stored_embeddings):
            for embs in stored_embeddings[cls].values():
                for emb in embs:
                    plain_embeddings[emb_i] = emb
                    emb_i += 1
                    class_index.append(cls)

        embeddings = (plain_embeddings + 1.) / 2.
        kd_tree = neighbors.KDTree(embeddings, metric='euclidean')

        dists, recognized = np.zeros([size],
                                     dtype=np.float32), np.zeros([size],
                                                                 np.bool)
        for i, emb in enumerate(embeddings):
            dist, idx = kd_tree.query(emb.reshape([1, 512]), k=2)

            dist = dist[0][np.argmax(dist)]
            idx = idx[0][np.argmax(dist)]
            detected_class = class_index[idx]

            detected = detected_class == class_index[i]
            recognized[i] = detected
            dists[i] = dist

        # __import__('ipdb').set_trace()
        best_threshold = 0
        max_detect = 0
        if recognized.all():
            best_threshold = np.max(dists)
        else:
            for threshold in thresholds:
                detected = len(dists[recognized & (dists < threshold)])
                if detected > max_detect:
                    max_detect = detected
                    best_threshold = threshold

        threshold_file = os.path.join(dir, 'threshold.txt')
        # best_threshold = best_threshold * 1.1

        print_fun('=' * 50)
        print_fun('Found best threshold = %s' % best_threshold)
        # print_fun('Written to %s.' % threshold_file)
        print_fun('=' * 50)
Esempio n. 15
0
def getKDTrees(places):

    trees = {}

    for place_type in places.keys():
        coords = [[i['lat'], i['lng']] for i in places[place_type]]
        X = np.array(coords)
        tree = neighbors.KDTree(X, leaf_size=2)

        trees[place_type] = tree

    return trees
Esempio n. 16
0
def which_points_inside_source(source, target):

    # point-cloudsをnp.arrayに変更
    s_points = np.asarray(source.pcd.points)
    t_points = np.asarray(target.pcd.points)

    # リガンドを0.5ずつ分割する
    cut_surface = [i for i in np.arange(s_points[:,2].max(), s_points[:,2].min(), -0.2)]

    # 内外判定
    points_inside_idx = []

    for l in range(len(cut_surface)-1):
    # c = 各層に対応するindex
        s_c = list((s_points[:,2] > cut_surface[l+1]) & (s_points[:,2] < cut_surface[l]))
        s_c = [i for i, x in enumerate(s_c) if x == True]

        t_c = list((t_points[:,2] > cut_surface[l+1]) & (t_points[:,2] < cut_surface[l]))
        t_c = [i for i, x in enumerate(t_c) if x == True]

        # 抽出
        layer = s_points[s_c][:, 0:2]
        points = t_points[t_c][:, 0:2]

        # ポリゴンを作成する
        # 近傍探索
        if len(layer)>2:
            tree = neighbors.KDTree(layer)
            order = [] 
            q = 0  # query
            k = 2  # 最近傍の個数

            # 最近傍探索
            while len(order) != len(layer):
                _, idx = tree.query([layer[q]], k=k)
                if idx[0,k-1] not in order:
                    order.append(idx[0,k-1])
                    q = idx[0,k-1]
                    k = 2
                else:
                    k = k + 1

            # ポリゴン作成
            polygon = Polygon(layer[order])

            # ポリゴン内に存在する点のindexを返す
            in_or_out = [Point(p).within(polygon) for p in points]
            c = [i for i, x in enumerate(in_or_out) if x == True]
            for i in c:
                points_inside_idx.append(t_c[i])
    
    return points_inside_idx, t_points[points_inside_idx]
def classify_nearest_neighbor_kd_tree_sk(k):
    print('k = {}'.format(k))
    labels = load_labels()

    song_samples = []
    indexed_genres = []

    for genre, song_genres_ids in labels.groupby('category'):
        print('Indexing genre: {}'.format(genre))
        num_values = len(song_genres_ids.values)
        for i in range(int(num_values / 2)):
            val = song_genres_ids.values[i]
            song_id = val[0]
            song = pd.read_csv('song_data/training/{}'.format(song_id),
                               header=None)
            for val in song.values:
                song_samples.append(val)
                indexed_genres.append(genre)

    kd_tree = nb.KDTree(np.vstack(song_samples))

    total_count = 0
    match_count = 0
    for genre, song_genres_ids in labels.groupby('category'):
        print('Expected genre: {}'.format(genre))
        num_values = len(song_genres_ids.values)
        for i in range(int(num_values / 2), num_values):
            val = song_genres_ids.values[i]
            song_id = val[0]
            song = pd.read_csv('song_data/training/{}'.format(song_id),
                               header=None)
            genre_freqs = {}
            # s = np.mean(song)
            # split_song = np.array_split(song, 5, axis=0)  # Split song into sections
            for s in song.values:
                # avg_song_val = np.mean(s)  # Take average of each section
                genre_indices = kd_tree.query([s], k, return_distance=False)
                logging.debug('Length of indexed genres: {}'.format(
                    len(indexed_genres)))
                logging.debug('genre_indices: {}'.format(genre_indices))
                for index in genre_indices[0]:
                    g = indexed_genres[index]
                    genre_freqs[g] = genre_freqs.get(g, 0) + 1

            actual_genre = max(genre_freqs, key=genre_freqs.get)
            print('Predicted genre: {}'.format(actual_genre))
            total_count += 1
            if genre == actual_genre:
                match_count += 1

    print('Matched {} out of {} songs: {}%'.format(
        match_count, total_count, (match_count / total_count) * 100))
Esempio n. 18
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("events", help="Path to events in CSV format")
    args = parser.parse_args()

    events_path = args.events

    events = pd.read_csv(events_path)

    i = 19750
    events = events.iloc[i:i + 2000]

    events["timestamp"] -= events["timestamp"].min()

    coordinates = events[["x", "y"]].values
    kdtree = skn.KDTree(coordinates, metric="euclidean")
    nn_distance = kdtree.query(coordinates, k=5)[0][:, 4]

    events = events.loc[nn_distance <= 3]
    nn_distance = nn_distance[nn_distance <= 3]

    fig = pp.figure(figsize=(8, 8))
    ax = fig.add_subplot(111, projection="3d")

    ax.w_xaxis.line.set_visible(False)
    ax.w_yaxis.line.set_visible(False)
    ax.w_zaxis.line.set_visible(False)

    ax.set_xlabel("$x$")
    ax.set_ylabel("$t$")
    ax.set_zlabel("$y$")

    ax.w_xaxis.set_ticklabels([])
    ax.w_yaxis.set_ticklabels([])
    ax.w_zaxis.set_ticklabels([])

    ax.set_ylim(bottom=-1000, top=events["timestamp"].max())

    ax.view_init(10, 60)
    ax.scatter3D(events["x"],
                 np.full(len(events), -1000),
                 events["y"],
                 s=10,
                 depthshade=False)
    ax.scatter3D(events["x"],
                 events["timestamp"],
                 events["y"],
                 s=8,
                 c=events["timestamp"],
                 cmap="Reds")

    fig.savefig("hand.pdf", bbox_inches=mpt.Bbox.from_bounds(1, 1.4, 6.0, 4.7))
Esempio n. 19
0
 def __init__(self, resolution, width, height, landmarks, robot):
     m = int(height / resolution)
     n = int(width / resolution)
     self.belief = np.zeros((m, n)) + (float(1) / (m*n))
     self.landmarks = landmarks
     self.landmarks_tree = neighbors.KDTree([(l.x,l.y) for l in landmarks])
     self.resolution = resolution
     self.z_noise = robot.z_noise
     self.x_noise = robot.x_noise
     self.y_noise = robot.y_noise
     self.phit = robot.phit
     self.pfalse = robot.pfalse
     self.max_sense_dist = robot.max_sense_dist
     self.correspondence_hash = self.setup_correspondence_hash(m,n)
Esempio n. 20
0
def perform_test(x_set, x_test, y_set, y_test, k, dist_met, approach):
    #Variables Initialization
    #Start Time
    start_time = time.time()
    #Prediction list to store the prediction for each data point
    pred = []

    #Brute_Force Approach
    if approach == 'Brute_Force':
        #Iterate over each test point
        for pt_test in x_test:
            #Compute the distances between x set train and test points
            x_dist_list = []
            for d in range(0, len(x_set)):
                x_dist = dist_met(x_set[d], pt_test)
                x_dist_list.append((x_dist, y_set[d]))

            #Sort the x distance list from smallest to largest distance
            x_dist_list.sort(key=lambda x: x[0])

            #Compute the average for k nearest y values
            new_dist_list = x_dist_list[:k]
            y_tot = []
            #Add in all y values to the y_tot list
            for y_dist in new_dist_list:
                y_tot.append(y_dist[1])
            #Compute the average for the y values
            y_avg = sum(y_tot) / len(y_tot)
            #Add the average value for y into the prediction list
            pred.append(y_avg)

        #Compute RMSE value for between the predictions and the testing set
        error_test = rmse(pred, y_test)

    #k-d Tree Approach
    elif approach == 'kd_tree':
        kd_t = neighbors.KDTree(x_set)
        #Compute the distances and indices of the k nearest neighbours
        dist, ind = kd_t.query(x_test, k=k)
        #Add the predicted values of y to the list
        pred = np.sum(y_set[ind], axis=1) / k

        #Compute RMSE value for between the predictions and the testing set
        error_test = rmse(pred, y_test)

    #Compute total time used to run the approach
    run_time = time.time() - start_time

    return run_time, error_test
Esempio n. 21
0
def process(inputs, ctx, **kwargs):
    original, is_video = helpers.load_image(inputs, 'input')
    image = original.copy()
    if kwargs.get('detect') == 'false' or len(ctx.drivers) == 1:
        detect_driver = None
        reid_driver = ctx.drivers[0]
    else:
        detect_driver = ctx.drivers[0]
        reid_driver = ctx.drivers[1]

    # reid_input_shape = list(reid_driver.inputs.values())[0]
    input_name = list(reid_driver.inputs.keys())[0]

    if detect_driver is not None:
        boxes = get_boxes(detect_driver, image, threshold=0.3)
    else:
        boxes = np.array([[0, 0, image.shape[1], image.shape[0]]])
    print(f'boxes={len(boxes)}')
    for box in boxes:
        box = box.astype(int)
        img = crop_by_box(image, box)
        img = cv2.resize(img,
                         tuple(PARAMS['input_shape'][::-1]),
                         interpolation=cv2.INTER_AREA)

        prepared = norm(img, need_transpose=PARAMS['driver_type'] == 'pytorch')
        prepared = np.expand_dims(prepared, axis=0)
        outputs = reid_driver.predict({input_name: prepared})
        global kd_tree
        embedding = list(outputs.values())[0]
        embedding = (embedding + 1.) / 2.
        if not kd_tree:
            kd_tree = neighbors.KDTree(embedding, metric='euclidean')
        else:
            dist, idx = kd_tree.query(embedding, k=1)
            print(f'distance={dist}')

        cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]),
                      color=(0, 250, 0),
                      thickness=2,
                      lineType=cv2.LINE_AA)

    if is_video:
        output = image
    else:
        _, buf = cv2.imencode('.jpg', image[:, :, ::-1])
        output = buf.tostring()

    return {'output': output}
Esempio n. 22
0
def over_sample_smote(x, n, k):
    minority, _ = x.shape
    new_x = []

    tree = neighbors.KDTree(x)
    for i in range(n):
        # choose x_i randomly
        index = random.choice(range(minority))
        # k-neighbor
        k_neighbor = tree.query(x[index, 0], k)
        x_mean = k_neighbor[1].mean()
        # gene a new x xigma* (k-neighbor mean)
        new_x_one = x[index, 0] + random.random() * (x_mean - x[index, 0])
        new_x.append(new_x_one)
    new_x = np.matrix(new_x).T
    return new_x
Esempio n. 23
0
def test_unsupervised_inputs():
    """test the types of valid input into NearestNeighbors"""
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2)
Esempio n. 24
0
async def calculate_z_value(alpha, limit_size, normal_point, result_dta, Z):
    X = list(
        map(lambda x: [x, result_dta.values[x][1]],
            np.arange(len(result_dta.values))))
    # dt=DistanceMetric.get_metric('pyfunc',func=mydist)
    tree = nb.KDTree(X, leaf_size=50)
    #print(normal_point)
    #await asyncio.sleep(1)  await loop.run_in_executor(ProcessPoolExecutor(), sleep, delay)
    nomaly_neighboor = np.array(cmfunc.find_inverneghboor_of_point(
        tree, X, normal_point, limit_size),
                                dtype=np.int32)
    #nomaly_neighboor = np.array(await loop.run_in_executor(ThreadPoolExecutor(), cmfunc.find_inverneghboor_of_point, tree, X, normal_point, limit_size), dtype=np.int32)
    for NN_pair in nomaly_neighboor:
        Z[NN_pair[1]] = Z[NN_pair[1]] + (1 - result_dta['anomaly_score'][normal_point]) - NN_pair[0] * alpha if (1 - result_dta['anomaly_score'][normal_point]) - \
                                                                                                                NN_pair[0] * alpha > 0 else \
            Z[NN_pair[1]]
    return True
Esempio n. 25
0
def create_graph(nodes, k):
    g = nx.Graph()
    tree = neighbors.KDTree(nodes)
    for n1 in nodes:
        ind = tree.query([n1], k, return_distance=False)[0]
        #print(ind)
        for j in ind:
            n2 = nodes[j]
            #print(n2)
            #print(j)
            if n2 == n1:
                continue
            #print('n1, n2',n1,n2)
            if can_connect(n1, n2):
                dist = np.linalg.norm(np.array(n1) - np.array(n2))
                g.add_edge(n1, n2, weight=dist)
    return g
Esempio n. 26
0
    def fit_predict(self, X):
        """
        Performs clustering on data X and returns cluster labels
        :param X: array with data
        :return: array of cluster labels
        """
        print("# Fit predict ( eps = ", self.eps, ", min_samples = ",
              self.min_samples, ") ...")
        start_time = time.time()

        clusters = 0
        labels = [0] * len(X)
        tree = sknei.KDTree(X)
        for i in range(len(X)):
            if labels[i] > 0:  # point is already marked
                continue

            # get indexes of X[i] neighbours within the distance eps
            if self.metric == 'euclidean':
                N = tree.query_radius([X[i]], r=self.eps)[0]
            if len(N) < self.min_samples:  # mark as a noise
                labels[i] = -1
                continue
            clusters += 1
            labels[i] = clusters
            index = 0
            while index < len(N):  # over all neighbours of point X[i]
                if labels[N[index]] > 0:  # neighbour is already in the cluster
                    index += 1
                    continue
                labels[N[index]] = clusters

                # get indexes of X[N[index]] neighbours within the distance eps
                if self.metric == 'euclidean':
                    Nq = tree.query_radius([X[N[index]]], r=self.eps)[0]
                if len(Nq) >= self.min_samples:
                    for n in Nq:
                        if n not in N:
                            N = np.append(N, n)
                index += 1

        print("#", clusters, " clusters found.")
        print("# Fit predict finished in ", (time.time() - start_time),
              " sec.")
        return np.array(labels)
async def calculate_z_value(alpha, limit_size, normal_point, result_dta, Z):
    if normal_point > 50 and normal_point < (len(result_dta.values) - 50):
        point_range = np.arange(normal_point - 50, normal_point + 50)
    elif normal_point <= 50:
        point_range = np.arange(0, 100)
    elif normal_point >= (len(result_dta.values) - 50):
        point_range = np.arange(len(result_dta.values) - 100, len(result_dta.values) - 1)
    X = list(map(lambda x: [x, result_dta.values[x][1]], point_range))
    tree = nb.KDTree(X, leaf_size=50)
    #print(normal_point)
    #await asyncio.sleep(1)  await loop.run_in_executor(ProcessPoolExecutor(), sleep, delay)
    nomaly_neighboor = np.array(cmfunc.find_inverneghboor_of_point(tree, X, normal_point, limit_size), dtype=np.int32)
    #nomaly_neighboor = np.array(await loop.run_in_executor(ThreadPoolExecutor(), cmfunc.find_inverneghboor_of_point, tree, X, normal_point, limit_size), dtype=np.int32)
    for NN_pair in nomaly_neighboor:
        Z[NN_pair[1]] = Z[NN_pair[1]] + (1 - result_dta['anomaly_score'][normal_point]) - NN_pair[0] * alpha if (1 - result_dta['anomaly_score'][normal_point]) - \
                                                                                                                NN_pair[0] * alpha > 0 else \
            Z[NN_pair[1]]
    return True
Esempio n. 28
0
    def _kd_match(self, treated_group, control_group, observation_count):
        tree = sk.KDTree([[x] for x in control_group],
                         leaf_size=1,
                         metric='minkowski',
                         p=2)

        matches = self._make_match_array(treated_group, control_group)

        # for match in treated_group.index:
        #     dist, ind = tree.query(treated_group[match], k=1, breadth_first=True)
        #     matches[match] = control_group.index[ind[0]][0]

        queries = treated_group[treated_group.index]
        dist, ind = tree.query([[x] for x in queries], k=1, breadth_first=True)
        matches[treated_group.index] = control_group.index[[
            x for x in ind
        ]].values.flatten()
        return matches
def DBSCAN(D, eps, MinPts):
    labels = [0]*len(D)
    kdTree = neighbors.KDTree(D)

    C = 0
    for P in range(0, len(D)):
        if not (labels[P] == 0):
           continue
        
        NeighborPts = kdTree.query_radius(D[P].reshape(1,-1), r=eps)[0]
        if (NeighborPts.shape[0] < MinPts):
            labels[P] = -1

        else: 
           C += 1
           growCluster(D, kdTree, labels, P, NeighborPts, C, eps, MinPts)
    
    return labels
Esempio n. 30
0
def build_kdtree(X, relative_scales=None,**kwargs):
    """
    Build a KD-tree from the finite values in the given array.
    """

    offset = np.mean(X, axis=0)

    if relative_scales is None:
        # Whiten the data.
        relative_scales = np.ptp(X, axis=0)
    
    X_norm = (X - offset)/relative_scales

    kdt_kwds = dict(leaf_size=40, metric="minkowski")
    kdt_kwds.update(kwargs)
    kdt = neighbours.KDTree(X_norm, **kdt_kwds)

    return (kdt, relative_scales, offset)