예제 #1
0
파일: hdbscan.py 프로젝트: zhihuidu/arkouda
    def extract_clusters(self):
        # List all the time window keys
        deltas = list(self.cluster_data.keys())
        # Reverse them so we can start with the last clustering data
        deltas.reverse()
        # Ignore delta 0 as it is an artifact of the clustering that isn't used
        deltas = deltas[:-1]

        print("Extracting clusters from each time delta: ".format(deltas))

        # This is list of cluster labels which we will update at each time delta
        # where a value of 0 should indicate an unclustered node
        final_cluster_info = ak.zeros_like(
            self.cluster_data[deltas[0]]['index'])
        # A list of cluster labels that are selected
        selected_clusters = self.selection_data['index'][
            self.selection_data['selected']]
        selected_clusters = selected_clusters[selected_clusters > 0]

        for delta in tqdm(deltas):
            cluster = self.cluster_data[delta]['labels']
            cluster_positive = ak.where(cluster < 0, -cluster, 0)

            # The cluster labels found in this delta
            labels_this_delta = cluster_positive[cluster_positive > 0]

            # A boolean array to indicate which "selected" clusters are labels this delta
            m = ak.in1d(selected_clusters, labels_this_delta)

            # A list of clusters selected for this delta
            extract_this_delta = selected_clusters[m]

            # A boolean array indicating which nodes are in clusters that are extracted this delta
            m2 = ak.in1d(cluster_positive, extract_this_delta)

            # Indicate the clusters for all the nodes in clusters that we extracted this delta
            final_cluster_info[m2] = cluster_positive[m2]
            v, c = ak.value_counts(cluster_positive[m2])
            selected_clusters = selected_clusters[(~m)]

        self.extracted_clusters = final_cluster_info

        if selected_clusters.size > 0:
            print("Failed. {} of the selected clusters remain.".format(
                selected_clusters.size))
            print("Failing cluster labels: {}".format(selected_clusters))

            # We can refer to this list here:
            self.unextracted = selected_clusters
        else:
            print("Extraction completed succesfully.")
예제 #2
0
    def value_counts(self,sort=True):
        """Return a Series containing counts of unique values.

        The resulting object will be in descending order so that the
        first element is the most frequently-occurring element.
        
        Parameters
        ----------
        
        sort : Boolean. Whether or not to sort the results.  Default is true.
        """
        
        s = Series(ak.value_counts(self.values))
        if sort:
            s = s.sort_values(ascending=False)
        return s
예제 #3
0
iv = ak.arange(0,5,1)
b = ak.zeros(iv.size,dtype=ak.bool)
a[iv] = b
print(a)

a = ak.randint(10,20,10)
print(a)
iv = ak.randint(0,10,5)
print(iv)
b = ak.zeros(iv.size,dtype=ak.int64)
a[iv] = b
print(a)

ak.v = False
a = ak.randint(10,30,40)
vc = ak.value_counts(a)
print(vc[0].size,vc[0])
print(vc[1].size,vc[1])

ak.v = False

a = ak.arange(0,10,1)
b = a[a<5]
a = ak.linspace(0,9,10)
b = a[a<5]
print(b)

ak.v = True
ak.pdarrayIterThresh = 1000
a = ak.arange(0,10,1)
print(list(a))