Ejemplo n.º 1
0
X_train = X_train[:50]

sz = X_train.shape[1]

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1]))

#Sloth = Sloth()
eps = 20
min_samples = 2
LOAD = False  # Flag for loading similarity matrix from file if it has been computed before
if (LOAD):
    SimilarityMatrix = cluster.LoadSimilarityMatrix()
else:
    SimilarityMatrix = cluster.GenerateSimilarityMatrix(X_train)
    cluster.SaveSimilarityMatrix(SimilarityMatrix)
nclusters, labels, cnt = cluster.ClusterSimilarityMatrix(
    SimilarityMatrix, eps, min_samples)

print("DEBUG::number of clusters found =")
print(nclusters)

plt.figure()
for yi in range(nclusters):
    plt.subplot(nclusters, 1, 1 + yi)
    for xx in X_train[labels == yi]:
        plt.plot(xx.ravel(), "k-")
    plt.xlim(0, sz)
    plt.ylim(-4, 4)
    plt.title("Cluster %d" % (yi + 1))

plt.tight_layout()
plt.show()
Ejemplo n.º 2
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """
        Parameters
        ----------
        inputs : numpy ndarray of size (number_of_time_series, time_series_length) containing new time series 

        Returns
        ----------
        Outputs
            The output is a dataframe containing a single column where each entry is the associated series' cluster number.
        """

        # temporary (until Uncharted adds conversion primitive to repo)
        if not self.hyperparams['long_format']:
            inputs = TimeSeriesFormatterPrimitive(
                hyperparams=self._hp).produce(inputs=inputs).value['0']
        else:
            hyperparams_class = DatasetToDataFrame.DatasetToDataFramePrimitive.metadata.query(
            )['primitive_code']['class_type_arguments']['Hyperparams']
            ds2df_client = DatasetToDataFrame.DatasetToDataFramePrimitive(
                hyperparams=hyperparams_class.defaults().replace(
                    {"dataframe_resource": "learningData"}))
            inputs = d3m_DataFrame(ds2df_client.produce(inputs=inputs).value)

        # parse values from output of time series formatter
        n_ts = len(inputs.d3mIndex.unique())
        ts_sz = int(inputs.shape[0] / n_ts)
        input_vals = np.array(inputs.value).reshape(n_ts, ts_sz)

        # use HP to produce DBSCAN clustering
        if self.hyperparams['algorithm'] == 'DBSCAN':
            #SimilarityMatrix = cluster.GenerateSimilarityMatrix(input_vals)
            _, labels, _ = cluster.ClusterSimilarityMatrix(
                input_vals, self.hyperparams['eps'],
                self.hyperparams['min_samples'])
        else:
            #SimilarityMatrix = cluster.GenerateSimilarityMatrix(input_vals)
            _, labels, _ = cluster.HClusterSimilarityMatrix(
                input_vals, self.hyperparams['min_cluster_size'],
                self.hyperparams['min_samples'])

        # transform labels for D3M classification task
        labels = [x + 1 if x >= 0 else x + 2 for x in labels]

        # add metadata to output
        labels = pandas.DataFrame(labels)
        out_df = pandas.concat(
            [pandas.DataFrame(inputs.d3mIndex.unique()), labels], axis=1)
        # get column names from metadata
        out_df.columns = ['d3mIndex', 'label']
        hdbscan_df = d3m_DataFrame(out_df)

        # first column ('d3mIndex')
        col_dict = dict(
            hdbscan_df.metadata.query((metadata_base.ALL_ELEMENTS, 0)))
        col_dict['structural_type'] = type("1")
        # confirm that this metadata still exists
        #index = inputs['0'].metadata.get_columns_with_semantic_type('https://metadata.datadrivendiscovery.org/types/PrimaryKey')
        #col_dict['name'] = inputs.metadata.query_column(index[0])['name']
        col_dict['name'] = 'd3mIndex'
        col_dict['semantic_types'] = (
            'http://schema.org/Integer',
            'https://metadata.datadrivendiscovery.org/types/PrimaryKey',
        )
        hdbscan_df.metadata = hdbscan_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 0), col_dict)

        # second column ('labels')
        col_dict = dict(
            hdbscan_df.metadata.query((metadata_base.ALL_ELEMENTS, 1)))
        col_dict['structural_type'] = type("1")
        #index = inputs['0'].metadata.get_columns_with_semantic_type('https://metadata.datadrivendiscovery.org/types/SuggestedTarget')
        #col_dict['name'] = inputs.metadata.query_column(index[0])['name']
        col_dict['name'] = 'label'
        col_dict['semantic_types'] = (
            'http://schema.org/Integer',
            'https://metadata.datadrivendiscovery.org/types/SuggestedTarget',
            'https://metadata.datadrivendiscovery.org/types/TrueTarget',
            'https://metadata.datadrivendiscovery.org/types/Target')
        hdbscan_df.metadata = hdbscan_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 1), col_dict)

        return CallResult(hdbscan_df)