def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams)
        output = []
        if len(sk_inputs.columns):
            try:
                sk_output = self._clf.predict(sk_inputs)
            except sklearn.exceptions.NotFittedError as error:
                raise PrimitiveNotFittedError("Primitive not fitted.") from error
            # For primitives that allow predicting without fitting like GaussianProcessRegressor
            if not self._fitted:
                raise PrimitiveNotFittedError("Primitive not fitted.")
            if sparse.issparse(sk_output):
                sk_output = pandas.DataFrame.sparse.from_spmatrix(sk_output)
            output = self._wrap_predictions(inputs, sk_output)
            output.columns = self._target_names
            output = [output]
        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")
        outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
                                               add_index_columns=self.hyperparams['add_index_columns'],
                                               inputs=inputs, column_indices=self._target_column_indices,
                                               columns_list=output)

        return CallResult(outputs)
Ejemplo n.º 2
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        if not self._fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        es = self._make_entityset(inputs)

        fm = ft.calculate_feature_matrix(entityset=es, features=self.features)

        # make sure the feature matrix is ordered the same as the input
        fm = fm.reindex(es[TARGET_ENTITY].df.index)
        fm = fm.reset_index(drop=True)  # d3m wants index to increment by 1

        # treat inf as null like fit step
        fm = fm.replace([np.inf, -np.inf], np.nan)

        fm = add_metadata(fm, self.features)

        pk_index = find_primary_key(inputs, return_index=True)
        # if a pk is found
        if pk_index is not None:
            pk_col = inputs.select_columns([pk_index])
            fm = fm.append_columns(pk_col)

        target_index = find_target_column(inputs, return_index=True)
        # if a target is found,
        if target_index is not None:
            labels = inputs.select_columns(target_index)
            fm = fm.append_columns(labels)

        return CallResult(fm)
Ejemplo n.º 3
0
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        sk_inputs, columns_to_use, _ = self._get_columns_to_fit(inputs, self.hyperparams)
        output = []
        if len(sk_inputs.columns):
            try:
                sk_output = self._clf.transform(sk_inputs)
            except sklearn.exceptions.NotFittedError as error:
                raise PrimitiveNotFittedError("Primitive not fitted.") from error
            if sparse.issparse(sk_output):
                sk_output = sk_output.toarray()
            target_columns_metadata = self._copy_columns_metadata(inputs.metadata, self._training_indices, self.hyperparams)
            output = self._wrap_predictions(inputs, sk_output, target_columns_metadata)

            output.columns = [inputs.columns[idx] for idx in range(len(inputs.columns)) if idx in self._training_indices]
            output = [output]
        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")
        _, _, dropped_cols = self._get_columns_to_fit(inputs, self.hyperparams)
        outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
                                               add_index_columns=self.hyperparams['add_index_columns'],
                                               inputs=inputs, column_indices=self._training_indices + dropped_cols,
                                               columns_list=output)
        return CallResult(outputs)
Ejemplo n.º 4
0
    def produce(self,
                *,
                inputs: Inputs,
                iterations: int = None,
                timeout: float = None) -> base.CallResult[Outputs]:
        """
        Inputs:  ndarray of features
        Returns: Pandas DataFrame Containing predictions
        """
        # Inference
        if not self._fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        XTest, columns_to_use = self._select_inputs_columns(inputs)

        if len(XTest.columns):
            # Prediction
            YpredCCF, _, _ = predictFromCCF(self._CCF, XTest)

            output_columns = [self._wrap_predictions(YpredCCF)]

        outputs = base_utils.combine_columns(
            inputs,
            columns_to_use,
            output_columns,
            return_result=self.hyperparams['return_result'],
            add_index_columns=self.hyperparams['add_index_columns'])

        return base.CallResult(outputs)
Ejemplo n.º 5
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Inputs]:
        """ Add SIMON annotations if manual annotations do not exist. Hyperparameter overwrite controls 
            whether SIMON annotations should overwrite manual annotations or merely augment them

            Arguments:
                inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target
            
            Keyword Arguments:
                timeout {float} -- timeout, not considered (default: {None})
                iterations {int} -- iterations, not considered (default: {None})

            Raises:
                PrimitiveNotFittedError: if primitive not fit

            Returns:
                CallResult[Outputs] -- Input pd frame with metadata augmented and optionally overwritten

        """
        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        # update columns with new metadata annotaions
        for i in range(0, inputs.shape[1]):
            inputs.metadata = inputs.metadata.update_column(
                i, self.training_metadata_annotations[i])
        return CallResult(inputs, has_finished=self._is_fit)
Ejemplo n.º 6
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """produce corrected and smoothed predictions

        Arguments:
            inputs {Inputs} -- D3M dataframe containing images

        Keyword Arguments:
            timeout {float} -- timeout, not considered (default: {None})
            iterations {int} -- iterations, not considered (default: {None})
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        X = inputs.astype(np.float32).values
        if self.hyperparams["normalize_features"]:
            X = X / np.sqrt((X**2).sum(axis=-1, keepdims=True))
        X, idx_train = self._compare_train_rows(X)
        X = np.ascontiguousarray(X)

        S, AD = self._make_adj_matrix(X)
        n_class = len(self.label_encoder.classes_)

        Z_orig = self._get_initial_predictions(X, n_class)
        Y_resid = self._get_residuals(Z_orig, idx_train, n_class)
        Z_corrected = self._spread_residuals(Z_orig, Y_resid, AD, idx_train)
        Z_smoothed = self._smooth_predictions(Z_corrected, S, idx_train)

        preds_df = self._prepare_d3m_df(Z_smoothed, n_class)
        return CallResult(preds_df)
Ejemplo n.º 7
0
    def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        """
        Process the testing data.
        Args:
            inputs: Container DataFrame. Time series data up to outlier detection.

        Returns:
            Container DataFrame
            1 marks Outliers, 0 marks normal.
        """

        if not self._fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")
        sk_inputs = inputs
        if self.hyperparams['use_semantic_types']:
            sk_inputs = inputs.iloc[:, self._training_indices]
        output_columns = []
        if len(self._training_indices) > 0:

            if self.hyperparams['return_subseq_inds']:

                if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD
                    pred_score = self._clf.decision_function(sk_inputs.values).ravel()
                    left_inds_ = numpy.arange(0, len(pred_score), self.step_size)
                    right_inds_ = left_inds_ + self.window_size
                    right_inds_[right_inds_ > len(pred_score)] = len(pred_score)

                else:
                    pred_score, left_inds_, right_inds_ = self._clf.decision_function(sk_inputs.values)

                # print(pred_score.shape, left_inds_.shape, right_inds_.shape)

                sk_output = numpy.concatenate((numpy.expand_dims(pred_score, axis=1),
                                               numpy.expand_dims(left_inds_, axis=1),
                                               numpy.expand_dims(right_inds_, axis=1)), axis=1)

            else:
                if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD
                    sk_output = self._clf.decision_function(sk_inputs.values)

                else:
                    sk_output, _, _ = self._clf.decision_function(sk_inputs.values)

            if sparse.issparse(sk_output):
                sk_output = sk_output.toarray()
            outputs = self._wrap_predictions(inputs, sk_output)
            if len(outputs.columns) == len(self._input_column_names):
                outputs.columns = self._input_column_names
            output_columns = [outputs]
        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")

        outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
                                             add_index_columns=self.hyperparams['add_index_columns'],
                                             inputs=inputs, column_indices=self._training_indices,
                                             columns_list=output_columns)
        return CallResult(outputs)
Ejemplo n.º 8
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """Produce primitive's classifications for new time series data

        Arguments:
            inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target

        Keyword Arguments:
            timeout {float} -- timeout, not considered (default: {None})
            iterations {int} -- iterations, not considered (default: {None})

        Raises:
            PrimitiveNotFittedError: if primitive not fit

        Returns:
            CallResult[Outputs] -- dataframe with a column containing a predicted class
                for each input time series
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        # instantiate classifier and load saved weights
        clf = generate_lstmfcn(
            self._ts_sz,
            self._n_classes,
            lstm_dim=self.hyperparams["lstm_dim"],
            attention=self.hyperparams["attention_lstm"],
            dropout=self.hyperparams["dropout_rate"],
        )
        clf.load_weights(self.hyperparams["weights_filepath"])

        # find column with ts value through metadata
        grouping_column = self._get_cols(inputs.metadata)

        n_ts = inputs.iloc[:, grouping_column[0]].nunique()
        ts_sz = inputs.shape[0] // n_ts
        attribute_col = self._get_value_col(inputs.metadata)
        x_vals = inputs.iloc[:, attribute_col].values.reshape(n_ts, 1, ts_sz)
        x_vals = tf.cast(x_vals, tf.float32)
        test_dataset = LSTMSequenceTest(x_vals, self.hyperparams["batch_size"])

        # make predictions
        preds = clf.predict(test_dataset)
        preds = self._label_encoder.inverse_transform(np.argmax(preds, axis=1))

        # create output frame
        result_df = container.DataFrame({self._output_columns[0]: preds},
                                        generate_metadata=True)
        result_df.metadata = result_df.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 0),
            ("https://metadata.datadrivendiscovery.org/types/PredictedTarget"),
        )

        # ok to set to True because we have checked that primitive has been fit
        return CallResult(result_df, has_finished=True)
Ejemplo n.º 9
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """
        Process the testing data.
        Args:
            inputs: Container DataFrame.

        Returns:
            Container DataFrame after HPFilter.
        """
        # Get cols to fit.
        self._fitted = False
        self._training_inputs, self._training_indices = self._get_columns_to_fit(
            inputs, self.hyperparams)
        self._input_column_names = self._training_inputs.columns

        if len(self._training_indices) > 0:
            # self._clf.fit(self._training_inputs)
            self._fitted = True
        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")

        if not self._fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")
        sk_inputs = inputs
        if self.hyperparams['use_semantic_types']:
            sk_inputs = inputs.iloc[:, self._training_indices]
        output_columns = []
        if len(self._training_indices) > 0:
            sk_output = self._hpfilter(sk_inputs,
                                       lamb=self.hyperparams['lamb'])
            if sparse.issparse(sk_output):
                sk_output = sk_output.toarray()
            outputs = self._wrap_predictions(inputs, sk_output)

            if len(outputs.columns) == len(self._input_column_names):
                outputs.columns = self._input_column_names
            output_columns = [outputs]

        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")
        outputs = base_utils.combine_columns(
            return_result=self.hyperparams['return_result'],
            add_index_columns=self.hyperparams['add_index_columns'],
            inputs=inputs,
            column_indices=self._training_indices,
            columns_list=output_columns)

        # self._write(outputs)
        # self.logger.warning('produce was called3')
        return CallResult(outputs)
Ejemplo n.º 10
0
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
        """

        Args:
            inputs: Container DataFrame
            timeout: Default
            iterations: Default

        Returns:
            Container DataFrame containing hmean of  time series
        """
        self.logger.info('Statistical Hmean  Primitive called')

        # Get cols to fit.
        self._fitted = False
        self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams)
        self._input_column_names = self._training_inputs.columns

        if len(self._training_indices) > 0:
            # self._clf.fit(self._training_inputs)
            self._fitted = True
        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")

        if not self._fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")
        statistical_hmean_input = inputs
        if self.hyperparams['use_semantic_types']:
            statistical_hmean_input = inputs.iloc[:, self._training_indices]
        output_columns = []
        if len(self._training_indices) > 0:
            statistical_hmean_output = self._hmean(statistical_hmean_input,self.hyperparams["window_size"])

            if sparse.issparse(statistical_hmean_output):
                statistical_hmean_output = statistical_hmean_output.toarray()
            outputs = self._wrap_predictions(inputs, statistical_hmean_output)

            #if len(outputs.columns) == len(self._input_column_names):
               # outputs.columns = self._input_column_names

            output_columns = [outputs]


        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")
        outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
                                             add_index_columns=self.hyperparams['add_index_columns'],
                                             inputs=inputs, column_indices=self._training_indices,
                                             columns_list=output_columns)

        self.logger.info('Statistical Hmean  Primitive returned')

        return base.CallResult(outputs)
Ejemplo n.º 11
0
    def produce_metafeatures(self,
                             *,
                             inputs: Inputs,
                             timeout: float = None,
                             iterations: int = None) -> CallResult[Outputs]:
        """ Produce primitive's best guess for the structural type of each input column.

            Arguments:
                inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target
            
            Keyword Arguments:
                timeout {float} -- timeout, not considered (default: {None})
                iterations {int} -- iterations, not considered (default: {None})

            Raises:
                PrimitiveNotFittedError: if primitive not fit
            
            Returns:
                CallResult[Outputs] -- dataframe with two columns: "semantic type classifications" and "probabilities"
                    Each row represents a column in the original dataframe. The column "semantic type 
                    classifications" contains a list of all semantic type labels and the column
                    "probabilities" contains a list of the model's confidence in assigning each 
                    respective semantic type label  
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        out_df = self._produce_annotations(inputs=inputs)

        # add metadata to output data frame
        simon_df = d3m_DataFrame(out_df)
        # first column ('semantic types')
        col_dict = dict(
            simon_df.metadata.query((metadata_base.ALL_ELEMENTS, 0)))
        col_dict["structural_type"] = typing.List[str]
        col_dict["name"] = "semantic types"
        col_dict["semantic_types"] = (
            "http://schema.org/Text",
            "https://metadata.datadrivendiscovery.org/types/Attribute",
        )
        simon_df.metadata = simon_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 0), col_dict)
        # second column ('probabilities')
        col_dict = dict(
            simon_df.metadata.query((metadata_base.ALL_ELEMENTS, 1)))
        col_dict["structural_type"] = typing.List[float]
        col_dict["name"] = "probabilities"
        col_dict["semantic_types"] = (
            "http://schema.org/Text",
            "https://metadata.datadrivendiscovery.org/types/Attribute",
        )
        simon_df.metadata = simon_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 1), col_dict)

        return CallResult(simon_df, has_finished=self._is_fit)
Ejemplo n.º 12
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        if not self._is_fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        inputs_copy = inputs.copy()

        # if datetime columns are integers, parse as # of days
        if self._integer_time:
            inputs_copy[self._time_column] = pd.to_datetime(
                inputs_copy[self._time_column] - 1, unit="D")
        else:
            inputs_copy[self._time_column] = pd.to_datetime(
                inputs_copy[self._time_column], unit="s")

        # find marked 'GroupingKey' or 'SuggestedGroupingKey'
        grouping_keys = inputs_copy.metadata.get_columns_with_semantic_type(
            "https://metadata.datadrivendiscovery.org/types/GroupingKey")
        suggested_grouping_keys = inputs_copy.metadata.get_columns_with_semantic_type(
            "https://metadata.datadrivendiscovery.org/types/SuggestedGroupingKey"
        )
        if len(grouping_keys) == 0:
            grouping_keys = suggested_grouping_keys
        else:
            inputs_copy = inputs_copy.drop(columns=[
                list(inputs_copy)[i] for i in suggested_grouping_keys
            ])

        # check whether no grouping keys are labeled
        if len(grouping_keys) == 0:
            # TODO
            pass
        else:
            # create year column and add it to the grouping_keys
            inputs_copy[self._year_column] = inputs_copy[
                self._time_column].dt.year

            # concatenate columns in `grouping_keys` to unique_id column
            concat = inputs_copy.loc[:, self.filter_idxs].apply(
                lambda x: '-'.join([str(v) for v in x]), axis=1)
            concat = pd.concat([concat, inputs_copy[self._time_column]],
                               axis=1)
            concat.columns = ['unique_id', 'ds']

        X_test = concat[['unique_id', 'ds']]

        predictions = self._esrnn.predict(X_test)
        predictions = predictions['y_hat']
        output = container.DataFrame(predictions, generate_metadata=True)
        return base.CallResult(output)
Ejemplo n.º 13
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        sk_inputs, columns_to_use = self._get_columns_to_fit(
            inputs, self.hyperparams)
        output = []
        if len(sk_inputs.columns):
            try:
                af_inputs = af.from_ndarray(sk_inputs.values)
                weight_by_dist = self._weights == 'distance'
                dist_type = self._get_dist_type(self.hyperparams['dist_type'])
                af_output = self._predict(af_inputs, self._data, self._labels,        \
                                          self.hyperparams['n_neighbors'], dist_type, \
                                          weight_by_dist)
                af_ndarray_output = af_output.to_ndarray().astype('int32')
            except sklearn.exceptions.NotFittedError as error:
                raise PrimitiveNotFittedError(
                    "Primitive not fitted.") from error
            # For primitives that allow predicting without fitting like GaussianProcessRegressor
            if not self._fitted:
                raise PrimitiveNotFittedError("Primitive not fitted.")
            if sparse.issparse(af_ndarray_output):
                af_ndarray_output = af_ndarray_output.toarray()
            output = self._wrap_predictions(inputs, af_ndarray_output)
            output.columns = self._target_names
            output = [output]
        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")
        outputs = base_utils.combine_columns(
            return_result=self.hyperparams['return_result'],
            add_index_columns=self.hyperparams['add_index_columns'],
            inputs=inputs,
            column_indices=self._target_column_indices,
            columns_list=output)

        return CallResult(outputs)
Ejemplo n.º 14
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """ Produce primitive's classifications for new time series data

            Arguments:
                inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target
            
            Keyword Arguments:
                timeout {float} -- timeout, not considered (default: {None})
                iterations {int} -- iterations, not considered (default: {None})

            Raises:
                PrimitiveNotFittedError: if primitive not fit

            Returns:
                CallResult[Outputs] -- dataframe with a column containing a predicted class 
                    for each input time series
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        # find column with ts value through metadata
        grouping_column = self._get_cols(inputs.metadata)

        n_ts = inputs.iloc[:, grouping_column[0]].nunique()
        ts_sz = inputs.shape[0] // n_ts
        attribute_col = self._get_value_col(inputs.metadata)
        x_vals = inputs.iloc[:, attribute_col].values.reshape(n_ts, ts_sz)

        # make predictions
        scaled = self._scaler.transform(x_vals)
        preds = self._knn.predict(scaled)

        # create output frame
        result_df = container.DataFrame({self._output_columns[0]: preds},
                                        generate_metadata=True)
        result_df.metadata = result_df.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 0),
            ("https://metadata.datadrivendiscovery.org/types/PredictedTarget"),
        )

        return CallResult(result_df, has_finished=True)
Ejemplo n.º 15
0
    def _produce(self, inputs: Inputs):
        """ internal produce method to support produce() and produce_confidence_intervals() methods """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        test_frame = inputs.copy()
        nbeats_forecast = NBEATSForecast(self._nbeats_dataset,
                                         self.hyperparams['weights_dir'],
                                         self.hyperparams['interpretable'],
                                         self.hyperparams['output_mean'],
                                         self.hyperparams['nan_padding'])
        test_frame, _, _, original_times = self._reindex(test_frame)
        pred_intervals = self._get_pred_intervals(original_times)

        st = time.time()
        preds = nbeats_forecast.predict(test_frame, pred_intervals)
        logger.info(f'Making predictions took {time.time() - st}s')
        return preds, pred_intervals
Ejemplo n.º 16
0
    def produce(self, *, inputs: Inputs, iterations: int = None, timeout: float = None) -> base.CallResult[Outputs]:
        """
        Inputs:  DataFrame of features or numerical inputs
        Returns: Pandas DataFrame Containing predictions
        """
        # Inference
        if not self._fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        # Curate data
        XTest, _, feature_columns, label_name_columns = self._curate_data(training_inputs=inputs, get_labels=True)

        # Delete columns with inputs
        outputs = inputs.remove_columns(feature_columns)

        # Predictions
        predictions = self._kmeans.predict(XTest)

        # Convert from ndarray from DataFrame
        predictions = container.DataFrame(predictions, generate_metadata=True)

        # Update Metadata for each feature vector column
        if len(label_name_columns) != 0:
            for col in range(predictions.shape[1]):
                col_dict = dict(predictions.metadata.query((metadata_base.ALL_ELEMENTS, col)))
                col_dict['structural_type'] = type(1.0)
                col_dict['name']            = label_name_columns[col]
                col_dict["semantic_types"]  = ("http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/PredictedTarget",)
                predictions.metadata        = predictions.metadata.update((metadata_base.ALL_ELEMENTS, col), col_dict)
            # Rename Columns to match label columns
            predictions.columns = label_name_columns
        else:
            for col in range(predictions.shape[1]):
                col_dict = dict(predictions.metadata.query((metadata_base.ALL_ELEMENTS, col)))
                col_dict['structural_type'] = type(1.0)
                col_dict['name']            = "KMeansPredictions"
                col_dict["semantic_types"]  = ("http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/PredictedTarget",)
                predictions.metadata        = predictions.metadata.update((metadata_base.ALL_ELEMENTS, col), col_dict)

        # Append predictions to outputs
        outputs = outputs.append_columns(predictions)

        return base.CallResult(outputs)
Ejemplo n.º 17
0
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:

        """
        Process the testing data.
        Args:
            inputs: Container DataFrame. Time series data up to standardlize.

        Returns:
            Container DataFrame after standardlization.
        """

        if not self._fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")
        sk_inputs = inputs
        if self.hyperparams['use_semantic_types']:
            sk_inputs = inputs.iloc[:, self._training_indices]
        output_columns = []
        if len(self._training_indices) > 0:
            sk_output = self._clf.transform(sk_inputs)
            if sparse.issparse(sk_output):
                sk_output = sk_output.toarray()
            outputs = self._wrap_predictions(inputs, sk_output)
            if len(outputs.columns) == len(self._input_column_names):
                outputs.columns = self._input_column_names
            output_columns = [outputs]
        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")

        # print(outputs.metadata.to_internal_simple_structure())

        outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
                                             add_index_columns=self.hyperparams['add_index_columns'],
                                             inputs=inputs, column_indices=self._training_indices,
                                             columns_list=output_columns)
        # print(inputs)
        # print(outputs)
        # print(inputs.metadata.to_internal_simple_structure())
        # print(outputs.metadata.to_internal_simple_structure())

        return CallResult(outputs)
Ejemplo n.º 18
0
    def _produce(self, inputs: Inputs):
        """ internal produce method to support produce() and produce_confidence_intervals() methods """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        test_frame = inputs.copy()
        deepar_forecast = DeepARForecast(
            self._deepar_dataset,
            self.hyperparams["weights_dir"],
            self.hyperparams["output_mean"],
            self.hyperparams["number_samples"],
            self.hyperparams["quantiles"],
            self.hyperparams["nan_padding"],
        )
        test_frame, _, _, original_times = self._reindex(test_frame)
        pred_intervals = self._get_pred_intervals(original_times)

        st = time.time()
        preds = deepar_forecast.predict(test_frame, pred_intervals)
        logger.info(f"Making predictions took {time.time() - st}s")
        return preds, pred_intervals
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        if not self._fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        sk_inputs, columns_to_use = self._get_columns_to_fit(
            inputs, self.hyperparams)

        output = []
        if len(sk_inputs.columns):
            af_inputs = af.from_ndarray(sk_inputs.values.astype('float32'))

            # Normalize feature values
            if not self._max_feature_value_defined:
                self._max_feature_value = af.max(train_feats)
            af_inputs = af_inputs / self._max_feature_value

            af_output = self._predict(af_inputs, self._weights)
            ndarray_output = af_output.to_ndarray()

            output = self._wrap_predictions(inputs, ndarray_output)
            output.columns = self._target_names
            output = [output]
        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")

        outputs = base_utils.combine_columns(
            return_result=self.hyperparams['return_result'],
            add_index_columns=self.hyperparams['add_index_columns'],
            inputs=inputs,
            column_indices=self._target_column_indices,
            columns_list=output)

        return CallResult(outputs)
Ejemplo n.º 20
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """produce predictions

        Arguments:
            inputs {Inputs} -- D3M dataframe containing images

        Keyword Arguments:
            timeout {float} -- timeout, not considered (default: {None})
            iterations {int} -- iterations, not considered (default: {None})
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        X = inputs.astype(np.float32).values
        X = (X - X.mean(0, keepdims=True)) / X.std(0, keepdims=True)

        n_class = len(self.label_encoder.classes_)
        mlp_model = Mlp(X.shape[1], n_class, 128).to(self.device)
        mlp_model.load_state_dict(
            torch.load(self.hyperparams["weights_filepath"]))
        mlp_model.eval()

        dataset = TensorDataset(torch.Tensor(X))
        loader = DataLoader(dataset, 64, shuffle=False, num_workers=10)

        all_logits = []
        for data in loader:
            logits = mlp_model(data[0].to(self.device))
            all_logits.append(logits)
        all_logits = torch.cat(all_logits).detach().cpu().numpy()

        preds_df = self._prepare_d3m_df(all_logits, n_class)
        return CallResult(preds_df)
Ejemplo n.º 21
0
    def _prepare_test_inputs(self, inputs):
        """ prepare test inputs and model to produce either predictions or explanations"""
        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        spatial_dim = int(
            math.sqrt(inputs.values.shape[1] /
                      self.hyperparams['feature_dim']))
        features = inputs.values.reshape(-1, self.hyperparams['feature_dim'],
                                         spatial_dim, spatial_dim)
        features = torch.Tensor(features)
        test_dataset = TensorDataset(features)

        test_loader = DataLoader(test_dataset,
                                 batch_size=self.hyperparams['batch_size'],
                                 shuffle=False)

        model = self._build_clf_model(self.hyperparams['feature_dim'],
                                      self._nclasses).to(self._device)
        model.load_state_dict(torch.load(self.hyperparams['weights_filepath']))
        model = model.eval()

        return model, test_loader
Ejemplo n.º 22
0
    def produce(
        self, *, inputs: Inputs, timeout: float = None, iterations: int = None
    ) -> CallResult[Outputs]:
        """produce segmentation masks

        Arguments:
            inputs {Inputs} -- D3M dataframe containing images

        Keyword Arguments:
            timeout {float} -- timeout, not considered (default: {None})
            iterations {int} -- iterations, not considered (default: {None})
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        imgs = self._get_imgs(inputs)
        test_loader = self._prepare_loader(imgs, shuffle=False)

        model = Unet(encoder_freeze=False, device=self.device).to(self.device)

        model.load_state_dict(torch.load(self.hyperparams["weights_filepath"]))

        model.segmentation_head = SegmentationHeadImageLabelEval(
            model.segmentation_head
        )

        all_preds = []
        for batch in tqdm(test_loader):
            inputs = batch[0].to(self.device)
            preds = model.predict(inputs).squeeze()
            preds = preds[:, self.pad : -self.pad, self.pad : -self.pad]
            all_preds.append(preds.detach().cpu().numpy())
        all_preds = np.vstack(all_preds)

        preds_df = self._prepare_d3m_df(all_preds)
        return CallResult(preds_df)
Ejemplo n.º 23
0
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        if not self._fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        es = self._make_entityset(inputs.copy())

        fm = ft.calculate_feature_matrix(
            entityset=es,
            features=self.features,
            chunk_size=self.chunk_size
        )

        # make sure the feature matrix is ordered the same as the input
        fm = fm.reindex(es[self._target_resource_id].df.index)
        fm = fm.reset_index(drop=True)  # d3m wants index to increment by 1

        # treat inf as null like fit step
        fm = fm.replace([np.inf, -np.inf], np.nan)

        # todo add this metadata handle
        fm = add_metadata(fm, self.features)
        fm = self._add_labels(fm, inputs)

        return CallResult(fm)
Ejemplo n.º 24
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """
        Process the testing data.
        Args:
            inputs: Container DataFrame.

        Returns:
            Container DataFrame after Truncated SVD.
        """
        self._clf = trmf(
            lags=self.hyperparams['lags'],
            K=self.hyperparams['K'],
            lambda_f=self.hyperparams['lambda_f'],
            lambda_x=self.hyperparams['lambda_x'],
            lambda_w=self.hyperparams['lambda_w'],
            alpha=self.hyperparams['alpha'],
            eta=self.hyperparams['eta'],
            max_iter=self.hyperparams['max_iter'],
            F_step=self.hyperparams['F_step'],
            X_step=self.hyperparams['X_step'],
            W_step=self.hyperparams['W_step'],
        )

        tmp = inputs.copy()
        for col in inputs.columns:
            tmp[col] = inputs[col] / inputs[col].max()

        self._inputs = tmp
        self._fitted = False

        # Get cols to fit.
        self._training_inputs, self._training_indices = self._get_columns_to_fit(
            self._inputs, self.hyperparams)
        self._input_column_names = self._training_inputs.columns

        if len(self._training_indices) > 0:
            self._clf.fit(self._training_inputs)
            self._fitted = True
        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")

        if not self._fitted:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        sk_inputs = inputs
        if self.hyperparams['use_semantic_types']:
            sk_inputs = inputs.iloc[:, self._training_indices]
        output_columns = []
        if len(self._training_indices) > 0:

            sk_output = self._clf.get_X()

            if sparse.issparse(sk_output):
                sk_output = sk_output.toarray()
            outputs = self._wrap_predictions(inputs, sk_output)
            if len(outputs.columns) == len(self._input_column_names):
                outputs.columns = self._input_column_names
            output_columns = [outputs]
        else:
            if self.hyperparams['error_on_no_input']:
                raise RuntimeError("No input columns were selected")
            self.logger.warn("No input columns were selected")
        outputs = base_utils.combine_columns(
            return_result=self.hyperparams['return_result'],
            add_index_columns=self.hyperparams['add_index_columns'],
            inputs=inputs,
            column_indices=self._training_indices,
            columns_list=output_columns)

        # self._write(outputs)
        return CallResult(outputs)
Ejemplo n.º 25
0
    def produce_confidence_intervals(self,
                                     *,
                                     inputs: Inputs,
                                     timeout: float = None,
                                     iterations: int = None
                                     ) -> CallResult[Outputs]:
        """ produce confidence intervals for each series 'confidence_interval_horizon' periods into
                the future
        
        Arguments:
            inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target
        
        Keyword Arguments:
            timeout {float} -- timeout, not considered (default: {None})
            iterations {int} -- iterations, considered (default: {None})
        
        Raises:
            PrimitiveNotFittedError: 
        
        Returns:
            CallResult[Outputs] -- 

            Ex. 
                series | timestep | mean | 0.05 | 0.95
                --------------------------------------
                a      |    0     |  5   |   3  |   7
                a      |    1     |  6   |   4  |   8
                b      |    0     |  5   |   3  |   7
                b      |    1     |  6   |   4  |   8
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        alpha = self.hyperparams["confidence_interval_alpha"]

        if len(self._drop_cols_no_tgt
               ) > 0 and inputs.shape[1] != self._cols_after_drop:
            test_frame = inputs.remove_columns(self._drop_cols_no_tgt)
        else:
            test_frame = inputs.copy()

        # Create TimeSeriesTest object
        if self._train_data.equals(inputs):
            ts_test_object = TimeSeriesTest(self._ts_object)
            include_all_training = True
            horizon = 0
        # test
        else:
            ts_test_object = TimeSeriesTest(self._ts_object, test_frame)
            include_all_training = self.hyperparams[
                'seed_predictions_with_all_data']
            horizon = self.hyperparams["confidence_interval_horizon"]

        # make predictions with learner
        start_time = time.time()
        logger.info(f"Making predictions...")
        preds = self._learner.predict(
            ts_test_object,
            horizon=horizon,
            samples=self.hyperparams["confidence_interval_samples"],
            include_all_training=include_all_training,
            point_estimate=False)
        logger.info(
            f"Prediction took {time.time() - start_time}s. Predictions array shape: {preds.shape}"
        )

        # convert samples to percentiles
        means = np.percentile(preds, 50, axis=2).reshape(-1, 1)
        lowers = np.percentile(preds, alpha / 2 * 100, axis=2).reshape(-1, 1)
        uppers = np.percentile(preds, (1 - alpha / 2) * 100,
                               axis=2).reshape(-1, 1)

        assert (lowers < means).all()
        assert (means < uppers).all()

        # convert to df
        if self._grouping_column is None:
            indices = np.repeat(self._output_columns[0], preds.shape[1])
        else:
            indices = np.repeat(
                test_frame[test_frame.columns[self._grouping_column]].unique(),
                preds.shape[1])
        interval_df = pd.DataFrame(
            np.concatenate((means, lowers, uppers), axis=1),
            columns=["mean", str(alpha / 2),
                     str(1 - alpha / 2)],
            index=indices,
        )

        # add index column
        interval_df["horizon_index"] = np.tile(np.arange(preds.shape[1]),
                                               len(interval_df.index.unique()))

        logger.debug(interval_df.head())

        # structure return df
        return CallResult(
            container.DataFrame(interval_df, generate_metadata=True),
            has_finished=self._is_fit,
        )
Ejemplo n.º 26
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """ Produce primitive's predictions for specific time series at specific future time instances
            * these specific timesteps / series are specified implicitly by input dataset

            Arguments:
                inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target
            
            Keyword Arguments:
                timeout {float} -- timeout, not considered (default: {None})
                iterations {int} -- iterations, not considered (default: {None})

            Raises:
                PrimitiveNotFittedError: if primitive not fit
            
            Returns:
                CallResult[Outputs] -- (N, 2) dataframe with d3m_index and value for each prediction slice requested.
                    prediction slice = specific horizon idx for specific series in specific regression 
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        if len(self._drop_cols_no_tgt
               ) > 0 and inputs.shape[1] != self._cols_after_drop:
            test_frame = inputs.remove_columns(self._drop_cols_no_tgt)
        else:
            test_frame = inputs.copy()

        # Create TimeSeriesTest object
        if self._train_data.equals(inputs):
            ts_test_object = TimeSeriesTest(self._ts_object)
        # test
        else:
            ts_test_object = TimeSeriesTest(self._ts_object, test_frame)

        # get prediction slices
        pred_intervals = self._get_pred_intervals(test_frame)

        # make predictions with learner
        learner = DeepARLearner(
            self._ts_object,
            emb_dim=self.hyperparams["emb_dim"],
            lstm_dim=self.hyperparams["lstm_dim"],
            dropout=self.hyperparams["dropout_rate"],
            lr=self.hyperparams["learning_rate"],
            batch_size=self.hyperparams["batch_size"],
            train_window=self.hyperparams["window_size"],
            verbose=0,
        )
        learner.load_weights(self.hyperparams['weights_filepath'])
        start_time = time.time()
        logger.info(f"Making predictions...")
        preds = learner.predict(ts_test_object, include_all_training=True)
        logger.info(
            f"Prediction took {time.time() - start_time}s. Predictions array shape: {preds.shape}"
        )

        # slice predictions with learned intervals
        all_preds = []
        for p, idxs in zip(preds, pred_intervals.values):
            # all_preds.extend(p[: len(idxs)])  # this takes first n predictions
            all_preds.extend([p[i] for i in idxs
                              ])  # this takes predictions at actual indices
        flat_list = np.array([p for pred_list in all_preds for p in pred_list])

        # if np.isinf(all_preds).any():
        #     logger.debug(f'There are {np.isinf(all_preds).sum()} inf preds')
        # if np.isnan(all_preds).any():
        #     logger.debug(f'There are {np.isnan(all_preds).sum()} nan preds')
        # logger.debug(f'Max: {preds.max()}, Min: {preds.min()}')

        # fill nans with 0s in case model predicted some (shouldnt need to - preventing edge case)
        flat_list = np.nan_to_num(flat_list)

        # create output frame
        result_df = container.DataFrame(
            {self._ts_frame.columns[self._target_column]: flat_list},
            generate_metadata=True,
        )
        result_df.metadata = result_df.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 0),
            ("https://metadata.datadrivendiscovery.org/types/PredictedTarget"),
        )

        return CallResult(result_df, has_finished=self._is_fit)
Ejemplo n.º 27
0
    def _produce(self,
                 inputs: Inputs,
                 return_conf_int: bool = False) -> CallResult[Outputs]:
        """ prediction for future time series data
        """
        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        # make copy of input data!
        inputs_copy = inputs.copy()

        # if datetime columns are integers, parse as # of days
        if self._integer_time:
            inputs_copy[self._time_column] = pd.to_datetime(
                inputs_copy[self._time_column] - 1, unit="D")
        else:
            inputs_copy[self._time_column] = pd.to_datetime(
                inputs_copy[self._time_column], unit="s")

        # find marked 'GroupingKey' or 'SuggestedGroupingKey'
        grouping_keys = inputs_copy.metadata.get_columns_with_semantic_type(
            "https://metadata.datadrivendiscovery.org/types/GroupingKey")
        suggested_grouping_keys = inputs_copy.metadata.get_columns_with_semantic_type(
            "https://metadata.datadrivendiscovery.org/types/SuggestedGroupingKey"
        )
        if len(grouping_keys) == 0:
            grouping_keys = suggested_grouping_keys
        else:
            inputs_copy = inputs_copy.drop(columns=[
                list(inputs_copy)[i] for i in suggested_grouping_keys
            ])

        # extract n_periods, interval
        n_periods, forecast_idxs, all_intervals = self._calculate_prediction_intervals(
            inputs_copy, len(grouping_keys))
        forecasts = self._forecast(n_periods, return_conf_int=return_conf_int)

        a = self.hyperparams['confidence_interval_alpha']
        if return_conf_int:
            columns = [[t, f'{t}-{a/2}', f'{t}-{1-a/2}']
                       for t in self._targets]
            columns = [cols for cols in columns]
        else:
            columns = self._targets

        var_df = []
        for (grp_idx, col_idx), intervals in zip(forecast_idxs, all_intervals):
            forecast = forecasts[grp_idx]
            if col_idx == -1:

                nan_array = np.empty(
                    (len(intervals),
                     len(forecast) * len(self._target_indices)))
                nan_array[:] = np.nan
                data = pd.DataFrame(nan_array)
            else:
                col_idxs = [
                    col_idx + target for target in self._target_indices
                ]
                data = [f.iloc[intervals, col_idxs] for f in forecast]
                data = pd.concat(data, axis=1)
            data.columns = columns
            var_df.append(data)
        var_df = pd.concat(var_df, axis=0, ignore_index=True)
        var_df = d3m_DataFrame(var_df)

        # assign target metadata and round appropriately
        idx = 0
        for tgt_name in self._targets:
            col_dict = dict(
                var_df.metadata.query((metadata_base.ALL_ELEMENTS, idx)))
            col_dict["structural_type"] = type("1")
            col_dict["name"] = tgt_name
            col_dict["semantic_types"] = (
                "https://metadata.datadrivendiscovery.org/types/PredictedTarget",
                "http://schema.org/Float")
            var_df.metadata = var_df.metadata.update(
                (metadata_base.ALL_ELEMENTS, idx), col_dict)

            if return_conf_int:
                col_dict_lower = dict(
                    var_df.metadata.query(
                        (metadata_base.ALL_ELEMENTS, idx + 1)))
                col_dict_upper = dict(
                    var_df.metadata.query(
                        (metadata_base.ALL_ELEMENTS, idx + 2)))
                col_dict_lower["structural_type"] = type("1")
                col_dict_upper["structural_type"] = type("1")
                col_dict_lower["semantic_types"] = (
                    "http://schema.org/Float", )
                col_dict_upper["semantic_types"] = (
                    "http://schema.org/Float", )
                col_dict_lower["name"] = f'{tgt_name}-{a/2}'
                col_dict_upper["name"] = f'{tgt_name}-{1-a/2}'
                var_df.metadata = var_df.metadata.update(
                    (metadata_base.ALL_ELEMENTS, idx + 1), col_dict_lower)
                var_df.metadata = var_df.metadata.update(
                    (metadata_base.ALL_ELEMENTS, idx + 2), col_dict_upper)

                idx += 3
            else:
                idx += 1

        return CallResult(var_df, has_finished=self._is_fit)
Ejemplo n.º 28
0
    def produce_weights(self,
                        *,
                        inputs: Inputs,
                        timeout: float = None,
                        iterations: int = None) -> CallResult[Outputs]:
        """ Produce absolute values of correlation coefficients (weights) for each of the terms used in each regression model. 
            Terms must be aggregated by series or by lag order (thus the need for absolute value). Pooling operation can be maximum 
            or average (controlled by 'interpret_pooling' HP).
        
        Arguments:
            inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target
        
        Keyword Arguments:
            timeout {float} -- timeout, not considered (default: {None})
            iterations {int} -- iterations, considered (default: {None})

        Raises:
            PrimitiveNotFittedError: if primitive not fit
        
        Returns:
            CallResult[Outputs] -- pandas df where each row represents a unique series from one of the regressions that was fit. 
                The columns contain the coefficients for each term in the regression, potentially aggregated by series or lag order. 
                Column names will represent the lag order or series to which that column refers. 
                If the regression is an ARIMA model, the set of column names will also contain AR_i (autoregressive terms) and 
                    MA_i (moving average terms)
                Columns that are not included in the regression for a specific series will have NaN values in those
                    respective columns. 
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        if self.hyperparams['interpret_value'] == 'series':
            logger.info(
                "You should interpret a row of the returned matrix like this: "
                +
                "Each row represents an endogeneous variable for which the VAR process learned an equation. "
                +
                "Each column represents all of the endogenous variables used in the regression equation. "
                +
                "Each matrix entry represents the weight of the column endogeneous variable in the equation for the "
                + "row endogenous variable.")

        # get correlation coefficients
        coefficients = [
            np.absolute(fit.coefs)
            if lags is not None else fit.get_absolute_value_params()
            for fit, lags in zip(self._fits, self._lag_order)
        ]
        trends = [
            np.absolute(fit.params[0, :].reshape(-1, 1))
            if lags is not None else None
            for fit, lags in zip(self._fits, self._lag_order)
        ]

        # combine coeffcient vectors into single df
        coef_df = None
        for coef, trend, names in zip(coefficients, trends,
                                      self._X_train_names):
            # aggregate VAR coefficients based on HPs
            if trend is not None:
                if self.hyperparams["interpret_value"] == "series":
                    if self.hyperparams["interpret_pooling"] == "avg":
                        coef = np.mean(coef, axis=0)  # K x K
                    else:
                        coef = np.max(coef, axis=0)  # K x K
                    colnames = names
                else:
                    # or axis = 2, I believe symmetrical
                    if self.hyperparams["interpret_pooling"] == "avg":
                        coef = np.mean(coef, axis=1).T  # K x p + 1
                    else:
                        coef = np.max(coef, axis=1).T  # K x p + 1
                    coef = np.concatenate((trend, coef), axis=1)
                    colnames = ["trend_0"] + [
                        "ar_" + str(i + 1) for i in range(coef.shape[1] - 1)
                    ]
                new_df = pd.DataFrame(coef, columns=colnames, index=names)
                coef_df = pd.concat([coef_df, new_df], sort=True)

            # add index to ARIMA params
            else:
                coef.index = names
                if self.hyperparams["interpret_value"] == "lag_order":
                    coef_df = pd.concat([coef_df, coef], sort=True)

        if coef_df is None:
            logger.info(
                f"There was only one variable in each grouping of time series, "
                +
                "therefore only ARIMA models were fit. Additionally, becasue the 'interpret_value' "
                +
                "hyperparameter is set to series, this will return an empty dataframe."
            )

        return CallResult(
            container.DataFrame(coef_df, generate_metadata=True),
            has_finished=self._is_fit,
        )
Ejemplo n.º 29
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Inputs]:
        """ Add SIMON annotations 

            Arguments:
                inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target

            Keyword Arguments:
                timeout {float} -- timeout, not considered (default: {None})
                iterations {int} -- iterations, not considered (default: {None})

            Raises:
                PrimitiveNotFittedError: if primitive not fit

            Returns:
                CallResult[Outputs] -- Input pd frame with metadata augmented 

        """
        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        ## BEGIN originally from from d3m.primitives.schema_discovery.profiler.Common """
        assert self._add_semantic_types is not None
        assert self._remove_semantic_types is not None

        columns_to_use, output_columns = self._produce_columns(
            inputs, self._add_semantic_types, self._remove_semantic_types)

        if self.hyperparams['replace_index_columns'] and self.hyperparams[
                'return_result'] == 'append':
            assert len(columns_to_use) == len(output_columns)

            index_columns = inputs.metadata.get_index_columns()

            index_columns_to_use = []
            other_columns_to_use = []
            index_output_columns = []
            other_output_columns = []
            for column_to_use, output_column in zip(columns_to_use,
                                                    output_columns):
                if column_to_use in index_columns:
                    index_columns_to_use.append(column_to_use)
                    index_output_columns.append(output_column)
                else:
                    other_columns_to_use.append(column_to_use)
                    other_output_columns.append(output_column)

            outputs = base_utils.combine_columns(
                inputs,
                index_columns_to_use,
                index_output_columns,
                return_result='replace',
                add_index_columns=self.hyperparams['add_index_columns'])
            outputs = base_utils.combine_columns(
                outputs,
                other_columns_to_use,
                other_output_columns,
                return_result='append',
                add_index_columns=self.hyperparams['add_index_columns'])
        else:
            outputs = base_utils.combine_columns(
                inputs,
                columns_to_use,
                output_columns,
                return_result=self.hyperparams['return_result'],
                add_index_columns=self.hyperparams['add_index_columns'])
        ## EMD originally from from d3m.primitives.schema_discovery.profiler.Common """

        return CallResult(outputs, has_finished=self._is_fit)
Ejemplo n.º 30
0
	def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:	

		"""

		Args:

			inputs: Container DataFrame

			timeout: Default

			iterations: Default

		Returns:

		    Container DataFrame containing Matrix Profile of selected columns
		
		"""

		# Get cols to fit.
		self._fitted = False
		self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams)
		self._input_column_names = self._training_inputs.columns


		if len(self._training_indices) > 0:
			self._fitted = True
		else:
			if self.hyperparams['error_on_no_input']:
				raise RuntimeError("No input columns were selected")
			self.logger.warn("No input columns were selected")

		if not self._fitted:
			raise PrimitiveNotFittedError("Primitive not fitted.")
		
		sk_inputs = inputs
		if self.hyperparams['use_semantic_types']:
			sk_inputs = inputs.iloc[:, self._training_indices]
		output_columns = []
		if len(self._training_indices) > 0:
			sk_output = self._clf.produce(sk_inputs)
			if sparse.issparse(sk_output):
				sk_output = sk_output.toarray()
			outputs = self._wrap_predictions(inputs, sk_output)
			
			if len(outputs.columns) == len(self._input_column_names):
				outputs.columns = self._input_column_names
			output_columns = [outputs]

		else:
			if self.hyperparams['error_on_no_input']:
				raise RuntimeError("No input columns were selected")
			self.logger.warn("No input columns were selected")

		outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
							   add_index_columns=self.hyperparams['add_index_columns'],
							   inputs=inputs, column_indices=self._training_indices,
							   columns_list=output_columns)
		#print(outputs)
		#CallResult(outputs)
		#print("___")
		print(outputs.columns)
		#outputs.columns = [str(x) for x in outputs.columns]

		return CallResult(outputs)