コード例 #1
0
ファイル: model.py プロジェクト: ojjiojji/cognite-sdk-python
    def train(file_io, data_spec, api_key, project, **kwargs):
        """
        file_io:
            The train method must accept a file_io argument. This is a function
            that works the same way as the builtin open(), except it reads from
            and writes to the root of a special storage location in the cloud
            that belongs to the current model version.
        data_spec:
            An argument we pass in ourself when we initiate the training.
        api_key, project:
            Optional arguments that are passed in automatically from Model
            Hosting for your convenience. The API key is the one that were
            used to initiate this training routine through the Model Hosting
            HTTP API.
        """
        dts = DataTransferService(data_spec, api_key=api_key, project=project)
        df = dts.get_dataframe().dropna()

        X = df[["temp", "pressure", "rpm"]].values
        y = df["production_rate"].values

        regressor = RandomForestRegressor(
            n_estimators=10,
            min_samples_split=100)  # We'll mostly use default settings
        regressor.fit(X, y)

        # Persist our regressor model
        with file_io("regressor.pickle", "wb") as f:
            pickle.dump(regressor, f)
コード例 #2
0
ファイル: model.py プロジェクト: ojjiojji/cognite-sdk-python
    def predict(self, instance, api_key, project, **kwargs):
        """
        instance:
            Since we're doing scheduled prediction, this will be a data spec
            describing the data we should do prediction on.
        
        Note that it's also possible to take api_key and project in as
        optional arguments here the same way as in train().
        """
        dts = DataTransferService(instance, api_key=api_key, project=project)
        df = dts.get_dataframe().dropna()

        X = df[["temp", "pressure", "rpm"]].values
        df["production_rate"] = self.regressor.predict(X)

        # For scheduled prediction we need to return output on the format:
        # {
        #   "timestamp": [t0, t1, t2, ...],
        #   "production_rate": [p0, p1, p2, ...]
        # }
        # And we can call to_dict(orient="list") on our pandas DataFrame to get
        # our prediction on that format.
        return df[["timestamp", "production_rate"]].to_dict(orient="list")
コード例 #3
0
 def test_get_dataframe(self, ts_data_spec_dtos):
     data_spec = DataSpec(time_series_data_specs=ts_data_spec_dtos)
     service = DataTransferService(data_spec)
     df = service.get_dataframe("ds1")
     assert isinstance(df, pd.DataFrame)