def train(file_io, data_spec, api_key, project, **kwargs): """ file_io: The train method must accept a file_io argument. This is a function that works the same way as the builtin open(), except it reads from and writes to the root of a special storage location in the cloud that belongs to the current model version. data_spec: An argument we pass in ourself when we initiate the training. api_key, project: Optional arguments that are passed in automatically from Model Hosting for your convenience. The API key is the one that were used to initiate this training routine through the Model Hosting HTTP API. """ dts = DataTransferService(data_spec, api_key=api_key, project=project) X = pd.read_csv(dts.get_file("data")) y = pd.read_csv(dts.get_file("target")) # Add a feature of constant value 1 X.insert(0, "f0", 1) # Least squares coefficients = pd.DataFrame(np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y), columns=["beta_hat"]) # Persist our result with file_io("coefficients.csv", "w") as f: coefficients.to_csv(f, index=False)
def test_get_files(self): data_spec = DataSpec(files_data_spec=FilesDataSpec(file_ids={"test": 7725800487412823})) dts = DataTransferService(data_spec) data = dts.get_file("test") assert isinstance(data, BytesIO) assert ( data.getvalue() == b'import os\n\nfrom cognite.config import configure_session\nfrom cognite.v05 import files\n\nconfigure_session(os.getenv("COGNITE_TEST_API_KEY"), "mltest")\n\n\nres = files.upload_file("test.py", "./test.py")\n\nprint(res)\n' )