def test_ml_model(self): h1.init(MODEL_REPO_PATH=".models") m = MLModel() raw_data = m.get_data() prepared_data = m.prep(raw_data) m.train(prepared_data) metric = m.evaluate(prepared_data) print("metric = ", str(metric)) self.assertGreaterEqual(metric, 0.9) version_id = m.persist() print("Persisted to version_id = %s" % version_id) m = MLModel().load(version_id) self.assertGreaterEqual(m.metrics, 0.9)
def setup(self, config): h1st.init() self.config = config self.hyperparameter = parameters self.timestep = 0 self.kwargs = {} for param in self.hyperparameter: k = param["name"] self.kwargs[k] = int(round(self.config[k])) if param["type"]=="int" \ else float(self.config[k]) if param["type"]=="float" \ else self.config[k] self.h1_ml_model = model_class(**self.kwargs) DATA_LOCK_DIR = "./data_lock" lock_file = f"{DATA_LOCK_DIR}/data.lock" if not os.path.exists(DATA_LOCK_DIR): os.makedirs(DATA_LOCK_DIR) with FileLock(os.path.expanduser(lock_file)): data = self.h1_ml_model.load_data() self.prepared_data = self.h1_ml_model.prep(data)
def evaluate(self, data): pred_y = self.predict({"x": data["test_x"]}) # self.metrics can also be persisted automatically by H1st self.metrics = metrics.accuracy_score(data["test_y"], pred_y) return self.metrics def predict(self, input_data: dict) -> dict: """ We expect an array of input data rows in the "x" field of the input_data dict """ return self.model.predict(input_data["x"]) if __name__ == "__main__": h1.init(MODEL_REPO_PATH=".models") m = MLModel() raw_data = m.get_data() print(raw_data) prepared_data = m.prep(raw_data) print(prepared_data['train_x'].shape) print(prepared_data['test_x'].shape) m.train(prepared_data) m.evaluate(prepared_data) print("accuracy_score = %.4f" % m.metrics) version_id = m.persist() print("Persisted to version_id = %s" % version_id)
## NOTE: execution of this notebook requires private access to the dataset. We're making the data available soon. In the mean time, hang on tight! import h1st as h1 h1.init() import pandas as pd import numpy as np import sklearn.metrics from AutomotiveCybersecurity.models.msg_freq_event_detector import MsgFreqEventDetectorModel m = MsgFreqEventDetectorModel() data = m.load_data(num_files=5) m.train(data) m.stats # Don't run automatically this easily overwite latest version in AHT's computer, # I need to use correct version in the tutorial notebooks # m.persist() from AutomotiveCybersecurity.graph import WindowGenerator df = pd.read_csv(data["test_attack_files"][0]) df.columns = [ 'Timestamp', 'Label', 'CarSpeed', 'SteeringAngle', 'YawRate', 'Gx', 'Gy' ] graph = h1.Graph() graph.start()\ .add(WindowGenerator())\
ypred = gbc.predict(df2[FEATURES]) cf = sklearn.metrics.confusion_matrix(df2.Label == "Attack", ypred) print(sklearn.metrics.accuracy_score(df2.Label == "Attack", ypred)) print(cf) print("Accuracy = %s " % sklearn.metrics.accuracy_score(df2.Label == "Attack", ypred)) ### 2c. Deep Learning and using a H1ST Model API, organizing, importing, saving & loading We can bring out larger guns like Bidirectional LSTM or CNN or Transformers which can work well on pattern recognition problems on sequential data such as this one. One such model is available in the full tutorial source code package, and it can reach quite impressive accuracy. Let's see how we could use it! import h1st as h1 h1.init() from AutomotiveCybersecurity.models.blstm_injection_msg_classifier import BlstmInjectionMsgClassifier m = BlstmInjectionMsgClassifier() A data-science project in H1ST.AI is designed to be a Python-importable package. You can create such a project using the `h1` command-line tool. Organizing model code this way makes it easy to use as we will see. The Model API provides a unified workflow so that models can be used interactively in notebooks as well as in structured and complex projects as we shall see later. Here, we call `h1.init()` to make sure we can import the package in our notebooks even when the package is not installed (as long as the notebooks are within the project folder structure). It is a simple matter to import and train such organized `h1st.Model`, say on a small fraction of the data. data = m.load_data(num_files=100)