def test_model_drift(): """ catch model drift. meant to show when test fails and returns error msg, but could be set to break instead :return: error message, or success message """ error_msg = 'Successfully ran model drift-volume anomaly tests.' target_feature = conf_object.project_conf["target_feature"] # fake data of the "previous" data data_previous_dummy = preprocess_data() # fake data of the incoming "new" data that has drifted data_drifted_dummy = deepcopy(data_previous_dummy) data_drifted_dummy[target_feature] = np.zeros(len(data_previous_dummy)) mu1 = data_previous_dummy[target_feature].mean() mu2 = data_drifted_dummy[target_feature].mean() std = data_previous_dummy[target_feature].std() drift_threshold = std * 3 try: assert abs(mu1 - mu2) < drift_threshold, "Model drift detected!" except Exception as ex: error_msg = "This is supposed to break since dummy data mean is 0 BTC. " \ "Returning error message for testing purposes." return error_msg
def process_request(observation: dict): """ format request from API into form taken by model :param observation: request from API :return: formatted request """ observation = pd.DataFrame(observation["bitcoin_last_minute"], index=[0]) observation = preprocess_data(observation) return observation
def test_preprocessing_train(): """ make sure preprocessing and training steps work :return: """ data = preprocess_data() data = { 'bitcoin_last_minute': data.to_dict() } # convert to "API" request format assert data is not None, "Error in preprocess_data()." mae = train_model(request_dict=data) assert mae is not None, "Error in train_model()."
def test_post(self, mock_post): self.data = preprocess_data() self.data = { 'bitcoin_last_minute': self.data.to_dict() } # convert to "API" request format info = self.data url = 'http://0.0.0.0:5000/train' resp = requests.post(url, data=json.dumps(info), headers={'Content-Type': 'application/json'}) mock_post.assert_called_with( url, data=json.dumps(info), headers={'Content-Type': 'application/json'})
def __init__(self): try: self.data = pd.read_pickle( os.path.join(os.path.dirname(__file__), "data.pkl")) except Exception as ex: logging.info( "Couldn't load data.pkl, so preprocessing data to generate.") self.data = preprocess_data() self.project_conf = conf_object.project_conf self.model = None self.target_feature = self.project_conf["target_feature"] self.features = list(self.data.columns) self.features.remove(self.target_feature) self.actuals = self.data[self.target_feature].values
def setUp(self) -> None: self.data = preprocess_data() self.data = { 'bitcoin_last_minute': self.data.to_dict() } # convert to "API" request format
from data.data_preprocessing import preprocess_data from data.data_lookback import lookback from conf.conf_loader import conf_object import logging data = preprocess_data() data = lookback(dataset=data, features=conf_object.project_conf["lookback_features"], timesteps=conf_object.project_conf["time_window"]) data.to_pickle('data.pkl') logging.info("Finished preprocessing.")