def setUp(self): # # Get dataset from MI API # # print("Loading data...") mi = MarketInsights(cred) fun = Functions(cred) self.miassembly = MIAssembly(mi, fun) TRAINING_RUN["id"] = cos.generateKey( [str(TRAINING_RUN["datasets"]), str(TRAINING_RUN["model_id"])]) mi.put_training_run(TRAINING_RUN) self.CONFIG = mi.get_model(MODEL_ID) TRN_CNF = self.CONFIG["training"] print("Creating model...") # Create ML model self.ffnn = Model(NUM_FEATURES, NUM_LABELS, self.CONFIG) mkt1, mkt1_desc = mi.get_dataset_by_id(DATASET_ID1) mkt2, mkt2_desc = mi.get_dataset_by_id(DATASET_ID2) # Crop training dates if "training_end_date" in TRN_CNF: mkt1 = mkt1[ TRN_CNF["training_start_date"]:TRN_CNF["training_end_date"]] mkt2 = mkt2[ TRN_CNF["training_start_date"]:TRN_CNF["training_end_date"]] # Interleave (part of the "added insight" for this model) self.mkt1, self.mkt2, self.isect = ppl.intersect(mkt1, mkt2) self.dataset = ppl.interleave(self.mkt1, self.mkt2) self.TRAINING_SET_SIZE = TRN_CNF["training_window_size"] self.TEST_SET_SIZE = len(self.dataset) - self.TRAINING_SET_SIZE self.WINDOW_SIZE = self.TRAINING_SET_SIZE _, self.test_y = ppl.splitCol(self.dataset[self.TRAINING_SET_SIZE:], NUM_FEATURES)
def __init__(self, name, env, credstore, mi_models, aggMethod, threshold=0, barOnly=False, debug=False): Model.__init__(self, name, env) self.miassembly = MIAssembly(MarketInsights(credstore), Functions(credstore)) self.modelConfig = mi_models self.aggMethod = aggMethod self.threshold = threshold self.barOnly = barOnly self.debug = debug return
def __init__(self, name, env, credstore, dataset_id, training_run_id, threshold=0, barOnly=False, debug=False): Model.__init__(self, name, env) self.miassembly = MIAssembly(MarketInsights(credstore), Functions(credstore)) self.dataset_id = dataset_id self.training_run_id = training_run_id self.threshold = threshold self.debug = debug self.barOnly = barOnly return
class MLModelTestCase(unittest.TestCase): def setUp(self): # # Get dataset from MI API # # print("Loading data...") mi = MarketInsights(cred) fun = Functions(cred) self.miassembly = MIAssembly(mi, fun) TRAINING_RUN["id"] = cos.generateKey( [str(TRAINING_RUN["datasets"]), str(TRAINING_RUN["model_id"])]) mi.put_training_run(TRAINING_RUN) self.CONFIG = mi.get_model(MODEL_ID) TRN_CNF = self.CONFIG["training"] print("Creating model...") # Create ML model self.ffnn = Model(NUM_FEATURES, NUM_LABELS, self.CONFIG) mkt1, mkt1_desc = mi.get_dataset_by_id(DATASET_ID1) mkt2, mkt2_desc = mi.get_dataset_by_id(DATASET_ID2) # Crop training dates if "training_end_date" in TRN_CNF: mkt1 = mkt1[ TRN_CNF["training_start_date"]:TRN_CNF["training_end_date"]] mkt2 = mkt2[ TRN_CNF["training_start_date"]:TRN_CNF["training_end_date"]] # Interleave (part of the "added insight" for this model) self.mkt1, self.mkt2, self.isect = ppl.intersect(mkt1, mkt2) self.dataset = ppl.interleave(self.mkt1, self.mkt2) self.TRAINING_SET_SIZE = TRN_CNF["training_window_size"] self.TEST_SET_SIZE = len(self.dataset) - self.TRAINING_SET_SIZE self.WINDOW_SIZE = self.TRAINING_SET_SIZE _, self.test_y = ppl.splitCol(self.dataset[self.TRAINING_SET_SIZE:], NUM_FEATURES) def testFFNN_BootstrapTrain(self): ############### # Test Training ############### print("testFFNN_BootstrapTrain") TRN_CNF = self.CONFIG['training'] print("Training", end='') results = mlutils.bootstrapTrain(self.ffnn, self.dataset[:self.TRAINING_SET_SIZE], self.dataset[self.TRAINING_SET_SIZE:], TRN_CNF['lamda'], TRN_CNF['iterations'], TRN_CNF['threshold'], False) predictions = np.nanmean(results["test_predictions"], axis=0) result = mlutils.evaluate(ppl.onehot(predictions), ppl.onehot(self.test_y), .0) print(result) print("".join(["Received : ", str(result)])) print("Expected : 0.47906977") self.assertTrue(np.allclose(result, 0.47906977)) # Local results ################## # Test weights API ################## # Save weights to Cloud Object Store newWeights = pd.DataFrame(results["weights"]) newWeights.insert( 0, 'timestamp', [self.isect[self.TRAINING_SET_SIZE // 2].value // 10**9] * len(newWeights)) cos.put_csv(COS_BUCKET, TRAINING_RUN["id"], newWeights) loadedWeights = cos.get_csv(COS_BUCKET, TRAINING_RUN["id"]) self.assertTrue(np.allclose(newWeights.values, loadedWeights.values)) ##################################### # Test prediction from loaded weights ##################################### dataset = self.mkt1.iloc[:, :-NUM_LABELS][-50:] timestamps = dataset.index.astype(np.int64) // 10**9 dataset = dataset.reset_index(drop=True) newPredictions = self.predict(timestamps, dataset, loadedWeights) self.assertTrue( np.allclose(newPredictions, results["test_predictions"][0][-100:][::2])) ##################################### # Test prediction from Assembly ##################################### dataset = self.mkt1.iloc[:, :-NUM_LABELS][-50:] assemblyPredictions = self.miassembly.get_predictions_with_dataset( dataset, TRAINING_RUN["id"]) self.assertTrue( np.allclose(newPredictions.flatten(), assemblyPredictions[0].values.flatten(), rtol=1e-03)) def testFFNN_BoostingTrain(self): ############### # Test Training ############### print("testFFNN_BoostingTrain") TRN_CNF = self.CONFIG['training'] print("Training", end='') results = mlutils.boostingTrain(self.ffnn, self.dataset[:self.TRAINING_SET_SIZE], self.dataset[self.TRAINING_SET_SIZE:], TRN_CNF['lamda'], TRN_CNF['iterations'], False) predictions = np.nanmean(results["test_predictions"], axis=0) result = mlutils.evaluate(ppl.onehot(predictions), ppl.onehot(self.test_y), .0) print("".join(["Received : ", str(result)])) print("Expected : 0.51627904") self.assertTrue(np.allclose(result, 0.51627904)) # Local results ################## # Test weights API ################## # Save weights to Cloud Object Store newWeights = pd.DataFrame(results["weights"]) newWeights.insert( 0, 'timestamp', [self.isect[self.TRAINING_SET_SIZE // 2].value // 10**9] * len(newWeights)) cos.put_csv(COS_BUCKET, TRAINING_RUN["id"], newWeights) loadedWeights = cos.get_csv(COS_BUCKET, TRAINING_RUN["id"]) self.assertTrue(np.allclose(newWeights.values, loadedWeights.values)) ##################################### # Test prediction from loaded weights ##################################### dataset = self.mkt2.iloc[:, :-NUM_LABELS][-50:] timestamps = dataset.index.astype(np.int64) // 10**9 dataset = dataset.reset_index(drop=True) newPredictions = self.predict(timestamps, dataset, loadedWeights) self.assertTrue( np.allclose(newPredictions, results["test_predictions"][0][-100:][1::2])) ##################################### # Test prediction from Assembly ##################################### dataset = self.mkt2.iloc[:, :-NUM_LABELS][-50:] assemblyPredictions = self.miassembly.get_predictions_with_dataset( dataset, TRAINING_RUN["id"]) self.assertTrue( np.allclose(newPredictions.flatten(), assemblyPredictions[0].values.flatten(), rtol=1e-03)) # Function to take dates, dataset info for those dates def predict(self, timestamps, dataset, weights=None): # Load timestamps from weights db (or load all weights data) if (weights is None): weights = cos.get_csv(COS_BUCKET, TRAINING_RUN["id"]) wPeriods = weights["timestamp"].values # x = for each dataset timestamp, match latest available weight timestamp latestPeriods = np.zeros(len(timestamps)) uniqueWPeriods = np.unique(wPeriods) # q mask = timestamps >= np.min(uniqueWPeriods) latestPeriods[mask] = [ uniqueWPeriods[uniqueWPeriods <= s][-1] for s in timestamps[mask] ] # for each non-duplicate timestamp in x, load weights into model for that timestamp results = np.empty((len(dataset), NUM_LABELS)) for x in np.unique(latestPeriods): # run dataset entries matching that timestamp through model, save results against original timestamps mask = latestPeriods == x results[mask] = np.nanmean(self.ffnn.predict( weights[wPeriods == x].values[:, 1:], dataset[mask]), axis=0) return results
class MIAggregateModel(Model): def __init__(self, name, env, credstore, mi_models, aggMethod, threshold=0, barOnly=False, debug=False): Model.__init__(self, name, env) self.miassembly = MIAssembly(MarketInsights(credstore), Functions(credstore)) self.modelConfig = mi_models self.aggMethod = aggMethod self.threshold = threshold self.barOnly = barOnly self.debug = debug return # Generate Signals and use them with asset values to calculate allocations def getSignals(self, idx=0): # Extract window from the data # TODO : Handle list of assetInfos window = self.getWindow(idx) signals = pd.DataFrame(np.zeros((len(window), 2)), index=window.index, columns=["bar", "gap"]) # Obtain the signals for the next n steps from the Market Insights api predictions = self.getPredictions( window.index[0].isoformat(), (window.index[-1] + datetime.timedelta(seconds=1)).isoformat()) if predictions is not None: signals.update(predictions) return signals[idx:] def getPredictions(self, start, end): predictions_list = [] for training_run in self.modelConfig: for dataset_id in training_run["datasets"]: print("Retrieving predictions for training id {}, dataset {}". format(training_run["training_run_id"], dataset_id)) predictions = self.miassembly.get_predictions_with_dataset_id( dataset_id, training_run["training_run_id"], start=start, end=end, debug=self.debug) if (predictions is None): return None predictions_list.append(predictions) predictions = mlutils.aggregatePredictions(predictions_list, self.aggMethod) signals = mlutils.getPredictionSignals(predictions.values, self.threshold) signals = pd.DataFrame(np.array([signals, signals]).T, index=predictions.index, columns=["bar", "gap"]) if (self.barOnly): signals["gap"] = 0 return signals
class MIBasicModel(Model): def __init__(self, name, env, credstore, dataset_id, training_run_id, threshold=0, barOnly=False, debug=False): Model.__init__(self, name, env) self.miassembly = MIAssembly(MarketInsights(credstore), Functions(credstore)) self.dataset_id = dataset_id self.training_run_id = training_run_id self.threshold = threshold self.debug = debug self.barOnly = barOnly return # Generate Signals and use them with asset values to calculate allocations def getSignals(self, idx=0): # Extract window from the data # TODO : Handle list of assetInfos window = self.getWindow(idx) # Obtain the signals for the next n steps from the Market Insights API signals = pd.DataFrame(np.zeros((len(window), 2)), index=window.index, columns=["bar", "gap"]) predictions = self.getPredictions( window.index[0].isoformat(), (window.index[-1] + datetime.timedelta(seconds=1)).isoformat()) if predictions is not None: signals.update(predictions) return signals[idx:] def getPredictions(self, start, end): predictions = self.miassembly.get_predictions_with_dataset_id( self.dataset_id, self.training_run_id, start=start, end=end, debug=self.debug) if predictions is None: return predictions predictions = mlutils.aggregatePredictions([predictions], method="mean_all") signals = mlutils.getPredictionSignals(predictions.values, self.threshold) signals = pd.DataFrame(np.array([signals, signals]).T, index=predictions.index, columns=["bar", "gap"]) if (self.barOnly): signals["gap"] = 0 return signals
def setUp(self): self.cred = CredentialsStore() self.mi = MarketInsights(self.cred) fun = Functions(self.cred) self.miassembly = MIAssembly(self.mi, fun)
class APITest(unittest.TestCase): def setUp(self): self.cred = CredentialsStore() self.mi = MarketInsights(self.cred) fun = Functions(self.cred) self.miassembly = MIAssembly(self.mi, fun) def testEndToEndPredictionFromDataset(self): TRAINING_RUN_ID = "94b227b9d7b22c920333aa36d23669c8" DATASET_ID = "4234f0f1b6fcc17f6458696a6cdf5101" #mc = MIModelClient(self.cred) #results = self.miassembly.get_local_predictions_with_dataset_id(mc, DATASET_ID, TRAINING_RUN_ID, start="2016-07-01", end="2016-07-15", debug=True) #results = pd.DataFrame(results["data"], results["index"]) results = self.miassembly.get_predictions_with_dataset_id( DATASET_ID, TRAINING_RUN_ID, start="2016-07-01", end="2016-07-15", debug=True) results = mlutils.aggregatePredictions([results], "mean_all") ''' # Results should look like this Date_Time 2016-07-01 15:00:00-04:00 0.000000e+00 2016-07-05 15:00:00-04:00 0.000000e+00 2016-07-06 15:00:00-04:00 0.000000e+00 2016-07-07 15:00:00-04:00 6.174025e-03 2016-07-08 15:00:00-04:00 8.180070e-01 2016-07-11 15:00:00-04:00 1.000000e+00 2016-07-12 15:00:00-04:00 3.874419e-06 2016-07-13 15:00:00-04:00 9.999999e-01 2016-07-14 15:00:00-04:00 3.974110e-11 2016-07-15 15:00:00-04:00 3.007612e-01 ''' self.assertEqual(np.nansum(results), 3.124945995554477) def testEndToEndPredictionFromRawData(self): TRAINING_RUN_ID = "94b227b9d7b22c920333aa36d23669c8" with open(root_dir + "data/testRawData.json") as data_file: testRawData = json.load(data_file) data = Dataset.jsontocsv(testRawData) data.columns = ["Open", "High", "Low", "Close"] results = self.miassembly.get_predictions_with_raw_data( data, TRAINING_RUN_ID) results = mlutils.aggregatePredictions([results], "mean_all") ''' Date_Time 2016-07-01 15:00:00-04:00 0.000000e+00 2016-07-05 15:00:00-04:00 0.000000e+00 2016-07-06 15:00:00-04:00 0.000000e+00 2016-07-07 15:00:00-04:00 6.174025e-03 2016-07-08 15:00:00-04:00 8.180070e-01 2016-07-11 15:00:00-04:00 1.000000e+00 2016-07-12 15:00:00-04:00 3.874419e-06 2016-07-13 15:00:00-04:00 9.999999e-01 2016-07-14 15:00:00-04:00 3.974110e-11 2016-07-15 15:00:00-04:00 3.007612e-01 ''' self.assertEqual(np.nansum(results), 3.124945995554477) @DeprecationWarning def _test_predictions(self): predictions = pd.read_csv(root_dir + 'data/testPredictions.csv', index_col=0, parse_dates=True, header=None) # Clean up print("Cleaning up") resp = self.mi.delete_predictions("testMkt", "testModelId", debug=False) print("Posting predictions") resp = self.mi.put_predictions(predictions, "testMkt", "testModelId", debug=False) self.assertTrue('success' in resp) resp = self.mi.get_predictions("testMkt", "testModelId") self.assertTrue(predictions.index.equals(resp.index)) self.assertTrue(np.allclose(predictions.values, resp.values)) # Shuffle values and update stored predictions predictions2 = ppl.shuffle(predictions) predictions2.index = predictions.index predictions = predictions2 print("Updating predictions") resp = self.mi.put_predictions(predictions, "testMkt", "testModelId", update=True) self.assertTrue('success' in resp) resp = self.mi.get_predictions("testMkt", "testModelId") self.assertTrue(predictions.index.equals(resp.index)) self.assertTrue(np.allclose(predictions.values, resp.values)) print("Cleaning up") resp = self.mi.delete_predictions("testMkt", "testModelId") resp = self.mi.get_predictions("testMkt", "testModelId") self.assertTrue(resp.empty)