Ejemplo n.º 1
0
    def setUp(self):

        #
        # Get dataset from MI API #
        #

        print("Loading data...")

        mi = MarketInsights(cred)
        fun = Functions(cred)
        self.miassembly = MIAssembly(mi, fun)

        TRAINING_RUN["id"] = cos.generateKey(
            [str(TRAINING_RUN["datasets"]),
             str(TRAINING_RUN["model_id"])])

        mi.put_training_run(TRAINING_RUN)

        self.CONFIG = mi.get_model(MODEL_ID)
        TRN_CNF = self.CONFIG["training"]

        print("Creating model...")
        # Create ML model
        self.ffnn = Model(NUM_FEATURES, NUM_LABELS, self.CONFIG)

        mkt1, mkt1_desc = mi.get_dataset_by_id(DATASET_ID1)
        mkt2, mkt2_desc = mi.get_dataset_by_id(DATASET_ID2)

        # Crop training dates
        if "training_end_date" in TRN_CNF:
            mkt1 = mkt1[
                TRN_CNF["training_start_date"]:TRN_CNF["training_end_date"]]
            mkt2 = mkt2[
                TRN_CNF["training_start_date"]:TRN_CNF["training_end_date"]]

        # Interleave (part of the "added insight" for this model)
        self.mkt1, self.mkt2, self.isect = ppl.intersect(mkt1, mkt2)
        self.dataset = ppl.interleave(self.mkt1, self.mkt2)

        self.TRAINING_SET_SIZE = TRN_CNF["training_window_size"]
        self.TEST_SET_SIZE = len(self.dataset) - self.TRAINING_SET_SIZE
        self.WINDOW_SIZE = self.TRAINING_SET_SIZE

        _, self.test_y = ppl.splitCol(self.dataset[self.TRAINING_SET_SIZE:],
                                      NUM_FEATURES)
Ejemplo n.º 2
0
    def __init__(self,
                 name,
                 env,
                 credstore,
                 mi_models,
                 aggMethod,
                 threshold=0,
                 barOnly=False,
                 debug=False):
        Model.__init__(self, name, env)

        self.miassembly = MIAssembly(MarketInsights(credstore),
                                     Functions(credstore))
        self.modelConfig = mi_models
        self.aggMethod = aggMethod
        self.threshold = threshold
        self.barOnly = barOnly
        self.debug = debug
        return
Ejemplo n.º 3
0
    def __init__(self,
                 name,
                 env,
                 credstore,
                 dataset_id,
                 training_run_id,
                 threshold=0,
                 barOnly=False,
                 debug=False):
        Model.__init__(self, name, env)

        self.miassembly = MIAssembly(MarketInsights(credstore),
                                     Functions(credstore))
        self.dataset_id = dataset_id
        self.training_run_id = training_run_id
        self.threshold = threshold
        self.debug = debug
        self.barOnly = barOnly

        return
Ejemplo n.º 4
0
class MLModelTestCase(unittest.TestCase):
    def setUp(self):

        #
        # Get dataset from MI API #
        #

        print("Loading data...")

        mi = MarketInsights(cred)
        fun = Functions(cred)
        self.miassembly = MIAssembly(mi, fun)

        TRAINING_RUN["id"] = cos.generateKey(
            [str(TRAINING_RUN["datasets"]),
             str(TRAINING_RUN["model_id"])])

        mi.put_training_run(TRAINING_RUN)

        self.CONFIG = mi.get_model(MODEL_ID)
        TRN_CNF = self.CONFIG["training"]

        print("Creating model...")
        # Create ML model
        self.ffnn = Model(NUM_FEATURES, NUM_LABELS, self.CONFIG)

        mkt1, mkt1_desc = mi.get_dataset_by_id(DATASET_ID1)
        mkt2, mkt2_desc = mi.get_dataset_by_id(DATASET_ID2)

        # Crop training dates
        if "training_end_date" in TRN_CNF:
            mkt1 = mkt1[
                TRN_CNF["training_start_date"]:TRN_CNF["training_end_date"]]
            mkt2 = mkt2[
                TRN_CNF["training_start_date"]:TRN_CNF["training_end_date"]]

        # Interleave (part of the "added insight" for this model)
        self.mkt1, self.mkt2, self.isect = ppl.intersect(mkt1, mkt2)
        self.dataset = ppl.interleave(self.mkt1, self.mkt2)

        self.TRAINING_SET_SIZE = TRN_CNF["training_window_size"]
        self.TEST_SET_SIZE = len(self.dataset) - self.TRAINING_SET_SIZE
        self.WINDOW_SIZE = self.TRAINING_SET_SIZE

        _, self.test_y = ppl.splitCol(self.dataset[self.TRAINING_SET_SIZE:],
                                      NUM_FEATURES)

    def testFFNN_BootstrapTrain(self):

        ###############
        # Test Training
        ###############

        print("testFFNN_BootstrapTrain")

        TRN_CNF = self.CONFIG['training']
        print("Training", end='')

        results = mlutils.bootstrapTrain(self.ffnn,
                                         self.dataset[:self.TRAINING_SET_SIZE],
                                         self.dataset[self.TRAINING_SET_SIZE:],
                                         TRN_CNF['lamda'],
                                         TRN_CNF['iterations'],
                                         TRN_CNF['threshold'], False)
        predictions = np.nanmean(results["test_predictions"], axis=0)
        result = mlutils.evaluate(ppl.onehot(predictions),
                                  ppl.onehot(self.test_y), .0)
        print(result)

        print("".join(["Received : ", str(result)]))
        print("Expected : 0.47906977")
        self.assertTrue(np.allclose(result, 0.47906977))  # Local results

        ##################
        # Test weights API
        ##################

        # Save weights to Cloud Object Store
        newWeights = pd.DataFrame(results["weights"])
        newWeights.insert(
            0, 'timestamp',
            [self.isect[self.TRAINING_SET_SIZE // 2].value // 10**9] *
            len(newWeights))
        cos.put_csv(COS_BUCKET, TRAINING_RUN["id"], newWeights)

        loadedWeights = cos.get_csv(COS_BUCKET, TRAINING_RUN["id"])
        self.assertTrue(np.allclose(newWeights.values, loadedWeights.values))

        #####################################
        # Test prediction from loaded weights
        #####################################

        dataset = self.mkt1.iloc[:, :-NUM_LABELS][-50:]
        timestamps = dataset.index.astype(np.int64) // 10**9
        dataset = dataset.reset_index(drop=True)
        newPredictions = self.predict(timestamps, dataset, loadedWeights)
        self.assertTrue(
            np.allclose(newPredictions,
                        results["test_predictions"][0][-100:][::2]))

        #####################################
        # Test prediction from Assembly
        #####################################
        dataset = self.mkt1.iloc[:, :-NUM_LABELS][-50:]
        assemblyPredictions = self.miassembly.get_predictions_with_dataset(
            dataset, TRAINING_RUN["id"])
        self.assertTrue(
            np.allclose(newPredictions.flatten(),
                        assemblyPredictions[0].values.flatten(),
                        rtol=1e-03))

    def testFFNN_BoostingTrain(self):

        ###############
        # Test Training
        ###############

        print("testFFNN_BoostingTrain")

        TRN_CNF = self.CONFIG['training']
        print("Training", end='')

        results = mlutils.boostingTrain(self.ffnn,
                                        self.dataset[:self.TRAINING_SET_SIZE],
                                        self.dataset[self.TRAINING_SET_SIZE:],
                                        TRN_CNF['lamda'],
                                        TRN_CNF['iterations'], False)
        predictions = np.nanmean(results["test_predictions"], axis=0)
        result = mlutils.evaluate(ppl.onehot(predictions),
                                  ppl.onehot(self.test_y), .0)

        print("".join(["Received : ", str(result)]))
        print("Expected : 0.51627904")
        self.assertTrue(np.allclose(result, 0.51627904))  # Local results

        ##################
        # Test weights API
        ##################

        # Save weights to Cloud Object Store
        newWeights = pd.DataFrame(results["weights"])
        newWeights.insert(
            0, 'timestamp',
            [self.isect[self.TRAINING_SET_SIZE // 2].value // 10**9] *
            len(newWeights))
        cos.put_csv(COS_BUCKET, TRAINING_RUN["id"], newWeights)

        loadedWeights = cos.get_csv(COS_BUCKET, TRAINING_RUN["id"])
        self.assertTrue(np.allclose(newWeights.values, loadedWeights.values))

        #####################################
        # Test prediction from loaded weights
        #####################################

        dataset = self.mkt2.iloc[:, :-NUM_LABELS][-50:]
        timestamps = dataset.index.astype(np.int64) // 10**9
        dataset = dataset.reset_index(drop=True)
        newPredictions = self.predict(timestamps, dataset, loadedWeights)
        self.assertTrue(
            np.allclose(newPredictions,
                        results["test_predictions"][0][-100:][1::2]))

        #####################################
        # Test prediction from Assembly
        #####################################
        dataset = self.mkt2.iloc[:, :-NUM_LABELS][-50:]
        assemblyPredictions = self.miassembly.get_predictions_with_dataset(
            dataset, TRAINING_RUN["id"])
        self.assertTrue(
            np.allclose(newPredictions.flatten(),
                        assemblyPredictions[0].values.flatten(),
                        rtol=1e-03))

    # Function to take dates, dataset info for those dates
    def predict(self, timestamps, dataset, weights=None):

        # Load timestamps from weights db (or load all weights data)
        if (weights is None):
            weights = cos.get_csv(COS_BUCKET, TRAINING_RUN["id"])
        wPeriods = weights["timestamp"].values

        # x = for each dataset timestamp, match latest available weight timestamp
        latestPeriods = np.zeros(len(timestamps))
        uniqueWPeriods = np.unique(wPeriods)  # q
        mask = timestamps >= np.min(uniqueWPeriods)
        latestPeriods[mask] = [
            uniqueWPeriods[uniqueWPeriods <= s][-1] for s in timestamps[mask]
        ]

        # for each non-duplicate timestamp in x, load weights into model for that timestamp
        results = np.empty((len(dataset), NUM_LABELS))
        for x in np.unique(latestPeriods):
            # run dataset entries matching that timestamp through model, save results against original timestamps
            mask = latestPeriods == x
            results[mask] = np.nanmean(self.ffnn.predict(
                weights[wPeriods == x].values[:, 1:], dataset[mask]),
                                       axis=0)

        return results
Ejemplo n.º 5
0
class MIAggregateModel(Model):
    def __init__(self,
                 name,
                 env,
                 credstore,
                 mi_models,
                 aggMethod,
                 threshold=0,
                 barOnly=False,
                 debug=False):
        Model.__init__(self, name, env)

        self.miassembly = MIAssembly(MarketInsights(credstore),
                                     Functions(credstore))
        self.modelConfig = mi_models
        self.aggMethod = aggMethod
        self.threshold = threshold
        self.barOnly = barOnly
        self.debug = debug
        return

    # Generate Signals and use them with asset values to calculate allocations
    def getSignals(self, idx=0):

        # Extract window from the data
        # TODO : Handle list of assetInfos
        window = self.getWindow(idx)

        signals = pd.DataFrame(np.zeros((len(window), 2)),
                               index=window.index,
                               columns=["bar", "gap"])

        # Obtain the signals for the next n steps from the Market Insights api
        predictions = self.getPredictions(
            window.index[0].isoformat(),
            (window.index[-1] + datetime.timedelta(seconds=1)).isoformat())

        if predictions is not None:
            signals.update(predictions)

        return signals[idx:]

    def getPredictions(self, start, end):

        predictions_list = []

        for training_run in self.modelConfig:
            for dataset_id in training_run["datasets"]:
                print("Retrieving predictions for training id {}, dataset {}".
                      format(training_run["training_run_id"], dataset_id))
                predictions = self.miassembly.get_predictions_with_dataset_id(
                    dataset_id,
                    training_run["training_run_id"],
                    start=start,
                    end=end,
                    debug=self.debug)
                if (predictions is None):
                    return None
                predictions_list.append(predictions)

        predictions = mlutils.aggregatePredictions(predictions_list,
                                                   self.aggMethod)
        signals = mlutils.getPredictionSignals(predictions.values,
                                               self.threshold)
        signals = pd.DataFrame(np.array([signals, signals]).T,
                               index=predictions.index,
                               columns=["bar", "gap"])
        if (self.barOnly):
            signals["gap"] = 0
        return signals
Ejemplo n.º 6
0
class MIBasicModel(Model):
    def __init__(self,
                 name,
                 env,
                 credstore,
                 dataset_id,
                 training_run_id,
                 threshold=0,
                 barOnly=False,
                 debug=False):
        Model.__init__(self, name, env)

        self.miassembly = MIAssembly(MarketInsights(credstore),
                                     Functions(credstore))
        self.dataset_id = dataset_id
        self.training_run_id = training_run_id
        self.threshold = threshold
        self.debug = debug
        self.barOnly = barOnly

        return

    # Generate Signals and use them with asset values to calculate allocations
    def getSignals(self, idx=0):

        # Extract window from the data
        # TODO : Handle list of assetInfos
        window = self.getWindow(idx)

        # Obtain the signals for the next n steps from the Market Insights API
        signals = pd.DataFrame(np.zeros((len(window), 2)),
                               index=window.index,
                               columns=["bar", "gap"])
        predictions = self.getPredictions(
            window.index[0].isoformat(),
            (window.index[-1] + datetime.timedelta(seconds=1)).isoformat())

        if predictions is not None:
            signals.update(predictions)

        return signals[idx:]

    def getPredictions(self, start, end):
        predictions = self.miassembly.get_predictions_with_dataset_id(
            self.dataset_id,
            self.training_run_id,
            start=start,
            end=end,
            debug=self.debug)
        if predictions is None:
            return predictions
        predictions = mlutils.aggregatePredictions([predictions],
                                                   method="mean_all")
        signals = mlutils.getPredictionSignals(predictions.values,
                                               self.threshold)
        signals = pd.DataFrame(np.array([signals, signals]).T,
                               index=predictions.index,
                               columns=["bar", "gap"])
        if (self.barOnly):
            signals["gap"] = 0
        return signals
Ejemplo n.º 7
0
    def setUp(self):

        self.cred = CredentialsStore()
        self.mi = MarketInsights(self.cred)
        fun = Functions(self.cred)
        self.miassembly = MIAssembly(self.mi, fun)
Ejemplo n.º 8
0
class APITest(unittest.TestCase):
    def setUp(self):

        self.cred = CredentialsStore()
        self.mi = MarketInsights(self.cred)
        fun = Functions(self.cred)
        self.miassembly = MIAssembly(self.mi, fun)

    def testEndToEndPredictionFromDataset(self):

        TRAINING_RUN_ID = "94b227b9d7b22c920333aa36d23669c8"
        DATASET_ID = "4234f0f1b6fcc17f6458696a6cdf5101"

        #mc = MIModelClient(self.cred)
        #results = self.miassembly.get_local_predictions_with_dataset_id(mc, DATASET_ID, TRAINING_RUN_ID, start="2016-07-01", end="2016-07-15", debug=True)
        #results = pd.DataFrame(results["data"], results["index"])
        results = self.miassembly.get_predictions_with_dataset_id(
            DATASET_ID,
            TRAINING_RUN_ID,
            start="2016-07-01",
            end="2016-07-15",
            debug=True)
        results = mlutils.aggregatePredictions([results], "mean_all")
        '''
        # Results should look like this
		Date_Time
		2016-07-01 15:00:00-04:00  0.000000e+00
		2016-07-05 15:00:00-04:00  0.000000e+00
		2016-07-06 15:00:00-04:00  0.000000e+00
		2016-07-07 15:00:00-04:00  6.174025e-03
		2016-07-08 15:00:00-04:00  8.180070e-01
		2016-07-11 15:00:00-04:00  1.000000e+00
		2016-07-12 15:00:00-04:00  3.874419e-06
		2016-07-13 15:00:00-04:00  9.999999e-01
		2016-07-14 15:00:00-04:00  3.974110e-11
		2016-07-15 15:00:00-04:00  3.007612e-01
		'''

        self.assertEqual(np.nansum(results), 3.124945995554477)

    def testEndToEndPredictionFromRawData(self):

        TRAINING_RUN_ID = "94b227b9d7b22c920333aa36d23669c8"

        with open(root_dir + "data/testRawData.json") as data_file:
            testRawData = json.load(data_file)

        data = Dataset.jsontocsv(testRawData)
        data.columns = ["Open", "High", "Low", "Close"]

        results = self.miassembly.get_predictions_with_raw_data(
            data, TRAINING_RUN_ID)

        results = mlutils.aggregatePredictions([results], "mean_all")
        '''
		Date_Time
		2016-07-01 15:00:00-04:00  0.000000e+00
		2016-07-05 15:00:00-04:00  0.000000e+00
		2016-07-06 15:00:00-04:00  0.000000e+00
		2016-07-07 15:00:00-04:00  6.174025e-03
		2016-07-08 15:00:00-04:00  8.180070e-01
		2016-07-11 15:00:00-04:00  1.000000e+00
		2016-07-12 15:00:00-04:00  3.874419e-06
		2016-07-13 15:00:00-04:00  9.999999e-01
		2016-07-14 15:00:00-04:00  3.974110e-11
		2016-07-15 15:00:00-04:00  3.007612e-01
		'''

        self.assertEqual(np.nansum(results), 3.124945995554477)

    @DeprecationWarning
    def _test_predictions(self):
        predictions = pd.read_csv(root_dir + 'data/testPredictions.csv',
                                  index_col=0,
                                  parse_dates=True,
                                  header=None)

        # Clean up
        print("Cleaning up")
        resp = self.mi.delete_predictions("testMkt",
                                          "testModelId",
                                          debug=False)

        print("Posting predictions")
        resp = self.mi.put_predictions(predictions,
                                       "testMkt",
                                       "testModelId",
                                       debug=False)
        self.assertTrue('success' in resp)

        resp = self.mi.get_predictions("testMkt", "testModelId")
        self.assertTrue(predictions.index.equals(resp.index))
        self.assertTrue(np.allclose(predictions.values, resp.values))

        # Shuffle values and update stored predictions
        predictions2 = ppl.shuffle(predictions)
        predictions2.index = predictions.index
        predictions = predictions2

        print("Updating predictions")
        resp = self.mi.put_predictions(predictions,
                                       "testMkt",
                                       "testModelId",
                                       update=True)
        self.assertTrue('success' in resp)

        resp = self.mi.get_predictions("testMkt", "testModelId")
        self.assertTrue(predictions.index.equals(resp.index))
        self.assertTrue(np.allclose(predictions.values, resp.values))

        print("Cleaning up")
        resp = self.mi.delete_predictions("testMkt", "testModelId")

        resp = self.mi.get_predictions("testMkt", "testModelId")
        self.assertTrue(resp.empty)