def fitness(self, organism): cache_key = organism.__repr__() if self.cache.has_key(cache_key): print "cache hit", cache_key sc, test_pred = self.cache[cache_key] organism.found_forest = FoundForest(cache_key, sc, test_pred) print "cache score", sc return sc def feature_generator(__sp): return TechnicalFeatures(self.stock_period, feature_desc=organism.features_hash()) forest_options = False # if organism["n_estimators"]: # max = None # if organism["max_features"] != "None": # max = organism["max_features"] # forest_options = {"n_estimators": organism["n_estimators"], # "min_samples_split": organism["min_samples_split"], # "max_features": max # } mlearn = MachineLearner( self.settings.symbol, classifier=cl.BuySellClassifier(threshold=organism["threshold"], days=organism["days_return"]), feature_gen=feature_generator, stock_period=self.stock_period, random_forest_options=forest_options, ) mlearn.learn_period(self.settings.learn_start_time, self.settings.learn_end_time) cache_predictions = mlearn.predict_period( self.settings.predict_cache_start_time, self.settings.predict_cache_end_time ) print organism score = getattr(FitnessScoring, self.settings.fitness_method)( mlearn, cache_predictions, self.settings.val_start_time, self.settings.val_end_time ) # cache_test_predictions = False # if self.settings.precache_tests(): # cache_test_predictions = mlearn.predict_period(self.settings.test_start_time, self.settings.test_end_time) organism.found_forest = FoundForest(cache_key, score, cache_predictions) self.history.append(organism.copy()) self.cache[cache_key] = (score, cache_predictions) return score
def setUp(self): self.start_date = dt.datetime(2004,9,1) self.end_date = dt.datetime(2010,1,1) self.sp_limited = StockPeriod('GOOG_SHORT', self.start_date, self.end_date) self.tf_limited = TechnicalFeatures(self.sp_limited) self.lim_start = dt.datetime(2004,9,1,16) self.lim_end = dt.datetime(2009,12,31,16) self.mlearn_lim = MachineLearner('GOOG_SHORT', feature_gen = lambda sp: self.tf_limited, stock_period = self.sp_limited) self.sp = StockPeriod('GOOG', self.start_date, dt.datetime(2011,1,1)) self.tf = TechnicalFeatures(self.sp) self.mlearn_verify = MachineLearner('GOOG_', feature_gen = lambda sp: self.tf, stock_period = self.sp)
class TestVerifyThatPredictionsArePrecient(unittest.TestCase): def setUp(self): self.start_date = dt.datetime(2004,9,1) self.end_date = dt.datetime(2010,1,1) self.sp_limited = StockPeriod('GOOG_SHORT', self.start_date, self.end_date) self.tf_limited = TechnicalFeatures(self.sp_limited) self.lim_start = dt.datetime(2004,9,1,16) self.lim_end = dt.datetime(2009,12,31,16) self.mlearn_lim = MachineLearner('GOOG_SHORT', feature_gen = lambda sp: self.tf_limited, stock_period = self.sp_limited) self.sp = StockPeriod('GOOG', self.start_date, dt.datetime(2011,1,1)) self.tf = TechnicalFeatures(self.sp) self.mlearn_verify = MachineLearner('GOOG_', feature_gen = lambda sp: self.tf, stock_period = self.sp) def assert_limits(self, sp, tf, start_d, end_d): self.assertEqual(sp.close_data.index[0], start_d) self.assertEqual(sp.close_data.index[-1], end_d) self.assertEqual(tf.relative_data['close'].index[0], start_d) self.assertEqual(tf.relative_data['close'].index[-1], end_d) def test_correct_boundaries(self): self.assert_limits(self.sp_limited, self.tf_limited, self.lim_start, self.lim_end) self.assert_limits(self.mlearn_lim.sp, self.mlearn_lim.feats, self.lim_start, self.lim_end) def test_predictions_are_the_same(self): #predictions after end date features = self.tf.get_features()[:][self.lim_end:][1:100] self.assertTrue(features.index[0] > self.lim_end) self.mlearn_lim.learn_period(dt.datetime(2006,1,1), dt.datetime(2008,1,1)) self.assert_limits(self.mlearn_lim.sp, self.mlearn_lim.feats, self.lim_start, self.lim_end) data_holder = [] for index, feat_row in features.iterrows(): res = self.mlearn_lim.predict([feat_row.values]) data_holder.append(res[0]) df_test_result = pd.DataFrame(index = features.index, data = data_holder) print df_test_result self.mlearn_verify.learn_period(dt.datetime(2006,1,1), dt.datetime(2008,1,1)) df_verify_result = self.mlearn_verify.predict_period(features.index[0], features.index[-1]) print df_verify_result self.assertTrue(np.equal(df_test_result.index, df_verify_result.index).all()) self.assertTrue(np.equal(df_test_result.values, df_verify_result.values).all()) def test_features_equal(self): limited_feats = self.tf_limited.get_features() feats = self.tf.get_features() sub_set_limited = limited_feats[:][-50:] #print sub_set_limited self.assertEqual(sub_set_limited.index[-1], self.lim_end) sub_set_full = feats[:][sub_set_limited.index[0]:sub_set_limited.index[-1]] #print sub_set_full self.assertTrue(feats.index[-1] > self.lim_end) #print np.equal(sub_set_limited.values, sub_set_full.values) self.assertTrue(np.equal(sub_set_limited.values, sub_set_full.values).all())