def test_callbacks(self): # a test with a early stopping callback and pass restore_best_weights=True as kwarg df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 1, 0, 1, 1, 0, 1, 0, ], }) model = PytorchModel(FeaturesAndLabels(["a", "b"], ["b"]), ClassificationModule, nn.MSELoss, lambda params: SGD(params, lr=0.1, momentum=0.9)) fit = df.model.fit( model, on_epoch=[Callbacks.early_stopping(patience=3, tolerance=-100)], restore_best_weights=True) self.assertEqual(4, len(fit.model.history["loss"]))
def test_regularized_loss(self): df = pd.DataFrame({ "f": np.sin(np.linspace(0, 12, 40)), "l": np.sin(np.linspace(5, 17, 40)) }) class TestModel(PytorchNN): def __init__(self): super().__init__() self.net = nn.Sequential(nn.Linear(1, 3), nn.ReLU(), nn.Linear(3, 2), nn.ReLU(), nn.Linear(2, 1), nn.Sigmoid()) def forward_training(self, x): return self.net(x) def L2(self) -> Dict[str, float]: return {'**/2/**/weight': 99999999999.99} fit = df.model.fit( PytorchModel(TestModel, FeaturesAndLabels(["f"], ["l"]), nn.MSELoss, Adam), FittingParameter(epochs=1000, splitter=naive_splitter(0.5))) print(fit.model._current_model.net.net[2].weight.detach().numpy()) print( fit.model._current_model.net.net[2].weight.norm().detach().item()) self.assertLess( fit.model._current_model.net.net[2].weight.norm().detach().item(), 0.1)
def test_no_training_data(self): """given""" df = pd.DataFrame({"featureA": [1,2,3,4,5], "labelA": [1,2,3,4,5]}) """when""" train_ix, test_ix = RandomSplits(0).train_test_split(df.index) """then""" np.testing.assert_array_almost_equal(train_ix.values, df.index.values) self.assertEqual(0, len(test_ix))
def test_youngest_portion(self): """given""" df = pd.DataFrame({"featureA": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "labelA": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) """when""" train_ix, test_ix = RandomSplits(test_size=0.6, youngest_size=0.25).train_test_split(df.index) "then" self.assertEqual(6, len(test_ix)) np.testing.assert_array_equal(test_ix[-2:], np.array([8, 9]))
def test_make_training_data(self): """given""" df = pd.DataFrame({"featureA": [1, 2, 3, 4, 5], "labelA": [1, 2, 3, 4, 5]}) """when""" train_ix, test_ix = RandomSplits(test_size=0.5).train_test_split(df.index) """then""" self.assertEqual(2, len(train_ix)) self.assertEqual(3, len(test_ix))
def test_callbacks(self): # a test with a early stopping callback and pass restore_best_weights=True as kwarg df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 1, 0, 1, 1, 0, 1, 0, ], }) def module_provider(): class ClassificationModule(nn.Module): def __init__(self): super().__init__() self.classifier = nn.Sequential(nn.Linear(2, 5), nn.ReLU(), nn.Linear(5, 1), nn.Sigmoid()) def forward(self, x): x = self.classifier(x) return x return ClassificationModule() model = PytorchModel(FeaturesAndLabels(["a", "b"], ["b"]), module_provider, nn.MSELoss, lambda params: SGD(params, lr=0.1, momentum=0.9)) fit = df.model.fit(model, on_epoch=[ PytorchModel.Callbacks.early_stopping( patience=3, tolerance=-100) ], restore_best_weights=True) print(fit.model._history) self.assertEqual(4, len(fit.model._history[0][0]))
def test_probabilistic(self): def create_sine_data(n=300): np.random.seed(32) n = 300 x = np.linspace(0, 1 * 2 * np.pi, n) y1 = 3 * np.sin(x) y1 = np.concatenate( (np.zeros(60), y1 + np.random.normal(0, 0.15 * np.abs(y1), n), np.zeros(60))) x = np.concatenate( (np.linspace(-3, 0, 60), np.linspace(0, 3 * 2 * np.pi, n), np.linspace(3 * 2 * np.pi, 3 * 2 * np.pi + 3, 60))) y2 = 0.1 * x + 1 y = y1 + y2 return x, y df = pd.DataFrame(np.array(create_sine_data(300)).T, columns=["x", "y"]) with df.model() as m: from pandas_ml_utils import FeaturesAndLabels from pandas_ml_utils_torch import PytorchNN, PytorchModel from pandas_ml_utils_torch.loss import HeteroscedasticityLoss from pandas_ml_common.sampling.splitter import duplicate_data from torch.optim import Adam from torch import nn class Net(PytorchNN): def __init__(self): super().__init__() self.l = nn.Sequential( nn.Linear(1, 20), nn.ReLU(), nn.Linear(20, 50), nn.ReLU(), nn.Linear(50, 20), nn.ReLU(), nn.Linear(20, 2), ) def forward_training(self, x): return self.l(x) fit = m.fit( PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), HeteroscedasticityLoss, Adam, restore_best_weights=True), FittingParameter(batch_size=128, epochs=10, splitter=duplicate_data()))
def test_feature_selection(self): df = pd.DataFrame({"featureA": [1, 2, 3, 4, 5], "featureB": [5, 4, 3, 2, 1], "featureC": [1, 2, 1, 2, 1], "labelA": [1, 2, 3, 4, 5], "labelB": [5, 4, 3, 2, 1]}) analysis = df.model.feature_selection(FeaturesAndLabels(["featureA", "featureB", "featureC"], ["labelA"]), lags=[2], show_plots=False) print(analysis) # top features are A, B, C self.assertListEqual(["featureA", "featureB", "featureC"], analysis[0]) self.assertListEqual([0, 1], analysis[1])
def test_mult_epoch_cross_validation(self): df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 1, 0, 1, 1, 0, 1, 0, ], }) with df.model() as m: class NN(PytorchNN): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.nn = nn.Sequential( nn.Linear(1, 2), nn.ReLU(), nn.Linear(2, 1), ) def forward_training(self, x): return self.nn(x) fit = m.fit( PytorchModel(NN, FeaturesAndLabels(["a"], ["b"]), nn.MSELoss, Adam), FittingParameter(splitter=naive_splitter(0.5), epochs=2, fold_epochs=10, batch_size=2)) print(fit)
def test_feature_selection_classification(self): data = make_classification(n_samples=20, n_features=5, n_informative=4, n_redundant=1, n_classes=2) df = pd.DataFrame(data[0]) df["label"] = data[1] report = df.model.feature_selection( features_and_labels=FeaturesAndLabels(features=list(range(5)), labels=["label"], label_type=int), training_data_splitter=stratified_random_splitter(0.5), rfecv_splits=2, forest_splits=2) print(report)
def test_multi_objective_loss(self): df = pd.DataFrame(np.array([ # train [0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1], # test [0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1], ]), columns=["f1", "f2", "l"]) class XorModule(nn.Module): def __init__(self): super().__init__() self.x1 = nn.Linear(2, 1) self.s1 = nn.Sigmoid() self.x2 = nn.Linear(2, 1) self.s2 = nn.Sigmoid() self.s = nn.Softmax() def forward(self, x): if self.training: return self.s1(self.x1(x)), self.s2(self.x2(x)) else: return self.s1(self.x1(x)) fit = df.model.fit( PytorchModel( FeaturesAndLabels(["f1", "f2"], ["l"]), XorModule, lambda: MultiObjectiveLoss((1, nn.MSELoss(reduction='none')), (1, nn.L1Loss(reduction='none')), on_epoch=lambda criterion, epoch: criterion.update_weights((0, 1.1))), Adam ), NaiveSplitter(0.5) ) print(fit.test_summary.df)
def test_pytorch_mfs(self): df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ [0, 0], [0, 0], [1, 1], [1, 1], [0, 0], [0, 0], [1, 1], [1, 1], ], "c": [ 1, 0, 0, 1, 1, 0, 0, 1, ] }) def module_provider(): class ClassificationModule(PytorchNN): def __init__(self): super().__init__() self.net0 = nn.Sequential(nn.Linear(1, 5), nn.ReLU(), nn.Linear(5, 1), nn.Sigmoid()) self.net1 = nn.Sequential(nn.Linear(2, 5), nn.ReLU(), nn.Linear(5, 1), nn.Sigmoid()) def forward_training(self, x) -> t.Tensor: x0, x1 = x return self.net0(x0) + self.net1(x1) return ClassificationModule() model = PytorchModel( module_provider, FeaturesAndLabels(features=(["a"], ["b"]), labels=["c"]), nn.MSELoss, lambda params: Adam(params, lr=0.03)) fl: FeaturesWithLabels = df._.extract(model.features_and_labels) self.assertIsInstance(fl.features_with_required_samples.features, MultiFrameDecorator) print(fl.features_with_required_samples.features) fit = df.model.fit(model, fold_epochs=10) print(fit.test_summary.df) self.assertIn(FEATURE_COLUMN_NAME, fit.test_summary.df) np.testing.assert_almost_equal( np.array([0, 0, 1]), fit.test_summary.df["label"].values.squeeze())
def ta_markowitz(df: pd.DataFrame, covariances=None, risk_aversion=5, return_period=60, prices='Close', expected_returns=None, rebalance_trigger=None, solver='cvxopt', tail=None): assert isinstance(df.columns, pd.MultiIndex), \ "expect multi index columns 'prices', 'expected returns' and rebalance trigger" # risk if covariances is None: if isinstance(df.columns, pd.MultiIndex) and prices in df.columns.get_level_values(1): # we need to flip levels cov = ta_ewma_covariance(df.cloc2(prices)) else: cov = ta_ewma_covariance(df[prices]) elif isinstance(covariances, str): cov = df[covariances] else: cov = covariances cov = cov.dropna() # return exp_ret = _default_returns_estimator(df, prices, expected_returns, return_period, len(cov.columns)) # re-balance trigger = (pd.Series(np.ones(len(df)), index=df.index) if rebalance_trigger is None else df[rebalance_trigger]).dropna() # non negative weight constraint and weights sum to 1 h = np.zeros(len(cov.columns)).reshape((-1, 1)) G = -np.eye(len(h)) A = np.ones(len(h)).reshape((1, -1)) b = np.ones(1) # magic solution's keep_solution = (np.empty(len(h)) * np.nan) uninvest = np.zeros(len(h)) # keep last solution last_solution = None def optimize(t, sigma, pi): nonlocal last_solution nr_of_assets = len(sigma) # only optimize if we have a re-balance trigger (early exit) if last_solution is not None and last_solution.sum() > 0.99: # so we had at least one valid solution in the past # we can early exit if we do not have any signal or or no signal for any currently hold asset if len(t.shape) > 1 and t.shape[1] == nr_of_assets: if t[:, last_solution >= 0.01].sum().any() < 1: return keep_solution else: if t.sum().any() < 1: return keep_solution # make sure covariance matrix is positive definite simga = cov_nearest(sigma) # we perform optimization except when all expected returns are < 0 # then we early exit with an un-invest command if len(pi[:, pi[0] < 0]) == pi.shape[1]: return uninvest else: try: sol = solve_qp(risk_aversion * sigma, -pi.T, G=G, h=h, A=A, b=b, solver=solver) if sol is None: _log.error("no solution found") return uninvest else: return sol except Exception as e: _log.error(traceback.format_exc()) return uninvest index = sorted(set(df.index.intersection(cov.index.get_level_values(0)).intersection(exp_ret.index).intersection(trigger.index))) if tail is not None: index = index[-abs(tail)] weights = [optimize(trigger.loc[[i]].values, cov.loc[[i]].values, exp_ret[cov.columns].loc[[i]].values) for i in index] # turn weights into a data frame return pd.DataFrame(weights, index=index, columns=cov.columns)