def calculate_initial_weight(self, input_dc: DataCollection): if not isinstance(input_dc.get_freq(), str): raise Exception("Optimization failed due to inconsistent series frequencies within input_dc.") else: self.input_freq = input_dc.get_freq() if self.initial_weight is None: self.tickers = input_dc.ticker_list() self.initial_weight = self.optimizer(input_dc.to_df().dropna()) else: raise Exception("initial weight was already calculated")
def calculate_initial_weight(self, input_dc: DataCollection, weight_bounds = (0,1), risk_aversion = 1, market_neutral = False, risk_free_rate = 0.0, target_volatility = 0.01, target_return = 0.11, returns_data = True, compounding = False): if not isinstance(input_dc.get_freq(), str): raise Exception("Optimization failed due to inconsistent series frequencies within input_dc.") else: self.input_freq = input_dc.get_freq() if self.initial_weight is None: self.tickers = input_dc.ticker_list() self.initial_weight = self.optimizer(input_dc.to_df().dropna(), self.input_freq, self.solution, weight_bounds,risk_aversion, market_neutral, risk_free_rate, target_volatility, target_return, returns_data, compounding) else: raise Exception("initial weight was already calculated")
def __init__(self, portfolio: Portfolio, evaluate_dc: DataCollection): if portfolio.get_tickers() != evaluate_dc.ticker_list(): raise ValueError( "Tickers in portfolio and evaluate data do not match") self.portfolio = portfolio # Check this self.label = portfolio.get_solution() if portfolio.get_freq() != evaluate_dc.get_freq(): raise ValueError( "The frequency of the data and portfolio do not match") self.price_df = evaluate_dc.to_df().dropna() self.freq = evaluate_dc.get_freq() self.evaluate_dc = evaluate_dc self.metrics = {}
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # fake data by ZZ Daily self.a_series = DataSeries( 'ETF', 'daily', pd.DataFrame([10.0, 15.0, 20.0, 30.0], columns=['ABC'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04' ]))) self.b_series = DataSeries( 'Bond', 'daily', pd.DataFrame([1.0, 3.5, 4.5], columns=['KKK'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', ]))) self.collect = DataCollection('trial', [self.a_series, self.b_series]) d = {'Initial weights': [0.6, 0.4]} self.weights = pd.DataFrame(data=d).T self.weights = self.weights.rename(columns={0: 'ABC', 1: 'KKK'}) self.p = port.EqualPort("test equal port") self.p.calculate_initial_weight(self.collect) # Monthly path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries(df[1], 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df() self.n_asset = len(self.input_df.columns) input_weights = [[1 / self.n_asset] * self.n_asset] input_weights_df = pd.DataFrame(input_weights, columns=self.input_df.columns, index=['Initial weights']) self.input_weights_df = input_weights_df
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries('ETF', 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df().dropna() self.a = pd.DataFrame([10, 12, 32, 9, 11, 9], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01' ])) self.a_series = DataSeries('ETF', self.input_freq, self.a) self.b = pd.DataFrame([1, 1.2, 3.2, 0.9], columns=['fakeTreasury'], index=pd.to_datetime([ '2019-12-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) self.b_series = DataSeries('Bond', self.input_freq, self.b) self.c_collection = DataCollection('trial', [self.a_series, self.b_series]) self.c_df = self.c_collection.to_df().interpolate(method='linear', axis=0)
class Test_Portfolio_Performance(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # fake data by ZZ Daily self.a_series = DataSeries( 'ETF', 'daily', pd.DataFrame([10.0, 15.0, 20.0, 30.0], columns=['ABC'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04' ]))) self.b_series = DataSeries( 'Bond', 'daily', pd.DataFrame([1.0, 3.5, 4.5], columns=['KKK'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', ]))) self.collect = DataCollection('trial', [self.a_series, self.b_series]) d = {'Initial weights': [0.6, 0.4]} self.weights = pd.DataFrame(data=d).T self.weights = self.weights.rename(columns={0: 'ABC', 1: 'KKK'}) self.p = port.EqualPort("test equal port") self.p.calculate_initial_weight(self.collect) # Monthly path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries(df[1], 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df() self.n_asset = len(self.input_df.columns) input_weights = [[1 / self.n_asset] * self.n_asset] input_weights_df = pd.DataFrame(input_weights, columns=self.input_df.columns, index=['Initial weights']) self.input_weights_df = input_weights_df def test_annualized_return(self): ans = PP.annualized_return(self.weights, self.collect.to_df().dropna(), 'daily') output_return_df = self.collect.to_df().dropna().pct_change().dropna() annual_return = output_return_df.mean() * 252 ans_new = annual_return @ self.weights.T self.assertAlmostEqual(ans_new[0], 203.4) self.assertAlmostEqual(ans, ans_new[0]) res2 = PP.annualized_return(self.input_weights_df, self.input_df, self.input_freq) expected2 = np.dot(self.input_weights_df, self.input_df.pct_change().mean() * 12).item() self.assertEqual(res2, expected2) def test_annualized_volatility(self): ans = PP.annualized_volatility(self.weights, self.collect.to_df().dropna(), 'daily') cov = self.collect.to_df().dropna().pct_change().dropna().cov() * 252 ans_new = (self.weights @ cov @ self.weights.T).iloc[0][0]**0.5 self.assertAlmostEqual(ans, ans_new) res2 = PP.annualized_volatility(self.input_weights_df, self.input_df, self.input_freq) expected2 = np.sqrt( np.dot( self.input_weights_df, np.dot(self.input_df.pct_change().cov() * 12, self.input_weights_df.T)).item()) self.assertAlmostEqual(res2, expected2) def test_sharpe_ratio(self): ans = PP.sharpe_ratio(0.6, 0.2, 0.03) ans_new = (0.6 - 0.03) / 0.2 self.assertAlmostEqual(ans, ans_new) def test_PnL(self): ans = PP.PnL(self.weights, self.collect.to_df().dropna()) output_return_df = self.collect.to_df().dropna().pct_change().dropna() ans_1 = output_return_df.iloc[0][0] * self.weights.iloc[0][ 0] + output_return_df.iloc[0][1] * self.weights.iloc[0][1] ans_2 = output_return_df.iloc[1][0] * self.weights.iloc[0][ 0] + output_return_df.iloc[1][1] * self.weights.iloc[0][1] self.assertAlmostEqual(ans.iloc[0][0], ans_1) self.assertAlmostEqual(ans.iloc[1][0], ans_2) def test_max_drawdown(self): price = { 'PnL': [75, 33, 35, 25, 80, 100, 95, 78, 72, 62, 65, 60, 42, 50] } pnl = pd.DataFrame(data=price).pct_change().dropna() ans = PP.max_drawdown(pnl) ans_new = (25 - 75) / 75 self.assertAlmostEqual(ans, ans_new) def test_partial_moment(self): pnl = PP.PnL(self.weights, self.collect.to_df().dropna()) pm = PP.partial_moment(pnl, threshold=0.6) length = pnl.shape[0] threshold = 0.6 diff_df = threshold - pnl drop_minus = diff_df[diff_df >= 0].dropna() pm_new = ((drop_minus**2).sum() / length).item() self.assertAlmostEqual(pm, 0.0408163265) self.assertAlmostEqual(pm, pm_new) def test_PP_class(self): self.assertEqual(self.p.get_freq(), self.collect.get_freq()) pp = PP.PortfolioPerformance(self.p, self.collect) pp.annualized_return() pp.annualized_volatility() pp.annualized_sharpe_ratio() # pp.print_metrics() # pp.get_metrics('annualized_return') pp.PnL() # print(pp.metrics['PnL']) pp.max_drawdown( ) # 0 since test data always increasing, but function tested self.assertEqual(pp.get_metrics("annualized_return"), 228) self.assertEqual(pp.get_metrics("PnL").iloc[0][0], 1.5) pp.print_metrics() pp.get_metrics('PnL') self.assertEqual(pp.metrics['annualized_return'], 228) self.assertAlmostEqual(pp.metrics['annualized_volatility'], 13.3630621) self.assertAlmostEqual(pp.metrics['sharpe_ratio'], 228 / 13.3630621) self.assertAlmostEqual(pp.metrics['PnL'].iloc[0][0], 1.5) self.assertAlmostEqual(pp.metrics['PnL'].iloc[1][0], 0.30952380952380953) self.assertEqual(pp.metrics['max_drawdown'], 0) # sortino pp.sortino_ratio(threshold=0.6) d = {'Initial weights': [0.5, 0.5]} self.weights2 = pd.DataFrame(data=d).T self.weights2 = self.weights2.rename(columns={0: 'ABC', 1: 'KKK'}) pnl = PP.PnL(self.weights2, self.collect.to_df().dropna()) threshold = 0.6 expected = pp.metrics['annualized_return'] lpm_sortino = PP.partial_moment(pnl, threshold, order=2, lower=True)**0.5 ans_sortino = (expected - threshold) / lpm_sortino self.assertAlmostEqual(pp.metrics['sortino_ratio'], ans_sortino) # omega pp.omega_ratio(threshold=0.6) lpm_omega = PP.partial_moment(pnl, threshold, order=1, lower=True) ans_omega = ((expected - threshold) / lpm_omega) + 1 self.assertAlmostEqual(pp.metrics['omega_ratio'], ans_omega)
class Test_Data(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.a = pd.DataFrame([10.2, 12, 32.1, 9.32], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) self.a_series = DataSeries('ETF', 'monthly', self.a) self.b = pd.DataFrame([2.3, 3.6, 4.5], columns=['fakeTreasury'], index=pd.to_datetime( ['2019-12-12', '2020-02-05', '2020-09-13'])) self.b_series = DataSeries('Bond', 'monthly', self.b) self.c_collection = DataCollection('trial', [self.a_series, self.b_series]) # For test_the_rest_of_entire_dataset(): self.a_entire = pd.DataFrame([10.2, 12, 32.1, 9.32, 11.5, 9.7], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01' ])) self.a_series_entire = DataSeries('ETF', 'monthly', self.a_entire) self.b_entire = pd.DataFrame([2.3, 3.6, 4.5, 5.5], columns=['fakeTreasury'], index=pd.to_datetime([ '2019-12-12', '2020-02-05', '2020-09-13', '2020-10-13' ])) self.b_series_entire = DataSeries('Bond', 'monthly', self.b_entire) self.c_collection_entire = DataCollection( 'trial', [self.a_series_entire, self.b_series_entire]) self.a_exp = pd.DataFrame([11.5, 9.7], columns=['fakeSPY'], index=pd.to_datetime( ['2020-05-01', '2020-06-01'])) self.a_series_exp = DataSeries('ETF', 'monthly', self.a_exp) self.b_exp = pd.DataFrame([5.5], columns=['fakeTreasury'], index=pd.to_datetime(['2020-10-13'])) self.b_series_exp = DataSeries('Bond', 'monthly', self.b_exp) self.c_collection_exp = DataCollection( 'trial', [self.a_series_exp, self.b_series_exp]) def test_DataSeries_basic(self): a = self.a a_series = self.a_series assert (len(a_series) == 4) assert (str(a_series) == 'monthly fakeSPY') assert (a_series.get_ticker() == 'fakeSPY') assert (a_series.get_category() == 'ETF') assert (a_series.get_freq() == 'monthly') assert (a.equals(a_series.get_ts())) # test deep copy a_copy = a_series.copy() assert (a_copy != a_series and a_copy.get_ts().equals(a_series.get_ts())) assert (isinstance(a_series.to_Series(), pd.Series)) def test_DataSeries_add_sub(self): diff = self.a_series_entire - self.a_series assert (self.compareSeries(diff, self.a_series_exp)) a_plus = diff + self.a_series assert (self.compareSeries(a_plus, self.a_series_entire)) def test_DataSeries_to_list(self): lst = self.a_series.to_list() assert (lst == [10.2, 12, 32.1, 9.32]) def test_last_index(self): assert (self.a_series.get_last_date() == pd.to_datetime('2020-04-01')) def test_DataSeries_split_and_trim(self): # test split a_train, a_test = self.a_series.split(pct=0.75) assert (isinstance(a_train, DataSeries)) assert (isinstance(a_test, DataSeries)) assert (len(a_train) == 3) assert (len(a_test) == 1) assert (self.a.iloc[:3].equals(a_train.get_ts())) assert (self.a.iloc[3:].equals(a_test.get_ts())) # test trim trimed = self.a_series.trim('2020-02-01', '2020-03-01') assert (len(trimed) == 2) assert (self.a.loc['2020-02-01':'2020-03-01'].equals(trimed.get_ts())) @staticmethod def compareSeries(a, b): flag = True if not isinstance(a, DataSeries): print("\n The first item is not a DataSeries object") return False if not isinstance(b, DataSeries): print("\n The Second item is not a DataSeries object") return False if a == b: print("\n The two items are the same object") flag = False if len(a) != len(b): print("\n The two items does not have the same length") flag = False if str(a) != str(b): print("\n The two items does not have the same ticker") flag = False if a.get_category() != b.get_category(): print("\n The two items does not have the same category") flag = False if not a.get_ts().equals(b.get_ts()): print("\n The two items does not have the same time series") flag = False if not a.get_freq() == b.get_freq(): print("\n The two items does not have the same frequency") flag = False return flag def test_DataCollection_basic(self): assert (len(self.c_collection) == 2) assert (self.c_collection.get_freq() == 'monthly') for item, compare in zip(self.c_collection, [self.a_series, self.b_series]): assert (self.compareSeries(item, compare)) def test_DataCollection_add_sub(self): res = self.c_collection_entire - self.c_collection expected = self.c_collection_exp for r, e in zip(res, expected): assert (self.compareSeries(r, e)) res_plus = res + self.c_collection for r, e in zip(res_plus, self.c_collection_entire): assert (self.compareSeries(r, e)) def test_DataCollection_get_series(self): item1 = self.c_collection[1] assert (self.compareSeries(item1, self.b_series)) item2 = self.c_collection.get_series('fakeSPY') assert (self.compareSeries(item2, self.a_series)) def test_DataCollection_copy(self): c = self.c_collection.copy() assert (c != self.c_collection) assert (c.label == self.c_collection.label) assert (c.get_freq() == self.c_collection.get_freq()) for one, two in zip(c, self.c_collection): assert (self.compareSeries(one, two)) def test_DataCollection_summary(self): pass def test_DataCollection_split(self): train, test = self.c_collection.split(pct=0.75) assert (str(train) == 'trial') assert (train.freq == 'monthly') assert (str(test) == 'trial') assert (test.freq == 'monthly') compare = [self.a_series.split(0.75), self.b_series.split(0.75)] compare_train, compare_test = zip(*compare) train_col, test_col = list(compare_train), list(compare_test) for i, item in enumerate(train): assert (self.compareSeries(item, train_col[i])) for i, item in enumerate(test): assert (self.compareSeries(item, test_col[i])) def test_DataCollection_list(self): assert (self.c_collection.ticker_list() == ['fakeSPY', 'fakeTreasury']) assert (self.c_collection.category_list() == ['ETF', 'Bond']) assert (self.c_collection.last_date_list() == pd.to_datetime( ['2020-04-01', '2020-09-13']).to_list()) assert (self.c_collection.to_list() == [[10.2, 12, 32.1, 9.32], [2.3, 3.6, 4.5]]) def test_DataCollection_add(self): d = pd.DataFrame([11, 22], columns=['fakeZZZ'], index=pd.to_datetime(['2019-1-12', '2019-02-05'])) d_series = DataSeries('Bond', 'monthly', d) c_plus = self.c_collection.copy() c_plus.add(d_series) compare = [self.a_series, self.b_series, d_series] for i, item in enumerate(c_plus): assert (self.compareSeries(item, compare[i])) def test_DataCollection_df(self): df = self.c_collection.to_df() compare = pd.concat([self.a, self.b], axis=1) assert (df.equals(compare)) def test_price_to_return(self): pass