def random_bulk_invest(random_date): bulk2 = pd.DataFrame() bulk2["date"] = sp500["date"] bulk2["order_size"] = 0 bulk2.loc[t.where(bulk1, "date", random_date, op.eq).index[0], "order_size"] = 1000 out2 = t.backtest_strategy(sp500, bulk2, capital) return (out2["IRR"], out2["return"])
def random_dca_invest(early=False): dca = pd.DataFrame() dca["date"] = sp500["date"] dca["order_size"] = 0 money = capital while money > 0: # up to 50 each time random_invest_value = random.randint(1, 50) if money > 50 else money random_day = "" if early: random_day = t.where(dca, "order_size", 0).head(60).sample()["date"].values[0] else: random_day = t.where(dca, "order_size", 0).sample()["date"].values[0] dca.loc[t.where(dca, "date", random_day, op.eq).index[0], "order_size"] = random_invest_value money = money - random_invest_value out3 = t.backtest_strategy(sp500, dca, capital) return (out3["IRR"], out3["return"])
#te_ary = te.fit(dt).transform(dt) #df = pd.DataFrame(te_ary, columns=te.columns_) df = pd.get_dummies(Dataset, prefix_sep=":") # frequent item sets, in same basket (with a min support) frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True) frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x)) frequent_itemsets rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.01) cola_rules = t.where(rules, "consequents", {cola}) # Count occurences for index, row in cola_rules.iterrows(): #print(list(row['antecedents']), list(row['consequents'])) valu = t.where(df, cola, True) #print(valu.shape[0]) for ant in list(row['antecedents']): valu = t.where(valu, ant, True) cola_rules.at[index,'A_cnt'] = valu.shape[0] valu2 = t.where(df, colb, True) for ant in list(row['antecedents']): valu2 = t.where(valu2, ant, True) cola_rules.at[index,'B_cnt'] = valu2.shape[0]
""" ## Consistent bucket? - Another property required, is that we need to apply the function above and keep putting the same user into same bucket. - So we can assure we continue capture events from the same user over time - This needs to happen without having to save the already sampled users into a database and then retrieve them when we are deciding who to include them in sampling. This operation would be too expensive. So here we check that user always goes into same bucket i.e. run again same function and see if previously selected user goes into same bucket """ results = [] for i in range(0, 10000): random_user = users.loc[random.randint(0, len(users.index)-1), "user_id"] random_user_previous_bucket = t.select(t.where(users, "user_id", random_user), "sample_bucket").values[0][0] results.append(random_user_previous_bucket == sample_bucket(random_user)) r = pd.DataFrame(results, columns=["result"]) t.group(r, "result") """ # Testing that GUIDs numbers have a truly random distribution - We can see this already when we do the counts above by bucket, where we expect 100 into each bucket - But another way, is to run a Shapiro–Wilk test is a test of normality. - The null hypothesis is: the set of numbers originate from a normal distribution (not a random one) results: - if the p-value<0.05 then is a random distribution
def test_group(table1): """Tests row and column by name""" assert t.where(t.group(table1, "user"), "user", 1).reset_index()['index'].values[0] == 't'
def test_where3(table1): """Tests where and column by index""" assert t.column(t.where(table1, "kpi", 9, op.gt), 0)[0] == 'j'
def test_where2(table1): """Tests where and column by index""" assert t.column(t.where(table1, "kpi", 11, op.ne), 0)[0] == 'k'
# Lets have a quick look at the index value over this period import plotly.express as px import plotly.io as pio pio.renderers.default = 'png' # svg, png fig = px.line(sp500, x="date", y="^GSPC", title='S&P 500') fig.show() # Lets say we have 1000 to invest capital = 1000 # Case 1. What happens if we invest everything on the 1st day ? bulk1 = pd.DataFrame() bulk1["date"] = sp500["date"] bulk1["order_size"] = 0 bulk1.loc[t.where(bulk1, "date", datetime.strptime('2018-03-01', '%Y-%m-%d' ), op.eq).index[0], "order_size"] = 1000 out1 = t.backtest_strategy(sp500, bulk1, capital) print(out1["ROI"]) print(out1["IRR"]) print(out1["return"]) # Case 2. What happens if we change investment based on trend def random_bulk_invest(random_date): bulk2 = pd.DataFrame() bulk2["date"] = sp500["date"] bulk2["order_size"] = 0 bulk2.loc[t.where(bulk1, "date", random_date, op.eq).index[0], "order_size"] = 1000
# Lets have a quick look at the index value over this period import plotly.express as px import plotly.io as pio pio.renderers.default = 'png' # svg, png fig = px.line(sp500, x="date", y="^GSPC", title='S&P 500') fig.show() # Lets say we have 1000 to invest capital = 1000 # Case 1. What happens if we invest everything on the 1st day ? bulk1 = pd.DataFrame() bulk1["date"] = sp500["date"] bulk1["order_size"] = 0 bulk1.loc[t.where(bulk1, "date", datetime.strptime('2018-03-01', '%Y-%m-%d' ), op.eq).index[0], "order_size"] = 1000 out1 = t.backtest_strategy(sp500, bulk1, capital) print(out1["ROI"]) print(out1["IRR"]) print(out1["return"]) # Case 2. What happens if we pick a random date to invest everything ? def random_bulk_invest(random_date): bulk2 = pd.DataFrame() bulk2["date"] = sp500["date"] bulk2["order_size"] = 0 bulk2.loc[t.where(bulk1, "date", random_date, op.eq).index[0], "order_size"] = 1000 out2 = t.backtest_strategy(sp500, bulk2, capital)