def random_bulk_invest(random_date):
    bulk2 = pd.DataFrame()
    bulk2["date"] = sp500["date"]
    bulk2["order_size"] = 0
    bulk2.loc[t.where(bulk1, "date", random_date, op.eq).index[0],
              "order_size"] = 1000
    out2 = t.backtest_strategy(sp500, bulk2, capital)
    return (out2["IRR"], out2["return"])
Esempio n. 2
0
def random_dca_invest(early=False):
    dca = pd.DataFrame()
    dca["date"] = sp500["date"]
    dca["order_size"] = 0
    money = capital
    while money > 0:
        # up to 50 each time
        random_invest_value = random.randint(1, 50) if money > 50 else money
        random_day = ""
        if early:
            random_day = t.where(dca, "order_size",
                                 0).head(60).sample()["date"].values[0]
        else:
            random_day = t.where(dca, "order_size",
                                 0).sample()["date"].values[0]
        dca.loc[t.where(dca, "date", random_day, op.eq).index[0],
                "order_size"] = random_invest_value
        money = money - random_invest_value

    out3 = t.backtest_strategy(sp500, dca, capital)
    return (out3["IRR"], out3["return"])
Esempio n. 3
0
#te_ary = te.fit(dt).transform(dt)
#df     = pd.DataFrame(te_ary, columns=te.columns_)


df = pd.get_dummies(Dataset, prefix_sep=":")


# frequent item sets, in same basket (with a min support)
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets


rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.01)

cola_rules = t.where(rules, "consequents", {cola})

# Count occurences
for index, row in cola_rules.iterrows():
    #print(list(row['antecedents']), list(row['consequents']))
    valu = t.where(df, cola, True)
    #print(valu.shape[0])
    for ant in list(row['antecedents']):
        valu  = t.where(valu, ant, True)
    cola_rules.at[index,'A_cnt'] = valu.shape[0]
    
    valu2 = t.where(df, colb, True)
    for ant in list(row['antecedents']):
        valu2 = t.where(valu2, ant, True)
    cola_rules.at[index,'B_cnt'] = valu2.shape[0]
    
"""
## Consistent bucket?
    
- Another property required, is that we need to apply the function above and keep putting the same user into same bucket.
- So we can assure we continue capture events from the same user over time
- This needs to happen without having to save the already sampled users into a database and then retrieve them when we are deciding who to include them in sampling. This operation would be too expensive.
  

So here we check that user always goes into same bucket
i.e. run again same function and see if previously selected user goes into same bucket
"""

results = []
for i in range(0, 10000):
    random_user = users.loc[random.randint(0, len(users.index)-1), "user_id"]
    random_user_previous_bucket = t.select(t.where(users, "user_id", random_user), "sample_bucket").values[0][0]
    results.append(random_user_previous_bucket == sample_bucket(random_user))

r = pd.DataFrame(results, columns=["result"])
t.group(r, "result")


"""
# Testing that GUIDs numbers have a truly  random distribution

- We can see this already when we do the counts above by bucket, where we expect 100 into each bucket
- But another way, is to run a Shapiro–Wilk test is a test of normality.
- The null hypothesis is: the set of numbers originate from a normal distribution (not a random one)

results: 
- if the p-value<0.05 then is a random distribution
Esempio n. 5
0
def test_group(table1):
    """Tests row and column by name"""
    assert t.where(t.group(table1, "user"), "user", 1).reset_index()['index'].values[0] == 't'
Esempio n. 6
0
def test_where3(table1):
    """Tests where and column by index"""
    assert t.column(t.where(table1, "kpi", 9, op.gt), 0)[0] == 'j'
Esempio n. 7
0
def test_where2(table1):
    """Tests where and column by index"""
    assert t.column(t.where(table1, "kpi", 11, op.ne), 0)[0] == 'k'
# Lets have a quick look at the index value over this period
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'png'  # svg, png
fig = px.line(sp500, x="date", y="^GSPC", title='S&P 500')
fig.show()

# Lets say we have 1000 to invest
capital = 1000

# Case 1. What happens if we invest everything on the 1st day ?
bulk1 = pd.DataFrame()
bulk1["date"] = sp500["date"]
bulk1["order_size"] = 0
bulk1.loc[t.where(bulk1, "date", datetime.strptime('2018-03-01', '%Y-%m-%d'
                                                   ), op.eq).index[0],
          "order_size"] = 1000
out1 = t.backtest_strategy(sp500, bulk1, capital)
print(out1["ROI"])
print(out1["IRR"])
print(out1["return"])

# Case 2. What happens if we change investment based on trend


def random_bulk_invest(random_date):
    bulk2 = pd.DataFrame()
    bulk2["date"] = sp500["date"]
    bulk2["order_size"] = 0
    bulk2.loc[t.where(bulk1, "date", random_date, op.eq).index[0],
              "order_size"] = 1000
Esempio n. 9
0
# Lets have a quick look at the index value over this period
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'png'  # svg, png
fig = px.line(sp500, x="date", y="^GSPC", title='S&P 500')
fig.show()

# Lets say we have 1000 to invest
capital = 1000

# Case 1. What happens if we invest everything on the 1st day ?
bulk1 = pd.DataFrame()
bulk1["date"] = sp500["date"]
bulk1["order_size"] = 0
bulk1.loc[t.where(bulk1, "date", datetime.strptime('2018-03-01', '%Y-%m-%d'
                                                   ), op.eq).index[0],
          "order_size"] = 1000
out1 = t.backtest_strategy(sp500, bulk1, capital)
print(out1["ROI"])
print(out1["IRR"])
print(out1["return"])


# Case 2. What happens if we pick a random date to invest everything ?
def random_bulk_invest(random_date):
    bulk2 = pd.DataFrame()
    bulk2["date"] = sp500["date"]
    bulk2["order_size"] = 0
    bulk2.loc[t.where(bulk1, "date", random_date, op.eq).index[0],
              "order_size"] = 1000
    out2 = t.backtest_strategy(sp500, bulk2, capital)