Ejemplo n.º 1
0
def plot_stock_with_sma():
    data = read_csv(path.join("StockData", "SPY.csv"), dateKey,
                    [dateKey, closeKey])

    data[sma50Key] = data[closeKey].rolling(50).mean()
    data[sma100Key] = data[closeKey].rolling(100).mean()

    stockDataKeys = [closeKey, sma50Key]
    plot_graph(data[stockDataKeys], stockDataKeys, "Time", "Price")
Ejemplo n.º 2
0
def plot_correlation(stock_one_ticker, stock_two_ticker):
    spyData = read_csv(path.join("StockData", r"%s.csv" % stock_one_ticker),
                       dateKey, [dateKey, closeKey])

    aaplData = read_csv(path.join("StockData", r"%s.csv" % stock_two_ticker),
                        dateKey, [dateKey, closeKey])

    corData = calculate_corr(spyData, aaplData, 50)[-100:]

    plot_graph(corData,
               ["%s and %s cor" % (stock_one_ticker, stock_two_ticker)])
def fp_growth_retail(TOP_PERCENTAGE, file_name, no_of_trx):
    data = pd.read_csv('../Datasets/' + str(file_name) + '.csv', header=None)

    print("\n --- FP Growth on File " + str(file_name) +
          " : and Top Percentage: " + str(TOP_PERCENTAGE))
    # converting into required format of TransactionEncoder()
    trans = []
    for i in range(0, no_of_trx):
        trans.append([str(data.values[i, j]) for j in range(0, 20)])

    Items = dict(collections.Counter([x for sublist in trans
                                      for x in sublist]))
    Items['nan'] = 0
    print("Frequencies of Each Item:")
    print(Items)

    top_items = top_x_per_products(Items, TOP_PERCENTAGE)
    print("Top Items:")
    print(top_items)

    plot_graph(top_items, 'fp_growth', TOP_PERCENTAGE)

    Output = [b for b in trans if any(a in b for a in top_items.keys())]

    # Using TransactionEncoder
    trans = np.array(trans)

    Output = np.array(Output)
    # print(Output.shape)

    t = TransactionEncoder()
    data = t.fit_transform(Output)
    data = pd.DataFrame(data, columns=t.columns_, dtype=int)

    # print(data.shape)
    # here we also find nan as one of the columns so lets drop that column

    data.drop('nan', axis=1, inplace=True)
    # print(data.shape)
    # print(data.head())

    # running the fpgrowth algorithm
    res = fpgrowth(data, min_support=0.01, use_colnames=True)
    print("Number of Frequent Item sets:" + str(len(res)))
    res = association_rules(res, metric="confidence", min_threshold=0.5)
    print("\n=============== ASOCIATION RULES ======================")

    cols = [0, 1, 4, 5]
    res = res[res.columns[cols]]
    print(res)
def start_apriori(top_percentage):
    store_data = pd.read_csv('../Datasets/BreadBasket.csv')
    # lets visualize which items are more popular.
    Items = {}
    for item in store_data['Item']:
        if item in Items:
            Items[item] = Items[item] + 1
        else:
            Items[item] = 1

    print(Items)
    print(len(Items))
    top_items = top_x_per_products(Items,top_percentage)
    print(top_items)
    top_item_set = set(top_items.keys())
    print(top_item_set)

    plot_graph(top_items)

    store_data['D'] = store_data.Item.isin(top_item_set).astype(int)
    store_data['D'].apply(lambda x: 1 if x in top_item_set else 0)

    store_data = store_data[store_data['D'] == 1]
    print(store_data)

    store_data['Quantity'] = 1

    # print(store_data.head(7))

    basket = store_data.groupby(['Transaction', 'Item'])['Quantity'].sum().unstack().fillna(0)

    # There are a lot of zeros in the data but we also need to make sure any positive values are converted to a 1
    # and anything less the 0 is set to 0. This step will complete the one hot encoding of the data

    def encode_units(x):
        if x <= 0:
            return 0
        if x >= 1:
            return 1

    basket_sets = basket.applymap(encode_units)
    print("Basket Data Shape:"+str(basket_sets.shape))
    frequent_itemsets = apriori(basket_sets, min_support=0.02, use_colnames=True)
    print(frequent_itemsets)

    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.5)
    print(rules)
    with open('table.txt', 'w') as f:
        f.write(tabulate(rules))
Ejemplo n.º 5
0
def plot_sma_crossover_vs_buy_and_hold_strategy_comparison():
    data = read_csv(path.join("StockData", "SPY.csv"), dateKey,
                    [dateKey, closeKey])

    data[sma50Key] = data[closeKey].rolling(50).mean()
    data[sma100Key] = data[closeKey].rolling(100).mean()
    data[positionKey] = np.where(data[sma50Key] > data[sma100Key], 1, 0)
    data[positionKey] = data[positionKey].shift(1)

    data[strategyPctKey] = data[closeKey].pct_change(1) * data[positionKey]
    data[strategyKey] = (data[strategyPctKey] + 1).cumprod()
    data[buyHoldKey] = (data[closeKey].pct_change(1) + 1).cumprod()

    strategyDataKeys = [strategyKey, buyHoldKey]
    plot_graph(data[strategyDataKeys], strategyDataKeys, "Time", "Returns")
Ejemplo n.º 6
0
def apriori_retail_dataset(TOP_PERCENTAGE):
    store_data = pd.read_csv('../Datasets/Market_Basket_Optimisation.csv',
                             header=None)

    records = []
    for i in range(0, 7501):
        records.append([str(store_data.values[i, j]) for j in range(0, 20)])

    Items = dict(
        collections.Counter([x for sublist in records for x in sublist]))
    del Items['nan']
    print("Frequencies of Each Item:")
    print(Items)

    top_items = top_x_per_products(Items, TOP_PERCENTAGE)
    print("Top Items:")
    print(top_items)

    plot_graph(top_items, 'apriori', TOP_PERCENTAGE)

    Output = [b for b in records if any(a in b for a in top_items.keys())]

    association_rules = apriori(Output,
                                min_support=0.01,
                                min_confidence=0.5,
                                min_lift=2,
                                min_length=1)
    association_results = list(association_rules)
    # print(association_results)
    print("\n=============== ASOCIATION RULES ======================")

    for item in association_results:
        # first index of the inner list
        # Contains base item and add item
        if 'nan' not in list(item[2][0][0]) and 'nan' not in list(
                item[2][0][1]):
            print("Rule: " + str(list(item[2][0][0])) + " -> " +
                  str(list(item[2][0][1])))
            # second index of the inner list
            print("Support: " + str(item[1]))

            # third index of the list located at 0th
            # of the third index of the inner list

            print("Confidence: " + str(item[2][0][2]))
            print("Lift: " + str(item[2][0][3]))
            print("=====================================")