Ejemplo n.º 1
0
To make comparison fair we take only four patterns

thus
((0, (3,)), (1, (3,)))
((0, (4,)), (1, (4,)))
((0, (3,)), (1, (4,)))
((0, (2,)), (1, (2,)))

These patterns all have an immediate and simple trend
They move almost identically (with one slight exception)

We now visualize the results like we always do
'''
#%%
st, d = import_dat('./output/UNHTRV/UNH_TRV_5y_close.dat')

d_og = d

ct = st.copy()

#this gives 48% coverage of line 0 and 1
ct = add(((0, (3,)), (1, (3,))),ct,d)
ct = add(((0, (4,)), (1, (4,))),ct,d)
ct = add(((0, (3,)), (1, (4,))),ct,d)
ct = add(((0, (2,)), (1, (2,))),ct,d)


patterns = {}
for key, value in ct.items():
    if value[1]>1:
Ejemplo n.º 2
0
import numpy as np
import pandas as pd
from bokeh.io import curdoc, show
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Grid, HoverTool, LinearAxis, Range1d
from bokeh.models.glyphs import MultiLine
from bokeh.plotting import figure, output_file, show

from add_rem import add
from cover import cov_order
from import_data import import_dat
from output_gen import load_dictionary, painter
from pattern_finder import finder

#%%
st, d = import_dat('./output/UNHMCD/UNH_MCD_5y_close.dat')

d_og = d

ct = st.copy()

#index patterns
ct = add(((0, (4, )), (1, (4, ))), ct, d)
ct = add(((0, (4, )), (1, (3, ))), ct, d)
ct = add(((0, (2, )), (1, (4, ))), ct, d)

ct = add(((0, (5, 4)), (1, (4, ))), ct, d)
ct = add(((0, (4, )), (1, (5, 3))), ct, d)
#stock pattens
ct = add(((0, (3, )), (1, (3, ))), ct, d)
ct = add(((0, (3, )), (1, (4, ))), ct, d)
Ejemplo n.º 3
0
from add_rem import add
from cover import cov_order
from import_data import import_dat
from output_gen import load_dictionary, painter

#%%

d = load_dictionary('./output/dowj_5y_close.dict')
ct = {k: v for k, v in d.items() if v[1] > 1}
ct_df = pd.DataFrame.from_dict(ct,
                               orient='index',
                               columns=['Support', 'Length', 'Time'])

#%%
st, d = import_dat('./output/dowj_5y_close.dat')
#%%
d_og = d

ct = st.copy()

#this gives 26% coverage of line 1 and 2`
ct = add(((1, (4, )), (2, (4, ))), ct, d)
ct = add(((1, (4, )), (2, (3, ))), ct, d)
ct = add(((1, (2, )), (2, (4, ))), ct, d)

ct = add(((1, (5, 4)), (2, (4, ))), ct, d)
ct = add(((1, (4, )), (2, (5, 3))), ct, d)

patterns = {}
for key, value in ct.items():
Ejemplo n.º 4
0
def create_codetables(data,codetable):
    """Creates the same codetable as produced by the cover algorithm
    
    This function can be used to get an idea of the actual support
    of a pattern within the codetable, as otherwise the patterns 
    is solelty the independent amount of times it appears within the
    dataset, not excluding cases when there is overlap with another
     pattern
    
    Arguments:
        data {[type]} -- [description]
        codetable {[type]} -- [description]
    
    Returns:
        [type] -- [description]
    """

    st, d = import_dat(data)
    codetable = load_dictionary(codetable)



    # Create separate dictionary for the patterns
    patterns = {}

    # Add all patterns from the codetable to the pattern dictionary
    for key, value in codetable.items():
        if value[1]>1:
            patterns[key]=value

    # Order the patterns dictionary
    c_ord_patterns = cov_order(patterns)

    # Initialize a counter dictionary
    c = Counter()

    # For patterns in order, go over the dataset
    for x in c_ord_patterns:
        # 'Paint' the dataset with 0's where covered and return p amount
        d,num = p2(x,d)
        # Use pattern amount for value, and pattern for key, for dict
        c[x] = num
        
    # Once all patterns have covered the dataset
    # Go over the covered dataset element by element and count
    #  how often an element appears
    for i, row in enumerate(d):
        for element in row:
            if element > 0:
                c[((i,(element,)),)] += 1
            
    for x in codetable:
        # If it doesn't exist in the covered codetable, add it
        if not x in c:
            #(support,total_length_pattern,timespan_of_pattern )

            c[x] = (0,codetable[x][1],codetable[x][2])
        #Otherwise just add the length and timespan of the patterns
        else:
            #(support,total_length_pattern,timespan_of_pattern )

            c[x] = (c[x] ,codetable[x][1],codetable[x][2])

    # C is now our covered codetable and codetable is the original
    '''
    This can be tested by checking the difference between 
    codetable[((0,(2,)),)]
    and
    c[((0,(2,)),)]
    '''
    original_ct = pd.DataFrame.from_dict(codetable, orient='index'
                , columns=['support','length','time']).sort_values(
                    'support')[::-1]
    covered_ct = pd.DataFrame.from_dict(c, orient='index', columns=[
        'support','length','time']).sort_values('support')[::-1]
    return original_ct, covered_ct
Ejemplo n.º 5
0
ct = load_dictionary('./output/workbook9/PEP_KO_5y_close.dict')
# Remove the singletons
ct = {k: v for k, v in ct.items() if v[1] > 1}
# Turn it into a dataframe
ct_df = pd.DataFrame()
ct_df['index'] = list(ct.keys())
ct_df['value'] = list(ct.values())
ct_df['Support'] = [ct_df.iloc[i, 1][0] for i in range(len(ct_df))]
ct_df['Length'] = [ct_df.iloc[i, 1][1] for i in range(len(ct_df))]
ct_df['Time'] = [ct_df.iloc[i, 1][2] for i in range(len(ct_df))]
del ct_df['value']
ct_df = ct_df.set_index('index')
#Now we sort it based on support
ct_df = ct_df.sort_values('Support')[::-1]

st, d = import_dat('./output/workbook9/PEP_KO_5y_close.dat')

d_og = d

ct = st.copy()

#this gives  33% coverage of line 0 and 1
ct = add(list(ct_df.index)[0], ct, d)
ct = add(list(ct_df.index)[2], ct, d)
ct = add(list(ct_df.index)[4], ct, d)
ct = add(list(ct_df.index)[5], ct, d)

patterns = {}
for key, value in ct.items():
    if value[1] > 1:
        patterns[key] = value
Ejemplo n.º 6
0
def make_plot():
    # Load the dictionary to see the patterns
    btc_dict = load_dictionary('./bitcoin2016.dict')
    # Remove the singletons
    btc_dict = {k: v for k, v in btc_dict.items() if v[1] > 1}
    # Turn it into a dataframe
    df = pd.DataFrame.from_dict(btc_dict,
                                orient='Index',
                                columns=['Support', 'Length', 'Time'])
    # Sort them by support
    df = df.sort_values(by='Support')[::-1]
    st, d = import_dat('./bitcoin2016.dat')
    ct = st.copy()

    for key in list(df.index[:10]):
        ct = add(key, ct, d)
        #patterns[key]=value

    patterns = {}
    for key, value in ct.items():
        if value[1] > 1:
            patterns[key] = value

    ordered_p = cov_order(patterns)

    val_d = {}
    sign = -100
    for x in ordered_p:
        # 'Paint' the dataset with 0's where covered and return p amount
        d = painter(x, d, sign)
        val_d[sign] = x
        sign *= 2

    d2 = [[val_d[x] if x in val_d else 'None' for x in row] for row in d]
    df2 = pd.read_excel('./bitcoin2016.xlsx')
    df2['Date'] = pd.to_datetime(df2['<DATE>'])
    df2['ToolTipDates'] = df2.Date.map(lambda x: x.strftime("%d %b %y"))

    colors = [
        '#e6194B', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#42d4f4',
        '#f032e6', '#e6beff', '#9A6324', '#800000', '#000075'
    ]
    P_TO_COLOR = {x: colors[i] for i, x in enumerate(patterns)}
    P_TO_COLOR['None'] = '#f1f1f1'

    for i, x in enumerate(('<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>', '<VOL>')):
        df2[f'pattern{x}'] = [str(x) for x in d2[i]]
        df2[f'color{x}'] = [P_TO_COLOR[x] for x in d2[i]]

    #for every column we generate the line we need
    open_ = xyc(df2, 'Date', 'og<OPEN>', 'color<OPEN>')
    high_ = xyc(df2, 'Date', 'og<HIGH>', 'color<HIGH>')
    low_ = xyc(df2, 'Date', 'og<LOW>', 'color<LOW>')
    close_ = xyc(df2, 'Date', 'og<CLOSE>', 'color<CLOSE>')
    vol_ = xyc(df2, 'Date', 'og<VOL>', 'color<VOL>')

    df2['labOPEN'] = df2['pattern<OPEN>']
    df2['labHIGH'] = df2['pattern<HIGH>']
    df2['labLOW'] = df2['pattern<LOW>']
    df2['labCLOSE'] = df2['pattern<CLOSE>']
    df2['labVOL'] = df2['pattern<VOL>']

    source2 = ColumnDataSource(df2)

    output_file('bitcoindaily.html')

    p = figure(x_axis_type='datetime',
               plot_width=1440,
               plot_height=600,
               title="Bitcoin Stock Price")

    p.circle(x='Date',
             y='og<OPEN>',
             name='open',
             alpha=0,
             source=source2,
             size=3)
    p.circle(x='Date',
             y='og<CLOSE>',
             name='close',
             alpha=0,
             source=source2,
             size=3)

    p.circle(x='Date',
             y='og<HIGH>',
             name='high',
             alpha=0,
             source=source2,
             size=3)

    p.circle(x='Date',
             y='og<LOW>',
             name='low',
             alpha=0,
             source=source2,
             size=3)

    p.multi_line(name='q',
                 xs=open_[0],
                 ys=open_[1],
                 color=open_[2],
                 line_width=3)
    p.multi_line(name='e',
                 xs=high_[0],
                 ys=high_[1],
                 color=high_[2],
                 line_width=3)
    p.multi_line(name='ee',
                 xs=low_[0],
                 ys=low_[1],
                 color=low_[2],
                 line_width=3)
    p.multi_line(name='w',
                 xs=close_[0],
                 ys=close_[1],
                 color=close_[2],
                 line_width=3)
    q = figure(x_range=p.x_range,
               x_axis_type='datetime',
               plot_width=1440,
               plot_height=200,
               title="Stock Volume",
               y_axis_type='linear')

    q.circle(x='Date',
             y='og<VOL>',
             name='VOL',
             alpha=0,
             source=source2,
             size=3)
    p.circle(x='Date',
             y='og<LOW>',
             name='low',
             alpha=0,
             source=source2,
             size=3)

    q.multi_line(name='qw',
                 xs=vol_[0],
                 ys=vol_[1],
                 color=vol_[2],
                 line_width=3)

    p.add_tools(
        HoverTool(names=['low'],
                  mode="vline",
                  line_policy='nearest',
                  point_policy='snap_to_data',
                  tooltips=[
                      ('Date : ', '@ToolTipDates'),
                      ('Low Price : ', '@{og<LOW>}{0.2f}'),
                      ('Low Pattern : ', '@labLOW'),
                      ('High Price : ', '@{og<HIGH>}{0.2f}'),
                      ('High Pattern : ', '@labHIGH'),
                      ('Open Price : ', '@{og<OPEN>}{0.2f}'),
                      ('Open Pattern : ', '@labOPEN'),
                      ('Close Price : ', '@{og<CLOSE>}{0.2f}'),
                      ('Close Pattern : ', '@labCLOSE'),
                  ]))

    p.add_tools(
        HoverTool(names=['open'],
                  mode="vline",
                  line_policy='nearest',
                  point_policy='snap_to_data',
                  tooltips=[
                      ('Name', 'Open'),
                  ]))
    p.add_tools(
        HoverTool(names=['close'],
                  mode="vline",
                  line_policy='nearest',
                  point_policy='snap_to_data',
                  tooltips=[
                      ('Name', 'Close'),
                  ]))

    p.add_tools(
        HoverTool(names=['high'],
                  mode="vline",
                  line_policy='nearest',
                  point_policy='snap_to_data',
                  tooltips=[
                      ('Name', 'High'),
                  ]))
    p.add_tools(
        HoverTool(names=['low'],
                  mode="vline",
                  line_policy='nearest',
                  point_policy='snap_to_data',
                  tooltips=[
                      ('Name', 'Low'),
                  ]))

    q.add_tools(
        HoverTool(names=['VOL'],
                  mode="vline",
                  line_policy='nearest',
                  point_policy='snap_to_data',
                  tooltips=[
                      ('Date : ', '@ToolTipDates'),
                      ('High Price : ', '@{og<VOL>}{0.2f}'),
                      ('High Pattern : ', '@labVOL'),
                  ]))
    show(column(p, q))
Ejemplo n.º 7
0
def pyccoli(  filename,cpu=0):
    output_generation = False
    #cpu sets the amount of cores to be used, 0 uses all

    st, d = import_dat(filename)
    
    
    cand = product(st,st)
    ct = st.copy()
    mdl = mdl_calc(ct,d,st)
    print(f'Original MDL:\t\t\t{mdl}')
    gen = 0
    if output_generation:
        while True:
            gen += 1
            ct, used = ditto_plus(cand,st,ct,d,mdl,cpu)
            
            if mdl_calc(ct,d,st)<mdl:
                mdl = mdl_calc(ct,d,st)
                with open(f'./plus_{filename}_{gen}.txt','w') as o:
                    for key,value in ct.items():
                        print(f'{key} \t {value}',file=o)
            else:
                print('Finished')
                break
            
            
            ct = ditto_min(st,ct,d,cpu)
            
            
            if mdl_calc(ct,d,st)<mdl:
                mdl = mdl_calc(ct,d,st)
                with open(f'./min_{filename}_{gen}.txt','w') as o:
                    for key,value in ct.items():
                        print(f'{key} \t {value}',file=o)
            else:
                print('Finished')
                break
            
            cand = product(ct,used)
    else:
        while True:
            gen += 1
            ct, used = ditto_plus(cand,st,ct,d,mdl,cpu)
            
            #mdl = mdl_calc(ct,d,st)
            
            ct = ditto_min(st,ct,d,cpu)
            
            
            if not mdl_calc(ct,d,st)<mdl:
                with open(f'./ct_{filename}.txt','w') as o:
                    for key,value in ct.items():
                        print(f'{key} \t {value}',file=o)
                with open(f'./{filename}.dict','wb') as out_pickle:
                    pickle.dump(ct,out_pickle,protocol=pickle.HIGHEST_PROTOCOL)
                
                print('Finished')
                break
            
            mdl = mdl_calc(ct,d,st)
            
            cand = product(ct,used)