def useCurrentCSV(self):
     self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol),
                                       index_col='Date',
                                       parse_dates=True)
     #print('self.dataFile print:', self.dataFile[2:3])
     print("Entered stxSetFile1b.py to create new file")
     return self.dataFile
Exemple #2
0
 def createCSV(self):
     print
     self.timeSeries0.to_csv('{0} ohlc.csv'.format(self.symbol))
     self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol),
                                       index_col='Date',
                                       parse_dates=True)
     return self.dataFile
Exemple #3
0
 def createCSV(self):
     print
     # self.timeSeries0.to_csv('{0} ohlc.csv'.format(self.symbol))
     self.timeSeries0.to_csv('../{0} ohlc.csv'.format(self.symbol))
     # self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol), index_col='Date',parse_dates=True)
     self.dataFile = pullData.read_csv('../{0} ohlc.csv'.format(self.symbol), index_col='Date', parse_dates=True)
     return self.dataFile
    def createCSV(self):

        # root = Tk()
        # # dateStart = input("Enter start date (format yyyymmdd): ")
        # # dateEnd = input("Enter end date (format yyyymmdd): ")
        # root.mainloop()
        self.timeSeries0.to_csv('{0} ohlc.csv'.format(self.symbol))
        self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol), index_col='Date',parse_dates=True)
        return self.dataFile
    def createCSV(self):

        # root = Tk()
        # # dateStart = input("Enter start date (format yyyymmdd): ")
        # # dateEnd = input("Enter end date (format yyyymmdd): ")
        # root.mainloop()
        self.timeSeries0.to_csv('{0} ohlc.csv'.format(self.symbol))
        self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol),
                                          index_col='Date',
                                          parse_dates=True)
        return self.dataFile
Exemple #6
0
 def useCurrentCSV(self):
     # self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol), index_col='Date',parse_dates=True)
     self.dataFile = pullData.read_csv('../{0} ohlc.csv'.format(self.symbol), index_col='Date', parse_dates=True)
     #print('self.dataFile print:', self.dataFile[2:3])
     print("Entered stxSetFile1b.py to create new file")
     return self.dataFile
Exemple #7
0
# setup zmq push
CONTEXT = zmq.CONTEXT()

TWITTER_PUB_ADDR = 'tcp://127.0.0.1:6050'
FR_RCV_ADDR = 'tcp://127.0.0.1:6051'

# twitter access codes
# need these in real life :)

CONSUMER_KEY = ''
CONSUMER_SECRET = ''
ACCESS_TOKEN = ''
ACCESS_TOKEN_SECRET = ''

TWITTER_SOURCES_FILE = "nasdaq_finance_400_2014_05_20.csv"
TWITTER_SOURCES = read_csv(TWITTER_SOURCES_FILE)


def gen_follow_symbols():
    """ Create a list of symbols to follow from the csv file """
    follow_syms = []
    for i, ticker in enumerate(TWITTER_SOURCES.Symbol.values):
        ticker = "$" + str(ticker)
        follow_syms.append(ticker)
        return follow_syms


def get_ticker_info(data):
    """ Lookup symbol's sector in Nasdaq """
    m_class = TWITTER_SOURCES[
                TWITTER_SOURCES['Symbol'].str.contains(
Exemple #8
0
import pandas.io.data as pd
import numpy as np
from sklearn import svm, linear_model
import decisiontree_lzl
import naivebayes_lc

train_data = np.array(pd.read_csv("train_data.csv").values.tolist(), dtype=np.int_)
test_data = np.array(pd.read_csv("test_data.csv").values.tolist(), dtype=np.int_)

train_data_b = np.array(pd.read_csv("train_data_b.csv").values.tolist(), dtype=np.int_)
test_data_b = np.array(pd.read_csv("test_data_b.csv").values.tolist(), dtype=np.int_)

train_X = train_data[:,:10]
train_Y = train_data[:,10]
test_X = test_data[:,:10]
test_Y = test_data[:,10]

train_X_b = train_data_b[:,:10]
train_Y_b = train_data_b[:,10]
test_X_b = test_data_b[:,:10]
test_Y_b = test_data_b[:,10]

clf_svm = svm.SVC()
clf_svm.fit(train_X, train_Y)
svm_score = clf_svm.score(test_X, test_Y)
clf_svm.fit(train_X_b, train_Y_b)
svm_score_b = clf_svm.score(test_X_b, test_Y_b)

clf_lr = linear_model.LogisticRegression()
clf_lr.fit(train_X, train_Y)
lr_score = clf_lr.score(test_X, test_Y)
Exemple #9
0
def iterlines(x, window, fromdate='1900-01-01', todate=None, charts=True,
              log_scale=False, directory=None):
    """
    Turn minitrends to iterative process more easily adaptable to
    implementation in simple trading systems; allows backtesting functionality.

    x - ticker symbol or data set
    window - float defining how far back the algorithm checks for critical
             values
    fromate - start date for the stock data
    todate - end date for the stock data
    charts - boolean value saying whether to print chart to screen
    log_scale - converts imported data to logarithmic scale
    directory - directory in which data may be found to save on import speed
    """
    # Import packages
    import pandas.io.data as pd
    import numpy as np
    from matplotlib.pyplot import subplot, plot, show, title, grid

    # Check inputs and get data
    if type(x) == str:
        if directory is None:
            if todate is None:
                y = pd.DataReader(x, 'yahoo', fromdate)
                y = np.array(y['Adj Close'])
            else:
                y = pd.DataReader(x, 'yahoo', fromdate, todate)
                y = np.array(y['Adj Close'])
        else:
            y = pd.read_csv(directory + x + '.csv')
            if (fromdate == '1900-01-01') & (todate is None):
                y = np.array(y['Adj Close'])
            elif (fromdate == '1900-01-01') & (todate is not None):
                todate = np.where(y.Date == str(todate)[0:10])[0][0]
                y = np.array(y['Adj Close'])[0:todate]
            elif (fromdate != '1900-01-01') & (todate is None):
                fromdate = np.where(y.Date == str(fromdate)[0:10])[0][0]
                y = y['Adj Close'][fromdate:]
            elif (fromdate != '1900-01-01') & (todate is not None):
                fromdate = np.where(y.Date == str(fromdate)[0:10])[0][0]
                todate = np.where(y.Date == str(todate)[0:10])[0][0]
                y = y['Adj Close'][fromdate:todate]
    else:
        y = x

    # Turn to log scale if desired
    if log_scale is True:
        y = np.log(y)

    # Change to log scale if desired
    if log_scale is True:
        y = np.log(y)
    if window < 1:
        window = int(window * len(y))
    x = np.arange(0, len(y))
    xmax = np.array(x[0])
    xmin = np.array(x[0])
    ymax = np.array(y[0])
    ymin = np.array(y[0])

    for i in x[window:]:
        if y[i] > max(y[i-window:i]):
            ymax = np.append(ymax, y[i])
            xmax = np.append(xmax, x[i])
        if y[i] < min(y[i-window:i]):
            ymin = np.append(ymin, y[i])
            xmin = np.append(xmin, x[i])

    # Plot results if desired
    if charts is True:
        plot(x, y)
        plot(xmax, ymax, 'o')
        plot(xmin, ymin, 'o')
        grid(True)
        show()

    return R[-1]
Exemple #10
0
def segtrend(x, segments=2.0, charts=True, fromdate='1900-01-01', todate=None,
             log_scale=False, directory=None):
    """
    Trendline algorithm that segments data into pieces and finds trendlines
    using those subsets.

    x - ticker symbol or data set
    threshold - ratio from 0 to 1 giving the desired extrema barrier window
    charts - boolean, whether or not to print charts to screen
    fromdate - when to start pulling stock data (defaults to all data)
    todate - when to stop pulling stock data (if none, defaults to most recent)
    log_scale - converts data to logarithmic scale
    directory - directory in which data may be found to save on import speed
    """
    # IMPORT PACKAGES
    import pandas.io.data as pd
    from matplotlib.pyplot import plot, grid, show, title
    import numpy as np

    # Check inputs and get data
    if type(x) == str:
        if directory is None:
            if todate is None:
                y = pd.DataReader(x, 'yahoo', fromdate)
                y = np.array(y['Adj Close'])
            else:
                y = pd.DataReader(x, 'yahoo', fromdate, todate)
                y = np.array(y['Adj Close'])
        else:
            y = pd.read_csv(directory + x + '.csv')
            if (fromdate == '1900-01-01') & (todate is None):
                y = np.array(y['Adj Close'])
            elif (fromdate == '1900-01-01') & (todate is not None):
                todate = np.where(y.Date == str(todate)[0:10])[0][0]
                y = np.array(y['Adj Close'])[0:todate]
            elif (fromdate != '1900-01-01') & (todate is None):
                fromdate = np.where(y.Date == str(fromdate)[0:10])[0][0]
                y = y['Adj Close'][fromdate:]
            elif (fromdate != '1900-01-01') & (todate is not None):
                fromdate = np.where(y.Date == str(fromdate)[0:10])[0][0]
                todate = np.where(y.Date == str(todate)[0:10])[0][0]
                y = y['Adj Close'][fromdate:todate]
    else:
        y = x
    if log_scale:
        y = np.log(y)  # change to log scale if desired
    # Implement trendlines

    segments = int(segments)
    maxima = np.ones(segments)
    minima = np.ones(segments)
    segsize = int(len(y)/(segments))
    for i in range(1, segments+1):
        ind2 = i*segsize
        ind1 = ind2 - segsize
        maxima[i-1] = max(y[ind1:ind2])
        minima[i-1] = min(y[ind1:ind2])
    
    # Find the indexes of these maxima in the data
    x_maxima = np.ones(segments)
    x_minima = np.ones(segments)
    for i in range(0, segments):
        x_maxima[i] = np.where(y == maxima[i])[0][0]
        x_minima[i] = np.where(y == minima[i])[0][0]
    
    # Return some output
    if charts:
        plot(y)
    for i in range(0, segments-1):
        maxslope = (maxima[i+1] - maxima[i]) / (x_maxima[i+1] - x_maxima[i])
        a_max = maxima[i] - (maxslope * x_maxima[i])
        b_max = maxima[i] + (maxslope * (len(y) - x_maxima[i]))
        maxline = np.linspace(a_max, b_max, len(y))

        minslope = (minima[i+1] - minima[i]) / (x_minima[i+1] - x_minima[i])
        a_min = minima[i] - (minslope * x_minima[i])
        b_min = minima[i] + (minslope * (len(y) - x_minima[i]))
        minline = np.linspace(a_min, b_min, len(y)) 

        if charts:
            plot(maxline, 'g')
            plot(minline, 'r')

    # OUTPUT
    grid(True)
    show()
    return x_maxima, maxima, x_minima, minima

# Run it
# segtrend(x='goog',
         segments=5,
         charts=True,
         fromdate='1900-01-01',
         todate=None,
         directory='/Users/JAmos/Dropbox/Research/Trading/Data Dump/')