def useCurrentCSV(self): self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol), index_col='Date', parse_dates=True) #print('self.dataFile print:', self.dataFile[2:3]) print("Entered stxSetFile1b.py to create new file") return self.dataFile
def createCSV(self): print self.timeSeries0.to_csv('{0} ohlc.csv'.format(self.symbol)) self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol), index_col='Date', parse_dates=True) return self.dataFile
def createCSV(self): print # self.timeSeries0.to_csv('{0} ohlc.csv'.format(self.symbol)) self.timeSeries0.to_csv('../{0} ohlc.csv'.format(self.symbol)) # self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol), index_col='Date',parse_dates=True) self.dataFile = pullData.read_csv('../{0} ohlc.csv'.format(self.symbol), index_col='Date', parse_dates=True) return self.dataFile
def createCSV(self): # root = Tk() # # dateStart = input("Enter start date (format yyyymmdd): ") # # dateEnd = input("Enter end date (format yyyymmdd): ") # root.mainloop() self.timeSeries0.to_csv('{0} ohlc.csv'.format(self.symbol)) self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol), index_col='Date',parse_dates=True) return self.dataFile
def createCSV(self): # root = Tk() # # dateStart = input("Enter start date (format yyyymmdd): ") # # dateEnd = input("Enter end date (format yyyymmdd): ") # root.mainloop() self.timeSeries0.to_csv('{0} ohlc.csv'.format(self.symbol)) self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol), index_col='Date', parse_dates=True) return self.dataFile
def useCurrentCSV(self): # self.dataFile = pullData.read_csv('{0} ohlc.csv'.format(self.symbol), index_col='Date',parse_dates=True) self.dataFile = pullData.read_csv('../{0} ohlc.csv'.format(self.symbol), index_col='Date', parse_dates=True) #print('self.dataFile print:', self.dataFile[2:3]) print("Entered stxSetFile1b.py to create new file") return self.dataFile
# setup zmq push CONTEXT = zmq.CONTEXT() TWITTER_PUB_ADDR = 'tcp://127.0.0.1:6050' FR_RCV_ADDR = 'tcp://127.0.0.1:6051' # twitter access codes # need these in real life :) CONSUMER_KEY = '' CONSUMER_SECRET = '' ACCESS_TOKEN = '' ACCESS_TOKEN_SECRET = '' TWITTER_SOURCES_FILE = "nasdaq_finance_400_2014_05_20.csv" TWITTER_SOURCES = read_csv(TWITTER_SOURCES_FILE) def gen_follow_symbols(): """ Create a list of symbols to follow from the csv file """ follow_syms = [] for i, ticker in enumerate(TWITTER_SOURCES.Symbol.values): ticker = "$" + str(ticker) follow_syms.append(ticker) return follow_syms def get_ticker_info(data): """ Lookup symbol's sector in Nasdaq """ m_class = TWITTER_SOURCES[ TWITTER_SOURCES['Symbol'].str.contains(
import pandas.io.data as pd import numpy as np from sklearn import svm, linear_model import decisiontree_lzl import naivebayes_lc train_data = np.array(pd.read_csv("train_data.csv").values.tolist(), dtype=np.int_) test_data = np.array(pd.read_csv("test_data.csv").values.tolist(), dtype=np.int_) train_data_b = np.array(pd.read_csv("train_data_b.csv").values.tolist(), dtype=np.int_) test_data_b = np.array(pd.read_csv("test_data_b.csv").values.tolist(), dtype=np.int_) train_X = train_data[:,:10] train_Y = train_data[:,10] test_X = test_data[:,:10] test_Y = test_data[:,10] train_X_b = train_data_b[:,:10] train_Y_b = train_data_b[:,10] test_X_b = test_data_b[:,:10] test_Y_b = test_data_b[:,10] clf_svm = svm.SVC() clf_svm.fit(train_X, train_Y) svm_score = clf_svm.score(test_X, test_Y) clf_svm.fit(train_X_b, train_Y_b) svm_score_b = clf_svm.score(test_X_b, test_Y_b) clf_lr = linear_model.LogisticRegression() clf_lr.fit(train_X, train_Y) lr_score = clf_lr.score(test_X, test_Y)
def iterlines(x, window, fromdate='1900-01-01', todate=None, charts=True, log_scale=False, directory=None): """ Turn minitrends to iterative process more easily adaptable to implementation in simple trading systems; allows backtesting functionality. x - ticker symbol or data set window - float defining how far back the algorithm checks for critical values fromate - start date for the stock data todate - end date for the stock data charts - boolean value saying whether to print chart to screen log_scale - converts imported data to logarithmic scale directory - directory in which data may be found to save on import speed """ # Import packages import pandas.io.data as pd import numpy as np from matplotlib.pyplot import subplot, plot, show, title, grid # Check inputs and get data if type(x) == str: if directory is None: if todate is None: y = pd.DataReader(x, 'yahoo', fromdate) y = np.array(y['Adj Close']) else: y = pd.DataReader(x, 'yahoo', fromdate, todate) y = np.array(y['Adj Close']) else: y = pd.read_csv(directory + x + '.csv') if (fromdate == '1900-01-01') & (todate is None): y = np.array(y['Adj Close']) elif (fromdate == '1900-01-01') & (todate is not None): todate = np.where(y.Date == str(todate)[0:10])[0][0] y = np.array(y['Adj Close'])[0:todate] elif (fromdate != '1900-01-01') & (todate is None): fromdate = np.where(y.Date == str(fromdate)[0:10])[0][0] y = y['Adj Close'][fromdate:] elif (fromdate != '1900-01-01') & (todate is not None): fromdate = np.where(y.Date == str(fromdate)[0:10])[0][0] todate = np.where(y.Date == str(todate)[0:10])[0][0] y = y['Adj Close'][fromdate:todate] else: y = x # Turn to log scale if desired if log_scale is True: y = np.log(y) # Change to log scale if desired if log_scale is True: y = np.log(y) if window < 1: window = int(window * len(y)) x = np.arange(0, len(y)) xmax = np.array(x[0]) xmin = np.array(x[0]) ymax = np.array(y[0]) ymin = np.array(y[0]) for i in x[window:]: if y[i] > max(y[i-window:i]): ymax = np.append(ymax, y[i]) xmax = np.append(xmax, x[i]) if y[i] < min(y[i-window:i]): ymin = np.append(ymin, y[i]) xmin = np.append(xmin, x[i]) # Plot results if desired if charts is True: plot(x, y) plot(xmax, ymax, 'o') plot(xmin, ymin, 'o') grid(True) show() return R[-1]
def segtrend(x, segments=2.0, charts=True, fromdate='1900-01-01', todate=None, log_scale=False, directory=None): """ Trendline algorithm that segments data into pieces and finds trendlines using those subsets. x - ticker symbol or data set threshold - ratio from 0 to 1 giving the desired extrema barrier window charts - boolean, whether or not to print charts to screen fromdate - when to start pulling stock data (defaults to all data) todate - when to stop pulling stock data (if none, defaults to most recent) log_scale - converts data to logarithmic scale directory - directory in which data may be found to save on import speed """ # IMPORT PACKAGES import pandas.io.data as pd from matplotlib.pyplot import plot, grid, show, title import numpy as np # Check inputs and get data if type(x) == str: if directory is None: if todate is None: y = pd.DataReader(x, 'yahoo', fromdate) y = np.array(y['Adj Close']) else: y = pd.DataReader(x, 'yahoo', fromdate, todate) y = np.array(y['Adj Close']) else: y = pd.read_csv(directory + x + '.csv') if (fromdate == '1900-01-01') & (todate is None): y = np.array(y['Adj Close']) elif (fromdate == '1900-01-01') & (todate is not None): todate = np.where(y.Date == str(todate)[0:10])[0][0] y = np.array(y['Adj Close'])[0:todate] elif (fromdate != '1900-01-01') & (todate is None): fromdate = np.where(y.Date == str(fromdate)[0:10])[0][0] y = y['Adj Close'][fromdate:] elif (fromdate != '1900-01-01') & (todate is not None): fromdate = np.where(y.Date == str(fromdate)[0:10])[0][0] todate = np.where(y.Date == str(todate)[0:10])[0][0] y = y['Adj Close'][fromdate:todate] else: y = x if log_scale: y = np.log(y) # change to log scale if desired # Implement trendlines segments = int(segments) maxima = np.ones(segments) minima = np.ones(segments) segsize = int(len(y)/(segments)) for i in range(1, segments+1): ind2 = i*segsize ind1 = ind2 - segsize maxima[i-1] = max(y[ind1:ind2]) minima[i-1] = min(y[ind1:ind2]) # Find the indexes of these maxima in the data x_maxima = np.ones(segments) x_minima = np.ones(segments) for i in range(0, segments): x_maxima[i] = np.where(y == maxima[i])[0][0] x_minima[i] = np.where(y == minima[i])[0][0] # Return some output if charts: plot(y) for i in range(0, segments-1): maxslope = (maxima[i+1] - maxima[i]) / (x_maxima[i+1] - x_maxima[i]) a_max = maxima[i] - (maxslope * x_maxima[i]) b_max = maxima[i] + (maxslope * (len(y) - x_maxima[i])) maxline = np.linspace(a_max, b_max, len(y)) minslope = (minima[i+1] - minima[i]) / (x_minima[i+1] - x_minima[i]) a_min = minima[i] - (minslope * x_minima[i]) b_min = minima[i] + (minslope * (len(y) - x_minima[i])) minline = np.linspace(a_min, b_min, len(y)) if charts: plot(maxline, 'g') plot(minline, 'r') # OUTPUT grid(True) show() return x_maxima, maxima, x_minima, minima # Run it # segtrend(x='goog', segments=5, charts=True, fromdate='1900-01-01', todate=None, directory='/Users/JAmos/Dropbox/Research/Trading/Data Dump/')