def mrTest(filename): stocks = ['C', 'FDX', 'KO', 'MSFT', 'SBUX', 'NFLX', 'LUV'] for s in stocks: csvFile = open(s+filename, 'a') f = csv.writer(csvFile) table = {} f.writerow([s]) f.writerow(['t', 'R^2', 'AIC']) path = createPath(s + '/' + s + 'mrValidation.csv') models = loadModels(path, "mr") remainingStocks = copy.deepcopy(stocks) remainingStocks.remove(s) for line in models: for rs in remainingStocks: i = line['timewindow'] featurePath = createPath(rs + "/") features = loadFeatures(featurePath, i) prices = [] ema = [] rsi = [] macd = [] for row in features: prices.append(row['price']) ema.append(row['ema']) rsi.append(row['rsi']) macd.append(row['macd']) y = np.array(prices) x = np.vstack([ema, rsi, macd]).T validationStats = anova.ols(y, x, [line['constant'], line['ema'], line['rsi'], line['macd']]) r2 = validationStats.R2 aic = validationStats.ll()[1] # average out the results over k folds stats = [r2, aic] stats = [val/float(len(remainingStocks)) for val in stats] if i in table: table[i] = [sum(item) for item in izip(table[i], stats)] else: table[i] = stats print i print stats # write the averaged results for i in range(2, 202, 2): table[i].insert(0, i) print table[i] f.writerow(table[i]) csvFile.flush()
def crossValidationFeatures(featureLocation, filename, algorithm, k=10): csvFile = open(filename, 'a') f = csv.writer(csvFile) f.writerow(['t', 'constant', 'EMA', 'RSI' , 'MACD', 'R^2', 'AIC']) table = {} for i in range(2, 202, 2): features = loadFeatures(featureLocation, i) bestSol = [] bestR2 = 0 for training, validation in k_fold_cross_validation(features, k): algorithm.setFeatures(training) sol = algorithm.findSol(i) prices = [] ema = [] rsi = [] macd = [] for row in validation: prices.append(row['price']) ema.append(row['ema']) rsi.append(row['rsi']) macd.append(row['macd']) y = np.array(prices) x = np.vstack([ema, rsi, macd]).T validationStats = anova.ols(y, x, [sol['constant'], sol['EMA'], sol['RSI'], sol['MACD']]) r2 = validationStats.R2 aic = validationStats.ll()[1] if (r2 > bestR2): bestR2 = r2 bestSol = [sol['constant'], sol['EMA'], sol['RSI'], sol['MACD']] # average out the results over k folds stats = [r2, aic] stats = [val/float(k) for val in stats] if i in table: table[i] = [sum(item) for item in izip(table[i], stats)] else: table[i] = stats for j in range(len(bestSol)): table[i].insert(j, bestSol[j]) print i # write the averaged results table[i].insert(0, i) print table[i] f.writerow(table[i]) csvFile.flush()
def emaTest(filename): stocks = ['C', 'FDX', 'KO', 'MSFT', 'SBUX', 'NFLX', 'LUV'] for s in stocks: csvFile = open(s + filename, 'a') f = csv.writer(csvFile) table = {} f.writerow([s]) f.writerow(['t', 'R^2', 'AIC']) path = createPath(s + '/' + s + 'emaValidation.csv') models = loadModels(path, "emaOnly") remainingStocks = copy.deepcopy(stocks) remainingStocks.remove(s) for line in models: for rs in remainingStocks: i = line['timewindow'] featurePath = createPath(rs + "/") features = loadFeatures(featurePath, i) prices = [] ema = [] for row in features: prices.append(row['price']) ema.append(row['ema']) y = np.array(prices) x = np.vstack([ema]).T validationStats = anova.ols(y, x, [line['constant'], line['ema']]) r2 = validationStats.R2 aic = validationStats.ll()[1] # average out the results over k folds stats = [r2, aic] stats = [val / float(len(remainingStocks)) for val in stats] if i in table: table[i] = [sum(item) for item in izip(table[i], stats)] else: table[i] = stats print i print stats # write the averaged results for i in range(2, 202, 2): table[i].insert(0, i) print table[i] f.writerow(table[i]) csvFile.flush()
def simpleCrossValidationFeatures(featureLocation, filename, algorithm, k=10): csvFile = open(filename, 'a') f = csv.writer(csvFile) f.writerow(['t', 'constant', 'EMA', 'R^2', 'AIC']) table = {} for i in range(2, 202, 2): features = loadFeatures(featureLocation, i) bestSol = [] bestR2 = 0 for training, validation in k_fold_cross_validation(features, k): algorithm.setFeatures(training) sol = algorithm.findSol(i) prices = [] ema = [] for row in validation: prices.append(row['price']) ema.append(row['ema']) y = np.array(prices) x = np.vstack([ema]).T validationStats = anova.ols(y, x, [sol['constant'], sol['EMA']]) r2 = validationStats.R2 aic = validationStats.ll()[1] if (r2 > bestR2): bestR2 = r2 bestSol = [sol['constant'], sol['EMA']] # average out the results over k folds stats = [r2, aic] stats = [val / float(k) for val in stats] if i in table: table[i] = [sum(item) for item in izip(table[i], stats)] else: table[i] = stats print i print stats for j in range(len(bestSol)): table[i].insert(j, bestSol[j]) # write the averaged results for i in range(2, 202, 2): table[i].insert(0, i) print table[i] f.writerow(table[i]) csvFile.flush()
def crossValidation(filename, timeseries, algorithm, k=10): csvFile = open(filename, 'a') f = csv.writer(csvFile) f.writerow(['t', 'constant', 'EMA', 'RSI', 'MACD', 'R^2', 'AIC']) timeseries = timeseries.items() table = {} for training, validation in k_fold_cross_validation(timeseries, k): trainingDict = OrderedDict() for key, value in training: trainingDict[key] = value validationDict = OrderedDict() for key, value in validation: validationDict[key] = value tr = tf.TechnicalFeatures(trainingDict) vd = tf.TechnicalFeatures(validationDict) algorithm.setFeatures(tr) for i in range(2, 202, 2): sol = algorithm.findSol(i) prices, ema, rsi, macd = vd.getTimewindow(i) y = np.array(prices) x = np.vstack([ema, rsi, macd]).T validationStats = anova.ols( y, x, [sol['constant'], sol['EMA'], sol['RSI'], sol['MACD']]) r2 = validationStats.R2 aic = validationStats.ll()[1] # average out the results over k folds stats = [ sol['constant'], sol['EMA'], sol['RSI'], sol['MACD'], r2, aic ] stats = [val / float(k) for val in stats] if i in table: table[i] = [sum(item) for item in izip(table[i], stats)] else: table[i] = stats print i print stats # write the averaged results for i in range(2, 202, 2): table[i].insert(0, i) print table[i] f.writerow(table[i]) csvFile.flush()
def simpleCrossValidation(filename, timeseries, algorithm, k=10): csvFile = open(filename, 'a') f = csv.writer(csvFile) f.writerow(['t', 'constant', 'EMA', 'R^2', 'AIC']) timeseries = timeseries.items() table = {} for training, validation in k_fold_cross_validation(timeseries, k): trainingDict = OrderedDict() for key, value in training: trainingDict[key] = value validationDict = OrderedDict() for key, value in validation: validationDict[key] = value tr = tf.TechnicalFeatures(trainingDict) vd = tf.TechnicalFeatures(validationDict) algorithm.setFeatures(tr) for i in range(2, 202, 2): sol = algorithm.findSol(i) prices, ema, rsi, macd = vd.getTimewindow(i) y = np.array(prices) x = np.vstack([ema]).T validationStats = anova.ols(y, x, [sol['constant'], sol['EMA']]) r2 = validationStats.R2 aic = validationStats.ll()[1] # average out the results over k folds stats = [sol['constant'], sol['EMA'], r2, aic] stats = [val/float(k) for val in stats] if i in table: table[i] = [sum(item) for item in izip(table[i], stats)] else: table[i] = stats print i print stats # write the averaged results for i in range(2, 202, 2): table[i].insert(0, i) print table[i] f.writerow(table[i]) csvFile.flush()