Ejemplo n.º 1
0
def mrTest(filename):
    
    stocks = ['C', 'FDX', 'KO', 'MSFT', 'SBUX', 'NFLX', 'LUV']
    
    for s in stocks:
        csvFile = open(s+filename, 'a')
        f = csv.writer(csvFile)
        
        table = {}
        
        f.writerow([s])
        f.writerow(['t', 'R^2', 'AIC'])
        path = createPath(s + '/' + s + 'mrValidation.csv')    
        models = loadModels(path, "mr")
        
        remainingStocks = copy.deepcopy(stocks)
        remainingStocks.remove(s)
        for line in models:
            
            for rs in remainingStocks:
                i = line['timewindow']
                
                featurePath = createPath(rs + "/")
                features = loadFeatures(featurePath, i)
                
                prices = []
                ema = []
                rsi = []
                macd = []
    
                for row in features:
                    prices.append(row['price'])
                    ema.append(row['ema'])
                    rsi.append(row['rsi'])
                    macd.append(row['macd'])
                
                y = np.array(prices)
                x = np.vstack([ema, rsi, macd]).T
                
                validationStats = anova.ols(y, x, [line['constant'], line['ema'], line['rsi'], line['macd']])
                r2 = validationStats.R2
                aic = validationStats.ll()[1]
                # average out the results over k folds
                stats = [r2, aic]
                stats = [val/float(len(remainingStocks)) for val in stats]
                
                if i in table:
                    table[i] = [sum(item) for item in izip(table[i], stats)]
                else:
                    table[i] = stats
                
                print i
                print stats
                
        # write the averaged results        
        for i in range(2, 202, 2):
            table[i].insert(0, i)
            print table[i]     
            f.writerow(table[i])
            csvFile.flush()
Ejemplo n.º 2
0
def crossValidationFeatures(featureLocation, filename, algorithm, k=10):
    
    csvFile = open(filename, 'a')
    f = csv.writer(csvFile)
    f.writerow(['t', 'constant', 'EMA', 'RSI' , 'MACD', 'R^2', 'AIC'])
    
    table = {}
    
    for i in range(2, 202, 2):
        
        features = loadFeatures(featureLocation, i)
        bestSol = []
        bestR2 = 0
        
        for training, validation in k_fold_cross_validation(features, k):
            
            algorithm.setFeatures(training)
            sol = algorithm.findSol(i)
            
            prices = []
            ema = []
            rsi = []
            macd = []

            for row in validation:
                prices.append(row['price'])
                ema.append(row['ema'])
                rsi.append(row['rsi'])
                macd.append(row['macd'])
            
            y = np.array(prices)
            x = np.vstack([ema, rsi, macd]).T
            
            validationStats = anova.ols(y, x, [sol['constant'], sol['EMA'], sol['RSI'], sol['MACD']])
            r2 = validationStats.R2
            aic = validationStats.ll()[1]
            
            if (r2 > bestR2):
                bestR2 = r2
                bestSol = [sol['constant'], sol['EMA'], sol['RSI'], sol['MACD']]
            
            # average out the results over k folds
            stats = [r2, aic]
            stats = [val/float(k) for val in stats]
            
            if i in table:
                table[i] = [sum(item) for item in izip(table[i], stats)]
            else:
                table[i] = stats
        
        for j in range(len(bestSol)):
            table[i].insert(j, bestSol[j])
            
        print i
        # write the averaged results        
        table[i].insert(0, i)
        print table[i]     
        f.writerow(table[i])
        csvFile.flush()
Ejemplo n.º 3
0
def emaTest(filename):

    stocks = ['C', 'FDX', 'KO', 'MSFT', 'SBUX', 'NFLX', 'LUV']

    for s in stocks:

        csvFile = open(s + filename, 'a')
        f = csv.writer(csvFile)

        table = {}

        f.writerow([s])
        f.writerow(['t', 'R^2', 'AIC'])
        path = createPath(s + '/' + s + 'emaValidation.csv')
        models = loadModels(path, "emaOnly")

        remainingStocks = copy.deepcopy(stocks)
        remainingStocks.remove(s)
        for line in models:

            for rs in remainingStocks:
                i = line['timewindow']

                featurePath = createPath(rs + "/")
                features = loadFeatures(featurePath, i)

                prices = []
                ema = []

                for row in features:
                    prices.append(row['price'])
                    ema.append(row['ema'])

                y = np.array(prices)
                x = np.vstack([ema]).T

                validationStats = anova.ols(y, x,
                                            [line['constant'], line['ema']])
                r2 = validationStats.R2
                aic = validationStats.ll()[1]
                # average out the results over k folds
                stats = [r2, aic]
                stats = [val / float(len(remainingStocks)) for val in stats]

                if i in table:
                    table[i] = [sum(item) for item in izip(table[i], stats)]
                else:
                    table[i] = stats

                print i
                print stats

        # write the averaged results
        for i in range(2, 202, 2):
            table[i].insert(0, i)
            print table[i]
            f.writerow(table[i])
            csvFile.flush()
Ejemplo n.º 4
0
def simpleCrossValidationFeatures(featureLocation, filename, algorithm, k=10):
    csvFile = open(filename, 'a')
    f = csv.writer(csvFile)
    f.writerow(['t', 'constant', 'EMA', 'R^2', 'AIC'])

    table = {}

    for i in range(2, 202, 2):
        features = loadFeatures(featureLocation, i)
        bestSol = []
        bestR2 = 0

        for training, validation in k_fold_cross_validation(features, k):
            algorithm.setFeatures(training)
            sol = algorithm.findSol(i)

            prices = []
            ema = []

            for row in validation:
                prices.append(row['price'])
                ema.append(row['ema'])

            y = np.array(prices)
            x = np.vstack([ema]).T

            validationStats = anova.ols(y, x, [sol['constant'], sol['EMA']])
            r2 = validationStats.R2
            aic = validationStats.ll()[1]

            if (r2 > bestR2):
                bestR2 = r2
                bestSol = [sol['constant'], sol['EMA']]

            # average out the results over k folds
            stats = [r2, aic]
            stats = [val / float(k) for val in stats]

            if i in table:
                table[i] = [sum(item) for item in izip(table[i], stats)]
            else:
                table[i] = stats

            print i
            print stats

        for j in range(len(bestSol)):
            table[i].insert(j, bestSol[j])

    # write the averaged results
    for i in range(2, 202, 2):
        table[i].insert(0, i)
        print table[i]
        f.writerow(table[i])
        csvFile.flush()
Ejemplo n.º 5
0
def crossValidation(filename, timeseries, algorithm, k=10):
    csvFile = open(filename, 'a')
    f = csv.writer(csvFile)
    f.writerow(['t', 'constant', 'EMA', 'RSI', 'MACD', 'R^2', 'AIC'])

    timeseries = timeseries.items()
    table = {}

    for training, validation in k_fold_cross_validation(timeseries, k):

        trainingDict = OrderedDict()
        for key, value in training:
            trainingDict[key] = value

        validationDict = OrderedDict()
        for key, value in validation:
            validationDict[key] = value

        tr = tf.TechnicalFeatures(trainingDict)
        vd = tf.TechnicalFeatures(validationDict)
        algorithm.setFeatures(tr)

        for i in range(2, 202, 2):
            sol = algorithm.findSol(i)
            prices, ema, rsi, macd = vd.getTimewindow(i)
            y = np.array(prices)
            x = np.vstack([ema, rsi, macd]).T

            validationStats = anova.ols(
                y, x, [sol['constant'], sol['EMA'], sol['RSI'], sol['MACD']])
            r2 = validationStats.R2
            aic = validationStats.ll()[1]

            # average out the results over k folds
            stats = [
                sol['constant'], sol['EMA'], sol['RSI'], sol['MACD'], r2, aic
            ]
            stats = [val / float(k) for val in stats]

            if i in table:
                table[i] = [sum(item) for item in izip(table[i], stats)]
            else:
                table[i] = stats

            print i
            print stats

    # write the averaged results
    for i in range(2, 202, 2):
        table[i].insert(0, i)
        print table[i]
        f.writerow(table[i])
        csvFile.flush()
Ejemplo n.º 6
0
def simpleCrossValidation(filename, timeseries, algorithm, k=10):
    csvFile = open(filename, 'a')
    f = csv.writer(csvFile)
    f.writerow(['t', 'constant', 'EMA', 'R^2', 'AIC'])
    
    timeseries = timeseries.items()
    table = {}
    
    for training, validation in k_fold_cross_validation(timeseries, k):
        
        trainingDict = OrderedDict()
        for key, value in training:
            trainingDict[key] = value
            
        validationDict = OrderedDict()
        for key, value in validation:
            validationDict[key] = value
        
        tr = tf.TechnicalFeatures(trainingDict)
        vd = tf.TechnicalFeatures(validationDict)
        algorithm.setFeatures(tr)
        
        for i in range(2, 202, 2):
            sol = algorithm.findSol(i)
            prices, ema, rsi, macd = vd.getTimewindow(i)
            
            y = np.array(prices)
            x = np.vstack([ema]).T
               
            validationStats = anova.ols(y, x, [sol['constant'], sol['EMA']])
            r2 = validationStats.R2
            aic = validationStats.ll()[1]
            
            # average out the results over k folds
            stats = [sol['constant'], sol['EMA'], r2, aic]
            stats = [val/float(k) for val in stats]
            
            if i in table:
                table[i] = [sum(item) for item in izip(table[i], stats)]
            else:
                table[i] = stats
            
            print i
            print stats
            
    # write the averaged results        
    for i in range(2, 202, 2):
        table[i].insert(0, i)
        print table[i]     
        f.writerow(table[i])
        csvFile.flush()