Ejemplo n.º 1
0
def main(datafile = 'data_by_cookie_slim.json', outputFolder = 'testAnalysis', outputSuffix = 'test', iterations = 10, epochmult = 4):
    #Load data
    print 'Running analysis of Alternative thing, 1st order'
    outextension = '/altBoot'
    outputFolder = outputFolder + outextension
    funcs.ensurePath(outputFolder)
    data = funcs.loadData(datafile)
    print 'iterations: {}\noutput folder: {}\n'.format(iterations,outputFolder)
    
    def processX(x, x_plays):
        f = np.vectorize(lambda x: 1 if x > 0 else -1, otypes=[np.float]) 
        a = np.mean(f(np.diff(x, axis = 0)),axis=0)
        #print f(np.diff(x, axis = 0))[0]
        
        return a
    
    print "OBSERVED DATA"
    os.runObs(data, outputFolder, processX = processX)
    
    print "BOOTSTRAP"
    bs.runBoot(data, iterations, outputFolder, processX = processX)
    
    print
    print
    
    return outextension
Ejemplo n.º 2
0
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', iterations = 10, epochmult = 4):
    
    filename = 'runMLP'
    outputFile = '{}/{}.p'.format(outputFolder,filename)
    data = funcs.loadData(datafile)
    
    #Filter away bottom 75%
    data = funcs.filterByPercRank(data, 75)
    
    print 'iterations: {}\nMultiplier Samplesize Epochs: {}\n output file: {}'.format(iterations,epochmult,outputFile)
    
    
    #Get first 10 values and try to decide whether people will keep on playing past 20 games
    samples = np.fromiter(((funcs.normalize(np.array(k[:10])),0 if len(k) < 20 else 1) for k in data if len(k) >= 10),
                    dtype=[('input',  float, 10), ('output', float, 1)])
    print 'Learning from {} samples...'.format(samples.size)
    network = MLP(10,10,10,1)
    
    def processResults(network,results):
        stepf = lambda x: 0 if x < .5 else 1
        test_data = [(t[0], t[1], stepf(t[2])) for t in results]
        percHits = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 1]) # Percentage right hits
        falseAlarm = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 0]) # Percentage false positives
        
        dPrime = funcs.dprime(percHits, falseAlarm)
        out = (percHits, falseAlarm, dPrime, network.weights)
        return out
    #print 'Hit % = {}, but false alarm % = {}, d\' = {}'.format(percHits,falseAlarm, dPrime)  
    out = network.learnLoop(samples, iterations = iterations, epochs = epochmult * samples.size, processResults = processResults) #40 million epochs for full dataset.. Too many? 
    
    pickle.dump(out,open(outputFile, 'wb'))
    #print out
    
    #results = network.test(samples)
    dprimes = pickle.load(open(outputFile,'rb'))
    #set nan to 0
    
    dprimes = [[0 if np.isnan(i) or np.isinf(i)  else i for i in k[2]] for k in dprimes]   
    
    print
    print 'Results:'
    print 'Mean d\' score for each quit opportunity: {}'.format([np.mean([k[i] for k in dprimes]) for i in xrange(1)])
    print 'Std : {}'.format([np.std([k[i] for k in dprimes]) for i in xrange(1)])
    print 'Max : {}'.format([np.max([k[i] for k in dprimes]) for i in xrange(1)])
    print
    print
Ejemplo n.º 3
0
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', outputSuffix = '', iterations = 10, epochmult = 4):
    #Load data
    print 'Running analysis of variance vs mean correlation top10'
    outextension = '/varMeanTop10'
    outputFolder = outputFolder + outextension
    data = funcs.loadData(datafile)
    funcs.ensurePath(outputFolder)
    print 'iterations: {}\noutput folder: {}\n'.format(iterations,outputFolder)
    
    print "OBSERVED DATA"
    os.runObs(data, outputFolder, rankFilter=90)
    
    print "BOOTSTRAP"
    bs.runBoot(data, iterations, outputFolder, rankFilter = 90)
    
    print
    print
    
    return outextension
Ejemplo n.º 4
0
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', outputSuffix = '', iterations = 10, epochmult = 4):
    #Load data
    print 'Running log variance vs mean'
    outextension = '/logVarMean'
    outputFolder = outputFolder + outextension
    funcs.ensurePath(outputFolder)
    print 'iterations: {}\noutput folder: {}\n'.format(iterations,outputFolder)
    data = funcs.loadData(datafile)
    #Preprocessing = taking logarithm of all data points
    preprocess = lambda x: [np.log(k) for k in x]
    print "OBSERVED DATA"
    os.runObs(data, outputFolder, preprocess = preprocess)
    
    print "BOOTSTRAP"
    bs.runBoot(data, iterations, outputFolder, preprocess = preprocess)
    
    print
    print
    
    
    return [outextension]
Ejemplo n.º 5
0
def main(datafile="../data_by_cookie_slim.json", outputFolder=".", outputSuffix="", iterations=10, epochmult=4):
    # Load data
    print "Running log regression analysis of variance vs mean, 1st order"
    outextension = "/regressMean"
    outputFolder = outputFolder + outextension

    funcs.ensurePath(outputFolder)
    print "iterations: {}\noutput folder: {}\n".format(iterations, outputFolder)
    data = funcs.loadData(datafile)
    # Process X = polyfit regression test
    processX = lambda x, x_plays: np.polyfit(x_plays, x, 1, full=True)[1]
    # Preprocessing = taking logarithm of all data points
    # preprocess = lambda x: [np.log(k) for k in x]

    print "OBSERVED DATA"
    os.runObs(data, outputFolder, processX=processX)

    print "BOOTSTRAP"
    bs.runBoot(data, iterations, outputFolder, processX=processX)

    print
    print

    return [outextension]
Ejemplo n.º 6
0
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', iterations = 10, epochmult = 4):
    
    filename = 'runPerceptronReal'
    outputFile = '{}/{}.p'.format(outputFolder,filename)
    
    #plt.close('all')
    #Load data
    print 'Perceptron quit after 20 from 10 games with Perceptron'
    
    data = funcs.loadData(datafile)
    
    #Generate artificial training data
    #mu = np.mean([k[:10] for k in data if len(k) >= 10],axis=0)
    
    #signalf = lambda x: 0 if x[2] < 18000 else 1
    #training_data = funcs.generateSamples(500000, signalf)
    
    #Set up perceptron
    s = 10 # games
    eta = 0.2 # learning rate
    dPrimes = [0]*iterations #
    endweights = []
    out = []
    training_data = [(np.array(k[:s]),0 if len(k) < 2*s else 1) for k in data if len(k) >= s]
    n = len(training_data) * epochmult
    
    print 'iterations: {}\nMultiplier Samplesize Epochs: {}\noutput file: {}\n'.format(iterations,epochmult,outputFile)
    
    print 'Overall plays over 20 plays: {}'.format(np.mean([t[1] for t in training_data]))
    print 'Learning from {} samples...'.format(len(training_data))
    for i in xrange(iterations):
        #print 'Preparing training data'
        w = 2 * np.random.rand(s) - 1
        stepf = lambda x: 0 if x < 0 else 1
        
        #print 'Training perceptron - n = {} and s = {}'.format(n,s)
        for j in xrange(n):
            x, expected = choice(training_data)
            result = np.dot(w,x)
            error = expected - stepf(result)
            w += eta * error * funcs.normalize(x)
        #print 'Training completed'
        #print 'Testing performance'
        #test_data consists of rows (x, expected, result) or (x, signal, response)
        
        #print w
        test_data = [(t[0], t[1], stepf(np.dot(w,t[0]))) for t in training_data]
        percHits = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 1]) # Percentage right hits
        falseAlarm = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 0]) # Percentage false positives
        dPrime = funcs.dprime(percHits, falseAlarm)
        #if percHits> .65:        
        endweights.append(funcs.normalize(w))
        dPrimes[i] = dPrime
        #print 'Hit % = {}, but false alarm % = {}, d\' = {}'.format(percHits,falseAlarm, dPrime)   
        out.append((percHits, falseAlarm, dPrime, w))
        #print w # print the weights
        
        
    pickle.dump(out,open(outputFile, 'wb'))
    #print out
    
    #results = network.test(samples)
    dprimes = pickle.load(open(outputFile,'rb'))
    #set nan to 0 
      
        
    print
    print 'Results:'
    print 'Mean d\' score for each quit opportunity: {}'.format([np.mean([k[i] for k in dprimes]) for i in xrange(1)])
    print 'Std : {}'.format([np.std([k[i] for k in dprimes]) for i in xrange(1)])
    print 'Max : {}'.format([np.max([k[i] for k in dprimes]) for i in xrange(1)])
    print
    print