def main(datafile = 'data_by_cookie_slim.json', outputFolder = 'testAnalysis', outputSuffix = 'test', iterations = 10, epochmult = 4): #Load data print 'Running analysis of Alternative thing, 1st order' outextension = '/altBoot' outputFolder = outputFolder + outextension funcs.ensurePath(outputFolder) data = funcs.loadData(datafile) print 'iterations: {}\noutput folder: {}\n'.format(iterations,outputFolder) def processX(x, x_plays): f = np.vectorize(lambda x: 1 if x > 0 else -1, otypes=[np.float]) a = np.mean(f(np.diff(x, axis = 0)),axis=0) #print f(np.diff(x, axis = 0))[0] return a print "OBSERVED DATA" os.runObs(data, outputFolder, processX = processX) print "BOOTSTRAP" bs.runBoot(data, iterations, outputFolder, processX = processX) print print return outextension
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', iterations = 10, epochmult = 4): filename = 'runMLP' outputFile = '{}/{}.p'.format(outputFolder,filename) data = funcs.loadData(datafile) #Filter away bottom 75% data = funcs.filterByPercRank(data, 75) print 'iterations: {}\nMultiplier Samplesize Epochs: {}\n output file: {}'.format(iterations,epochmult,outputFile) #Get first 10 values and try to decide whether people will keep on playing past 20 games samples = np.fromiter(((funcs.normalize(np.array(k[:10])),0 if len(k) < 20 else 1) for k in data if len(k) >= 10), dtype=[('input', float, 10), ('output', float, 1)]) print 'Learning from {} samples...'.format(samples.size) network = MLP(10,10,10,1) def processResults(network,results): stepf = lambda x: 0 if x < .5 else 1 test_data = [(t[0], t[1], stepf(t[2])) for t in results] percHits = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 1]) # Percentage right hits falseAlarm = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 0]) # Percentage false positives dPrime = funcs.dprime(percHits, falseAlarm) out = (percHits, falseAlarm, dPrime, network.weights) return out #print 'Hit % = {}, but false alarm % = {}, d\' = {}'.format(percHits,falseAlarm, dPrime) out = network.learnLoop(samples, iterations = iterations, epochs = epochmult * samples.size, processResults = processResults) #40 million epochs for full dataset.. Too many? pickle.dump(out,open(outputFile, 'wb')) #print out #results = network.test(samples) dprimes = pickle.load(open(outputFile,'rb')) #set nan to 0 dprimes = [[0 if np.isnan(i) or np.isinf(i) else i for i in k[2]] for k in dprimes] print print 'Results:' print 'Mean d\' score for each quit opportunity: {}'.format([np.mean([k[i] for k in dprimes]) for i in xrange(1)]) print 'Std : {}'.format([np.std([k[i] for k in dprimes]) for i in xrange(1)]) print 'Max : {}'.format([np.max([k[i] for k in dprimes]) for i in xrange(1)]) print print
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', outputSuffix = '', iterations = 10, epochmult = 4): #Load data print 'Running analysis of variance vs mean correlation top10' outextension = '/varMeanTop10' outputFolder = outputFolder + outextension data = funcs.loadData(datafile) funcs.ensurePath(outputFolder) print 'iterations: {}\noutput folder: {}\n'.format(iterations,outputFolder) print "OBSERVED DATA" os.runObs(data, outputFolder, rankFilter=90) print "BOOTSTRAP" bs.runBoot(data, iterations, outputFolder, rankFilter = 90) print print return outextension
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', outputSuffix = '', iterations = 10, epochmult = 4): #Load data print 'Running log variance vs mean' outextension = '/logVarMean' outputFolder = outputFolder + outextension funcs.ensurePath(outputFolder) print 'iterations: {}\noutput folder: {}\n'.format(iterations,outputFolder) data = funcs.loadData(datafile) #Preprocessing = taking logarithm of all data points preprocess = lambda x: [np.log(k) for k in x] print "OBSERVED DATA" os.runObs(data, outputFolder, preprocess = preprocess) print "BOOTSTRAP" bs.runBoot(data, iterations, outputFolder, preprocess = preprocess) print print return [outextension]
def main(datafile="../data_by_cookie_slim.json", outputFolder=".", outputSuffix="", iterations=10, epochmult=4): # Load data print "Running log regression analysis of variance vs mean, 1st order" outextension = "/regressMean" outputFolder = outputFolder + outextension funcs.ensurePath(outputFolder) print "iterations: {}\noutput folder: {}\n".format(iterations, outputFolder) data = funcs.loadData(datafile) # Process X = polyfit regression test processX = lambda x, x_plays: np.polyfit(x_plays, x, 1, full=True)[1] # Preprocessing = taking logarithm of all data points # preprocess = lambda x: [np.log(k) for k in x] print "OBSERVED DATA" os.runObs(data, outputFolder, processX=processX) print "BOOTSTRAP" bs.runBoot(data, iterations, outputFolder, processX=processX) print print return [outextension]
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', iterations = 10, epochmult = 4): filename = 'runPerceptronReal' outputFile = '{}/{}.p'.format(outputFolder,filename) #plt.close('all') #Load data print 'Perceptron quit after 20 from 10 games with Perceptron' data = funcs.loadData(datafile) #Generate artificial training data #mu = np.mean([k[:10] for k in data if len(k) >= 10],axis=0) #signalf = lambda x: 0 if x[2] < 18000 else 1 #training_data = funcs.generateSamples(500000, signalf) #Set up perceptron s = 10 # games eta = 0.2 # learning rate dPrimes = [0]*iterations # endweights = [] out = [] training_data = [(np.array(k[:s]),0 if len(k) < 2*s else 1) for k in data if len(k) >= s] n = len(training_data) * epochmult print 'iterations: {}\nMultiplier Samplesize Epochs: {}\noutput file: {}\n'.format(iterations,epochmult,outputFile) print 'Overall plays over 20 plays: {}'.format(np.mean([t[1] for t in training_data])) print 'Learning from {} samples...'.format(len(training_data)) for i in xrange(iterations): #print 'Preparing training data' w = 2 * np.random.rand(s) - 1 stepf = lambda x: 0 if x < 0 else 1 #print 'Training perceptron - n = {} and s = {}'.format(n,s) for j in xrange(n): x, expected = choice(training_data) result = np.dot(w,x) error = expected - stepf(result) w += eta * error * funcs.normalize(x) #print 'Training completed' #print 'Testing performance' #test_data consists of rows (x, expected, result) or (x, signal, response) #print w test_data = [(t[0], t[1], stepf(np.dot(w,t[0]))) for t in training_data] percHits = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 1]) # Percentage right hits falseAlarm = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 0]) # Percentage false positives dPrime = funcs.dprime(percHits, falseAlarm) #if percHits> .65: endweights.append(funcs.normalize(w)) dPrimes[i] = dPrime #print 'Hit % = {}, but false alarm % = {}, d\' = {}'.format(percHits,falseAlarm, dPrime) out.append((percHits, falseAlarm, dPrime, w)) #print w # print the weights pickle.dump(out,open(outputFile, 'wb')) #print out #results = network.test(samples) dprimes = pickle.load(open(outputFile,'rb')) #set nan to 0 print print 'Results:' print 'Mean d\' score for each quit opportunity: {}'.format([np.mean([k[i] for k in dprimes]) for i in xrange(1)]) print 'Std : {}'.format([np.std([k[i] for k in dprimes]) for i in xrange(1)]) print 'Max : {}'.format([np.max([k[i] for k in dprimes]) for i in xrange(1)]) print print