def main(): Ws, bs = get_params(['model_reinforcement.pickle', 'model.pickle']) Ws_s, bs_s = load.get_parameters(Ws=Ws, bs=bs) f_pred = get_predict(Ws_s, bs_s) f_train = get_update(Ws_s, bs_s) i, n, l, c = 0, 0.0, 0.0, 0.0 base_learning_rate = 1e-2 t0 = time.time() while True: learning_rate = base_learning_rate * math.exp( -(time.time() - t0) / 86400) r = game(f_pred, f_train, learning_rate) if r is None: continue i += 1 n_t, l_t, c_t = r n = n * 0.999 + n_t l = l * 0.999 + l_t * n_t c = c * 0.999 + c_t * n_t print('%6d %9.5f %9.5f %9.5f' % (i, learning_rate, l / n, c / n)) if i % 100 == 0: print('dumping model...') dump(Ws_s, bs_s)
def train(): Xc_train, Xc_test, Xr_train, Xr_test, Xp_train, Xp_test = get_data( ['x', 'xr', 'xp']) for board in [Xc_train[0], Xp_train[0]]: for row in range(8): print(' '.join('%2d' % x for x in board[(row * 8):((row + 1) * 8)])) print n_in = 12 * 64 Ws_s, bs_s = load.get_parameters(n_in=n_in, n_hidden_units=[2048] * 3) minibatch_size = min(MINIBATCH_SIZE, Xc_train.shape[0]) train = get_function(Ws_s, bs_s, update=True, dropout=False) test = get_function(Ws_s, bs_s, update=False, dropout=False) best_test_loss = float('inf') base_learning_rate = 0.03 t0 = time.time() i = 0 while True: i += 1 learning_rate = floatX(base_learning_rate * math.exp(-(time.time() - t0) / 86400)) minibatch_index = random.randint( 0, int(Xc_train.shape[0] / minibatch_size) - 1) lo, hi = minibatch_index * minibatch_size, (minibatch_index + 1) * minibatch_size loss, reg, loss_a, loss_b, loss_c = train(Xc_train[lo:hi], Xr_train[lo:hi], Xp_train[lo:hi], learning_rate) zs = [loss, loss_a, loss_b, loss_c, reg] print('iteration %6d learning rate %12.9f: %s' % (i, learning_rate, '\t'.join(['%12.9f' % z for z in zs]))) if i % 200 == 0: test_loss, test_reg, _, _, _ = test(Xc_test, Xr_test, Xp_test, learning_rate) print('test loss %12.9f' % test_loss) if test_loss < best_test_loss: print('new record!') best_test_loss = test_loss print('dumping pickled model') def values(zs): return [z.get_value(borrow=True) for z in zs] with open('cupy_model.pickle', 'wb') as f: pickle.dump((values(Ws_s), values(bs_s)), f)
def get_model_from_pickle(fn): f = open(fn, 'rb') Ws, bs = pickle.load(f, encoding='latin1') Ws_s, bs_s = load.get_parameters(Ws=Ws, bs=bs) x, p = load.get_model(Ws_s, bs_s) predict = theano.function(inputs=[x], outputs=p) return predict
def getModel(fn): #gets the model from the pickle file and turns it into a function f = open(fn, 'rb') Ws, bs = pickle.load(f) Ws_s, bs_s = load.get_parameters(Ws=Ws, bs=bs) x, p = load.get_model(Ws_s, bs_s) predict = theano.function(inputs=[x], outputs=p) return predict
def get_model_from_pickle(fn): f = open(fn) Ws, bs = pickle.load(f) Ws_s, bs_s = load.get_parameters(Ws=Ws, bs=bs) x, p = load.get_model(Ws_s, bs_s) predict = theano.function( inputs=[x], outputs=p) return predict
def train(): Xc_train, Xc_test, Xr_train, Xr_test, Xp_train, Xp_test = get_data(['x', 'xr', 'xp']) for board in [Xc_train[0], Xp_train[0]]: for row in xrange(8): print ' '.join('%2d' % x for x in board[(row*8):((row+1)*8)]) print n_in = 12 * 64 Ws_s, bs_s = load.get_parameters(n_in=n_in, n_hidden_units=[2048] * 3) minibatch_size = min(MINIBATCH_SIZE, Xc_train.shape[0]) train = get_function(Ws_s, bs_s, update=True, dropout=False) test = get_function(Ws_s, bs_s, update=False, dropout=False) best_test_loss = float('inf') base_learning_rate = 0.03 t0 = time.time() i = 0 while True: i += 1 learning_rate = floatX(base_learning_rate * math.exp(-(time.time() - t0) / 86400)) minibatch_index = random.randint(0, int(Xc_train.shape[0] / minibatch_size) - 1) lo, hi = minibatch_index * minibatch_size, (minibatch_index + 1) * minibatch_size loss, reg, loss_a, loss_b, loss_c = train(Xc_train[lo:hi], Xr_train[lo:hi], Xp_train[lo:hi], learning_rate) zs = [loss, loss_a, loss_b, loss_c, reg] print 'iteration %6d learning rate %12.9f: %s' % (i, learning_rate, '\t'.join(['%12.9f' % z for z in zs])) if i % 200 == 0: test_loss, test_reg, _, _, _ = test(Xc_test, Xr_test, Xp_test, learning_rate) print 'test loss %12.9f' % test_loss if test_loss < best_test_loss: print 'new record!' best_test_loss = test_loss print 'dumping pickled model' f = open('model.pickle', 'w') def values(zs): return [z.get_value(borrow=True) for z in zs] pickle.dump((values(Ws_s), values(bs_s)), f) f.close()
def get_model_from_pickle(fn): """ Loads and returns model from pickled file which stores weights and biases. :param fn: filename :return: the theano function object representing the model. """ f = open(fn, 'rb') # load weights Ws and biases bs. # Ws is a list of 3 nd arrays. # 1st nd-array: 768 x 2048 (weights for input to first hidden layer) # 2nd nd-array: 2048 x 2048 # 3rd nd-array: 2048-dimensional vector to provide single output value. Ws, bs = pickle.load(f, encoding='bytes') Ws_s, bs_s = load.get_parameters(Ws=Ws, bs=bs) x, p = load.get_model(Ws_s, bs_s) # predict is a function object, computing outputs from inputs (based on # the function defined in load.py's get_model function. predict = theano.function(inputs=[x], outputs=p) return predict
def train(): #Get test and train data for each paramter of #parent, observed and random moves Xc_train, Xc_test, Xr_train, Xr_test, Xp_train, Xp_test = get_data( ['x', 'xr', 'xp']) #Print the board representation to be passed in for board in [Xc_train[0], Xp_train[0]]: for row in range(8): print(' '.join('%2d' % x for x in board[(row * 8):((row + 1) * 8)])) print("\n") n_in = 12 * 64 #Get the parmeters for training Ws_s, bs_s = load.get_parameters(n_in=n_in, n_hidden_units=[2048] * 3) minibatch_size = min(MINIBATCH_SIZE, Xc_train.shape[0]) #Get the training and test sets train = get_function(Ws_s, bs_s, update=True, dropout=False) test = get_function(Ws_s, bs_s, update=False, dropout=False) #Set initail values for #test loss and the base learning rate and number of iterations best_test_loss = float('inf') base_learning_rate = 0.03 t0 = time.time() i = 0 #Train loop while True: i += 1 #calculate the learning rate learning_rate = floatX(base_learning_rate * math.exp(-(time.time() - t0) / 86400)) #calculate the training loss minibatch_index = random.randint( 0, int(Xc_train.shape[0] / minibatch_size) - 1) lo, hi = minibatch_index * minibatch_size, (minibatch_index + 1) * minibatch_size loss, reg, loss_a, loss_b, loss_c = train(Xc_train[lo:hi], Xr_train[lo:hi], Xp_train[lo:hi], learning_rate) zs = [loss, loss_a, loss_b, loss_c, reg] #Print the learning rate and current loss print("iteration %6d learning rate %12.9f: %s" % (i, learning_rate, '\t'.join(["%12.9f" % z for z in zs]))) #every 200 iterations check if the test loss is better than the best loss recorded if i % 200 == 0: test_loss, test_reg, _, _, _ = test(Xc_test, Xr_test, Xp_test, learning_rate) #Print he test loss print("test loss %12.9f" % test_loss) #if test loss is better than the best loss then dump model parameters to model.pickle if test_loss < best_test_loss: print("new record!") best_test_loss = test_loss print("dumping pickled model") f = open('model.pickle', 'wb') def values(zs): return [z.get_value(borrow=True) for z in zs] pickle.dump((values(Ws_s), values(bs_s)), f) f.close()
def training(): global train, test Xc_train, Xc_test, Xp_train, Xp_test, Xd_train, Xd_test, Xa_train, Xa_test = get_data( ['x', 'xp', 'xd', 'xa']) Xc_train = [x.flatten() for x in Xc_train] Xc_test = [x.flatten() for x in Xc_test] Xp_train = [x.flatten() for x in Xp_train] Xp_test = [x.flatten() for x in Xp_test] n_in = 12 * 64 Xtrain = [ x for x in numpy.concatenate(Xc_train), numpy.concatenate(Xp_train), Xd_train.flatten(), Xa_train.flatten() ] Xtest = [ x for x in numpy.concatenate(Xc_test), numpy.concatenate(Xp_test), Xd_test.flatten(), Xa_test.flatten() ] Ws_s, bs_s = load.get_parameters(n_in=n_in, n_hidden_units=[2048] * 3) minibatch_size = min(MINIBATCH_SIZE, len(Xp_train)) train = get_function(Ws_s, bs_s, update=True, dropout=False) test = get_function(Ws_s, bs_s, update=False, dropout=False) best_test_loss = float('inf') base_learning_rate = 0.03 t0 = time.time() i = 0 while True: i += 1 learning_rate = floatX(base_learning_rate * math.exp(-(time.time() - t0) / 86400)) print learning_rate minibatch_index = random.randint( 0, int(len(Xp_train) / minibatch_size) - 1) print minibatch_index lo, hi = minibatch_index * minibatch_size, (minibatch_index + 1) * minibatch_size print lo, hi loss, reg, loss_a, loss_b, loss_c = train(Xc_train[lo:hi], Xp_train[lo:hi], Xd_train[lo:hi], Xa_train[lo:hi], learning_rate) print loss zs = [loss, loss_a, loss_b, loss_c, reg] print 'iteration %6d learning rate %12.9f: %s' % ( i, learning_rate, '\t'.join(['%12.9f' % z for z in zs])) if i % 200 == 0: test_loss, test_reg, _, _, _ = test(Xc_test, Xr_test, Xp_test, learning_rate) print 'test loss %12.9f' % test_loss if test_loss < best_test_loss: print 'new record!' best_test_loss = test_loss print 'dumping pickled model' f = open('model.pickle', 'w') def values(zs): return [z.get_value(borrow=True) for z in zs] pickle.dump((values(Ws_s), values(bs_s)), f) f.close()