def do_training(self, sess, cases, steps, continued=False): if not continued: self.error_history = [] error = 0 gvars = [self.error] + self.grabvars mbs = self.minibatch_size ncases = len(cases) nmb = math.ceil(ncases / mbs) start_index = 0 end_index = mbs # The way we select minibatches might be subject to change. for cstart in range(0, steps): # Loops through steps and sends one minibatch through per iteration step = self.global_training_step + cstart minibatch = cases[start_index:end_index] if end_index >= ncases: start_index = 0 end_index = mbs np.random.shuffle(cases) else: start_index = end_index end_index += mbs inputs = [c[0] for c in minibatch] targets = [c[1] for c in minibatch] feeder = {self.input: inputs, self.target: targets} _, grabvals, _ = self.run_one_step([self.trainer], gvars, self.probes, session=sess, feed_dict=feeder, step=step, show_interval=self.show_interval) error += grabvals[0] self.error_history.append((step, grabvals[0])) self.consider_validation_testing(step, sess) self.global_training_step += steps TFT.plot_training_history(self.error_history, self.validation_history, xtitle="Steps", ytitle="Error", title="TRAINING HISTORY", fig=not continued)
def do_training(self, sess, cases, epochs=100, continued=False): if not (continued): self.error_history = [] for i in range(epochs): error = 0 step = self.global_training_step + i gvars = [self.error] + self.grabvars mbs = self.minibatch_size ncases = len(cases) nmb = math.ceil(ncases / mbs) for cstart in range( 0, ncases, mbs): # Loop through cases, one minibatch at a time. cend = min(ncases, cstart + mbs) minibatch = cases[cstart:cend] inputs = [c[0] for c in minibatch] targets = [c[1] for c in minibatch] feeder = {self.input: inputs, self.target: targets} _, grabvals, _ = self.run_one_step( [self.trainer], gvars, self.probes, session=sess, feed_dict=feeder, step=step, show_interval=self.show_interval) error += grabvals[0] self.error_history.append((step, error / nmb)) self.consider_validation_testing(step, sess) self.global_training_step += epochs TFT.plot_training_history(self.error_history, self.validation_history, xtitle="Epoch", ytitle="Error", title="", fig=not (continued))
def do_training(self, sess, cases, steps, continued=False): if not (continued): self.error_history = [] for i in range(steps): error = 0 step = self.global_training_step + i gvars = [self.error] + self.grabvars mbs = self.minibatch_size minibatch = random.sample(list(cases), mbs) # randomly selected of size mbs inputs = [c[0] for c in minibatch] targets = [c[1] for c in minibatch] feeder = {self.input: inputs, self.target: targets} _, grabvals, _ = self.run_one_step( [self.trainer], gvars, self.probes, session=sess, feed_dict=feeder, step=step, show_interval=self.show_interval) error += grabvals[0] self.error_history.append((step, error)) self.consider_validation_testing(step, sess) self.global_training_step += steps TFT.plot_training_history(self.error_history, self.validation_history, xtitle="Step", ytitle="Error", title="", fig=not (continued))
def do_training(self, epochs=100, test_interval=10, show_interval=50, mbs=100): errors = [] self.val_error = [] self.train_error = [] if test_interval: self.avg_vector_distances = [] self.current_session = sess = TFT.gen_initialized_session() for i in range(epochs): self.current_epoch = i error = 0 grabvars = [self.error] step = self.global_step + i ncases = len(self.training_cases) nmb = math.ceil(ncases / mbs) for cstart in range(0, ncases, mbs): cend = min(ncases, cstart + mbs) minibatch = self.training_cases[cstart:cend] feeder = { self.input: [c[0] for c in minibatch], self.target: [c[1] for c in minibatch] } _, grabvals, _ = self.run_one_step([self.trainer], grabvars, step=step, show_interval=show_interval, session=sess, feed_dict=feeder) error += grabvals[0] errors.append([i, error]) if (test_interval and i % test_interval == 0): self.do_testing(sess, scatter=False, mbs=len(self.training_cases), testset="training") if (len(self.validation_cases) != 0): self.do_testing(sess, scatter=False, mbs=len(self.validation_cases), testset="validation") if (len(self.testing_cases) != 0): self.do_testing(sess, scatter=False, mbs=mbs, testset="testing") #TFT.simple_plot(errors,xtitle="Epoch",ytitle="Error",title="") TFT.plot_training_history(self.train_error, self.val_error)
def do_training(self, sess, cases, epochs=100, continued=False): if not (continued): self.error_history = [] for i in range(epochs): error = 0 step = self.global_training_step + i gvars = [self.error] + self.grabvars mbs = self.minibatch_size ncases = len(cases) nmb = math.ceil(ncases / mbs) for cstart in range( 0, ncases, mbs): # Loop through cases, one minibatch at a time. #print("Training on case: [" + str(cstart) + "/" + str(ncases) + "]") cend = min(ncases, cstart + mbs) minibatch = cases[cstart:cend] inputs = [c[0] for c in minibatch] targets = [c[1] for c in minibatch] feeder = {self.input: inputs, self.target: targets} _, grabvals, _ = self.run_one_step( [self.trainer], gvars, self.probes, session=sess, feed_dict=feeder, step=step, show_interval=self.show_interval) # Need to account for all errors in a batch: batch_error = 0 if (type(grabvals[0]) == np.float64): error += grabvals[0] else: for e in grabvals[0]: batch_error += e error += float(batch_error / len(grabvals[0])) if ((self.validation_interval > 0) and step % self.validation_interval == 0): print("\n--- Training ---\n") print("Epoch: ", step) print('%s Set Error = %f ' % ("Training", float(error / nmb))) self.error_history.append((step, error / nmb)) self.consider_validation_testing(step, sess) self.global_training_step += epochs TFT.plot_training_history(self.error_history, self.validation_history, xtitle="Epoch", ytitle="Error", title="", fig=not (continued))
def do_training(self, sess, cases, epochs=100, continued=False): if not continued: self.error_history = [] for i in range(epochs): # shuffle dataset each epoch #np.random.shuffle(cases) error = 0 step = self.global_training_step + i gvars = [self.error] + self.grabvars # TRENGER VI DENNE? batch_size = self.minibatch_size ncases = len(cases) nmb = math.ceil(ncases / batch_size) #for cstart in range(0, ncases, batch_size): # Loop through cases, one minibatch at a time. # cend = min(ncases, cstart+batch_size) # minibatch = cases[cstart:cend] idx = np.random.choice(ncases, batch_size, replace=False) cases = np.array(cases) minibatch = cases[idx] inputs = [c[0] for c in minibatch] targets = [c[1] for c in minibatch] feeder = {self.input: inputs, self.target: targets} _, grabvals, _ = self.run_one_step( [self.trainer], gvars, self.probes, session=sess, feed_dict=feeder, step=step, show_interval=self.show_interval) error += grabvals[0] self.error_history.append((step, error)) self.consider_validation_testing(step, sess) self.global_training_step += epochs TFT.plot_training_history(self.error_history, self.validation_history, xtitle="Epoch", ytitle="Error", title="History", fig=True)
def do_training(self, sess, cases, steps=100, continued=False): if not continued: self.error_history = [] for step in range(steps): gvars = [self.error] + self.grabvars mbs = self.minibatch_size minibatch = [] while len(minibatch) < mbs: number = random.randint(0, len(cases)-1) minibatch.append(cases[number]) inputs = [c[0] for c in minibatch] targets = [c[1] for c in minibatch] feeder = {self.input: inputs, self.target: targets} _, grabvals, _ = self.run_one_step([self.trainer], gvars, self.probes, session=sess, feed_dict=feeder, step=step) self.error_history.append((step, grabvals[0])) self.consider_validation_testing(step, sess) TFT.plot_training_history(self.error_history, self.validation_history, xtitle="Steps", ytitle="Error", title="", fig=not continued)
def do_training(self, sess, cases, epochs=100, continued=False, bestk=None): if not(continued): self.error_history = [] for i in range(epochs): #decrease learning rate after every self.lr_freq epochs if((self.lr_freq is not None) and ((i % self.lr_freq) == 0) and ( i != 0)): print("\n\n\n halving learning rate..! \n\n\n") self.learning_rate = self.learning_rate / 2 if((self.bs_freq is not None) and ((i % self.bs_freq) == 0) and ( i != 0)): print("\n\n\n doubling batch size..! \n\n\n") self.minibatch_size = self.minibatch_size * 2 ##add fuctionality for increasing batch size every epoch? error = 0; step = self.global_training_step + i gvars = [self.error] + self.grabvars minibatch_size = self.minibatch_size; num_cases = len(cases); num_minibatches = math.ceil(num_cases/minibatch_size) #randomize before each epoch np.random.shuffle(cases) for c_start in range(0, num_cases, minibatch_size): # Loop through cases, one minibatch at a time. c_end = min(num_cases, c_start + minibatch_size) minibatch = cases[c_start:c_end] inputs = [case[0] for case in minibatch]; targets = [case[1] for case in minibatch] feeder = {self.input: inputs, self.target: targets} _,grabvals,_ = self.run_one_step([self.trainer], gvars, self.probes, session=sess, feed_dict=feeder, step=step, display_interval=self.display_interval) error += grabvals[0] print("---Epoch: " + str(i)) print("---Average error: " + str(error/num_minibatches) + "\n") self.error_history.append((step, error/num_minibatches)) self.consider_validation_testing(step, sess) if self.early_stopping and i % 100 == 0 and i != 0: if self.consider_early_stopping(sess, cases, bestk=bestk, target_accuracy=self.target_accuracy): break self.global_training_step += epochs TFT.plot_training_history(self.error_history, self.validation_history,xtitle="Epoch",ytitle="Error", title="",fig=not(continued))
def doTraining(self, sess, cases, epochs = 100, continued = False): if not(continued): self.errorHistory = [] for i in range(epochs): error = 0; step = self.globalTrainingStep + 1 gvars = [self.error] + self.grabvars mbs = self.miniBatchSize; nCases = len(cases); nmb = math.ceil(nCases/mbs) for cstart in range(0, ncases, mbs): cend = min(ncases, cstart+mbs) minibatch = cases[cstart:cend] inputs = [c[0] for c in minibatch]; targets = [c[1] for c in minibatch] feeder = {self.input: inputs, self.target: targets} _, grabvals, _ = self.run_one_step([self.trainer], gvars, self.probes, session = sess, feed_dict = feeder, step = step, show_interval = self.showInterval) error += grabvals[0] self.errorHistory.append((step, error/nmb)) self.considerValidationTesting(step, sess) self.globalTrainingStep += epochs tft.plot_training_history(self.error_history, self.validation_history, xtitle = "Epoch", ytitle = "Error", title = "", fig = not(continued))
def run(self): print("Starting up.. Playing " + str(self.numGames) + " games:") # set save interval for actor network parameters # clear the replayBuffer # randomly init weights and biases for Actor network self.Anet.setupSession() self.Anet.error_history = [] self.Anet.validation_history = [] startNode = Node.Node( state=State.State(player=self.player, hexSize=self.hexSize)) mcts = MCTS.MCTS(numberOfSimulationsPerMove=self.numSimulations, hexTrainer=self, Anet=self.Anet) player = startNode.getState().getPlayer() startNodeCopy = startNode player1Wins = 0 player2Wins = 0 player1Starts = 0 player2Starts = 0 gc = 1 #for a game in numberOfGames for game in range(0, self.numGames): #Start of a game #clear replay buffer self.replayBuffer = [] #initialize gameboard to empty board startNode = startNodeCopy startingPlayer = startNode.getState().getPlayer() if startingPlayer == 1: player1Starts += 1 else: player2Starts += 1 print("\n\n\n --- Game number " + str(gc)) #print starting state startNode.getState().getBoard().printBoard() while not startNode.getState().gameIsOver(): player = startNode.getState().getPlayer() #use tree policy to search from root to leaf #use ANET to choose rollout actions from L to final state #perform mcts-backpropogation #next gamestate print("Player " + str(player) + "'s turn") print("legal moves:") print(startNode.getState().getBoard().getLegalMoves()) nextNode = mcts.findNextMove(startNode, player, startingPlayer) # D = distribution of visitCounts alogn all arcs emanating from root # add case (root, D) to replayBuffer #choose actual move (action*) based on D #perform action* on root to produce successor state s* #update currentstate to s* # in mcts - retain subtree rooted at s*, discard everything else # rootnode = s* #TODO change this ? if self.verbose: nextNode.getState().getBoard().printBoard() if nextNode.getState().gameIsOver(): if self.verbose: print("\nPlayer " + str(player) + " won! \n") if player == 1: player1Wins += 1 else: player2Wins += 1 startNode = nextNode if nextNode.getState().gameIsOver(): break gc += 1 # train ANET on random minibatch of cases from replayBuffer np.random.shuffle(self.replayBuffer) #TODO write a custom do_training method inputs = [case[0] for case in self.replayBuffer] targets = [case[1] for case in self.replayBuffer] print("inputs:") print(inputs) print("targets") print(targets) feeder = {self.Anet.input: inputs, self.Anet.target: targets} gvars = [self.Anet.error] + self.Anet.grabvars _, grabvals, _ = self.Anet.run_one_step( [self.Anet.trainer], gvars, session=self.Anet.current_session, feed_dict=feeder) error = grabvals[0] self.Anet.error_history.append((gc, error)) # if gameNum %modulo saveinterval: save ANET parameters for later use in TOPP #next game #print result of all games print("\nPlayer 1 started " + str(player1Starts) + " games and won " + str(player1Wins) + " of " + str(self.numGames) + " games! " + str((player1Wins / self.numGames * 100)) + " % ") print("Player 2 started " + str(player2Starts) + " games and won " + str(player2Wins) + " of " + str(self.numGames) + " games! " + str((player2Wins / self.numGames * 100)) + " % ") print("\n") TFT.plot_training_history(self.Anet.error_history, self.Anet.validation_history, xtitle="Game", ytitle="Error", title="", fig=True) self.Anet.close_current_session(view=False) #loop to keep program from closing at the end so we can view the graph x = "" while x == "": x = str(input("enter any key to quit"))
def train(dims=[11,40,20,6], activation_func='tanh', softmax=True, cost_func=CE, lr= 0.5, vint = 10, bint = 10, acc_lim = 0.95, initial_weight_range=[-0.1,0.1], data_source='gen_wine_cases', case_count=1, vfrac=0.1, tfrac=0.1, mbs=1277, map_bs=20, epochs=10000, show_layers=None, dendogram_layers=None, show=True, map_layers = [1, 2]): #Training and validation accuracies train_acc= [] val_acc = [] # Import data dataset = getattr(TFT,data_source)(case_count=case_count) mnist = case_holder(dataset,tfrac=tfrac,vfrac=vfrac) sess = tf.InteractiveSession() # Create a multilayer model. # Input placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, dims[0]], name='x-input') y_ = tf.placeholder(tf.float32, [None, dims[-1]], name='y-input') # We can't initialize these variables to 0 - the network will get stuck. def weight_variable(shape): """Create a weight variable with appropriate initialization.""" if initial_weight_range == "scaled": initial = tf.truncated_normal(shape, stddev=0.1) else: initial = tf.Variable(tf.random_uniform(shape=shape,minval=initial_weight_range[0],maxval=[initial_weight_range[1]])) return tf.Variable(initial) def bias_variable(shape): """Create a bias variable with appropriate initialization.""" initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def variable_summaries(var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.histogram('histogram', var) def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=getattr(tf.nn,activation_func)): """Reusable code for making a simple neural net layer. It does a matrix multiply, bias add, and then uses ReLU to nonlinearize. It also sets up name scoping so that the resultant graph is easy to read, and adds a number of summary ops. """ # Adding a name scope ensures logical grouping of the layers in the graph. with tf.name_scope(layer_name): # This Variable will hold the state of the weights for the layer with tf.name_scope('weights'): weights = weight_variable([input_dim, output_dim]) variable_summaries(weights) with tf.name_scope('biases'): biases = bias_variable([output_dim]) variable_summaries(biases) with tf.name_scope('Wx_plus_b'): preactivate = tf.matmul(input_tensor, weights) + biases tf.summary.histogram('pre_activations', preactivate) activations = act(preactivate, name='activation') tf.summary.histogram('activations', activations) return activations previous_layer = x layers = [] for i in range(1,len(dims)): layers.append(nn_layer(previous_layer,dims[i-1],dims[i],'layer'+str(i),act=tf.nn.relu)) previous_layer = layers[-1] y = layers[-1] with tf.name_scope('error_func'): # The raw formulation of cross-entropy, # # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)), # reduction_indices=[1])) # # can be numerically unstable. # # So here we use tf.nn.softmax_cross_entropy_with_logits on the # raw outputs of the nn_layer above, and then average across # the batch. diff = error_funcs[cost_func](y_,y) #diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer(learning_rate=lr).minimize( cross_entropy) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # Merge all the summaries and write them out to # /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('netsaver_test'+ '/train', sess.graph) test_writer = tf.summary.FileWriter('netsaver_test' + '/test') tf.global_variables_initializer().run() # Train the model, and also write summaries. # Every 10th step, measure test-set accuracy, and write test summaries # All other steps, run train_step on training data, & add training summaries def feed_dict(train): """Make a TensorFlow feed_dict: maps data onto Tensor placeholders.""" if train == "train": xs, ys = mnist.train_next_batch(size=mbs) elif train == 'test': xs, ys = mnist.test_features, mnist.test_labels elif train == 'val': xs, ys = mnist.validation_features, mnist.validation_labels elif train == 'map': xs, ys = mnist.train_features[:map_bs], mnist.train_labels[:map_bs] else: raise Exception return {x: xs, y_: ys} for i in range(epochs): if i % bint == 0: # Record summaries and test-set accuracy summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict('train')) test_writer.add_summary(summary, i) print('Accuracy at step %s: %s' % (i, acc)) # Own code for pulling training accuracy to matplot graph train_acc.append([i,acc]) if acc >= acc_lim: break else: # Record train set summaries, and train if i % 100 == 99: # Record execution stats run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary, _ = sess.run([merged, train_step], feed_dict=feed_dict('train'), options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'step%03d' % i) train_writer.add_summary(summary, i) print('Adding run metadata for', i) else: # Record a summary summary, _ = sess.run([merged, train_step], feed_dict=feed_dict('train')) train_writer.add_summary(summary, i) train_writer.close() test_writer.close() # Display final test scores summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict('train')) print('Final training set accuracy: %s' % ( acc)) # Code for displaying graphs if show: TFT.plot_training_history(train_acc,val_acc) if map_layers: for l in map_layers: _, activation = sess.run([merged,layers[l]],feed_dict=feed_dict('map')) TFT.display_matrix(activation, title="mapping of layer: "+ str(l)) # for variable in tf.trainable_variables(): # if variable.name in real-map_layer: # _,values = sess.run([merged,variable],feed_dict=feed_dict('map')) # if 'weigths' in variable.name: # TFT.display_matrix(values) # elif 'biases' in variable.name: # TFT.display_vector(values) # else: # raise Exception("wrong dimensionality on show layers") if show_layers: for variable in tf.trainable_variables(): if variable.name in show_layers: _,values = sess.run([merged,variable],feed_dict=feed_dict('map')) if len(values.shape) == 2: TFT.display_matrix(values, title="weights of: "+variable.name) elif len(values.shape) == 1: TFT.display_vector(values, title="biases of: "+variable.name) else: raise Exception("wrong dimensionality on map layers") if dendogram_layers: for l in dendogram_layers: _, activation = sess.run([merged,layers[l]],feed_dict=feed_dict('map')) y_s = [] #for y in feed_dict('map')[x]: # y_s.append(TFT.segmented_vector_string(y)) TFT.dendrogram(activation,feed_dict('map')[y_], title="Dendogram, layer: "+str(l)) PLT.show()
def run(self): """Runs the batch""" print("Starting up playing "+str(self.numberOfGames)+" games: ") winsPlayer1 = 0 winsPlayer2 = 0 self.anet.error_history = [] self.anet.validation_history = [] self.replayBuffer = [] #saving the pilicy for 0 episodes if self.savedGames is not None: self.anet.save_session_params(self.savePath, self.anet.current_session, 0) print("saving game after "+str(0)+" episodes as "+ str(0)) totalTime = 0 for i in range(self.numberOfGames): t0 = time.time() currentNode = deepcopy(self.rootNode) mcst = MCST(currentNode, self.anet, self.replayBuffer, self.numberOfSimulations) #removes 200 first cases in the replaybuffer if it goes over 800 if len(self.replayBuffer) > 4000: self.replayBuffer = self.replayBuffer[400:] print("\nGame "+str(i)) while not currentNode.state.isOver(): playerToMove = currentNode.state.player nextNode = mcst.findNextMove(currentNode) if self.verbose: nextNode.state.printBoard() if nextNode.state.isOver(): if self.verbose: print("\nPlayer " + str(playerToMove) + " wins \n") if playerToMove == 1: winsPlayer1 += 1 else: winsPlayer2 += 1 currentNode = nextNode currentNode.parent = None #************** training of anet ************ np.random.shuffle(self.replayBuffer) inputs = [case[0] for case in self.replayBuffer]; targets = [case[1] for case in self.replayBuffer] feeder = {self.anet.input: inputs[:self.batchSize], self.anet.target: targets[:self.batchSize]} gvars = [self.anet.error] _, error, _ = self.anet.run_one_step( [self.anet.trainer], grabbed_vars = gvars, session=self.anet.current_session, feed_dict=feeder, display_interval=0 ) #if self.verbose: print("error: "+str(error[0])) self.anet.error_history.append((i, error[0])) #********************************************* #************* Saving session params ********** if self.savedGames is not None: saveInterval = self.numberOfGames/(self.savedGames-1) if (i+1) % saveInterval == 0: savedGameNum = int((i+1)/saveInterval) print("saving game after "+str(i+1)+" episodes as "+ str(savedGameNum)) self.anet.save_session_params(self.savePath, self.anet.current_session, savedGameNum) t1 = time.time() gameTime = t1-t0 totalTime += gameTime estimatedTimeLeft = (gameTime) * (self.numberOfGames-1-i) print("\nLast game time: "+str(datetime.timedelta(seconds=int(gameTime)))) print("Total time: "+str(datetime.timedelta(seconds=int(totalTime)))) print("Estimated time left: "+str(datetime.timedelta(seconds=int(estimatedTimeLeft)))) print("player 1 wins {} out of {} games: {} percent".format(winsPlayer1, self.numberOfGames, 100*winsPlayer1/self.numberOfGames)) print("player 2 wins {} out of {} games: {} percent".format(winsPlayer2, self.numberOfGames, 100*winsPlayer2/self.numberOfGames)) if self.graph: TFT.plot_training_history(self.anet.error_history, self.anet.validation_history,xtitle="Game",ytitle="Error", title="",fig=True) self.anet.close_current_session(view=False) #loop to keep program from closing at the end so we can view the graph x = "" while x == "": x = str(input("enter any key to quit"))
def run(self): """Runs the batch""" print("Starting up playing "+str(self.numberOfGames)+" games: ") winsPlayer1 = 0 winsPlayer2 = 0 self.anet.setupSession() self.anet.error_history = [] self.anet.validation_history = [] self.replayBuffer = [] #fast str #saving the pilicy for 0 episodes self.anet.save_session_params(self.savePath, self.anet.current_session, 0) print("saving game after "+str(0)+" episodes as "+ str(0)) for i in range(self.numberOfGames): currentNode = deepcopy(self.rootNode) mcst = MCST(currentNode, self.anet, self.replayBuffer, self.numberOfSimulations) print("\nGame "+str(i)) while not currentNode.state.isOver(): playerToMove = currentNode.state.player nextNode = mcst.findNextMove(currentNode) if self.verbose: nextNode.state.printBoard() if nextNode.state.isOver(): if self.verbose: print("\nPlayer " + str(playerToMove) + " wins \n") if playerToMove == 1: winsPlayer1 += 1 else: winsPlayer2 += 1 currentNode = nextNode currentNode.parent = None #************** training of anet ************ np.random.shuffle(self.replayBuffer) inputs = [case[0] for case in self.replayBuffer]; targets = [case[1] for case in self.replayBuffer] feeder = {self.anet.input: inputs[:self.batchSize], self.anet.target: targets[:self.batchSize]} gvars = [self.anet.error] _, error, _ = self.anet.run_one_step( [self.anet.trainer], grabbed_vars = gvars, session=self.anet.current_session, feed_dict=feeder, display_interval=0 ) #if self.verbose: print("error: "+str(error[0])) self.anet.error_history.append((i, error[0])) #********************************************* #************* Saving session params ********** if self.savedGames is not None: saveInterval = self.numberOfGames/(self.savedGames-1) if (i+1) % saveInterval == 0: savedGameNum = int((i+1)/saveInterval) print("saving game after "+str(i+1)+" episodes as "+ str(savedGameNum)) self.anet.save_session_params(self.savePath, self.anet.current_session, savedGameNum) print("player 1 wins {} out of {} games: {} percent".format(winsPlayer1, self.numberOfGames, 100*winsPlayer1/self.numberOfGames)) print("player 2 wins {} out of {} games: {} percent".format(winsPlayer2, self.numberOfGames, 100*winsPlayer2/self.numberOfGames)) TFT.plot_training_history(self.anet.error_history, self.anet.validation_history,xtitle="Game",ytitle="Error", title="",fig=True) self.anet.close_current_session(view=False)
def run(self): total_wins_player1 = 0 total_wins_player2 = 0 mix = False self.ANET.setupSession() self.ANET.error_history = [] self.ANET.validation_history = [] if self.starting_player == 'mix': mix = True # Save ANET state before training self.ANET.save_session_params(self.saved_folder, self.ANET.current_session, 0) print("Saved game after 0 episodes") # clear replay buffer self.replay_buffer = [] for i in range(0, self.batch): if len(self.replay_buffer) > 2000: self.replay_buffer = self.replay_buffer[1000:] if mix: self.starting_player = random.randint(1, 2) # print("Starting player is: ", self.starting_player) root_node = node.Node(parent=None, state=gamestate.GameState( player=self.starting_player, dimensions=self.hex_dimensions)) # empty board root_node.state.initialize_hexboard() batch_player = self.starting_player game_over = False while not game_over: indexes = root_node.state.next_node_states()[1] # initialize to same state as root batch_node = self.find_move(root_node, self.simulations, batch_player) next_move = None highest_visits = -float('inf') visit_counts = [] for child in batch_node.child_nodes: visits = child.get_visits() visit_counts.append(child.get_visits()) # Printing children = [x for x in child.state.hexBoard] new_children = [] for thing in children: for another in thing: new_children.append(another) # print("Child " + str([x.value for x in new_children]) + " had ratio " + str( # visits) + " with wins/visits " + str( # child.get_wins()) + " / " + str(child.get_visits())) if visits > highest_visits: highest_visits = visits next_move = child if self.verbose: next_move.state.print_hexboard() # add case case = [] visit_distribution = [] case.append( batch_node.state.complex_to_simple_hexboard( batch_node.state.hexBoard)) for index in indexes: if index == 1: visit_distribution.append(visit_counts.pop(0) - 1) else: visit_distribution.append(0) # one_hot_visit_distribution = [0] * len(visit_distribution) normalized_visit_distribution = [] # generates one_hot list max_value = max(visit_distribution) # max_index = visit_distribution.index(max_value) # one_hot_visit_distribution[max_index] = 1 # generates normalized list for value in visit_distribution: normalized_visit_distribution.append( value / sum(visit_distribution)) # add case root, D to replay buffer case.append(normalized_visit_distribution) self.replay_buffer.append(case) # choose actual move root_node = next_move # Already switching when generating kids # root_node.state.switch_player(root_node.state.get_player()) # root_node.state.print_hexboard() if root_node.get_state().game_over(): winner = 3 - root_node.get_state().get_player() if self.verbose: print("Player " + str(winner) + " wins.") print("") if winner == 1: total_wins_player1 += 1 if winner == 2: total_wins_player2 += 1 game_over = True # do training np.random.shuffle(self.replay_buffer) inputs = [c[0] for c in self.replay_buffer] targets = [c[1] for c in self.replay_buffer] feeder = { self.ANET.input: inputs[:self.minibatch_size], self.ANET.target: targets[:self.minibatch_size] } gvars = self.ANET.error _, grabvals, _ = self.ANET.run_one_step( [self.ANET.trainer], gvars, self.ANET.probes, session=self.ANET.current_session, feed_dict=feeder, show_interval=0) self.ANET.error_history.append((i, grabvals)) # Save ANET-params if self.number_of_saved_agents: if (i + 1) % self.save_interval == 0: self.ANET.save_session_params(self.saved_folder, self.ANET.current_session, (i + 1)) print("Saved game after ", i + 1, " episodes") self.ANET_CM.cases = self.replay_buffer print("Game {} of {} is over".format(i + 1, self.batch)) print("") print("Player 1" + " won " + str(total_wins_player1) + " times out of " + str(self.batch) + " batches." + " (" + str(100 * total_wins_player1 / self.batch) + "%)") print("Player 2" + " won " + str(total_wins_player2) + " times out of " + str(self.batch) + " batches." + " (" + str(100 * total_wins_player2 / self.batch) + "%)") TFT.plot_training_history(self.ANET.error_history, self.ANET.validation_history, xtitle="Game", ytitle="Error", title="", fig=True) self.ANET.close_current_session(view=False)