Esempio n. 1
0
 def do_training(self, sess, cases, steps, continued=False):
     if not continued: self.error_history = []
     error = 0
     gvars = [self.error] + self.grabvars
     mbs = self.minibatch_size
     ncases = len(cases)
     nmb = math.ceil(ncases / mbs)
     start_index = 0
     end_index = mbs
     # The way we select minibatches might be subject to change.
     for cstart in range(0, steps):  # Loops through steps and sends one minibatch through per iteration
         step = self.global_training_step + cstart
         minibatch = cases[start_index:end_index]
         if end_index >= ncases:
             start_index = 0
             end_index = mbs
             np.random.shuffle(cases)
         else:
             start_index = end_index
             end_index += mbs
         inputs = [c[0] for c in minibatch]
         targets = [c[1] for c in minibatch]
         feeder = {self.input: inputs, self.target: targets}
         _, grabvals, _ = self.run_one_step([self.trainer], gvars, self.probes, session=sess,
                                            feed_dict=feeder, step=step, show_interval=self.show_interval)
         error += grabvals[0]
         self.error_history.append((step, grabvals[0]))
         self.consider_validation_testing(step, sess)
     self.global_training_step += steps
     TFT.plot_training_history(self.error_history, self.validation_history, xtitle="Steps", ytitle="Error",
                               title="TRAINING HISTORY", fig=not continued)
Esempio n. 2
0
 def do_training(self, sess, cases, epochs=100, continued=False):
     if not (continued): self.error_history = []
     for i in range(epochs):
         error = 0
         step = self.global_training_step + i
         gvars = [self.error] + self.grabvars
         mbs = self.minibatch_size
         ncases = len(cases)
         nmb = math.ceil(ncases / mbs)
         for cstart in range(
                 0, ncases,
                 mbs):  # Loop through cases, one minibatch at a time.
             cend = min(ncases, cstart + mbs)
             minibatch = cases[cstart:cend]
             inputs = [c[0] for c in minibatch]
             targets = [c[1] for c in minibatch]
             feeder = {self.input: inputs, self.target: targets}
             _, grabvals, _ = self.run_one_step(
                 [self.trainer],
                 gvars,
                 self.probes,
                 session=sess,
                 feed_dict=feeder,
                 step=step,
                 show_interval=self.show_interval)
             error += grabvals[0]
         self.error_history.append((step, error / nmb))
         self.consider_validation_testing(step, sess)
     self.global_training_step += epochs
     TFT.plot_training_history(self.error_history,
                               self.validation_history,
                               xtitle="Epoch",
                               ytitle="Error",
                               title="",
                               fig=not (continued))
Esempio n. 3
0
 def do_training(self, sess, cases, steps, continued=False):
     if not (continued): self.error_history = []
     for i in range(steps):
         error = 0
         step = self.global_training_step + i
         gvars = [self.error] + self.grabvars
         mbs = self.minibatch_size
         minibatch = random.sample(list(cases),
                                   mbs)  # randomly selected of size mbs
         inputs = [c[0] for c in minibatch]
         targets = [c[1] for c in minibatch]
         feeder = {self.input: inputs, self.target: targets}
         _, grabvals, _ = self.run_one_step(
             [self.trainer],
             gvars,
             self.probes,
             session=sess,
             feed_dict=feeder,
             step=step,
             show_interval=self.show_interval)
         error += grabvals[0]
         self.error_history.append((step, error))
         self.consider_validation_testing(step, sess)
     self.global_training_step += steps
     TFT.plot_training_history(self.error_history,
                               self.validation_history,
                               xtitle="Step",
                               ytitle="Error",
                               title="",
                               fig=not (continued))
Esempio n. 4
0
    def do_training(self,
                    epochs=100,
                    test_interval=10,
                    show_interval=50,
                    mbs=100):
        errors = []
        self.val_error = []
        self.train_error = []
        if test_interval: self.avg_vector_distances = []
        self.current_session = sess = TFT.gen_initialized_session()
        for i in range(epochs):
            self.current_epoch = i
            error = 0
            grabvars = [self.error]
            step = self.global_step + i
            ncases = len(self.training_cases)
            nmb = math.ceil(ncases / mbs)
            for cstart in range(0, ncases, mbs):
                cend = min(ncases, cstart + mbs)
                minibatch = self.training_cases[cstart:cend]
                feeder = {
                    self.input: [c[0] for c in minibatch],
                    self.target: [c[1] for c in minibatch]
                }
                _, grabvals, _ = self.run_one_step([self.trainer],
                                                   grabvars,
                                                   step=step,
                                                   show_interval=show_interval,
                                                   session=sess,
                                                   feed_dict=feeder)
                error += grabvals[0]

            errors.append([i, error])
            if (test_interval and i % test_interval == 0):
                self.do_testing(sess,
                                scatter=False,
                                mbs=len(self.training_cases),
                                testset="training")
                if (len(self.validation_cases) != 0):
                    self.do_testing(sess,
                                    scatter=False,
                                    mbs=len(self.validation_cases),
                                    testset="validation")

        if (len(self.testing_cases) != 0):
            self.do_testing(sess, scatter=False, mbs=mbs, testset="testing")

        #TFT.simple_plot(errors,xtitle="Epoch",ytitle="Error",title="")

        TFT.plot_training_history(self.train_error, self.val_error)
Esempio n. 5
0
 def do_training(self, sess, cases, epochs=100, continued=False):
     if not (continued):
         self.error_history = []
     for i in range(epochs):
         error = 0
         step = self.global_training_step + i
         gvars = [self.error] + self.grabvars
         mbs = self.minibatch_size
         ncases = len(cases)
         nmb = math.ceil(ncases / mbs)
         for cstart in range(
                 0, ncases,
                 mbs):  # Loop through cases, one minibatch at a time.
             #print("Training on case: [" + str(cstart) + "/" + str(ncases) + "]")
             cend = min(ncases, cstart + mbs)
             minibatch = cases[cstart:cend]
             inputs = [c[0] for c in minibatch]
             targets = [c[1] for c in minibatch]
             feeder = {self.input: inputs, self.target: targets}
             _, grabvals, _ = self.run_one_step(
                 [self.trainer],
                 gvars,
                 self.probes,
                 session=sess,
                 feed_dict=feeder,
                 step=step,
                 show_interval=self.show_interval)
             # Need to account for all errors in a batch:
             batch_error = 0
             if (type(grabvals[0]) == np.float64):
                 error += grabvals[0]
             else:
                 for e in grabvals[0]:
                     batch_error += e
                 error += float(batch_error / len(grabvals[0]))
         if ((self.validation_interval > 0)
                 and step % self.validation_interval == 0):
             print("\n--- Training ---\n")
             print("Epoch: ", step)
             print('%s Set Error = %f ' % ("Training", float(error / nmb)))
         self.error_history.append((step, error / nmb))
         self.consider_validation_testing(step, sess)
     self.global_training_step += epochs
     TFT.plot_training_history(self.error_history,
                               self.validation_history,
                               xtitle="Epoch",
                               ytitle="Error",
                               title="",
                               fig=not (continued))
Esempio n. 6
0
    def do_training(self, sess, cases, epochs=100, continued=False):

        if not continued:
            self.error_history = []

        for i in range(epochs):
            # shuffle dataset each epoch
            #np.random.shuffle(cases)

            error = 0
            step = self.global_training_step + i
            gvars = [self.error] + self.grabvars  # TRENGER VI DENNE?
            batch_size = self.minibatch_size
            ncases = len(cases)
            nmb = math.ceil(ncases / batch_size)

            #for cstart in range(0, ncases, batch_size):  # Loop through cases, one minibatch at a time.
            #    cend = min(ncases, cstart+batch_size)
            #    minibatch = cases[cstart:cend]
            idx = np.random.choice(ncases, batch_size, replace=False)

            cases = np.array(cases)
            minibatch = cases[idx]

            inputs = [c[0] for c in minibatch]
            targets = [c[1] for c in minibatch]
            feeder = {self.input: inputs, self.target: targets}
            _, grabvals, _ = self.run_one_step(
                [self.trainer],
                gvars,
                self.probes,
                session=sess,
                feed_dict=feeder,
                step=step,
                show_interval=self.show_interval)

            error += grabvals[0]

            self.error_history.append((step, error))
            self.consider_validation_testing(step, sess)

        self.global_training_step += epochs

        TFT.plot_training_history(self.error_history,
                                  self.validation_history,
                                  xtitle="Epoch",
                                  ytitle="Error",
                                  title="History",
                                  fig=True)
Esempio n. 7
0
 def do_training(self, sess, cases, steps=100, continued=False):
     if not continued:
         self.error_history = []
     for step in range(steps):
         gvars = [self.error] + self.grabvars
         mbs = self.minibatch_size
         minibatch = []
         while len(minibatch) < mbs:
             number = random.randint(0, len(cases)-1)
             minibatch.append(cases[number])
         inputs = [c[0] for c in minibatch]
         targets = [c[1] for c in minibatch]
         feeder = {self.input: inputs, self.target: targets}
         _, grabvals, _ = self.run_one_step([self.trainer], gvars, self.probes, session=sess,
                                            feed_dict=feeder, step=step)
         self.error_history.append((step, grabvals[0]))
         self.consider_validation_testing(step, sess)
     TFT.plot_training_history(self.error_history, self.validation_history,
                               xtitle="Steps", ytitle="Error", title="", fig=not continued)
Esempio n. 8
0
    def do_training(self, sess, cases, epochs=100, continued=False, bestk=None):
        if not(continued): self.error_history = []
        for i in range(epochs):

            #decrease learning rate after every self.lr_freq epochs
            if((self.lr_freq is not None) and ((i % self.lr_freq) == 0) and ( i != 0)):
                print("\n\n\n halving learning rate..! \n\n\n")
                self.learning_rate = self.learning_rate / 2
            
            if((self.bs_freq is not None) and ((i % self.bs_freq) == 0) and ( i != 0)):
                print("\n\n\n doubling batch size..! \n\n\n")
                self.minibatch_size = self.minibatch_size * 2

            ##add fuctionality for increasing batch size every epoch?

            error = 0; step = self.global_training_step + i
            gvars = [self.error] + self.grabvars
            minibatch_size = self.minibatch_size; num_cases = len(cases); num_minibatches = math.ceil(num_cases/minibatch_size)
            
            #randomize before each epoch
            np.random.shuffle(cases)
            for c_start in range(0, num_cases, minibatch_size): # Loop through cases, one minibatch at a time.
                c_end = min(num_cases, c_start + minibatch_size)
                minibatch = cases[c_start:c_end]
                inputs = [case[0] for case in minibatch]; targets = [case[1] for case in minibatch] 
                feeder = {self.input: inputs, self.target: targets}
                _,grabvals,_ = self.run_one_step([self.trainer], gvars, self.probes, session=sess, 
                                feed_dict=feeder, step=step, display_interval=self.display_interval)
                error += grabvals[0]
            print("---Epoch: " + str(i))
            print("---Average error: " + str(error/num_minibatches) + "\n")
            self.error_history.append((step, error/num_minibatches))

            self.consider_validation_testing(step, sess)
            if self.early_stopping and i % 100 == 0 and i != 0:
                if self.consider_early_stopping(sess, cases, bestk=bestk, target_accuracy=self.target_accuracy):
                    break
        self.global_training_step += epochs   
        TFT.plot_training_history(self.error_history, self.validation_history,xtitle="Epoch",ytitle="Error",
                                   title="",fig=not(continued))
Esempio n. 9
0
	def doTraining(self, sess, cases, epochs = 100, continued = False):
		if not(continued):
			self.errorHistory = []

		for i in range(epochs):
			error = 0; step = self.globalTrainingStep + 1
			gvars = [self.error] + self.grabvars
			mbs = self.miniBatchSize; nCases = len(cases); nmb = math.ceil(nCases/mbs)

			for cstart in range(0, ncases, mbs):
				cend = min(ncases, cstart+mbs)
				minibatch = cases[cstart:cend]
				inputs = [c[0] for c in minibatch]; targets = [c[1] for c in minibatch]
				feeder = {self.input: inputs, self.target: targets}
				_, grabvals, _ = self.run_one_step([self.trainer], gvars, self.probes, session = sess,
										feed_dict = feeder, step = step, show_interval = self.showInterval)
				error += grabvals[0]

			self.errorHistory.append((step, error/nmb))
			self.considerValidationTesting(step, sess)

		self.globalTrainingStep += epochs
		tft.plot_training_history(self.error_history, self.validation_history, xtitle = "Epoch", 
						ytitle = "Error", title = "", fig = not(continued))
Esempio n. 10
0
    def run(self):

        print("Starting up..  Playing " + str(self.numGames) + " games:")

        # set save interval for actor network parameters
        # clear the replayBuffer
        # randomly init weights and biases for Actor network

        self.Anet.setupSession()
        self.Anet.error_history = []
        self.Anet.validation_history = []

        startNode = Node.Node(
            state=State.State(player=self.player, hexSize=self.hexSize))
        mcts = MCTS.MCTS(numberOfSimulationsPerMove=self.numSimulations,
                         hexTrainer=self,
                         Anet=self.Anet)

        player = startNode.getState().getPlayer()

        startNodeCopy = startNode

        player1Wins = 0
        player2Wins = 0
        player1Starts = 0
        player2Starts = 0

        gc = 1

        #for a game in numberOfGames
        for game in range(0, self.numGames):
            #Start of a game

            #clear replay buffer
            self.replayBuffer = []

            #initialize gameboard to empty board
            startNode = startNodeCopy
            startingPlayer = startNode.getState().getPlayer()

            if startingPlayer == 1:
                player1Starts += 1
            else:
                player2Starts += 1

            print("\n\n\n --- Game number " + str(gc))

            #print starting state
            startNode.getState().getBoard().printBoard()

            while not startNode.getState().gameIsOver():

                player = startNode.getState().getPlayer()

                #use tree policy to search from root to leaf
                #use ANET to choose rollout actions from L to final state
                #perform mcts-backpropogation
                #next gamestate
                print("Player " + str(player) + "'s turn")
                print("legal moves:")
                print(startNode.getState().getBoard().getLegalMoves())

                nextNode = mcts.findNextMove(startNode, player, startingPlayer)

                # D = distribution of visitCounts alogn all arcs emanating from root
                # add case (root, D) to replayBuffer
                #choose actual move (action*) based on D
                #perform action* on root to produce successor state s*
                #update currentstate to s*
                # in mcts - retain subtree rooted at s*, discard everything else
                # rootnode = s*

                #TODO change this ?
                if self.verbose: nextNode.getState().getBoard().printBoard()

                if nextNode.getState().gameIsOver():

                    if self.verbose:
                        print("\nPlayer " + str(player) + " won! \n")

                    if player == 1:
                        player1Wins += 1
                    else:
                        player2Wins += 1

                startNode = nextNode
                if nextNode.getState().gameIsOver():
                    break
            gc += 1

            # train ANET on random minibatch of cases from replayBuffer
            np.random.shuffle(self.replayBuffer)

            #TODO write a custom do_training method

            inputs = [case[0] for case in self.replayBuffer]
            targets = [case[1] for case in self.replayBuffer]
            print("inputs:")
            print(inputs)
            print("targets")
            print(targets)
            feeder = {self.Anet.input: inputs, self.Anet.target: targets}
            gvars = [self.Anet.error] + self.Anet.grabvars
            _, grabvals, _ = self.Anet.run_one_step(
                [self.Anet.trainer],
                gvars,
                session=self.Anet.current_session,
                feed_dict=feeder)
            error = grabvals[0]
            self.Anet.error_history.append((gc, error))

            # if gameNum %modulo saveinterval: save ANET parameters for later use in TOPP

            #next game

        #print result of all games
        print("\nPlayer 1 started " + str(player1Starts) + " games and won " +
              str(player1Wins) + " of " + str(self.numGames) + " games!   " +
              str((player1Wins / self.numGames * 100)) + " % ")
        print("Player 2 started " + str(player2Starts) + " games and won " +
              str(player2Wins) + " of " + str(self.numGames) + " games!   " +
              str((player2Wins / self.numGames * 100)) + " % ")
        print("\n")
        TFT.plot_training_history(self.Anet.error_history,
                                  self.Anet.validation_history,
                                  xtitle="Game",
                                  ytitle="Error",
                                  title="",
                                  fig=True)

        self.Anet.close_current_session(view=False)

        #loop to keep program from closing at the end so we can view the graph
        x = ""
        while x == "":
            x = str(input("enter any key to quit"))
Esempio n. 11
0
def train(dims=[11,40,20,6],
          activation_func='tanh',
          softmax=True,
          cost_func=CE,
          lr= 0.5,
          vint = 10,
          bint = 10,
          acc_lim = 0.95,
          initial_weight_range=[-0.1,0.1],
          data_source='gen_wine_cases',
          case_count=1,
          vfrac=0.1,
          tfrac=0.1,
          mbs=1277,
          map_bs=20,
          epochs=10000,
          show_layers=None,
          dendogram_layers=None,
          show=True,
          map_layers = [1, 2]):


  #Training and validation accuracies
  train_acc= []
  val_acc = []
    
  # Import data
  dataset = getattr(TFT,data_source)(case_count=case_count)
  mnist = case_holder(dataset,tfrac=tfrac,vfrac=vfrac)

  sess = tf.InteractiveSession()
  # Create a multilayer model.

  # Input placeholders
  with tf.name_scope('input'):
    x = tf.placeholder(tf.float32, [None, dims[0]], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, dims[-1]], name='y-input')


  # We can't initialize these variables to 0 - the network will get stuck.
  def weight_variable(shape):
    """Create a weight variable with appropriate initialization."""
    if initial_weight_range == "scaled":
        initial = tf.truncated_normal(shape, stddev=0.1)
    else:
        initial = tf.Variable(tf.random_uniform(shape=shape,minval=initial_weight_range[0],maxval=[initial_weight_range[1]]))
    return tf.Variable(initial)

  def bias_variable(shape):
    """Create a bias variable with appropriate initialization."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

  def variable_summaries(var):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope('summaries'):
      mean = tf.reduce_mean(var)
      tf.summary.scalar('mean', mean)
      with tf.name_scope('stddev'):
        stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
      tf.summary.scalar('stddev', stddev)
      tf.summary.scalar('max', tf.reduce_max(var))
      tf.summary.scalar('min', tf.reduce_min(var))
      tf.summary.histogram('histogram', var)

  def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=getattr(tf.nn,activation_func)):
    """Reusable code for making a simple neural net layer.
    It does a matrix multiply, bias add, and then uses ReLU to nonlinearize.
    It also sets up name scoping so that the resultant graph is easy to read,
    and adds a number of summary ops.
    """
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.name_scope(layer_name):
      # This Variable will hold the state of the weights for the layer
      with tf.name_scope('weights'):
        weights = weight_variable([input_dim, output_dim])
        variable_summaries(weights)
      with tf.name_scope('biases'):
        biases = bias_variable([output_dim])
        variable_summaries(biases)
      with tf.name_scope('Wx_plus_b'):
        preactivate = tf.matmul(input_tensor, weights) + biases
        tf.summary.histogram('pre_activations', preactivate)
      activations = act(preactivate, name='activation')
      tf.summary.histogram('activations', activations)
      return activations



  previous_layer = x
  layers = []
  for i in range(1,len(dims)):
      layers.append(nn_layer(previous_layer,dims[i-1],dims[i],'layer'+str(i),act=tf.nn.relu))
      previous_layer = layers[-1]
  y = layers[-1]



  with tf.name_scope('error_func'):
    # The raw formulation of cross-entropy,
    #
    # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)),
    #                               reduction_indices=[1]))
    #
    # can be numerically unstable.
    #
    # So here we use tf.nn.softmax_cross_entropy_with_logits on the
    # raw outputs of the nn_layer above, and then average across
    # the batch.
    diff = error_funcs[cost_func](y_,y)
    #diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
    with tf.name_scope('total'):
      cross_entropy = tf.reduce_mean(diff)
  tf.summary.scalar('cross_entropy', cross_entropy)

  with tf.name_scope('train'):
    train_step = tf.train.AdamOptimizer(learning_rate=lr).minimize(
        cross_entropy)

  with tf.name_scope('accuracy'):
    with tf.name_scope('correct_prediction'):
      correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    with tf.name_scope('accuracy'):
      accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  tf.summary.scalar('accuracy', accuracy)

  # Merge all the summaries and write them out to
  # /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default)
  merged = tf.summary.merge_all()
  train_writer = tf.summary.FileWriter('netsaver_test'+ '/train', sess.graph)
  test_writer = tf.summary.FileWriter('netsaver_test' + '/test')
  tf.global_variables_initializer().run()

  # Train the model, and also write summaries.
  # Every 10th step, measure test-set accuracy, and write test summaries
  # All other steps, run train_step on training data, & add training summaries

  def feed_dict(train):
    """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
    if train == "train":
      xs, ys = mnist.train_next_batch(size=mbs)
    elif train == 'test':
      xs, ys = mnist.test_features, mnist.test_labels
    elif train == 'val':
        xs, ys = mnist.validation_features, mnist.validation_labels
    elif train == 'map':
        xs, ys = mnist.train_features[:map_bs], mnist.train_labels[:map_bs]
    else:
        raise Exception
    return {x: xs, y_: ys}

  for i in range(epochs):
    if i % bint == 0:  # Record summaries and test-set accuracy
      summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict('train'))
      test_writer.add_summary(summary, i)
      print('Accuracy at step %s: %s' % (i, acc))

      # Own code for pulling training accuracy to matplot graph
      train_acc.append([i,acc])

      if acc >= acc_lim: break
    else:  # Record train set summaries, and train
      if i % 100 == 99:  # Record execution stats
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        summary, _ = sess.run([merged, train_step],
                              feed_dict=feed_dict('train'),
                              options=run_options,
                              run_metadata=run_metadata)
        train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
        train_writer.add_summary(summary, i)
        print('Adding run metadata for', i)
      else:  # Record a summary
        summary, _ = sess.run([merged, train_step], feed_dict=feed_dict('train'))
        train_writer.add_summary(summary, i)



  train_writer.close()
  test_writer.close()


  # Display final test scores
  summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict('train'))
  print('Final training set accuracy: %s' % ( acc))


  # Code for displaying graphs

  if show:
    TFT.plot_training_history(train_acc,val_acc)

  if map_layers:
      for l in map_layers:
          _, activation = sess.run([merged,layers[l]],feed_dict=feed_dict('map'))
          TFT.display_matrix(activation, title="mapping of layer: "+ str(l))
      # for variable in tf.trainable_variables():
      #     if variable.name in real-map_layer:
              # _,values = sess.run([merged,variable],feed_dict=feed_dict('map'))
              # if 'weigths' in variable.name:
              #     TFT.display_matrix(values)
              # elif 'biases' in variable.name:
              #     TFT.display_vector(values)
              # else:
              #     raise Exception("wrong dimensionality on show layers")

  if show_layers:
      for variable in tf.trainable_variables():
          if variable.name in show_layers:
              _,values = sess.run([merged,variable],feed_dict=feed_dict('map'))
              if len(values.shape) == 2:
                  TFT.display_matrix(values, title="weights of: "+variable.name)
              elif len(values.shape) == 1:
                  TFT.display_vector(values, title="biases of: "+variable.name)
              else:
                  raise Exception("wrong dimensionality on map layers")

  if dendogram_layers:
      for l in dendogram_layers:
          _, activation = sess.run([merged,layers[l]],feed_dict=feed_dict('map'))
          y_s = []
          #for y in feed_dict('map')[x]:
          #    y_s.append(TFT.segmented_vector_string(y))
          TFT.dendrogram(activation,feed_dict('map')[y_], title="Dendogram, layer: "+str(l))



  PLT.show()
Esempio n. 12
0
    def run(self):
        """Runs the batch"""
        print("Starting up playing "+str(self.numberOfGames)+" games: ")
        winsPlayer1 = 0
        winsPlayer2 = 0
        self.anet.error_history = []
        self.anet.validation_history = []
        self.replayBuffer = []
        
        #saving the pilicy for 0 episodes
        if self.savedGames is not None:
            self.anet.save_session_params(self.savePath, self.anet.current_session, 0)
            print("saving game after "+str(0)+" episodes as "+ str(0))
        
        totalTime = 0
        for i in range(self.numberOfGames):

            
            t0 = time.time()

            currentNode = deepcopy(self.rootNode)

            mcst = MCST(currentNode, self.anet, self.replayBuffer, self.numberOfSimulations)

            #removes 200 first cases in the replaybuffer if it goes over 800
            if len(self.replayBuffer) > 4000:
                self.replayBuffer = self.replayBuffer[400:]


            print("\nGame "+str(i))
            while not currentNode.state.isOver():

                playerToMove = currentNode.state.player

                nextNode = mcst.findNextMove(currentNode)
                if self.verbose: nextNode.state.printBoard()
                
                if nextNode.state.isOver():
                    if self.verbose:
                        print("\nPlayer " + str(playerToMove) + " wins \n")
                    if playerToMove == 1:
                        winsPlayer1 += 1
                    else:
                        winsPlayer2 += 1

                currentNode = nextNode
                currentNode.parent = None

            #************** training of anet ************
            np.random.shuffle(self.replayBuffer)
            inputs = [case[0] for case in self.replayBuffer]; targets = [case[1] for case in self.replayBuffer] 
            feeder = {self.anet.input: inputs[:self.batchSize], self.anet.target: targets[:self.batchSize]}
            gvars = [self.anet.error]

            _, error, _ = self.anet.run_one_step(
                [self.anet.trainer],
                grabbed_vars = gvars,
                session=self.anet.current_session,
                feed_dict=feeder,
                display_interval=0
                )
            #if self.verbose: 
            print("error: "+str(error[0]))
            self.anet.error_history.append((i, error[0]))
            #*********************************************

            #************* Saving session params **********
            if self.savedGames is not None:
                saveInterval = self.numberOfGames/(self.savedGames-1)
                if (i+1) % saveInterval == 0:

                    savedGameNum = int((i+1)/saveInterval)
                    print("saving game after "+str(i+1)+" episodes as "+ str(savedGameNum))
                    self.anet.save_session_params(self.savePath, self.anet.current_session, savedGameNum)

            t1 = time.time()
            gameTime = t1-t0
            totalTime += gameTime

            estimatedTimeLeft = (gameTime) * (self.numberOfGames-1-i)

            print("\nLast game time:      "+str(datetime.timedelta(seconds=int(gameTime))))
            print("Total time:          "+str(datetime.timedelta(seconds=int(totalTime))))
            print("Estimated time left: "+str(datetime.timedelta(seconds=int(estimatedTimeLeft))))

                    
                
        print("player 1 wins {} out of {} games: {} percent".format(winsPlayer1, self.numberOfGames, 100*winsPlayer1/self.numberOfGames))
        print("player 2 wins {} out of {} games: {} percent".format(winsPlayer2, self.numberOfGames, 100*winsPlayer2/self.numberOfGames))

        if self.graph:
            TFT.plot_training_history(self.anet.error_history, self.anet.validation_history,xtitle="Game",ytitle="Error",
                                    title="",fig=True)

            self.anet.close_current_session(view=False)

            #loop to keep program from closing at the end so we can view the graph
        
            x = ""
            while x == "":
                x = str(input("enter any key to quit"))
Esempio n. 13
0
    def run(self):
        """Runs the batch"""
        print("Starting up playing "+str(self.numberOfGames)+" games: ")
        winsPlayer1 = 0
        winsPlayer2 = 0

        self.anet.setupSession()
        self.anet.error_history = []
        self.anet.validation_history = []
        self.replayBuffer = [] #fast str
        
        #saving the pilicy for 0 episodes
        self.anet.save_session_params(self.savePath, self.anet.current_session, 0)
        print("saving game after "+str(0)+" episodes as "+ str(0))
        for i in range(self.numberOfGames):


            currentNode = deepcopy(self.rootNode)

            mcst = MCST(currentNode, self.anet, self.replayBuffer, self.numberOfSimulations)

            print("\nGame "+str(i))
            while not currentNode.state.isOver():

                playerToMove = currentNode.state.player

                nextNode = mcst.findNextMove(currentNode)
                if self.verbose: nextNode.state.printBoard()
                
                if nextNode.state.isOver():
                    if self.verbose:
                        print("\nPlayer " + str(playerToMove) + " wins \n")
                    if playerToMove == 1:
                        winsPlayer1 += 1
                    else:
                        winsPlayer2 += 1

                currentNode = nextNode
                currentNode.parent = None

            #************** training of anet ************
            np.random.shuffle(self.replayBuffer)
            inputs = [case[0] for case in self.replayBuffer]; targets = [case[1] for case in self.replayBuffer] 
            feeder = {self.anet.input: inputs[:self.batchSize], self.anet.target: targets[:self.batchSize]}
            gvars = [self.anet.error]

            _, error, _ = self.anet.run_one_step(
                [self.anet.trainer],
                grabbed_vars = gvars,
                session=self.anet.current_session,
                feed_dict=feeder,
                display_interval=0
                )
            #if self.verbose: 
            print("error: "+str(error[0]))
            self.anet.error_history.append((i, error[0]))
            #*********************************************

            #************* Saving session params **********
            if self.savedGames is not None:
                saveInterval = self.numberOfGames/(self.savedGames-1)
                if (i+1) % saveInterval == 0:

                    savedGameNum = int((i+1)/saveInterval)
                    print("saving game after "+str(i+1)+" episodes as "+ str(savedGameNum))
                    self.anet.save_session_params(self.savePath, self.anet.current_session, savedGameNum)
                    
                
        print("player 1 wins {} out of {} games: {} percent".format(winsPlayer1, self.numberOfGames, 100*winsPlayer1/self.numberOfGames))
        print("player 2 wins {} out of {} games: {} percent".format(winsPlayer2, self.numberOfGames, 100*winsPlayer2/self.numberOfGames))

        TFT.plot_training_history(self.anet.error_history, self.anet.validation_history,xtitle="Game",ytitle="Error",
                                   title="",fig=True)

        self.anet.close_current_session(view=False)
Esempio n. 14
0
    def run(self):

        total_wins_player1 = 0
        total_wins_player2 = 0
        mix = False
        self.ANET.setupSession()
        self.ANET.error_history = []
        self.ANET.validation_history = []

        if self.starting_player == 'mix':
            mix = True

        # Save ANET state before training
        self.ANET.save_session_params(self.saved_folder,
                                      self.ANET.current_session, 0)
        print("Saved game after 0 episodes")

        # clear replay buffer
        self.replay_buffer = []

        for i in range(0, self.batch):

            if len(self.replay_buffer) > 2000:
                self.replay_buffer = self.replay_buffer[1000:]
            if mix:
                self.starting_player = random.randint(1, 2)
                # print("Starting player is: ", self.starting_player)

            root_node = node.Node(parent=None,
                                  state=gamestate.GameState(
                                      player=self.starting_player,
                                      dimensions=self.hex_dimensions))
            # empty board
            root_node.state.initialize_hexboard()

            batch_player = self.starting_player

            game_over = False

            while not game_over:
                indexes = root_node.state.next_node_states()[1]
                # initialize to same state as root
                batch_node = self.find_move(root_node, self.simulations,
                                            batch_player)

                next_move = None
                highest_visits = -float('inf')

                visit_counts = []

                for child in batch_node.child_nodes:
                    visits = child.get_visits()
                    visit_counts.append(child.get_visits())

                    # Printing
                    children = [x for x in child.state.hexBoard]
                    new_children = []
                    for thing in children:
                        for another in thing:
                            new_children.append(another)

                    # print("Child " + str([x.value for x in new_children]) + " had ratio " + str(
                    #    visits) + " with wins/visits " + str(
                    #    child.get_wins()) + " / " + str(child.get_visits()))

                    if visits > highest_visits:
                        highest_visits = visits
                        next_move = child
                if self.verbose:
                    next_move.state.print_hexboard()

                # add case
                case = []
                visit_distribution = []
                case.append(
                    batch_node.state.complex_to_simple_hexboard(
                        batch_node.state.hexBoard))
                for index in indexes:
                    if index == 1:
                        visit_distribution.append(visit_counts.pop(0) - 1)
                    else:
                        visit_distribution.append(0)

                # one_hot_visit_distribution = [0] * len(visit_distribution)
                normalized_visit_distribution = []

                # generates one_hot list
                max_value = max(visit_distribution)
                # max_index = visit_distribution.index(max_value)
                # one_hot_visit_distribution[max_index] = 1

                # generates normalized list
                for value in visit_distribution:
                    normalized_visit_distribution.append(
                        value / sum(visit_distribution))

                # add case root, D to replay buffer
                case.append(normalized_visit_distribution)
                self.replay_buffer.append(case)

                # choose actual move
                root_node = next_move

                # Already switching when generating kids
                # root_node.state.switch_player(root_node.state.get_player())
                # root_node.state.print_hexboard()

                if root_node.get_state().game_over():
                    winner = 3 - root_node.get_state().get_player()
                    if self.verbose:
                        print("Player " + str(winner) + " wins.")
                        print("")
                    if winner == 1:
                        total_wins_player1 += 1
                    if winner == 2:
                        total_wins_player2 += 1
                    game_over = True

            # do training
            np.random.shuffle(self.replay_buffer)
            inputs = [c[0] for c in self.replay_buffer]
            targets = [c[1] for c in self.replay_buffer]
            feeder = {
                self.ANET.input: inputs[:self.minibatch_size],
                self.ANET.target: targets[:self.minibatch_size]
            }

            gvars = self.ANET.error

            _, grabvals, _ = self.ANET.run_one_step(
                [self.ANET.trainer],
                gvars,
                self.ANET.probes,
                session=self.ANET.current_session,
                feed_dict=feeder,
                show_interval=0)

            self.ANET.error_history.append((i, grabvals))

            # Save ANET-params
            if self.number_of_saved_agents:
                if (i + 1) % self.save_interval == 0:
                    self.ANET.save_session_params(self.saved_folder,
                                                  self.ANET.current_session,
                                                  (i + 1))
                    print("Saved game after ", i + 1, " episodes")

            self.ANET_CM.cases = self.replay_buffer
            print("Game {} of {} is over".format(i + 1, self.batch))
        print("")
        print("Player 1" + " won " + str(total_wins_player1) +
              " times out of " + str(self.batch) + " batches." + " (" +
              str(100 * total_wins_player1 / self.batch) + "%)")
        print("Player 2" + " won " + str(total_wins_player2) +
              " times out of " + str(self.batch) + " batches." + " (" +
              str(100 * total_wins_player2 / self.batch) + "%)")
        TFT.plot_training_history(self.ANET.error_history,
                                  self.ANET.validation_history,
                                  xtitle="Game",
                                  ytitle="Error",
                                  title="",
                                  fig=True)
        self.ANET.close_current_session(view=False)