Exemple #1
0
def computeCost(inputs, labels, qPos, qPri, taskId, numSamples, alpha=1):
    inputSize = inputs.size()[0]
    monteCarlo = MonteCarlo(qPos, numSamples)
    kl = KL()
    mcTerm = monteCarlo.logPred(inputs, labels, taskId)
    klTerm = torch.div(kl.computeKL(qPos, qPri, taskId), inputSize)
    return -((2 - alpha) * mcTerm - alpha * klTerm)
Exemple #2
0
def MC_OffPolicy_Prediction_Results():
    mc_obj = MonteCarlo(POLICY_PLAYER, POLICY_DEALER)

    true_value = -0.27726
    episodes = 10000
    runs = 100
    error_ordinary = np.zeros(episodes)  #MSE of ordinary importance sampling
    error_weighted = np.zeros(episodes)  #MSE of weighted importance sampling
    for i in tqdm(range(0, runs)):
        ordinary_sampling_, weighted_sampling_ = mc_obj.monte_carlo_off_policy(
            episodes)
        # get the squared error
        error_ordinary += np.power(ordinary_sampling_ - true_value, 2)
        error_weighted += np.power(weighted_sampling_ - true_value, 2)
    error_ordinary /= runs
    error_weighted /= runs

    plt.plot(error_weighted, label='Weighted Importance Sampling')
    plt.plot(error_ordinary, label='Ordinary Importance Sampling')
    plt.xlabel('Episodes (log scale)')
    plt.ylabel('Mean square error')
    plt.xscale('log')
    plt.legend()

    plt.savefig('MC_OffPolicy_Prediction.png')
    plt.close()
Exemple #3
0
def MC_OnPolicy_Prediction_Results():
    mc_obj = MonteCarlo(POLICY_PLAYER2, POLICY_DEALER)

    states_usable_ace_1, states_no_usable_ace_1 = mc_obj.monte_carlo_on_policy(
        10000)
    states_usable_ace_2, states_no_usable_ace_2 = mc_obj.monte_carlo_on_policy(
        1000000)

    states = [
        states_usable_ace_1, states_usable_ace_2, states_no_usable_ace_1,
        states_no_usable_ace_2
    ]

    titles = [
        'Usable Ace, 10000 Episodes', 'Usable Ace, 1000000 Episodes',
        'No Usable Ace, 10000 Episodes', 'No Usable Ace, 1000000 Episodes'
    ]

    _, axes = plt.subplots(2, 2, figsize=(40, 30))
    plt.subplots_adjust(wspace=0.1, hspace=0.2)
    axes = axes.flatten()

    sns.set(font_scale=3)
    for state, title, axis in zip(states, titles, axes):
        fig = sns.heatmap(np.flipud(state), cmap="YlGnBu", ax=axis)
        fig.set_yticklabels(list(reversed(range(12, 22))), fontsize=35)
        fig.set_xticklabels(range(1, 11), fontsize=35)
        fig.set_ylabel('Player sum', fontsize=40)
        fig.set_xlabel('Dealer showing', fontsize=40)
        fig.set_title(title, fontsize=40)

    plt.savefig('MC_OnPolicy_Prediction.png')
    plt.close()
 def test_eu_call_opt_with_mp(self):
     '''Run the same test but in multiprocess mode
     '''
     mc = MonteCarlo(50, 52, 0.05, 2, 0.3)
     self.assertAlmostEqual(
         6.7601, mc.run(OptionType.PUT, 300000, 4),
         1)  # 4 processes seems to be the fastest on a quad-core pc
 def test_eu_call_opt(self):
     '''2-year European put option, spot price 50, strike 52
     risk-free rate 5%, volatility 30%
     '''
     mc = MonteCarlo(50, 52, 0.05, 2, 0.3)
     self.assertAlmostEqual(6.7601, mc.run(OptionType.PUT, 300000, 0),
                            1)  # single process
def test_density_input():

    mc = MonteCarlo()
    with assert_raises(ValueError) as exception: mc([-1, 2, 3, 4], lambda x: 0)
    with assert_raises(TypeError) as exception: mc([1.1, 2, 3, 4], lambda x: 0)

    densities = [[0],[0,0,0,0]]
    for density in densities:
       with assert_raises(ValueError) as exception: mc.random_move(density)
Exemple #7
0
 def testAccuracy(self, x_test, y_test, q_pred, headId):
     acc = 0
     num_pred_samples = 100
     for x_test_batch, y_test_batch in self.getBatch(x_test, y_test):
         monteCarlo = MonteCarlo(q_pred, num_pred_samples)
         y_pred_batch = monteCarlo.computeMonteCarlo(x_test_batch, headId)
         _, y_pred_batch = torch.max(y_pred_batch.data, 1)
         y_pred_batch = torch.eye(
             self.dataGen.get_dims()[1])[y_pred_batch].type(FloatTensor)
         acc += torch.sum(torch.mul(y_pred_batch, y_test_batch)).item()
     return acc / y_test.shape[0]
Exemple #8
0
def test_equal_probability():
    """ Check particles have equal probability of movement. """
    from numpy import array, sqrt, count_nonzero

    mc = MonteCarlo()
    density = array([1, 0, 99])
    changes_at_zero = [(density - mc.change_density(density))[0] != 0
                       for i in range(10000)]
    assert_almost_equal(count_nonzero(changes_at_zero),
                        0.01 * len(changes_at_zero),
                        delta=0.5 * sqrt(len(changes_at_zero)))
Exemple #9
0
def test_input_sanity():
  """ Check incorrect input do fail """

  with assert_raises(NotImplementedError) as exception: MonteCarlo(temperature=0e0)
  with assert_raises(ValueError) as exception: MonteCarlo(temperature=-1e0)
  
  mc = MonteCarlo()
  with assert_raises(TypeError) as exception: mc(lambda x: 0, [1.0, 2, 3])
  with assert_raises(ValueError) as exception: mc(lambda x: 0, [-1, 2, 3])
  with assert_raises(ValueError) as exception: mc(lambda x: 0, [[1, 2, 3], [3, 4, 5]])
  with assert_raises(ValueError) as exception: mc(lambda x: 0, [3])
  with assert_raises(ValueError) as exception: mc(lambda x: 0, [0, 0])
def test_equal_probability():
  """ Check particles have equal probability of movement. """
  from numpy import array, sqrt, count_nonzero

  mc = MonteCarlo()
  density = array([1, 0, 99])
  changes_at_zero = [(density - mc.change_density(density))[0] != 0 for i in range(10000)]
  assert_almost_equal(
      count_nonzero(changes_at_zero), 
      0.01 * len(changes_at_zero), 
      delta = 0.5 * sqrt(len(changes_at_zero))
  )
Exemple #11
0
 def setUp(self):
     """
     Initialize Monte-Carlo algorithm tests.
     """
     func = lambda x: 100*np.sum((x[1:]-x[:-1]**2)**2)+np.sum((1-x[:-1])**2)
     n_dim = 2
     lower = np.full(n_dim, -5.12)
     upper = np.full(n_dim, 5.12)
     max_iter = 50
     random_state = 42
     self.sampler = MonteCarlo(func, lower = lower, upper = upper,
                               max_iter = max_iter, random_state = random_state)
     self.n_dim = n_dim
Exemple #12
0
def test_stop_simulation():
  """ Checks that if observe returns False, iteration stops. """
  from mock import Mock
  mc = MonteCarlo(temperature=100.0, itermax=8)

  # Make a fake observer 
  mc.observe = Mock(side_effect=[True, False, True])
  # Fake energy method
  energies = [0.1, -0.1, -0.2, -0.15, -0.25]
  energy = Mock(side_effect=energies)
  # Call simulation 
  mc(energy, [0, 1, 2, 3])

  assert_equal(len(mc.observe.mock_calls), 2)
  assert_equal(len(energy.mock_calls), 3) # one extra call to get first energy
Exemple #13
0
  def play(self):
    node = Node(self.board, self.current_player.piece)
    print self.current_player.name, ' goes first'
    self.board.print_board()

    while True:
      if self.current_player.name != 'Computer':
        column = self.get_move()

        while not self.board.add_piece(self.current_player.piece, column):
          print 'That is not a valid move. Please select a different column'
          column = self.get_move()

      else:
        node, column = MonteCarlo(self.board.make_copy(), 'O', ITERATIONS, last_node=node).get_move()
        print 'Computer chooses column', column
        self.board.add_piece(self.current_player.piece, column)

      self.board.print_board()
      node = self.__navigate_to_node_for_move(node, column, self.board)
      if self.board.winner_found():
        print '***** ' + self.current_player.name + ' wins!'
        break

      if self.board.spaces_left() == 0:
        print '***** Tie game'
        break

      self.__advance_turn()
def test_main_iteration_particle_number_is_conserved():

    from mock import Mock

    mc = MonteCarlo()

    # Mock the energy function
    energies = [1, 2, 3, 4]
    energy = Mock(side_effect=energies)

    density = random_integers(100, size=100)
    n = sum(density)
    result = mc.iteration(density, energy)
    new_density = result[0]
    n_new = sum(new_density)
    assert_equal(n, n_new, "particle number not conserved")
Exemple #15
0
def test_accept_change():
    """ Check that move is accepted if second energy is lower """
    from numpy import sqrt, count_nonzero, exp

    mc = MonteCarlo(temperature=100.0)
    # Should always be true. But do more than one draw, in case random incorrectly crept into
    # implementation
    for i in range(10):
        assert_true(mc.accept_change(0.5, 0.4))
        assert_true(mc.accept_change(0.5, 0.5))

    # This should be accepted only part of the time, depending on exponential distribution
    prior, successor = 0.4, 0.5
    accepted = [mc.accept_change(prior, successor) for i in range(10000)]
    assert_almost_equal(count_nonzero(accepted) / float(len(accepted)),
                        exp(-(successor - prior) / mc.temperature),
                        delta=3e0 / sqrt(len(accepted)))
Exemple #16
0
def monte_carlo_demo():
    np.random.seed(101)
    env = SnakeEnv(10, [3, 6])
    agent = ModelFreeAgent(env)
    mc = MonteCarlo(0.5)
    with timer('Timer Monte Carlo Iter'):
        mc.monte_carlo_opt(agent, env)
    print('return_pi={}'.format(eval_game(env, agent)))
    print(agent.pi)

    np.random.seed(101)
    agent2 = TableAgent(env)
    pi_algo = PolicyIteration()
    with timer('Timer PolicyIter'):
        pi_algo.policy_iteration(agent2)
    print('return_pi={}'.format(eval_game(env, agent2)))
    print(agent2.pi)

    np.random.seed(101)
    agent3 = ModelFreeAgent(env)
    mc = SARSA(0.5)
    with timer('Timer Monte Carlo Iter'):
        mc.sarsa(agent3, env)
    print('return_pi={}'.format(eval_game(env, agent3)))
    print(agent3.pi)
def test_accept_change():
  from numpy import sqrt, count_nonzero, exp

  mc = MonteCarlo(temperature=100.0)
  # Should always be true. But do more than one draw, in case random incorrectly crept into
  # implementation
  for i in range(10):
    assert_true(mc.accept_change(0.5, 0.4))
    assert_true(mc.accept_change(0.5, 0.5))

  # This should be accepted only part of the time, depending on exponential distribution
  prior, successor = 0.4, 0.5
  accepted = [mc.accept_change(prior, successor) for i in range(10000)]
  assert_almost_equal( 
      count_nonzero(accepted) / float(len(accepted)),
      exp(-(successor - prior) / mc.temperature),
      delta = 3e0 / sqrt(len(accepted))
  )
Exemple #18
0
def test_move_particle_one_over():
    """ Check density is change by a particle hopping left or right. """
    from numpy import nonzero, multiply
    from numpy.random import randint

    mc = MonteCarlo()

    for i in range(
            100):  # Do this n times, to avoid issues with random numbers
        # Create density
        density = randint(50, size=randint(2, 6))
        # Change it
        new_density = mc.change_density(density)

        # Make sure any movement is by one
        indices = nonzero(density - new_density)[0]
        assert_equal(len(indices), 2, "densities differ in two places")
        assert_equal(multiply.reduce((density - new_density)[indices]), -1,
                     "densities differ by + and - 1")
Exemple #19
0
def test_main_algorithm():
    """ Check set path through main algorithm """
    from mock import Mock, call

    mc = MonteCarlo(temperature=100.0, itermax=4)

    # Patch mc so that it takes a pre-determined path through
    acceptance = [True, True, False, True]
    mc.accept_change = Mock(side_effect=acceptance)
    densities = (
        [0, 0, 1, 0],
        [0, 1, 1, 0],
        [2, 2, 2, 2],
        [2, 3, 3, 2],
        [5, 3, 3, 5],
    )
    mc.change_density = Mock(side_effect=densities[1:])
    mc.observe = Mock(return_value=True)

    # Fake energy method
    energies = [0.1, -0.1, -0.2, -0.15, -0.25]
    energy = Mock(side_effect=energies)

    # Call simulation
    mc(energy, densities[0])

    # Now, analyze path. First check length.
    assert_equal(len(mc.accept_change.mock_calls), 4)
    assert_equal(len(mc.change_density.mock_calls), 4)
    assert_equal(len(mc.observe.mock_calls), 4)
    assert_equal(len(energy.mock_calls),
                 5)  # one extra call to get first energy

    # Easiest to look at observe, since it should have all the info about the step
    observe_path = [
        call(0, acceptance[0], densities[1], energies[1]),
        call(1, acceptance[1], densities[2], energies[2]),
        call(2, acceptance[2], densities[2], energies[2]),
        call(3, acceptance[3], densities[4], energies[4]),
    ]
    assert_equal(observe_path, mc.observe.call_args_list)
def test_move_particle_one_over():
  """ Check density is change by a particle hopping left or right. """
  from numpy import nonzero, multiply
  from numpy.random import randint

  mc = MonteCarlo()

  for i in range(100): # Do this n times, to avoid issues with random numbers
    # Create density
    density = randint(50, size=randint(2, 6))
    # Change it
    new_density = mc.change_density(density)

    # Make sure any movement is by one
    indices = nonzero(density - new_density)[0]
    assert_equal(len(indices), 2, "densities differ in two places")
    assert_equal( 
        multiply.reduce((density - new_density)[indices]), 
        -1,
        "densities differ by + and - 1"
    )
def test_main_algorithm():
  """ Check set path through main algorithm """
  from mock import Mock, call

  mc = MonteCarlo(temperature=100.0, itermax=4)

  # Patch mc so that it takes a pre-determined path through 
  acceptance = [True, True, False, True]
  mc.accept_change = Mock(side_effect=acceptance)
  densities = ( 
      [0, 0, 1, 0],
      [0, 1, 1, 0],
      [2, 2, 2, 2],
      [2, 3, 3, 2],
      [5, 3, 3, 5],
  )
  mc.change_density = Mock(side_effect=densities[1:])
  mc.observe = Mock(return_value=True)

  # Fake energy method
  energies = [0.1, -0.1, -0.2, -0.15, -0.25]
  energy = Mock(side_effect=energies)

  # Call simulation 
  mc(energy, densities[0])

  # Now, analyze path. First check length.
  assert_equal(len(mc.accept_change.mock_calls), 4)
  assert_equal(len(mc.change_density.mock_calls), 4)
  assert_equal(len(mc.observe.mock_calls), 4)
  assert_equal(len(energy.mock_calls), 5) # one extra call to get first energy

  # Easiest to look at observe, since it should have all the info about the step
  observe_path = [
      call(0, acceptance[0], densities[1], energies[1]),
      call(1, acceptance[1], densities[2], energies[2]),
      call(2, acceptance[2], densities[2], energies[2]),
      call(3, acceptance[3], densities[4], energies[4])
  ]
  assert_equal(observe_path, mc.observe.call_args_list)
Exemple #22
0
def MC_OnPolicy_Control_Results():
    mc_obj = MonteCarlo(POLICY_PLAYER, POLICY_DEALER)

    state_action_values = mc_obj.monte_carlo_es_control(500000)

    state_value_no_usable_ace = np.max(state_action_values[:, :, 0, :],
                                       axis=-1)
    state_value_usable_ace = np.max(state_action_values[:, :, 1, :], axis=-1)

    # get the optimal policy
    action_no_usable_ace = np.argmax(state_action_values[:, :, 0, :], axis=-1)
    action_usable_ace = np.argmax(state_action_values[:, :, 1, :], axis=-1)

    images = [
        action_usable_ace, state_value_usable_ace, action_no_usable_ace,
        state_value_no_usable_ace
    ]

    titles = [
        'Optimal policy with usable Ace', 'Optimal value with usable Ace',
        'Optimal policy without usable Ace', 'Optimal value without usable Ace'
    ]

    _, axes = plt.subplots(2, 2, figsize=(40, 30))
    plt.subplots_adjust(wspace=0.1, hspace=0.2)
    axes = axes.flatten()

    sns.set(font_scale=3)
    for image, title, axis in zip(images, titles, axes):
        fig = sns.heatmap(np.flipud(image), cmap="YlGnBu", ax=axis)
        fig.set_yticklabels(list(reversed(range(12, 22))), fontsize=35)
        fig.set_xticklabels(range(1, 11), fontsize=35)
        fig.set_ylabel('Player sum', fontsize=40)
        fig.set_xlabel('Dealer showing', fontsize=40)
        fig.set_title(title, fontsize=40)

    plt.savefig('MC_OnPolicy_Control.png')
    plt.close()
Exemple #23
0
class MonteCarloTest(unittest.TestCase):
    """
    Monte-Carlo algorithms unit tests.
    """
    
    def setUp(self):
        """
        Initialize Monte-Carlo algorithm tests.
        """
        func = lambda x: 100*np.sum((x[1:]-x[:-1]**2)**2)+np.sum((1-x[:-1])**2)
        n_dim = 2
        lower = np.full(n_dim, -5.12)
        upper = np.full(n_dim, 5.12)
        max_iter = 50
        random_state = 42
        self.sampler = MonteCarlo(func, lower = lower, upper = upper,
                                  max_iter = max_iter, random_state = random_state)
        self.n_dim = n_dim
        
    def tearDown(self):
        """
        Cleaning after each test.
        """
        del self.sampler
        
    def test_pure(self):
        """
        Pure Monte-Carlo test.
        """
        self.sampler.sample(sampler = "pure")
        mean = np.mean(self.sampler.models, axis = 0)
        mean_true = np.array([ -0.6070602, -0.00363818 ])
        for i, val in enumerate(mean):
            self.assertAlmostEqual(val, mean_true[i])
            
    def test_hastings(self):
        """
        Metropolis-Hastings algorithm test.
        """
        stepsize = 0.1409
        self.sampler.sample(sampler = "hastings", stepsize = stepsize)
        mean = np.mean(self.sampler.models, axis = 0)
        mean_true = np.array([ -1.61141558, 2.73788443 ])
        for i, val in enumerate(mean):
            self.assertAlmostEqual(val, mean_true[i])
            
    def test_hamiltonian(self):
        """
        Hamiltonian Monte-Carlo algorithm test.
        """
        stepsize = 0.0091991
        n_leap = 14
        self.sampler.sample(sampler = "hamiltonian", stepsize = stepsize,
                            n_leap = n_leap)
        mean = np.mean(self.sampler.models, axis = 0)
        mean_true = np.array([ 0.89343405, 1.18474131 ])
        for i, val in enumerate(mean):
            self.assertAlmostEqual(val, mean_true[i])
def main(args):
    # Get configuration from the cConfig.py file
    conf = cConfig()

    if not os.path.exists('results'):
        os.makedirs('results')

    # Start an output file where all the results will be stored
    ofile = open("results/summary.csv", "w+")
    WriteHeader(ofile, conf)

    if conf.MODE == 0:
        MonteCarlo(conf, ofile)
    elif conf.MODE == 1:
        SimulatedAnneal(conf, ofile)
 def test_monte_carlo(self):
     inputs = {
         'years': 30,
         'savings': 100000,
         'withdrawalRate': 0.45,
         'stocks': 0.5,
         'bonds': 0.3,
         'cash': 0.20,
         'total_trials': 1000
     }
     print("years = " + str(inputs["years"]))
     print("savings = " + str(inputs["savings"]))
     print("withdrawalRate = " + str(inputs["withdrawalRate"]))
     print("stocks = " + str(inputs["stocks"]))
     print("bonds = " + str(inputs["bonds"]))
     print("cash = " + str(inputs["cash"]))
     monte_carlo = mc.MonteCarlo('a', inputs)
     print(monte_carlo.name)
     self.assertEqual(True, True)
                eligibility_trace[index1] += 1
                self.state_count[index1] += 1

                self.action_value_matrix += 1/self.state_count[index1] * delta * eligibility_trace
                eligibility_trace *= self.gamma * self.lamb

                state1 = state2
                action1 = action2

            if self.save_mse_vals: self.mse_vals.append(self.mse(self.action_value_matrix, self.mc.action_value_matrix))

        return self.mse(self.action_value_matrix, self.mc.action_value_matrix)

if __name__ == "__main__":
    iterations = 50000
    mc = MonteCarlo(Environment(), iterations)
    lambda_values = [round(x * 0.1, 2) for x in range(11)]
    mse_values = []
    lambda_0_and_1 = []
    for l in lambda_values:
        save_mse_vals = True if l == 0.0 or l == 1.0 else False
        sl = SarsaLambda(Environment(), iterations, l, mc, save_mse_vals)
        if save_mse_vals: lambda_0_and_1.append(sl)
        mse_values.append(sl.train())
        plot_3d(11, 22, sl.action_value_matrix, 'sarsa_lambda' + str(l) +'.png')

    line_plot([lambda_values], [mse_values], 'Lambda', 'MSE', 'lambda_vs_mse.png')

    episodes = [i + 1 for i in range(iterations)]

    line_plot([episodes, episodes], [sl.mse_vals for sl in lambda_0_and_1], 'MSE', 'Episodes', 'mse_vs_episodes.png', ['lambda = 0', 'lambda = 1'])
def test_fails_for_non_integer_densities():
    mc = MonteCarlo()
    with assert_raises(TypeError) as exception: mc.random_move([1.0, 2, 3, 4])
def test_handles_zero_densities():
    mc = MonteCarlo()
    densities = [[0],[0,0,0,0]]
    for density in densities:
       with assert_raises(ValueError) as exception: mc.random_move(density)
def test_particle_number_is_conserved():
    mc = MonteCarlo()
    density = random_integers(100, size=100)
    n = sum(density)
    n_new = sum(mc.random_move(density))
    assert_equal(n,n_new,"particle number not conserved")
def test_compare_energies():
    mc = MonteCarlo()
    assert_true(mc.compare_energy(2,1))
    assert_false(mc.compare_energy(1,2))
    assert_false(mc.compare_energy(1,1))
 def test_eu_call_opt_with_mp(self):
     '''Run the same test but in multiprocess mode
     '''
     mc = MonteCarlo(50, 52, 0.05, 2, 0.3)
     self.assertAlmostEqual(6.7601, mc.run(OptionType.PUT, 300000, 4), 1) # 4 processes seems to be the fastest on a quad-core pc
 def test_eu_call_opt(self):
     '''2-year European put option, spot price 50, strike 52
     risk-free rate 5%, volatility 30%
     '''
     mc = MonteCarlo(50, 52, 0.05, 2, 0.3)
     self.assertAlmostEqual(6.7601, mc.run(OptionType.PUT, 300000, 0), 1) # single process
def test_fails_for_negative_densities():
    mc = MonteCarlo()
    with assert_raises(ValueError) as exception: mc.random_move([-1, 2, 3, 4])
Exemple #34
0
    def play_human(self, alg, player):
        """
        Main game loop. Waits for human input.
        """

        if player is 1:
            opp = -1
            player = 1
        elif player is 2:
            opp = 1
            player = -1
        else:
            print("Player options: 1, 2")
            return

        if alg == "QL":
            o = QLearner(opp, self)
        elif alg == "MonteCarlo":
            o = MonteCarlo(opp, self)
        elif alg == "NN":
            o = NN_Player(opp, self)
        else:
            print("Algorithm options: [MonteCarlo, QL, NN]")
            return

        print(self.__str__())
        print("1 2 3 4 5 6 7")

        i = 0
        while self.has_winner() == 0:

            if self.full():
                print("It's a draw!")
                return

            if self.turn is player:
                human_move = int(raw_input(">>> "))
                self.place_with_print(human_move - 1)

                if alg == "QL":
                    o.check_if_lost()

            else:
                if alg == "MonteCarlo":
                    o = MonteCarlo(opp, self, depth=100, rollouts=1000)
                    move = o.choose_col()
                    self.place_with_print(move)
                elif alg == "QL":
                    o.learn()
                    print(self.__str__())
                elif alg == "NN":
                    move = o.choose_col()
                    self.place_with_print(move)

            print("1 2 3 4 5 6 7")


            i += 1

        if self.has_winner() is player:
            print("You won!")
        else:
            print("The winner is Bot!")
        self.clear_board()
Exemple #35
0
parameters = vanillaNN.getParameters()
qPrior = ParametersDistribution(sharedDim, headDim, headCount)
qPrior.setParameters(parameters, 1)
parameters = qPrior.getFlattenedParameters(1)

for i, (images, labels) in enumerate(trainLoader):
    images = images.reshape(-1, 28 * 28).to(Device)
    yOnehot = _onehot(labels)
    qPosterior = maximizeVariationalLowerBound(images,
                                               yOnehot,
                                               qPrior,
                                               taskId=1)

print("Prediction Time :-) ")

with torch.no_grad():
    correct = 0
    total = 0

    for images, labels in testLoader:
        images = images.reshape(-1, 28 * 28).to(Device)
        labels = labels.to(Device)
        monteCarlo = MonteCarlo(qPrior, numSamples)
        predicted = monteCarlo.computeMonteCarlo(images, 1)
        _, predicted = torch.max(predicted.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print('Accuracy of the network on the 10000 test images: {} %'.format(
        100 * correct / total))
def test_compare_boltzmann_factor_high_t():
    mc = MonteCarlo(10000000000)
    assert_true(mc.compare_boltzmann_factor(100,1))
Exemple #37
0
    def play(self, games=1, traindata_flag=False, saveresults_flag=True, save_filename=str(int(time.time()))):
        """
        Main game loop. Plays full game iterations.
        """

        p1 = None
        p2 = None

        traindata_feature = []
        traindata_target = []

        iter_n = games

        # Select player1 outside of game loop
        if self.player1 == "Random":
            p1 = RandomPlayer(self)
        elif self.player1 == "QL":
            p1 = QLearner(1, self)
        elif self.player1 == "MonteCarlo":
            p1 = MonteCarlo(1, self)
        elif self.player1 == "NN":
            p1 = NN_Player(1, self)
            # p1 = NN_Player(1, self.board, self.available_columns())

        # Select player1 outside of game loop
        if self.player2 == "Random":
            p2 = RandomPlayer(self)
        elif self.player2 == "QL":
            p2 = QLearner(-1, self)
        elif self.player2 == "MonteCarlo":
            p2 = MonteCarlo(-1, self)
        elif self.player2 == "NN":
            p2 = NN_Player(-1, self)
            # p2 = NN_Player(-1, self.board, self.available_columns())

        # record the total time each player uses in each game

        total_time_player1 = []
        total_time_player2 = []

        while games > 0:
            print("Play iteration = ", games)

            # record total move in each game
            total_move = 0

            # record the total time each player uses in each game
            player1_time = 0
            player2_time = 0

            while self.has_winner() == 0:

                # print(self.board)

                if self.full():
                    print("It's a draw!")
                    print("Player 1 uses: ", player1_time, "s")
                    print("player 2 uses: ", player2_time, "s")

                    break

                if self.turn == 1:

                    start_time = time.time()

                    # Which Strategy for Palyer 1
                    if self.player1 == "Random":
                        self.place(p1.choose_col())

                    elif self.player1 == "QL":
                        # p1 = QLearner(1)
                        p1.learn()

                    elif self.player1 == "MonteCarlo":
                        self.place(p1.choose_col())

                    elif self.player1 == "NN":
                        self.place(p1.choose_col())

                    if self.player2 == "QL":
                        p2.check_if_lost()

                    end_time = time.time()

                    player1_time = player1_time + (end_time - start_time)

                else:

                    start_time = time.time()

                    # Which Strategy for Palyer 2
                    if self.player2 == "Random":
                        self.place(p2.choose_col())

                    elif self.player2 == "QL":
                        p2.learn()

                    elif self.player2 == "MonteCarlo":
                        p2 = MonteCarlo(-1, self)
                        self.place(p2.choose_col())

                    elif self.player2 == "NN":
                        self.place(p2.choose_col())

                    if self.player1 == "QL":
                        p1.check_if_lost()

                    end_time = time.time()

                    player2_time = player2_time + (end_time - start_time)

                if traindata_flag:
                    # add features for training data for NN
                    traindata_feature.append(np.array(self.board).reshape(42))

                total_move = total_move + 1

            # add targets for training data for NN
            if traindata_flag:
                for m in range(total_move):
                    traindata_target.append(self.target())

            # complete results
            if saveresults_flag:
                traindata_target.append(self.target())
                total_time_player1.append(player1_time)
                total_time_player2.append(player2_time)

            print("The winner is player ", self.has_winner())
            print("Player 1 uses: ", player1_time, "s")
            print("player 2 uses: ", player2_time, "s")

            self.clear_board()
            games -= 1

        # save training data for NN
        if traindata_flag:
            np.savetxt('TrainingData/features_' + str(iter_n) + '_' + save_filename + '.csv',
                       traindata_feature, delimiter=',', fmt='%10.0f')
            np.savetxt('TrainingData/targets_' + str(iter_n) + '_' + save_filename + '.csv',
                       traindata_target, delimiter=',', fmt='%10.0f')

        # save game results for comparison
        if saveresults_flag:
            results = np.array([traindata_target, total_time_player1, total_time_player2]).T
            fmt = '%10.0f', '%10.10f', '%10.10f'
            np.savetxt(
                'Game_results/' + self.player1 + '_' + self.player2 + '_' + str(iter_n) + '_' + save_filename + '.csv',
                results, delimiter=',', fmt=fmt, header="win,Player1_time, Player2_time")

        print("-------------Among all the games----------------- ")
        print("Player 1 is ", self.player1)
        print("Player 2 is ", self.player2)
        print("Total games Player 1 wins: ", sum([i for i in traindata_target if i == 1]))
        print("Total games Player 2 wins: ", sum([i for i in traindata_target if i == -1]) * (-1))
        print("Total Time Player 1 takes: ", sum(total_time_player1), "s")
        print("Total Time Player 2 takes: ", sum(total_time_player2), "s")
def test_compare_boltzmann_factor_equal():
    mc = MonteCarlo()
    assert_true(mc.compare_boltzmann_factor(1,1))
Exemple #39
0
from update_graph import UpdateGraph
from monte_carlo import MonteCarlo
from draw import DrawGraph

# Set parameters
k = 8
r = 1
T = 300
nsteps = 300

# calculate number of all possible edges
etot = k * (k - 1) / 2
# aliases of my own modules
gi = GetInit()
ug = UpdateGraph()
mc = MonteCarlo()
pg = DrawGraph()

# Generate initial graph and all the inital properties needed for future use
# tmp is the initial graph and pos is the matrix storing positions
# and later 'tmp' will also be the name of all current graphs
tmp, pos = gi.init_graph(k)
# calculate weights of all possible edges
w = gi.calc_weight(pos, k)
# initialize the list to store all the edge list in the type of string
edge_list = [None] * 0
# write initial edge list into edge_list[0] as a string
edge_list.append(str(tmp.edges()))
# get the the number of neighbors of node 0
neighbor_0 = len(tmp.neighbors(0))
# get the number of edges in the whole graph