def computeCost(inputs, labels, qPos, qPri, taskId, numSamples, alpha=1): inputSize = inputs.size()[0] monteCarlo = MonteCarlo(qPos, numSamples) kl = KL() mcTerm = monteCarlo.logPred(inputs, labels, taskId) klTerm = torch.div(kl.computeKL(qPos, qPri, taskId), inputSize) return -((2 - alpha) * mcTerm - alpha * klTerm)
def MC_OffPolicy_Prediction_Results(): mc_obj = MonteCarlo(POLICY_PLAYER, POLICY_DEALER) true_value = -0.27726 episodes = 10000 runs = 100 error_ordinary = np.zeros(episodes) #MSE of ordinary importance sampling error_weighted = np.zeros(episodes) #MSE of weighted importance sampling for i in tqdm(range(0, runs)): ordinary_sampling_, weighted_sampling_ = mc_obj.monte_carlo_off_policy( episodes) # get the squared error error_ordinary += np.power(ordinary_sampling_ - true_value, 2) error_weighted += np.power(weighted_sampling_ - true_value, 2) error_ordinary /= runs error_weighted /= runs plt.plot(error_weighted, label='Weighted Importance Sampling') plt.plot(error_ordinary, label='Ordinary Importance Sampling') plt.xlabel('Episodes (log scale)') plt.ylabel('Mean square error') plt.xscale('log') plt.legend() plt.savefig('MC_OffPolicy_Prediction.png') plt.close()
def MC_OnPolicy_Prediction_Results(): mc_obj = MonteCarlo(POLICY_PLAYER2, POLICY_DEALER) states_usable_ace_1, states_no_usable_ace_1 = mc_obj.monte_carlo_on_policy( 10000) states_usable_ace_2, states_no_usable_ace_2 = mc_obj.monte_carlo_on_policy( 1000000) states = [ states_usable_ace_1, states_usable_ace_2, states_no_usable_ace_1, states_no_usable_ace_2 ] titles = [ 'Usable Ace, 10000 Episodes', 'Usable Ace, 1000000 Episodes', 'No Usable Ace, 10000 Episodes', 'No Usable Ace, 1000000 Episodes' ] _, axes = plt.subplots(2, 2, figsize=(40, 30)) plt.subplots_adjust(wspace=0.1, hspace=0.2) axes = axes.flatten() sns.set(font_scale=3) for state, title, axis in zip(states, titles, axes): fig = sns.heatmap(np.flipud(state), cmap="YlGnBu", ax=axis) fig.set_yticklabels(list(reversed(range(12, 22))), fontsize=35) fig.set_xticklabels(range(1, 11), fontsize=35) fig.set_ylabel('Player sum', fontsize=40) fig.set_xlabel('Dealer showing', fontsize=40) fig.set_title(title, fontsize=40) plt.savefig('MC_OnPolicy_Prediction.png') plt.close()
def test_eu_call_opt_with_mp(self): '''Run the same test but in multiprocess mode ''' mc = MonteCarlo(50, 52, 0.05, 2, 0.3) self.assertAlmostEqual( 6.7601, mc.run(OptionType.PUT, 300000, 4), 1) # 4 processes seems to be the fastest on a quad-core pc
def test_eu_call_opt(self): '''2-year European put option, spot price 50, strike 52 risk-free rate 5%, volatility 30% ''' mc = MonteCarlo(50, 52, 0.05, 2, 0.3) self.assertAlmostEqual(6.7601, mc.run(OptionType.PUT, 300000, 0), 1) # single process
def test_density_input(): mc = MonteCarlo() with assert_raises(ValueError) as exception: mc([-1, 2, 3, 4], lambda x: 0) with assert_raises(TypeError) as exception: mc([1.1, 2, 3, 4], lambda x: 0) densities = [[0],[0,0,0,0]] for density in densities: with assert_raises(ValueError) as exception: mc.random_move(density)
def testAccuracy(self, x_test, y_test, q_pred, headId): acc = 0 num_pred_samples = 100 for x_test_batch, y_test_batch in self.getBatch(x_test, y_test): monteCarlo = MonteCarlo(q_pred, num_pred_samples) y_pred_batch = monteCarlo.computeMonteCarlo(x_test_batch, headId) _, y_pred_batch = torch.max(y_pred_batch.data, 1) y_pred_batch = torch.eye( self.dataGen.get_dims()[1])[y_pred_batch].type(FloatTensor) acc += torch.sum(torch.mul(y_pred_batch, y_test_batch)).item() return acc / y_test.shape[0]
def test_equal_probability(): """ Check particles have equal probability of movement. """ from numpy import array, sqrt, count_nonzero mc = MonteCarlo() density = array([1, 0, 99]) changes_at_zero = [(density - mc.change_density(density))[0] != 0 for i in range(10000)] assert_almost_equal(count_nonzero(changes_at_zero), 0.01 * len(changes_at_zero), delta=0.5 * sqrt(len(changes_at_zero)))
def test_input_sanity(): """ Check incorrect input do fail """ with assert_raises(NotImplementedError) as exception: MonteCarlo(temperature=0e0) with assert_raises(ValueError) as exception: MonteCarlo(temperature=-1e0) mc = MonteCarlo() with assert_raises(TypeError) as exception: mc(lambda x: 0, [1.0, 2, 3]) with assert_raises(ValueError) as exception: mc(lambda x: 0, [-1, 2, 3]) with assert_raises(ValueError) as exception: mc(lambda x: 0, [[1, 2, 3], [3, 4, 5]]) with assert_raises(ValueError) as exception: mc(lambda x: 0, [3]) with assert_raises(ValueError) as exception: mc(lambda x: 0, [0, 0])
def test_equal_probability(): """ Check particles have equal probability of movement. """ from numpy import array, sqrt, count_nonzero mc = MonteCarlo() density = array([1, 0, 99]) changes_at_zero = [(density - mc.change_density(density))[0] != 0 for i in range(10000)] assert_almost_equal( count_nonzero(changes_at_zero), 0.01 * len(changes_at_zero), delta = 0.5 * sqrt(len(changes_at_zero)) )
def setUp(self): """ Initialize Monte-Carlo algorithm tests. """ func = lambda x: 100*np.sum((x[1:]-x[:-1]**2)**2)+np.sum((1-x[:-1])**2) n_dim = 2 lower = np.full(n_dim, -5.12) upper = np.full(n_dim, 5.12) max_iter = 50 random_state = 42 self.sampler = MonteCarlo(func, lower = lower, upper = upper, max_iter = max_iter, random_state = random_state) self.n_dim = n_dim
def test_stop_simulation(): """ Checks that if observe returns False, iteration stops. """ from mock import Mock mc = MonteCarlo(temperature=100.0, itermax=8) # Make a fake observer mc.observe = Mock(side_effect=[True, False, True]) # Fake energy method energies = [0.1, -0.1, -0.2, -0.15, -0.25] energy = Mock(side_effect=energies) # Call simulation mc(energy, [0, 1, 2, 3]) assert_equal(len(mc.observe.mock_calls), 2) assert_equal(len(energy.mock_calls), 3) # one extra call to get first energy
def play(self): node = Node(self.board, self.current_player.piece) print self.current_player.name, ' goes first' self.board.print_board() while True: if self.current_player.name != 'Computer': column = self.get_move() while not self.board.add_piece(self.current_player.piece, column): print 'That is not a valid move. Please select a different column' column = self.get_move() else: node, column = MonteCarlo(self.board.make_copy(), 'O', ITERATIONS, last_node=node).get_move() print 'Computer chooses column', column self.board.add_piece(self.current_player.piece, column) self.board.print_board() node = self.__navigate_to_node_for_move(node, column, self.board) if self.board.winner_found(): print '***** ' + self.current_player.name + ' wins!' break if self.board.spaces_left() == 0: print '***** Tie game' break self.__advance_turn()
def test_main_iteration_particle_number_is_conserved(): from mock import Mock mc = MonteCarlo() # Mock the energy function energies = [1, 2, 3, 4] energy = Mock(side_effect=energies) density = random_integers(100, size=100) n = sum(density) result = mc.iteration(density, energy) new_density = result[0] n_new = sum(new_density) assert_equal(n, n_new, "particle number not conserved")
def test_accept_change(): """ Check that move is accepted if second energy is lower """ from numpy import sqrt, count_nonzero, exp mc = MonteCarlo(temperature=100.0) # Should always be true. But do more than one draw, in case random incorrectly crept into # implementation for i in range(10): assert_true(mc.accept_change(0.5, 0.4)) assert_true(mc.accept_change(0.5, 0.5)) # This should be accepted only part of the time, depending on exponential distribution prior, successor = 0.4, 0.5 accepted = [mc.accept_change(prior, successor) for i in range(10000)] assert_almost_equal(count_nonzero(accepted) / float(len(accepted)), exp(-(successor - prior) / mc.temperature), delta=3e0 / sqrt(len(accepted)))
def monte_carlo_demo(): np.random.seed(101) env = SnakeEnv(10, [3, 6]) agent = ModelFreeAgent(env) mc = MonteCarlo(0.5) with timer('Timer Monte Carlo Iter'): mc.monte_carlo_opt(agent, env) print('return_pi={}'.format(eval_game(env, agent))) print(agent.pi) np.random.seed(101) agent2 = TableAgent(env) pi_algo = PolicyIteration() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent2) print('return_pi={}'.format(eval_game(env, agent2))) print(agent2.pi) np.random.seed(101) agent3 = ModelFreeAgent(env) mc = SARSA(0.5) with timer('Timer Monte Carlo Iter'): mc.sarsa(agent3, env) print('return_pi={}'.format(eval_game(env, agent3))) print(agent3.pi)
def test_accept_change(): from numpy import sqrt, count_nonzero, exp mc = MonteCarlo(temperature=100.0) # Should always be true. But do more than one draw, in case random incorrectly crept into # implementation for i in range(10): assert_true(mc.accept_change(0.5, 0.4)) assert_true(mc.accept_change(0.5, 0.5)) # This should be accepted only part of the time, depending on exponential distribution prior, successor = 0.4, 0.5 accepted = [mc.accept_change(prior, successor) for i in range(10000)] assert_almost_equal( count_nonzero(accepted) / float(len(accepted)), exp(-(successor - prior) / mc.temperature), delta = 3e0 / sqrt(len(accepted)) )
def test_move_particle_one_over(): """ Check density is change by a particle hopping left or right. """ from numpy import nonzero, multiply from numpy.random import randint mc = MonteCarlo() for i in range( 100): # Do this n times, to avoid issues with random numbers # Create density density = randint(50, size=randint(2, 6)) # Change it new_density = mc.change_density(density) # Make sure any movement is by one indices = nonzero(density - new_density)[0] assert_equal(len(indices), 2, "densities differ in two places") assert_equal(multiply.reduce((density - new_density)[indices]), -1, "densities differ by + and - 1")
def test_main_algorithm(): """ Check set path through main algorithm """ from mock import Mock, call mc = MonteCarlo(temperature=100.0, itermax=4) # Patch mc so that it takes a pre-determined path through acceptance = [True, True, False, True] mc.accept_change = Mock(side_effect=acceptance) densities = ( [0, 0, 1, 0], [0, 1, 1, 0], [2, 2, 2, 2], [2, 3, 3, 2], [5, 3, 3, 5], ) mc.change_density = Mock(side_effect=densities[1:]) mc.observe = Mock(return_value=True) # Fake energy method energies = [0.1, -0.1, -0.2, -0.15, -0.25] energy = Mock(side_effect=energies) # Call simulation mc(energy, densities[0]) # Now, analyze path. First check length. assert_equal(len(mc.accept_change.mock_calls), 4) assert_equal(len(mc.change_density.mock_calls), 4) assert_equal(len(mc.observe.mock_calls), 4) assert_equal(len(energy.mock_calls), 5) # one extra call to get first energy # Easiest to look at observe, since it should have all the info about the step observe_path = [ call(0, acceptance[0], densities[1], energies[1]), call(1, acceptance[1], densities[2], energies[2]), call(2, acceptance[2], densities[2], energies[2]), call(3, acceptance[3], densities[4], energies[4]), ] assert_equal(observe_path, mc.observe.call_args_list)
def test_move_particle_one_over(): """ Check density is change by a particle hopping left or right. """ from numpy import nonzero, multiply from numpy.random import randint mc = MonteCarlo() for i in range(100): # Do this n times, to avoid issues with random numbers # Create density density = randint(50, size=randint(2, 6)) # Change it new_density = mc.change_density(density) # Make sure any movement is by one indices = nonzero(density - new_density)[0] assert_equal(len(indices), 2, "densities differ in two places") assert_equal( multiply.reduce((density - new_density)[indices]), -1, "densities differ by + and - 1" )
def test_main_algorithm(): """ Check set path through main algorithm """ from mock import Mock, call mc = MonteCarlo(temperature=100.0, itermax=4) # Patch mc so that it takes a pre-determined path through acceptance = [True, True, False, True] mc.accept_change = Mock(side_effect=acceptance) densities = ( [0, 0, 1, 0], [0, 1, 1, 0], [2, 2, 2, 2], [2, 3, 3, 2], [5, 3, 3, 5], ) mc.change_density = Mock(side_effect=densities[1:]) mc.observe = Mock(return_value=True) # Fake energy method energies = [0.1, -0.1, -0.2, -0.15, -0.25] energy = Mock(side_effect=energies) # Call simulation mc(energy, densities[0]) # Now, analyze path. First check length. assert_equal(len(mc.accept_change.mock_calls), 4) assert_equal(len(mc.change_density.mock_calls), 4) assert_equal(len(mc.observe.mock_calls), 4) assert_equal(len(energy.mock_calls), 5) # one extra call to get first energy # Easiest to look at observe, since it should have all the info about the step observe_path = [ call(0, acceptance[0], densities[1], energies[1]), call(1, acceptance[1], densities[2], energies[2]), call(2, acceptance[2], densities[2], energies[2]), call(3, acceptance[3], densities[4], energies[4]) ] assert_equal(observe_path, mc.observe.call_args_list)
def MC_OnPolicy_Control_Results(): mc_obj = MonteCarlo(POLICY_PLAYER, POLICY_DEALER) state_action_values = mc_obj.monte_carlo_es_control(500000) state_value_no_usable_ace = np.max(state_action_values[:, :, 0, :], axis=-1) state_value_usable_ace = np.max(state_action_values[:, :, 1, :], axis=-1) # get the optimal policy action_no_usable_ace = np.argmax(state_action_values[:, :, 0, :], axis=-1) action_usable_ace = np.argmax(state_action_values[:, :, 1, :], axis=-1) images = [ action_usable_ace, state_value_usable_ace, action_no_usable_ace, state_value_no_usable_ace ] titles = [ 'Optimal policy with usable Ace', 'Optimal value with usable Ace', 'Optimal policy without usable Ace', 'Optimal value without usable Ace' ] _, axes = plt.subplots(2, 2, figsize=(40, 30)) plt.subplots_adjust(wspace=0.1, hspace=0.2) axes = axes.flatten() sns.set(font_scale=3) for image, title, axis in zip(images, titles, axes): fig = sns.heatmap(np.flipud(image), cmap="YlGnBu", ax=axis) fig.set_yticklabels(list(reversed(range(12, 22))), fontsize=35) fig.set_xticklabels(range(1, 11), fontsize=35) fig.set_ylabel('Player sum', fontsize=40) fig.set_xlabel('Dealer showing', fontsize=40) fig.set_title(title, fontsize=40) plt.savefig('MC_OnPolicy_Control.png') plt.close()
class MonteCarloTest(unittest.TestCase): """ Monte-Carlo algorithms unit tests. """ def setUp(self): """ Initialize Monte-Carlo algorithm tests. """ func = lambda x: 100*np.sum((x[1:]-x[:-1]**2)**2)+np.sum((1-x[:-1])**2) n_dim = 2 lower = np.full(n_dim, -5.12) upper = np.full(n_dim, 5.12) max_iter = 50 random_state = 42 self.sampler = MonteCarlo(func, lower = lower, upper = upper, max_iter = max_iter, random_state = random_state) self.n_dim = n_dim def tearDown(self): """ Cleaning after each test. """ del self.sampler def test_pure(self): """ Pure Monte-Carlo test. """ self.sampler.sample(sampler = "pure") mean = np.mean(self.sampler.models, axis = 0) mean_true = np.array([ -0.6070602, -0.00363818 ]) for i, val in enumerate(mean): self.assertAlmostEqual(val, mean_true[i]) def test_hastings(self): """ Metropolis-Hastings algorithm test. """ stepsize = 0.1409 self.sampler.sample(sampler = "hastings", stepsize = stepsize) mean = np.mean(self.sampler.models, axis = 0) mean_true = np.array([ -1.61141558, 2.73788443 ]) for i, val in enumerate(mean): self.assertAlmostEqual(val, mean_true[i]) def test_hamiltonian(self): """ Hamiltonian Monte-Carlo algorithm test. """ stepsize = 0.0091991 n_leap = 14 self.sampler.sample(sampler = "hamiltonian", stepsize = stepsize, n_leap = n_leap) mean = np.mean(self.sampler.models, axis = 0) mean_true = np.array([ 0.89343405, 1.18474131 ]) for i, val in enumerate(mean): self.assertAlmostEqual(val, mean_true[i])
def main(args): # Get configuration from the cConfig.py file conf = cConfig() if not os.path.exists('results'): os.makedirs('results') # Start an output file where all the results will be stored ofile = open("results/summary.csv", "w+") WriteHeader(ofile, conf) if conf.MODE == 0: MonteCarlo(conf, ofile) elif conf.MODE == 1: SimulatedAnneal(conf, ofile)
def test_monte_carlo(self): inputs = { 'years': 30, 'savings': 100000, 'withdrawalRate': 0.45, 'stocks': 0.5, 'bonds': 0.3, 'cash': 0.20, 'total_trials': 1000 } print("years = " + str(inputs["years"])) print("savings = " + str(inputs["savings"])) print("withdrawalRate = " + str(inputs["withdrawalRate"])) print("stocks = " + str(inputs["stocks"])) print("bonds = " + str(inputs["bonds"])) print("cash = " + str(inputs["cash"])) monte_carlo = mc.MonteCarlo('a', inputs) print(monte_carlo.name) self.assertEqual(True, True)
eligibility_trace[index1] += 1 self.state_count[index1] += 1 self.action_value_matrix += 1/self.state_count[index1] * delta * eligibility_trace eligibility_trace *= self.gamma * self.lamb state1 = state2 action1 = action2 if self.save_mse_vals: self.mse_vals.append(self.mse(self.action_value_matrix, self.mc.action_value_matrix)) return self.mse(self.action_value_matrix, self.mc.action_value_matrix) if __name__ == "__main__": iterations = 50000 mc = MonteCarlo(Environment(), iterations) lambda_values = [round(x * 0.1, 2) for x in range(11)] mse_values = [] lambda_0_and_1 = [] for l in lambda_values: save_mse_vals = True if l == 0.0 or l == 1.0 else False sl = SarsaLambda(Environment(), iterations, l, mc, save_mse_vals) if save_mse_vals: lambda_0_and_1.append(sl) mse_values.append(sl.train()) plot_3d(11, 22, sl.action_value_matrix, 'sarsa_lambda' + str(l) +'.png') line_plot([lambda_values], [mse_values], 'Lambda', 'MSE', 'lambda_vs_mse.png') episodes = [i + 1 for i in range(iterations)] line_plot([episodes, episodes], [sl.mse_vals for sl in lambda_0_and_1], 'MSE', 'Episodes', 'mse_vs_episodes.png', ['lambda = 0', 'lambda = 1'])
def test_fails_for_non_integer_densities(): mc = MonteCarlo() with assert_raises(TypeError) as exception: mc.random_move([1.0, 2, 3, 4])
def test_handles_zero_densities(): mc = MonteCarlo() densities = [[0],[0,0,0,0]] for density in densities: with assert_raises(ValueError) as exception: mc.random_move(density)
def test_particle_number_is_conserved(): mc = MonteCarlo() density = random_integers(100, size=100) n = sum(density) n_new = sum(mc.random_move(density)) assert_equal(n,n_new,"particle number not conserved")
def test_compare_energies(): mc = MonteCarlo() assert_true(mc.compare_energy(2,1)) assert_false(mc.compare_energy(1,2)) assert_false(mc.compare_energy(1,1))
def test_eu_call_opt_with_mp(self): '''Run the same test but in multiprocess mode ''' mc = MonteCarlo(50, 52, 0.05, 2, 0.3) self.assertAlmostEqual(6.7601, mc.run(OptionType.PUT, 300000, 4), 1) # 4 processes seems to be the fastest on a quad-core pc
def test_fails_for_negative_densities(): mc = MonteCarlo() with assert_raises(ValueError) as exception: mc.random_move([-1, 2, 3, 4])
def play_human(self, alg, player): """ Main game loop. Waits for human input. """ if player is 1: opp = -1 player = 1 elif player is 2: opp = 1 player = -1 else: print("Player options: 1, 2") return if alg == "QL": o = QLearner(opp, self) elif alg == "MonteCarlo": o = MonteCarlo(opp, self) elif alg == "NN": o = NN_Player(opp, self) else: print("Algorithm options: [MonteCarlo, QL, NN]") return print(self.__str__()) print("1 2 3 4 5 6 7") i = 0 while self.has_winner() == 0: if self.full(): print("It's a draw!") return if self.turn is player: human_move = int(raw_input(">>> ")) self.place_with_print(human_move - 1) if alg == "QL": o.check_if_lost() else: if alg == "MonteCarlo": o = MonteCarlo(opp, self, depth=100, rollouts=1000) move = o.choose_col() self.place_with_print(move) elif alg == "QL": o.learn() print(self.__str__()) elif alg == "NN": move = o.choose_col() self.place_with_print(move) print("1 2 3 4 5 6 7") i += 1 if self.has_winner() is player: print("You won!") else: print("The winner is Bot!") self.clear_board()
parameters = vanillaNN.getParameters() qPrior = ParametersDistribution(sharedDim, headDim, headCount) qPrior.setParameters(parameters, 1) parameters = qPrior.getFlattenedParameters(1) for i, (images, labels) in enumerate(trainLoader): images = images.reshape(-1, 28 * 28).to(Device) yOnehot = _onehot(labels) qPosterior = maximizeVariationalLowerBound(images, yOnehot, qPrior, taskId=1) print("Prediction Time :-) ") with torch.no_grad(): correct = 0 total = 0 for images, labels in testLoader: images = images.reshape(-1, 28 * 28).to(Device) labels = labels.to(Device) monteCarlo = MonteCarlo(qPrior, numSamples) predicted = monteCarlo.computeMonteCarlo(images, 1) _, predicted = torch.max(predicted.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: {} %'.format( 100 * correct / total))
def test_compare_boltzmann_factor_high_t(): mc = MonteCarlo(10000000000) assert_true(mc.compare_boltzmann_factor(100,1))
def play(self, games=1, traindata_flag=False, saveresults_flag=True, save_filename=str(int(time.time()))): """ Main game loop. Plays full game iterations. """ p1 = None p2 = None traindata_feature = [] traindata_target = [] iter_n = games # Select player1 outside of game loop if self.player1 == "Random": p1 = RandomPlayer(self) elif self.player1 == "QL": p1 = QLearner(1, self) elif self.player1 == "MonteCarlo": p1 = MonteCarlo(1, self) elif self.player1 == "NN": p1 = NN_Player(1, self) # p1 = NN_Player(1, self.board, self.available_columns()) # Select player1 outside of game loop if self.player2 == "Random": p2 = RandomPlayer(self) elif self.player2 == "QL": p2 = QLearner(-1, self) elif self.player2 == "MonteCarlo": p2 = MonteCarlo(-1, self) elif self.player2 == "NN": p2 = NN_Player(-1, self) # p2 = NN_Player(-1, self.board, self.available_columns()) # record the total time each player uses in each game total_time_player1 = [] total_time_player2 = [] while games > 0: print("Play iteration = ", games) # record total move in each game total_move = 0 # record the total time each player uses in each game player1_time = 0 player2_time = 0 while self.has_winner() == 0: # print(self.board) if self.full(): print("It's a draw!") print("Player 1 uses: ", player1_time, "s") print("player 2 uses: ", player2_time, "s") break if self.turn == 1: start_time = time.time() # Which Strategy for Palyer 1 if self.player1 == "Random": self.place(p1.choose_col()) elif self.player1 == "QL": # p1 = QLearner(1) p1.learn() elif self.player1 == "MonteCarlo": self.place(p1.choose_col()) elif self.player1 == "NN": self.place(p1.choose_col()) if self.player2 == "QL": p2.check_if_lost() end_time = time.time() player1_time = player1_time + (end_time - start_time) else: start_time = time.time() # Which Strategy for Palyer 2 if self.player2 == "Random": self.place(p2.choose_col()) elif self.player2 == "QL": p2.learn() elif self.player2 == "MonteCarlo": p2 = MonteCarlo(-1, self) self.place(p2.choose_col()) elif self.player2 == "NN": self.place(p2.choose_col()) if self.player1 == "QL": p1.check_if_lost() end_time = time.time() player2_time = player2_time + (end_time - start_time) if traindata_flag: # add features for training data for NN traindata_feature.append(np.array(self.board).reshape(42)) total_move = total_move + 1 # add targets for training data for NN if traindata_flag: for m in range(total_move): traindata_target.append(self.target()) # complete results if saveresults_flag: traindata_target.append(self.target()) total_time_player1.append(player1_time) total_time_player2.append(player2_time) print("The winner is player ", self.has_winner()) print("Player 1 uses: ", player1_time, "s") print("player 2 uses: ", player2_time, "s") self.clear_board() games -= 1 # save training data for NN if traindata_flag: np.savetxt('TrainingData/features_' + str(iter_n) + '_' + save_filename + '.csv', traindata_feature, delimiter=',', fmt='%10.0f') np.savetxt('TrainingData/targets_' + str(iter_n) + '_' + save_filename + '.csv', traindata_target, delimiter=',', fmt='%10.0f') # save game results for comparison if saveresults_flag: results = np.array([traindata_target, total_time_player1, total_time_player2]).T fmt = '%10.0f', '%10.10f', '%10.10f' np.savetxt( 'Game_results/' + self.player1 + '_' + self.player2 + '_' + str(iter_n) + '_' + save_filename + '.csv', results, delimiter=',', fmt=fmt, header="win,Player1_time, Player2_time") print("-------------Among all the games----------------- ") print("Player 1 is ", self.player1) print("Player 2 is ", self.player2) print("Total games Player 1 wins: ", sum([i for i in traindata_target if i == 1])) print("Total games Player 2 wins: ", sum([i for i in traindata_target if i == -1]) * (-1)) print("Total Time Player 1 takes: ", sum(total_time_player1), "s") print("Total Time Player 2 takes: ", sum(total_time_player2), "s")
def test_compare_boltzmann_factor_equal(): mc = MonteCarlo() assert_true(mc.compare_boltzmann_factor(1,1))
from update_graph import UpdateGraph from monte_carlo import MonteCarlo from draw import DrawGraph # Set parameters k = 8 r = 1 T = 300 nsteps = 300 # calculate number of all possible edges etot = k * (k - 1) / 2 # aliases of my own modules gi = GetInit() ug = UpdateGraph() mc = MonteCarlo() pg = DrawGraph() # Generate initial graph and all the inital properties needed for future use # tmp is the initial graph and pos is the matrix storing positions # and later 'tmp' will also be the name of all current graphs tmp, pos = gi.init_graph(k) # calculate weights of all possible edges w = gi.calc_weight(pos, k) # initialize the list to store all the edge list in the type of string edge_list = [None] * 0 # write initial edge list into edge_list[0] as a string edge_list.append(str(tmp.edges())) # get the the number of neighbors of node 0 neighbor_0 = len(tmp.neighbors(0)) # get the number of edges in the whole graph