def main(): begin = datetime.utcnow() timelimit = timedelta(seconds=60) b = Board() mc = MonteCarlo(b, seconds=4) player_one_wins = 0 player_two_wins = 0 draws = 0 while (datetime.utcnow() - begin < timelimit): winner, _ = self_play(mc, b) if winner == 1: player_one_wins += 1 elif winner == -1: player_two_wins += 1 elif winner == 0: draws += 1 else: print("Error, unknown winner returned:", winner) total_played = player_one_wins + player_two_wins + draws print("Total games played:", total_played) print("Player one wins:", player_one_wins, " or ", (player_one_wins / total_played) * 100, "%") print("Player two wins:", player_two_wins, " or ", (player_two_wins / total_played) * 100, "%") print("Draws:", draws, " or ", (draws / total_played) * 100, "%")
def testMonteCarlo1(self): print("====== MonteCarlo 1 ===================") N = 100 ran = numpy.random ran.seed(12345) noise = ran.standard_normal(N) x = numpy.arange(N, dtype=float) - 3 nn = 0.1 for k in range(5): y = noise * nn m = PolynomialModel(0) ftr = Fitter(x, m) par = ftr.fit(y) std = ftr.getStandardDeviations() chisq = ftr.chisq mc = MonteCarlo(x, m, ftr.covariance) mc.mcycles = 1000 lmce = ftr.monteCarloError(monteCarlo=mc) print("noise : ", fmt(nn), "===========================================") print("params : ", fmt(par, format="%8.5f")) print("stdevs : ", fmt(std, format="%8.5f")) print("scale : ", fmt(ftr.scale, format="%8.5f"), fmt(nn)) print("chisq : ", fmt(chisq, format="%8.5f"), fmt(mc._eigenvalues, format="%8.5f"), fmt(mc._eigenvectors, format="%8.5f")) print("covar : ", fmt(ftr.covariance, format="%8.5f")) print("mcerr : ", fmt(lmce[0], format="%8.5f")) self.assertTrue(abs(std[0] - lmce[0]) < 0.1 * std[0]) self.assertTrue(par[0] < 0.05 * nn) nn *= 10
def monteCarloError(self, xdata=None, monteCarlo=None): """ Calculates :math:\sigma:math:-confidence regions on the model given some inputs. From the full covariance matrix (inverse of the Hessian) random samples are drawn, which are added to the parameters. With this new set of parameters the model is calculated. This procedure is done by default, 25 times. The standard deviation of the models is returned as the error bar. The calculation of the confidence region is delegated to the class MonteCarlo. For tweaking of that class can be done outside BaseFitter. Parameters ---------- xdata : array_like input data over which to calculate the error bars. monteCarlo : MonteCarlo a ready-made MonteCarlo class. """ if xdata is None: xdata = self.xdata if monteCarlo is None: monteCarlo = MonteCarlo(xdata, self.model, self.covariance, index=self.fitIndex) return monteCarlo.getError(xdata)
def run_trial(planning_horizon): blocks_world_builder = BlocksWorldBuilder(blocks_world_size) ctrl = SimpleMonteCarloControl() planner = Planner(planning_horizon) mc = MonteCarlo(blocks_world_builder, planner, control=ctrl, max_episode_length=blocks_world_size * 2, planning_factor=0, plan_on_empty_policy=True, exploring_starts=True, exploring_factor=0) mc.learn_policy(number_episodes=number_of_episodes, show_progress_bar=True, evaluate_return_ratio=False) data = pd.DataFrame({ 'episode': range(len(mc.returns)), #'return_ratio': mc.return_ratios, 'observed_returns': mc.returns, #'optimal_returns': mc.optimal_returns }) return data
def main(): print("Would you like to go 1st or 2nd?\n Go 1st: 1\n Go 2nd: 2") if int(input()) == 2: players = {-1: "Human", 1: 'AI'} else: players = {1: "Human", -1: 'AI'} board = Board() mc = MonteCarlo(board, seconds=3) game_history = [] game_state = board.start() game_history.append(game_state) mc.update(game_state) legals = board.legal_plays(game_history) winner = board.winner(game_history) board.show(game_history[-1]) while legals and winner == 0: current_player = board.current_player(game_state) #print(current_player) if players[current_player] == 'Human': print("Please enter the square you'd like to play: ") pos = int(input()) game_state = board.next_state(game_state, (pos, current_player)) elif players[board.current_player(game_state)] == 'AI': print("AI is thinking....") game_state = board.next_state(game_state, mc.get_play()) mc.update(game_state) game_history.append(game_state) legals = board.legal_plays([game_state]) winner = board.winner([game_state]) board.show(game_history[-1]) print("The game is over!\n Plauer: ", winner, "has won")
def testMonteCarlo3(self, doplot=False): print("====== MonteCarlo 3 ===================") N = 101 x = numpy.arange(N, dtype=float) * 0.1 ran = numpy.random ran.seed(1235) noise = ran.standard_normal(N) ym = x * x + 0.03 * x + 0.05 y1 = ym + 10 * noise pm = PolynomialModel(2) ftr = Fitter(x, pm) pars1 = ftr.fit(y1) stdv1 = ftr.getStandardDeviations() print("parameters : ", pars1) print("std devs : ", stdv1) print("chisquared : ", ftr.chisq) lmce = ftr.monteCarloError() chisq = ftr.chisq mce = MonteCarlo(x, pm, ftr.covariance) mce1 = mce.getError() assertAAE(lmce, mce1) yfit = pm.result(x) s2 = numpy.sum(numpy.square((yfit - ym) / lmce)) print(s2, math.sqrt(s2 / N)) integral = numpy.sum(yfit) s1 = 0 s2 = 0 k = 0 for k in range(1, 100001): rv = mce.randomVariant(x) s1 += numpy.sum(rv) s2 += numpy.sum(numpy.square(rv - yfit)) if k % 10000 == 0: print("%6d %10.3f %10.3f %10.3f" % (k, integral, s1 / k, math.sqrt(s2 / k))) ### TBC dont know why the factor 1000 is there. ######## print(abs(integral - s1 / k), math.sqrt(s2 / (k * 1000))) self.assertTrue(abs(integral - s1 / k) < math.sqrt(s2 / (k * 1000))) if doplot: pyplot.plot(x, y1, 'b.') pyplot.plot(x, ym, 'k-') pyplot.plot(x, yfit, 'g-') pyplot.plot(x, yfit + lmce, 'r-') pyplot.plot(x, yfit - lmce, 'r-') pyplot.show()
def test_02_MonteCarlo(self): """ Varying the time step size """ Asset = 100.0 Strike = 100.0 InterestRate = 0.05 Volatility = 0.2 Expiration = 1.0 NumberAssetStep = 100 TimeStep = Expiration / NumberAssetStep NumbreOfSimulation = 10000 listOfValuesList = [] for i in range(0, 10): """ Price with Black and Scholes and Monte Carlo and print the differences """ BS = BlackScholes(Asset, Strike, InterestRate, Volatility, Expiration) BS.price() MC = MonteCarlo(Asset, Strike, InterestRate, Volatility, Expiration, TimeStep, NumberAssetStep, NumbreOfSimulation) MC.price() err = BS.getPrice() - MC.getPrice() self.assertGreater(BS.getPrice(), 0.0) valuesList = [ Asset, Strike, InterestRate, Volatility, Expiration, TimeStep, NumberAssetStep, NumbreOfSimulation, BS.getPrice(), MC.getPrice(), err, math.fabs(err / BS.getPrice()) ] listOfValuesList.append(valuesList) NumberAssetStep += 100 TimeStep = Expiration / NumberAssetStep headerList = [ 'Asset', 'Strike', 'IntRate', 'Vol', 'Expiration', 'TimeStep', 'NumberAssetStep', 'NumbreOfSimulation', 'BSPrice', 'MCPrice', 'ErrorWithBS', 'ErrWithBSPer' ] writeToFile("binaryCallMCPriceTest2.csv", headerList) for valuesList in listOfValuesList: writeToFile("binaryCallMCPriceTest2.csv", valuesList)
def ReachPrecisionMonteCarlo(eps, n): workbook = xlsxwriter.Workbook('ExcelFileRoot') worksheet = workbook.add_worksheet('Sheet1') listResult = [] for k in range(0, n): i = 1 while (abs(math.pi - MonteCarlo(i)) > eps): i += 1 worksheet.write_number(k, 2, i) workbook.close() return listResult
def test_equal_probability(): """ Check particles have equal probability of movement. """ from numpy import array, sqrt, count_nonzero energy = MagicMock() density = array([1, 0, 99]) mc = MonteCarlo(energy, density) changes_at_zero = [(density - mc.change_density(density))[0] != 0 for i in range(10000)] assert count_nonzero(changes_at_zero) == approx( 0.01 * len(changes_at_zero), 0.5 * sqrt(len(changes_at_zero)))
def drawMonteCarlo(): iterations = [10, 100, 1000, 10000, 100000, 500000, 1000000] for iteration in iterations: print('Creating Monte Carlo Agent...') monti = MonteCarlo(100) print('Monte Carlo created') print('Training Monte Carlo for', iteration, 'iterations.') monti.train(iteration) print('Training completed, plotting image') figure = plt.figure('Monte' + str(iteration)) b = figure.add_subplot(111, projection='3d') resultfig = plotMonte(b, monti) figure.savefig('MonteCarlo' + str(iteration) + '.png') plt.show()
def test_main_algorithm(): import numpy as np from numpy import testing from unittest.mock import Mock density = [1, 1, 1, 1, 1] energy = MagicMock() mc = MonteCarlo(energy, density, itermax=5) acceptance = [True, True, True, True, True] mc.accept_change = Mock(side_effect=acceptance) mc.random_agent = Mock(side_effect=[0, 1, 2, 3, 4]) mc.random_direction = Mock(side_effect=[1, 1, 1, 1, -1]) np.testing.assert_equal(mc.step()[1], [0, 1, 1, 2, 1])
def main(): all_docs = get_docs(NUMBER_OF_DOCS) monte_carlo = MonteCarlo(all_docs, p=.9, N=100, epsilon=.1) comparison_counter = 0 try: print(f'Comparisons: {comparison_counter}', end='\r') while not monte_carlo.all_same: pair = monte_carlo.next_pair() if PERCENT_CORRECT > random.random(): if pair[0] > pair[1]: monte_carlo.compare(pair, True) else: monte_carlo.compare(pair, False) else: if pair[0] > pair[1]: monte_carlo.compare(pair, False) else: monte_carlo.compare(pair, True) comparison_counter += 1 print(f'Comparisons: {comparison_counter}', end='\r') # pprint(pair) # print(monte_carlo.N_array) except KeyboardInterrupt: print(monte_carlo.get_sorted()) exit() sorted_list = monte_carlo.get_sorted() distances = [] for i in range(0, NUMBER_OF_DOCS): all_docs = monte_carlo.get_sorted() try: distances.append(abs((i) - all_docs[i])) except IndexError: print(f'i={i}') print(monte_carlo.get_sorted()) print(f'Number of docs: {NUMBER_OF_DOCS}\n' f'Comparisons: {comparison_counter}\n' f'Average error: {statistics.mean(distances)}\n' f'SD of errors: {statistics.stdev(distances)}\n' f'Max error: {max(distances)}')
def drawForAllLambdas(): montecarlo = MonteCarlo(100) print('Training Monte Carlo') montecarlo.train(500000) print('Training of Monte Carlo Completed') lambdas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] squareMean = [] numberElements = montecarlo.Q.shape[0] * montecarlo.Q.shape[1] * 2 for lambdaValue in lambdas: sarsa = SARSA(100, lambdaValue) print('Training SARSA', lambdaValue) sarsa.train(1000) print('Training of SARSA Completed') squareMeanCalc = np.sum( np.square(sarsa.Q - montecarlo.Q)) / float(numberElements) squareMean.append(squareMeanCalc) fig = plt.figure("SARSA") surf = plt.plot(lambdas[1:10], squareMean[1:10]) fig.savefig('lambdaALL.png') plt.show()
def test_accept_change(): """ Check that move is accepted if second energy is lower """ from numpy import sqrt, count_nonzero, exp energy = MagicMock mc = MonteCarlo(energy, [1, 1, 1], temperature=100.0) # Should always be true. # But do more than one draw, # in case randomness incorrectly crept into # implementation for i in range(10): assert mc.accept_change(0.5, 0.4) assert mc.accept_change(0.5, 0.5) # This should be accepted only part of the time, # depending on exponential distribution prior, successor = 0.4, 0.5 accepted = [mc.accept_change(prior, successor) for i in range(10000)] assert count_nonzero(accepted) / float(len(accepted)) == approx( exp(-(successor - prior) / mc.temperature), 3e0 / sqrt(len(accepted)))
def drawForLambdaZero(): montecarlo = MonteCarlo(100) print('Training Monte Carlo') montecarlo.train(500000) print('Training of Monte Carlo Completed') lambdaValue = 0 learningRate = [] learningRateIndex = [] sarsa = SARSA(100, lambdaValue) print('Training SARSA and plotting graph') for i in range(1000): learningRateIndex.append(i) sarsa.train(1) squareMean = np.sum(np.square(sarsa.Q - montecarlo.Q)) / float(1000) learningRate.append(squareMean) fig = plt.figure("SARSAZERO") surf = plt.plot(learningRateIndex, learningRate) fig.savefig('lambdaZero.png') plt.show()
def test_input_sanity(): """ Check incorrect input do fail """ energy = MagicMock() with raises(NotImplementedError) as exception: MonteCarlo(sum, [1, 1, 1], 0e0) with raises(ValueError) as exception: MonteCarlo(energy, [1, 1, 1], temperature=-1e0) with raises(TypeError) as exception: MonteCarlo(energy, [1.0, 2, 3]) with raises(ValueError) as exception: MonteCarlo(energy, [-1, 2, 3]) with raises(ValueError) as exception: MonteCarlo(energy, [[1, 2, 3], [3, 4, 5]]) with raises(ValueError) as exception: MonteCarlo(energy, [3]) with raises(ValueError) as exception: MonteCarlo(energy, [0, 0])
def testMonteCarlo2(self, doplot=False): print("====== MonteCarlo 2 ===================") x = numpy.arange(7, dtype=float) - 3 y = numpy.asarray([-1, -1, -1, 0, 1, 1, 1], dtype=float) m = PolynomialModel(1) ftr = Fitter(x, m) par = ftr.fit(y) std = ftr.getStandardDeviations() yfit = m.result(x) chisq = ftr.chisq hes = ftr.hessian # mc = MonteCarlo( x, m, chisq, hessian=hes ) mc = MonteCarlo(x, m, ftr.covariance) mc.mcycles = 1000 lmce = ftr.monteCarloError(monteCarlo=mc) print("params : ", par) print("stdevs : ", std) print("scale : ", ftr.getScale()) print("chisq : ", chisq) print("evals : ", mc._eigenvalues) print(mc._eigenvectors) print("hessi :\n", ftr.hessian) print("covar :\n", ftr.covariance) print("mcerr : ", lmce) numpy.testing.assert_array_almost_equal( par, numpy.asarray([0.0, 0.42857142857142855])) # numpy.testing.assert_array_almost_equal( std, numpy.asarray([0.1564921592871903,0.07824607964359515]) ) self.assertAlmostEqual(chisq, 0.857142857143) if doplot: pyplot.plot(x, y, 'k*') pyplot.plot(x, yfit, 'g-') pyplot.plot(x, yfit + lmce, 'r-') pyplot.plot(x, yfit - lmce, 'r-') pyplot.show()
def test_move_particle_one_over(): """ Check density is change by a particle hopping left or right. """ from numpy import nonzero, multiply from numpy.random import randint energy = MagicMock() for i in range(100): # Do this n times, to avoid # issues with random numbers # Create density density = randint(50, size=randint(2, 6)) mc = MonteCarlo(energy, density) # Change it new_density = mc.change_density(density) # Make sure any movement is by one indices = nonzero(density - new_density)[0] assert len(indices) == 2, "densities differ in two places" assert (multiply.reduce( (density - new_density)[indices]) == -1), "densities differ by + and - 1"
from MonteCarlo import MonteCarlo from TDLambda import TDLambda from LinFuncApprox import LinFuncApprox import matplotlib.pyplot as plt from plotQValues import plotQValues '''Monte Carlo''' mc = MonteCarlo() q_mc, n_mc = mc.run_episodes(10000) # plotQValues(q_mc) '''TD Lambda''' # td = TDLambda(0.5) # td.run_episodes(100000) # q_td = td.q_values() # plotQValues(q_td) # # mse = [] # lmbda = [] # for i in range(0, 10): # lmbda.append(i / 10.) # td = TDLambda(i / 10.) # q_td, n_td = td.run_episodes(10000) # # error = (q_td - q_mc) ** 2 # mse.append(sum(sum(sum(error * 1./ (2 * 21 * 10))))) # # plt.plot(lmbda, mse) # # plt.show() '''Linear Function Approximation''' # lin = LinFuncApprox() # lin.run_episodes(10000)
import os import sys # Make sure the path of the framework is included in the import path sys.path.insert( 0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) from tests import test_policy from MonteCarlo import MonteCarlo from mdp import BlocksWorldBuilder from control import SimpleMonteCarloControl, SgdMonteCarloControl from planner import Planner from matplotlib import pyplot as plt mdp_builder = BlocksWorldBuilder(blocks_world_size=7) planner = Planner(planning_horizon=5) ctrl = SimpleMonteCarloControl() mc = MonteCarlo(mdp_builder, planner, control=ctrl, max_episode_length=14, planning_factor=0, plan_on_empty_policy=True, exploring_starts=True, exploring_factor=0.0) learned_policy = mc.learn_policy(number_episodes=150, show_progress_bar=True)
def monteCarloAIPlay(self): mcObj = MonteCarlo(self.gameState, self.name) mcObj.update(self.gameState.cardsPlayed) card = mcObj.getPlay() return card
# import Monte Carlo function from MonteCarlo import MonteCarlo # define experiment parameters gamma = 0.99 lr = 0.1 epsilon = [0.01, 0.1, 0.25] runs = 2 step_number = 100 episode_length = 100 # run experiment MonteCarlo(gamma, lr, epsilon, runs, step_number, episode_length)
def main2(): b = Board() mc = MonteCarlo(b, seconds=20) game_state = b.start() mc.update(game_state) mc.get_play()
step_size_parameters = [1, 0.8, 0.3, 0.03] """ SETUP EXPERIMENT """ experiments = [] for _ in range(number_of_trials): # Control case: Monte carlo control such as in the bachelor's project, without planning. blocks_world_builder = BlocksWorldBuilder(blocks_world_size) planner = Planner(planning_horizon) ctrl = SimpleMonteCarloControl() mc = MonteCarlo(blocks_world_builder, planner, control=ctrl, max_episode_length=blocks_world_size * 2, planning_factor=0, plan_on_empty_policy=True, exploring_starts=True, exploring_factor=0) experiments.append(('Mean-based', None, mc)) for step_size_parameter in step_size_parameters * number_of_trials: # Other cases: Gradient-based agents with different step size parameter values blocks_world_builder = BlocksWorldBuilder(blocks_world_size) planner = Planner(planning_horizon) ctrl = SgdMonteCarloControl(step_size_parameter) mc = MonteCarlo(blocks_world_builder, planner,
return function if __name__ == "__main__": outbound_filename = "outbound.xlsx" initinv_filename = "init_inventory.xlsx" plan = Plan(outbound_filename, initinv_filename) dict_code2name = plan.code_name monthly_demand_all = plan.get_monthly_demand()[0] for code, monthly_demand in monthly_demand_all.items(): kde = KDE(code, monthly_demand) fig, ax = plt.subplots(2) print(monthly_demand) print(sum(monthly_demand) / len(monthly_demand)) pdf = kde.pdf x_p = np.linspace(np.min(kde.demand), np.max(kde.demand), 300) y_p = [pdf(x_i) for x_i in x_p] ax[0].plot(x_p, y_p) kde.plot() # print(kde.estimate.score(kde.demand)) print( MonteCarlo(kde.pdf, lowerbound=0, upperbound=1.5 * max(monthly_demand)).get_mu()) print( MonteCarlo(kde.pdf, lowerbound=0, upperbound=1.5 * max(monthly_demand)).get_F_eq_alpha( .8, 3)) break
# grab map information mission_xml = BeautifulSoup(env.params['mission_xml'], features="xml") map_spec = mission_xml.find('specification') placement = mission_xml.find('Placement') map_dimension = [ int(map_spec.contents[1].text), int(map_spec.contents[2].text), int(map_spec.contents[3].text) ] mission_available_moves = env.params['comp_all_commands'] num_episodes = 300 gamma = [1, .6, .3] alpha = [1, .6, .3] max_simulation_time = 120 # Input learning method # MC - monte carlo, Q - Q learning algorithm = 'Q' for g in gamma: for a in alpha: if algorithm == 'MC': # instantiate an Agent object mc = MonteCarlo(mission_name, env, num_episodes, g, max_simulation_time, a) mc.mc_prediction(filename='', iteration_number=0) elif algorithm == 'Q': # instantiate an Agent object q = Q(mission_name, env, num_episodes, g, a, max_simulation_time) q.q_prediction()
def __init__(self, board, color, turn_time, dificulty=0): super().__init__(color) self.dificulty = dificulty self.ignore_mouse = True self.turn_time = turn_time self.monte = MonteCarlo(board, self , self.turn_time)
print(female_to_male_reject) female_to_male_accept_prob = (female_to_male_accept / (female_to_male_accept + female_to_male_reject)) female_to_male_reject_prob = (female_to_male_reject / (female_to_male_accept + female_to_male_reject)) # no_change_prob = (no_change/X.shape[0]) print("The probability of female_to_male_accept_prob = ", female_to_male_accept_prob) print("The probability of female_to_male_reject_prob = ", female_to_male_reject_prob) sys.stdout = orig_stdout f.close() ########################## """ MonteCarlo() """ # plt.hist(X['applicant_sex']) # sns.catplot(x=[1,2,3,4], y=X.groupby("applicant_sex").count().accden, data=X) # plt.show() # plt.hist(need) # plt.show() # plt.hist(reason) # plt.show() # tune_model(X,y,n_it = 50,models = ['RandomForest','xgb','Logistic'])
mc.update(game_state) mc.get_play() def self_play(mc, b): game_history = [] game_state = b.start() game_history.append(game_state) #print(b.start()) mc.update(game_state) legals = b.legal_plays([game_state]) winner = b.winner([game_state]) while legals and winner == 0: game_state = b.next_state(game_state, mc.get_play()) mc.update(game_state) game_history.append(game_state) legals = b.legal_plays([game_state]) winner = b.winner([game_state]) return winner, game_history if __name__ == '__main__': b = Board() mc = MonteCarlo(b, seconds=0.3) winner, hist = self_play(mc, b) for state in hist: print("") Board().show(state) print("\nWinner: ", winner)