from timings import Timings t1 = Timings(title = "collections", setup = "import collections; s = collections.deque()", statement = "s.appendleft(100)") t2 = Timings(title = "lists", setup = "s = []", statement = "s.insert(0,100)") Timings.titles() t1.run(100000) t2.run(100000) t1.run(200000) t2.run(200000)
from timings import Timings Timings.titles() t1 = Timings(title="class", setup="from myprogram import Person", statement="p = Person('Susan',25,'London')") t2 = Timings(title="slots", setup="from myprogram import PersonSlots", statement="p = PersonSlots('Susan',25,'London')") t3 = Timings( title="dict", setup="pass", statement="p = {'name' : 'Susan', 'age' : 25, 'address' : 'London' }") t4 = Timings( title="class", setup="from myprogram import Person\np = Person('Susan',25,'London')", statement="s = p.getDetails()") t5 = Timings( title="slots", setup= "from myprogram import PersonSlots\np = PersonSlots('Susan',25,'London')", statement="s = p.getDetails()") t6 = Timings( title="dict", setup="p = {'name' : 'Susan', 'age' : 25, 'address' : 'London' }", statement="s = p['name'] + ',' + str(p['age']) + ',' + p['address']") print "\n*** Creating instances ***" t1.run(10000000) t2.run(10000000)
from timings import Timings t1 = Timings(title="compiled regex", setup=('import re' '\n' 'text = "This line contains the numbers 8.73 and 4.67"' '\n' 'numberPattern = r"\d+\.\d+"' '\n' 'pattern = re.compile(numberPattern)'), statement="result = pattern.search(text)") t2 = Timings(title="uncompiled regex", setup=('import re' '\n' 'text = "This line contains the numbers 8.73 and 4.67"' '\n' 'numberPattern = r"\d+\.\d+"'), statement="result = re.search(numberPattern, text)") Timings.titles() t1.run(1000000) t2.run(1000000)
def __init__(self, parameters: {}, session: tf.Session): width = parameters.get('width', 1000) height = parameters.get('height', 1000) self.session = session self.model_type = parameters.get('model_type', 3) self.width = parameters.get('width', 1000) self.height = parameters.get('height', 1000) self.n_apples = parameters.get('n_apples', 100) self.n_eyes = parameters.get('n_eyes', 9) self.stop = parameters.get('stop', 10000) self.speed = parameters.get('speed', 3) self.gamma = parameters.get('gamma', .9) self.action_buffer = parameters.get('action_buffer', 128) self.memory_size = parameters.get('memory_size', 1024 * 10) self.eye_length = parameters.get('eye_length', 200) self.sensor_levels = parameters.get('sensor_levels', 10) self.auto = parameters.get('auto', True) self.score_buffer = parameters.get('score_buffer', 10240) self.exploration = parameters.get('exploration', .10) self.label = parameters.get('label', 'no_label') self.repeat = False self.layer_size = parameters.get('layer_size') self.n_layers = parameters.get('n_layers') self.block_size = parameters.get('block_size', 128) self.learn_rate = parameters.get('learn_rate', 0.00001) self.finished = False self.res_blocks = parameters.get('res_blocks', 4) self.res_layers = parameters.get('res_layers', 3) self.dropout = parameters.get('dropout', False) self.layer_norm = parameters.get('layer_norm', False) self.tensorboard_dir = parameters.get('tensorboard_dir', "tf-logs") self.sensor_width = parameters.get('sensor_width', pi / 2) self.width = self.width self.height = self.height self.board_width = self.width self.board_height = self.height / 2 self.timings = Timings() self.status = 'starting' self.snap() self.ship = Ship(XYPoint(self.board_width / 2, self.board_height / 2, ), 0, self.board_width, self.board_height, self.n_eyes, self.sensor_levels, self.eye_length, self.sensor_width) self.red = 'red' self.green = 'green' self.up = False self.right = False self.left = False self.score = 0 self.peak_score = 0 self.sensor = SensorMap(self.ship) self.step = 0 self.n_actions = 3 self.model = self.build_model() self.policy = PolicyGradientOptimizer(self.model, self.sensor.size, 3, self.memory_size, self.action_buffer, self.gamma, self.exploration, self.timings) self.scores = collections.deque() self.reset_apples() self.adjust_score(0) self.sensor.update(self.apples.values()) self.next_state = []
from timings import Timings t1 = Timings(title="collections", setup="import collections; s = collections.deque()", statement="s.appendleft(100)") t2 = Timings(title="lists", setup="s = []", statement="s.insert(0,100)") Timings.titles() t1.run(100000) t2.run(100000) t1.run(200000) t2.run(200000)
from timings import Timings t1 = Timings(title = "using xrange", setup = "total = 0", statement = "for x in xrange(100 * 1000 * 1000): total = total + x") t2 = Timings(title = "using range", setup = "total = 0", statement = "for x in range(100 * 1000 * 1000): total = total + x") Timings.titles() t1.run(1) t2.run(1)
class ApplesGame(): def __init__(self, parameters: {}, session: tf.Session): width = parameters.get('width', 1000) height = parameters.get('height', 1000) self.session = session self.model_type = parameters.get('model_type', 3) self.width = parameters.get('width', 1000) self.height = parameters.get('height', 1000) self.n_apples = parameters.get('n_apples', 100) self.n_eyes = parameters.get('n_eyes', 9) self.stop = parameters.get('stop', 10000) self.speed = parameters.get('speed', 3) self.gamma = parameters.get('gamma', .9) self.action_buffer = parameters.get('action_buffer', 128) self.memory_size = parameters.get('memory_size', 1024 * 10) self.eye_length = parameters.get('eye_length', 200) self.sensor_levels = parameters.get('sensor_levels', 10) self.auto = parameters.get('auto', True) self.score_buffer = parameters.get('score_buffer', 10240) self.exploration = parameters.get('exploration', .10) self.label = parameters.get('label', 'no_label') self.repeat = False self.layer_size = parameters.get('layer_size') self.n_layers = parameters.get('n_layers') self.block_size = parameters.get('block_size', 128) self.learn_rate = parameters.get('learn_rate', 0.00001) self.finished = False self.res_blocks = parameters.get('res_blocks', 4) self.res_layers = parameters.get('res_layers', 3) self.dropout = parameters.get('dropout', False) self.layer_norm = parameters.get('layer_norm', False) self.tensorboard_dir = parameters.get('tensorboard_dir', "tf-logs") self.sensor_width = parameters.get('sensor_width', pi / 2) self.width = self.width self.height = self.height self.board_width = self.width self.board_height = self.height / 2 self.timings = Timings() self.status = 'starting' self.snap() self.ship = Ship(XYPoint(self.board_width / 2, self.board_height / 2, ), 0, self.board_width, self.board_height, self.n_eyes, self.sensor_levels, self.eye_length, self.sensor_width) self.red = 'red' self.green = 'green' self.up = False self.right = False self.left = False self.score = 0 self.peak_score = 0 self.sensor = SensorMap(self.ship) self.step = 0 self.n_actions = 3 self.model = self.build_model() self.policy = PolicyGradientOptimizer(self.model, self.sensor.size, 3, self.memory_size, self.action_buffer, self.gamma, self.exploration, self.timings) self.scores = collections.deque() self.reset_apples() self.adjust_score(0) self.sensor.update(self.apples.values()) self.next_state = [] def snap(self): import os snapdir = "snapshots/" + self.label print("snapdir is %s" % snapdir) os.system("mkdir -p " + snapdir ) os.system("cp *.py %s" % snapdir) def reset_apples(self): self.apples = {} for i in range(self.n_apples): self.add_apple(True) self.add_apple(False) def build_model(self) -> QModel: return QModel(self.session, self.label, self.n_actions, self.sensor.size, self.layer_size, self.dropout, self.res_blocks, self.res_layers, self.n_layers, self.layer_norm, self.learn_rate, self.tensorboard_dir) def add_apple(self, is_red: bool): xy = XYPoint(randrange(self.board_width), randrange(self.board_height)) while (xy.x, xy.y) in self.apples: xy = XYPoint(randrange(self.board_width), randrange(self.board_height)) apple = Apple(xy, 10, is_red) self.apples[(apple.xy.x, apple.xy.y)] = apple def update(self, dt): pass def manual_step(self): reward = 0 action = -1 if self.up: action = 0 if self.right: action = 1 if self.left: action = 2 if not self.repeat: self.up = self.right = self.left = False if action != -1: self.take_action(action) state = self.sensor.as_input() action, next_state = self.policy.get_action(state) self.next_state = next_state def take_action(self, action: int) -> float: reward = 0 if action == 0: reward += self.ship.move_forward() elif action == 1: reward += self.ship.move_right() elif action == 2: reward += self.ship.move_left() t = time.time() reward += self.check_for_collision() self.timings.add("check_for_collision", time.time() - t) t = time.time() self.sensor.update(self.apples.values()) self.timings.add("ship.update_screen", time.time() - t) self.step += 1 self.adjust_score(reward) self.model.add_score(self.score_delta(), self.step) self.move_random_apple() sd = self.score_delta() self.status = "score_delta %5.0f peak_score %4.0f memory_size %6d step %6d speed %3d" % ( sd, self.peak_score, len(self.policy.memory), self.step, self.speed) return reward def move_random_apple(self): for apple in self.apples.values(): if (random.randrange(0, 10000) == 4): del self.apples[(apple.xy.x, apple.xy.y)] self.add_apple(apple.ripe()) def auto_step(self): state = self.sensor.as_input() # the policy used to return a random action if it was not ready but that # does not work well when recovering from a saved game. if self.policy.ready(): action, next_state = self.policy.get_action(state) else: # randint() is *totally* broken. things like this should return an element # where min <= x < max, i.e. in the range [min, max) not [min, max]. blech. action, next_state = random.randint(0, self.n_actions - 1), [] self.next_state = next_state reward = self.take_action(action) self.policy.add_action(state, action, reward) self.policy.train(1, self.block_size) def adjust_score(self, amount: float): self.score += amount self.scores.appendleft(self.score) while len(self.scores) > self.score_buffer: self.scores.pop() sd = self.score_delta() if (sd > self.peak_score): self.peak_score = sd self.status = "score_delta %5.0f peak_score %4.0f score %6.0f, memory_size %6d step %6d speed %3d" % ( sd, self.peak_score, self.score, len(self.policy.memory), self.step, self.speed) def check_for_collision(self) -> int: reward = 0 for apple in self.apples.values(): if self.ship.can_eat(apple): del self.apples[(apple.xy.x, apple.xy.y)] if apple.red: reward += 1 else: reward -= 1 self.add_apple(apple.ripe()) return reward def score_delta(self) -> int: if len(self.scores) == 0: return self.score else: return self.score - self.scores[-1] def finish(self): self.model.close_session() score_delta = self.score_delta() self.status = "%s: final_score %6.0f score_delta %6.0f peak %6.0f step %6.0f" % (self.label, self.score, score_delta, self.peak_score, self.step) print(self.status) self.finished = True self.policy.timings.print() sys.stdout.flush() def run_no_window(self): i = 0 start = time.time() for i in range(self.stop): self.auto_step() if (i > 0 and i % 1000 == 0): now = time.time() print("%6.2f %s" % (now - start, self.status)) start = now self.finish()
from timings import Timings t1 = Timings(title="attribute lookup", setup="from myprogram import Point; p = Point(15, 33)", statement="distance = p.x * p.y") t2 = Timings( title="using locals", setup="from myprogram import Point; p = Point(15, 33); x = p.x; y = p.y", statement="distance = x * y") Timings.titles() t1.run(1000000) t2.run(1000000) t1.run(5000000) t2.run(5000000)
def main(): # this makes a simple atari pong game. the -ram- means that we are seeing the 128 bytes of RAM in the # atari computer rather than the actual pixels. p = gym.make('Pong-ram-v0') p._max_episode_steps = 1000000 # set up a few things: We need to get the initial state so we can figure out it size and make the # neural network match it properly. initial_state = p.reset() state_size: int = initial_state.size n_actions: int = p.action_space.n # set up a few initial things about tensorflow. session = tf.Session() global_step_tensor = tf.Variable(0, trainable=False, name='global_step') increment_global_step = global_step_tensor.assign_add(1) # find an empty place to store output for tensorboard (a web app to look at results.) run_counter = 0 tensorboard_dir = "../tensorboard/pong-simple-%03d" % run_counter while os.path.exists(tensorboard_dir): run_counter += 1 tensorboard_dir = "../tensorboard/pong-simple-%03d" % run_counter print("tensorboard_dir: %s" % tensorboard_dir) # build a simple model. model = QModel(session, "pong-model", n_actions=n_actions, state_size=state_size, layer_size=512, dropout=True, res_blocks=0, res_layers=0, layers=3, layer_norm=True, learn_rate=.00001, tensorboard_dir=tensorboard_dir) # this is a little widget to keep track of how much time we spend doing things timings = Timings() # this is the thing that tracks the State-Action-Reward tuples and trains the network. policy_optimizer = PolicyGradientOptimizer(model=model, input_size=state_size, n_actions=n_actions, max_memory_size=1024 * 1024, action_buffer_size=102400, gamma=.999, exploration=.05, timings=timings) # this allows us to restart from wherever we left off. These checkpoints can also be used by # "saved_pong.py" to watch how the game is currently playing. saver = tf.train.Saver() checkpoint_file = "../checkpoints/pong-simple/ckpt" saved_checkpoint_path = tf.train.latest_checkpoint( "../checkpoints/pong-simple/") from_saved_model = False if saved_checkpoint_path is not None: print("restoring from %s" % saved_checkpoint_path) saver.restore(session, saved_checkpoint_path) from_saved_model = True # this global_step tensor is really just a counter for the current step but I want to be able to # recover it after a restart so I store it and increment it within tensorflow. global_step = tf.train.global_step(session, global_step_tensor) training_started = True # this is the outermost loop. it will never exit. each new game will start at the top of this loop. while True: initial_state = p.reset() done = False summed_rewards = 0 raw_score = 0 steps = 0.0 state = initial_state.reshape([ 1, 128 ]) # this starts out as a vector and needs to be a [1x128] matrix. # this loop runs one game. At the end of the game the step function returns done=True while not done: steps += 1.0 # uncomment this line to watch the pong games play as it learns. This will slow things down, a lot. # p.render() # if we have loaded a saved model or if we have begun training then we can use the model to choose # an action. Otherwise, pick one at random. if from_saved_model or policy_optimizer.ready(): action, _ = policy_optimizer.get_action(state) else: action = p.action_space.sample() new_state, reward, done, info = p.step(action) raw_score += reward # Using just the reward from the game wasn't very effective. It did not learn to play well. # after thinking about it I considered a situation where the game plays a long point and loses at the end. # It may have properly returned the ball 10 or 20 times but those *all* get told that they messed up. # I decided that only the actions before missing the ball should get a negative reward and everything else # should be positive. This worked great. Since it takes about 50 frames (steps) for the ball to go from # the left edge to the right edge I put in a rule that if it loses the points the last 50 frames get a # negative reward and everything else gets +.5. # # Winning points get +1 for all frames. if reward < 0: count = 0 for sar in policy_optimizer.action_buffer: if (count < 50): sar.add_reward(-1) else: sar.add_reward(.5) policy_optimizer.add_to_memory(sar) count += 1 policy_optimizer.action_buffer.clear() elif reward > 0: policy_optimizer.add_action(state, action, reward + steps / 400) policy_optimizer.flush_action_buffer() elif reward == 0: policy_optimizer.add_action(state, action, reward) # This is where the magic happens. randomly select 32 of the actions we've taken and # see what we can learn about them. policy_optimizer.train(1, 32) state = new_state.reshape([1, 128]) summed_rewards += reward # increment the global step and save our progress after each complete game (not point). session.run(increment_global_step) global_step = tf.train.global_step(session, global_step_tensor) print( "iteration %6d steps %6d, training_score: % 6.2f raw_score % 6.2f" % (global_step, steps, summed_rewards, raw_score)) saver.save(session, checkpoint_file, global_step=global_step) # this logs things to tensorboard model.add_score(steps, global_step) # if we are a multiple of 10 steps print out the timings. I know what these are by now # but it's interesting to see. if ((global_step + 1) % 10 == 0): timings.print() timings.reset()
from timings import Timings t1 = Timings(title="+=", setup="s = ''", statement="for i in range(100000000): s += 'a'") t2 = Timings(title="array", setup="import array", statement="s = ''.join(array.array('c', ['a']*100000000))") t3 = Timings(title="comprehensions", setup="pass", statement="s = ''.join(['a' for n in range(100000000)])") Timings.titles() t1.run(1) t2.run(1) t3.run(1)
from timings import Timings def listComprehension(n): sum = 0 X = [n * n for n in range(n)] for x in X: sum += x def generatorExpression(n): sum = 0 Y = (n * n for n in range(n)) for y in Y: sum += y t1 = Timings(title="lists", setup="from __main__ import listComprehension", statement="listComprehension(100000000)") t2 = Timings(title="generators", setup="from __main__ import generatorExpression", statement="generatorExpression(100000000)") Timings.titles() t1.run(1) t2.run(1)
from timings import Timings t1 = Timings(title = "import", setup = "import math", statement = "math.sqrt(50.0)") t2 = Timings(title = "from", setup = "from math import sqrt", statement = "sqrt(50.0)") Timings.titles() t1.run(100000) t2.run(100000) t1.run(500000) t2.run(500000)