def start_pong(self): self.pong = Pong(self.app.ipcon) if self.pong.okay: self.pong.run_game_loop() self.pong = None
class PongWidget(QWidget, Ui_Pong): pong = None thread = None def __init__(self, parent, app): super(QWidget, self).__init__() self.app = app self.setupUi(self) self.button_a.pressed.connect(lambda: self.button_press('a')) self.button_s.pressed.connect(lambda: self.button_press('s')) self.button_k.pressed.connect(lambda: self.button_press('k')) self.button_l.pressed.connect(lambda: self.button_press('l')) self.button_r.pressed.connect(lambda: self.button_press('r')) def start_pong(self): self.pong = Pong(self.app.ipcon) if self.pong.okay: self.pong.run_game_loop() self.pong = None def button_press(self, button): if self.pong: self.pong.kp.key_queue.put(button) def start(self): self.thread = Thread(target=self.start_pong) self.thread.daemon = True self.thread.start() def stop(self): if self.pong: self.pong.loop = False self.pong.kp.key_queue.put('q')
def test_move_ball(self): pong = Pong() pong.move_ball() self.assertEqual(pong.ball.x, Config.ball_left_start_x + Config.ball_serve_left_vx) self.assertEqual(pong.ball.y, Config.ball_left_start_y + Config.ball_serve_left_vy)
def test_bounce_ball_against_top_wall(self): pong = Pong() pong.ball = Ball(Config.ball_left_start_x, Config.paddle_upper_limit, 5, -5) pong.move_ball() self.assertEqual(pong.ball.vx, 5) self.assertEqual(pong.ball.vy, 5)
def test_ball_serve_right(self): pong = Pong() pong.serve_right() self.assertEqual(pong.ball.x, Config.ball_right_start_x) self.assertEqual(pong.ball.y, Config.ball_right_start_y) self.assertEqual(pong.ball.vx, Config.ball_serve_right_vx) self.assertEqual(pong.ball.vy, Config.ball_serve_right_vy)
def test_ball_missed_on_right(self): pong = Pong() pong.ball = Ball(1005, 250, 5, 5) pong.move_ball() self.assertEqual(pong.ball.x, Config.ball_right_start_x) self.assertEqual(pong.ball.y, Config.ball_right_start_y) self.assertEqual(pong.ball.vx, 0) self.assertEqual(pong.ball.vy, 0)
def test_ball_missed_on_left(self): pong = Pong() pong.ball = Ball(-5, 250, -5, -5) pong.move_ball() self.assertEqual(pong.ball.x, Config.ball_left_start_x) self.assertEqual(pong.ball.y, Config.ball_left_start_y) self.assertEqual(pong.ball.vx, 0) self.assertEqual(pong.ball.vy, 0)
def __init__(self): super().__init__() self.hits =0 self.miss =0 self.screensize = (640,480) self.screen = pygame.display.set_mode(self.screensize) self.pong = Pong(self.screensize) self.paddle = Paddle(self.screensize) self.done = False self.reward=0
class pongGame(): def __init__(self): super().__init__() self.hits =0 self.miss =0 self.screensize = (640,480) self.screen = pygame.display.set_mode(self.screensize) self.pong = Pong(self.screensize) self.paddle = Paddle(self.screensize) self.done = False self.reward=0 def reset(self): self.pong=Pong(self.screensize) self.paddle=Paddle(self.screensize) return [self.paddle.centerx*0.01, self.pong.centerx*0.01, self.pong.centery*0.01, self.pong.speedx, self.pong.speedy] def step(self,action): self.reward = 0 self.done = 0 if action == 0: self.paddle.direction=-1 self.paddle.update() self.reward -= .1 if action == 2: self.paddle.direction=1 self.reward -= .1 self.update() state = [self.paddle.centerx*0.01, self.pong.centerx*0.01, self.pong.centery*0.01, self.pong.speedx, self.pong.speedy] return self.reward, state, self.done def update(self): self.paddle.update() newhits=self.pong.update(self.paddle,self.hits) if self.hits < newhits: self.reward +=3 self.hits=newhits if self.pong.hit_edge_bottom: self.miss +=1 self.reward -= 3 self.done=True self.screen.fill((0,0,0)) self.paddle.render(self.screen) self.pong.render(self.screen) #Display scores: font = pygame.font.Font(None, 30) text = font.render("Hit: "+str(self.hits), 1, white) self.screen.blit(text, (250,10)) text = font.render("Miss: "+str(self.miss), 1, white) self.screen.blit(text, (350,10)) pygame.display.flip()
def __init__(self, screen_size): self.player_color = [255,255,255] self.screen_size = (screen_size) self.game_size = (x//10 for x in self.screen_size) self.speed = .01 self.inactive = [0,0,0] self.border = 0 screen = pygame.display.set_mode(self.screen_size) game = Pong(self.game_size) screen.fill([0,0,0]) x, y, height, width = game.ball.info() ball = pygame.Rect(x*10,y*10,height*10,width*10) screen.fill(self.player_color, ball) x, y, height, width = game.player1.info() player1 = pygame.Rect(x*10,y*10,height*10,width*10) screen.fill(self.player_color, player1) x, y, height, width = game.player2.info() player2 = pygame.Rect(x*10,y*10,height*10,width*10) screen.fill(self.player_color, player2) self.game = game self.ball = ball self.player1 = player1 self.player2 = player2 self.screen = screen
def __init__(self,FA="LSReg",domain="50chain",N=100,loss="ls",trees=500,type="max",depth=2): '''class constructor''' self.domain = domain if domain == "50chain": self.domObj = Chain() elif domain == "blackjack": self.domObj = Game() elif domain == "wumpus": self.domObj = Grid(4) elif domain == "blocksworld": self.domObj = BW(4) elif domain == "traffic": self.domObj = TrafficSignal() elif domain == "pong": self.domObj = Pong() elif domain == "tetris": self.domObj = Tetris() if FA == "LSReg": self.FA = LSReg() elif FA == "NN": self.FA = NeuralNetwork() elif FA == "GB": self.FA = GradientBooster(loss=loss,trees=trees,depth=depth) self.value,self.count = {},{} self.values = [{} for i in range(N)] self.approxValues = [{} for i in range(N)] self.BE = [] self.type = type self.TD(self.FA,N)
def getSample(self): '''gets a sample trajectory from the specified domain ''' d = Chain() if self.domain == "blackjack": d = Game() elif self.domain == "wumpus": d = Grid(4) elif self.domain == "blocksworld": d = BW(4) elif self.domain == "traffic": d = TrafficSignal() elif self.domain == "pong": d = Pong() elif self.domain == "tetris": d = Tetris() #elif wumpus sample = [] state = d actions = d.actions while True: if state == "winner" or state == "loser": sample += [deepcopy(state)] break sample += [deepcopy(str(state))] action = actions[randint(0,len(actions)-1)] state = d.takeAction(state,action) return sample
def main(): input_q = Queue(maxsize=5) output_q = Queue(maxsize=5) cap = cv2.VideoCapture(0) ret, frame = cap.read() height, width, _ = frame.shape frame = cv2.resize(frame, (width // 2, height // 2)) height, width, _ = frame.shape pong = Pong(h=height, w=width, default_ball_dx=width // 100, default_ball_dy=height // 100, default_paddle_speed=height // 100, default_half_paddle_height=height // 10) i = 0 # parallelize cap_params = {} frame_processed = 0 cap_params['im_width'], cap_params['im_height'] = (width, height) cap_params['score_thresh'] = 0.5 cap_params['num_hands_detect'] = 1 cap_params['pong'] = pong pool = Pool(2, worker, (input_q, output_q, cap_params, frame_processed)) while True: i += 1 ret, frame = cap.read() frame = cv2.resize(frame, (width, height)) frame = cv2.flip(frame, 1) # flip across vertical axis # wait for keys key = cv2.waitKey(100) pong.on_key(key) input_q.put(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) frame, box, score = output_q.get() frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) update_pong_with_boxes_scores([box], [score], pong, height) # update game ended = pong.update() pong.draw(frame) # Display the resulting frame cv2.imshow('frame', frame) if pong.is_key(key, 'q') or ended: break
def __init__(self): self.server = OSCServer(("192.168.2.122", 5005)) self.server.timeout = 0 self.driver = DriverAdaMatrix(rows=32, chain=2) self.driver.SetPWMBits(6) self.led = LEDMatrix(self.driver, 64, 32, serpentine=False) self.modes = [ self.mode_presets, self.color_presets, self.color_gradient, self.white_gradient, self.direct_control, self.arcade, self.mindfuck ] self.color = colors.Salmon self.wheel = ColorWheel() self.running = True self.joysticks = [0] * 5 self.pong = Pong(self.led) self.scope = Scope(self.led, self.wheel) self.scheme = [] # funny python's way to add a method to an instance of a class # import types # self.server.handle_timeout = types.MethodType(lambda: self.handle_timeout(self), self.server) self.server.addMsgHandler("/mode", self.mode_callback) self.server.addMsgHandler("/coin", self.coin_callback) self.server.addMsgHandler("/js", self.joystick_callback) self.server.addMsgHandler("/pot", self.pot_callback) self.server.addMsgHandler("/fad", self.fader_callback) self.server.addMsgHandler("/beam", self.beam_callback) self.server.addMsgHandler("/scheme", self.scheme_callback) self.server.addMsgHandler("/sleep", self.sleep_callback)
def main(): log = logging.getLogger("main") # parse arguments parser = argparse.ArgumentParser() parser.add_argument( "--pong-manager-port", required=False, default="18889", help="port number for pong") parser.add_argument( "--worker-count", required=False, default=5, help="ip address of pong") arguments = parser.parse_args() # setup application log.debug("setup application") pong_instance = Pong(arguments.worker_count) pong_application_arguments = {'pong_instance': pong_instance} pong_application = tornado.web.Application([ (r"/version", VersionHandler, pong_application_arguments), (r"/api/v1/pong/stats", PongStatsHandler, pong_application_arguments), (r"/api/v1/pong/server/?([0-9a-z\.]*)", PongServerHandler, pong_application_arguments), (r"/api/v1/pong/adminstatus/([a-z]+)", PongAdminStatusHandler, pong_application_arguments) ]) pong_server = tornado.httpserver.HTTPServer( pong_application) # setup SIGINT handler log.debug("setup SIGINT handler") def signal_handler(signal, frame): print("") # print newline to clear user input log.info("Exiting") pong_instance.stop() pong_server.stop() log.info("Sayonara!") quit() signal.signal(signal.SIGINT, signal_handler) # start log.debug("pong application listening on %s" % arguments.pong_manager_port) try: pong_server.listen(arguments.pong_manager_port) except OSError: print("port %s is already is use, exiting" % arguments.ping_manager_port) return tornado.ioloop.IOLoop.instance().start()
def main(): pygame.init() pointcounter =0 screensize = (640,480) screen = pygame.display.set_mode(screensize) clock = pygame.time.Clock() pong = Pong(screensize) paddle = Paddle(screensize) running = True while running: for event in pygame.event.get(): if event.type == QUIT: running = False if event.type == KEYDOWN: if event.key == K_LEFT: paddle.direction = -1 elif event.key == K_RIGHT: paddle.direction = 1 if event.type == KEYUP: if event.key == K_LEFT and paddle.direction == -1: paddle.direction = 0 elif event.key == K_RIGHT and paddle.direction == 1: paddle.direction = 0 paddle.update() pointcounter= pong.update(paddle,pointcounter) if pong.hit_edge_bottom: print ('Your Score ' + str(pointcounter)) running = False screen.fill((0,0,0)) paddle.render(screen) pong.render(screen) #Display scores: font = pygame.font.Font(None, 74) text = font.render(str(pointcounter), 1, white) screen.blit(text, (310,10)) pygame.display.flip() clock.tick(60) pygame.quit()
def main(): description = 'lr: {}, exp_prob: {}'.format(config.LEARNING_RATE, config.EXPLORE_PROB) pong = Pong(description) agents = [Agent(3), Agent(3)] target_agents = [Agent(3), Agent(3)] optimizer = tf.keras.optimizers.RMSprop(learning_rate = config.LEARNING_RATE) # Logger instances: import os os.system('rm -rf ./logs && mkdir ckpt') log_dir = './logs' summary_writer = tf.summary.create_file_writer(log_dir) os.system('tensorboard --logdir=./logs --port={} &'.format(config.PORT)) # to initialize, we run a random episode run(agents, pong) run(target_agents, pong) import sys if len(sys.argv) > 1: agents = load_vars(sys.argv[1]) update_target_network(agents, target_agents) episodes = 0 hit_log = [] last_best = 0 while True: with tf.GradientTape() as tape: reward_l, reward_r, time, loss = train_episode(agents, target_agents, pong) trainable_variables = agents[0].trainable_variables + agents[1].trainable_variables grads = tape.gradient(loss, trainable_variables) optimizer.apply_gradients(zip(grads, trainable_variables)) episodes += 1 hit_log.append(time) hit_log = hit_log[-30: ] if last_best < sum(hit_log) * 1.0 / len(hit_log): last_best = sum(hit_log) * 1.0 / len(hit_log) pickle_vars(agents, './ckpt/' + str(episodes) + '_' + str(last_best) + '.txt') with summary_writer.as_default(): tf.summary.scalar('average-hits', sum(hit_log) * 1.0 / len(hit_log), episodes) print('Train Episode ({}) {:7d}: Hits: {:4d}'.format(description, episodes, time)) if episodes % config.EPISODES_TO_TRAIN == 0: reward_l, reward_r, time = run(agents, pong) print('Test Episode {:7d}: Reward_l: {:7.4f} Reward_r: {:7.4f} Hits: {:4d}'.format(episodes, reward_l, reward_r, time)) update_target_network(agents, target_agents)
def main(): cap = cv2.VideoCapture(0) detection_graph, sess = detector_utils.load_inference_graph() ret, frame = cap.read() height, width, _ = frame.shape frame = cv2.resize(frame, (width // 2, height // 2)) height, width, _ = frame.shape pong = Pong(h=height, w=width, default_ball_dx=width // 100, default_ball_dy=height // 100, default_paddle_speed=height // 100, default_half_paddle_height=height // 10) i = 0 while True: i += 1 ret, frame = cap.read() frame = cv2.resize(frame, (width, height)) frame = cv2.flip(frame, 1) # flip across vertical axis # wait for keys key = cv2.waitKey(100) pong.on_key(key) boxes, scores = detector_utils.detect_objects( cv2.resize(frame, (320, 180)), detection_graph, sess) if boxes is not None and scores is not None: # draw bounding boxes detector_utils.draw_box_on_image(1, 0.5, scores, boxes, width, height, frame) update_pong_with_boxes_scores(boxes, scores, pong, height) # update game ended = pong.update() pong.draw(frame) # Display the resulting frame cv2.imshow('frame', frame) if pong.is_key(key, 'q') or ended: break
def main(): # initialize the director director.init(800, 600, resizable=True) # create the scene switch layer switch_layer = SwitchScene() # define the scene switch layer red = ColorLayer(255, 0, 0, 255) green = ColorLayer(0, 255, 0, 255) blue = ColorLayer(0, 0, 255, 255) # Goal: adding class name and docstring # 1 using on_enter: # get parent().__name__ # get parent().__doc__ # place all scenes in a scene list scenes = [ Scene(Title('Cocos2D tutorial'), switch_layer), Scene(HelloWorld(), switch_layer), Scene(AddActor(), switch_layer), Scene(AddAction(), switch_layer), Scene(Mouse(), switch_layer), Scene(Cat(), switch_layer), Scene(Pong(), switch_layer), Scene(Flappy(), switch_layer), Scene(BirdLayer(), switch_layer), Scene(WallLayer(), switch_layer), Scene(SwitchLayer(red, green, blue), switch_layer), Scene(PythonInterpreterLayer(), switch_layer), Scene(ColorLayer(150, 0, 0, 255), switch_layer), Scene(OptionsMenu(), switch_layer), Scene(ColorLayer(0, 150, 0, 255), switch_layer), Scene(ActionMenu(actions), switch_layer), Scene(EffectMenu(effects), switch_layer), ] # give the scene list to the switch layer switch_layer.scenes = scenes # run the first scene cocos.director.director.run(scenes[0])
def main(): # Part one t0 = time.time() pong = Pong("../data/input.csv") part_one = pong.render_map() time_part_one = round((time.time()-t0)*1e3) print("Solution to part one: %s (time taken %s[ms])" % ( part_one, time_part_one)) t0 = time.time() pong = Pong("../data/input.csv", play=True) part_two = pong.play() time_part_one = round((time.time()-t0)*1e3) print("Solution to part one: %s (time taken %s[ms])" % ( part_two, time_part_one))
def __init__(self): self.server = OSCServer( ("192.168.2.122", 5005) ) self.server.timeout = 0 self.driver = DriverAdaMatrix(rows=32, chain=2) self.driver.SetPWMBits(6) self.led = LEDMatrix(self.driver, 64, 32, serpentine=False) self.modes = [self.mode_presets, self.color_presets, self.color_gradient, self.white_gradient, self.direct_control, self.arcade, self.mindfuck] self.color = colors.Salmon self.wheel = ColorWheel() self.running = True self.joysticks = [0] * 5 self.pong = Pong(self.led) self.scope = Scope(self.led, self.wheel) self.scheme = [] # funny python's way to add a method to an instance of a class # import types # self.server.handle_timeout = types.MethodType(lambda: self.handle_timeout(self), self.server) self.server.addMsgHandler("/mode", self.mode_callback) self.server.addMsgHandler("/coin", self.coin_callback) self.server.addMsgHandler("/js", self.joystick_callback) self.server.addMsgHandler("/pot", self.pot_callback) self.server.addMsgHandler("/fad", self.fader_callback) self.server.addMsgHandler("/beam", self.beam_callback) self.server.addMsgHandler("/scheme", self.scheme_callback) self.server.addMsgHandler("/sleep", self.sleep_callback)
def __init__(self, action_dim): self.action_dim = action_dim self.size = (60, 40) self.state_dim = len(Pong(self.size).player1_state()) model = Sequential([ #Dense(self.action_dim), Dense(5, activation='relu', input_dim=self.state_dim), Dense(self.action_dim), # Dense(self.action_dim, input_dim = self.state_dim), ]) model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001), loss=tf.keras.losses.MSE) model.summary() self.model = model self.batch_size = 50 self.stored_batch = 2000 self.epsilon = 1 self.epsilon_decay = 1.0005 self.memory = deque() self.n_games = 300 self.n_frames = 300 self.model_name = 'ponglayer.h5'
def main(): dir_path = dirname(realpath(__file__)) file_location = join(dir_path, "../data/input.txt") # Part one t0 = time.time() pong = Pong(file_location) part_one = pong.render_map() time_part_one = round((time.time() - t0) * 1e3) print("Solution to part one: %s (time taken %s[ms])" % (part_one, time_part_one)) t0 = time.time() pong = Pong(file_location, play=True) part_two = pong.play() time_part_one = round((time.time() - t0) * 1e3) print("Solution to part one: %s (time taken %s[ms])" % (part_two, time_part_one))
def AVI(self): for i in range(self.number_of_iterations): j = 0 X,Y,bk = [],[],[] values = {} fitted_values = {} while j < self.batch_size: if self.simulator == "logistics": state = Logistics(number = self.state_number,start=True) if not bk: bk = Logistics.bk elif self.simulator == "pong": state = Pong(number = self.state_number,start=True) if not bk: bk = Pong.bk elif self.simulator == "tetris": state = Tetris(number = self.state_number,start=True) if not bk: bk = Tetris.bk elif self.simulator == "wumpus": state = Wumpus(number = self.state_number,start=True) if not bk: bk = Wumpus.bk elif self.simulator == "blocks": state = Blocks_world(number = self.state_number,start=True) if not bk: bk = Blocks_world.bk elif self.simulator == "blackjack": state = Game(number = self.state_number,start=True) if not bk: bk = Game.bk elif self.simulator == "50chain": state = Chain(number = self.state_number,start=True) if not bk: bk = Chain.bk elif self.simulator == "net_admin": state = Admin(number = self.state_number,start=True) if not bk: bk = Admin.bk with open(self.simulator+"_FVI_out.txt","a") as fp: fp.write("*"*80+"\nstart state: "+str(state.get_state_facts())+"\n") time_elapsed = 0 within_time = True start = clock() trajectory = [(state.state_number,state.get_state_facts())] while not state.goal(): fp.write("="*80+"\n") state_action_pair = state.execute_random_action() state = state_action_pair[0] fp.write(str(state.get_state_facts())+"\n") trajectory.append((state.state_number,state.get_state_facts())) end = clock() time_elapsed = abs(end-start) if self.simulator == "logistics" and time_elapsed > 0.5: within_time = False break elif self.simulator == "pong" and time_elapsed > 1000: within_time = False break elif self.simulator == "tetris" and time_elapsed > 10: within_time = False break elif self.simulator == "wumpus" and time_elapsed > 1: within_time = False break elif self.simulator == "blocks" and time_elapsed > 1: within_time = False break elif self.simulator == "blackjack" and time_elapsed > 1: within_time = False break elif self.simulator == "50chain" and time_elapsed > 1: within_time = False break elif self.simulator == "net_id" and time_elapsed > 1: within_time = False if within_time: if i > 0: self.compute_value_of_trajectory(values,trajectory,AVI=True) else: self.compute_value_of_trajectory(values,trajectory,AVI=False) self.state_number += 1 for key in values: state = list(key[1]) value = values[key] #X.append(state) fitted_values[key] = self.model.predict(np.array([state])) #Y.append([value]) ''' for key in values: facts += list(key[1]) example_predicate = "value(s"+str(key[0])+") "+str(values[key]) examples.append(example_predicate) ''' j += 1 #fitted_values = self.model.predict(np.array(X)) bellman_error = self.compute_bellman_error(values,fitted_values) with open(self.simulator+"_BEs.txt","a") as f: f.write("iteration: "+str(i)+" average bellman error: "+str(bellman_error)+"\n") for key in values: X.append(list(key[1])) Y.append(values[key]) npX = np.array(X) npY = np.array(Y) self.model.fit(npX,npY)
import _thread import time from pong import Pong from mainmodel import PolicyGradient game = Pong(2, server=True, sync=False, headless=True) ##,debug=True) _thread.start_new_thread(game.start, ()) time.sleep(5) model = PolicyGradient(resume=True) model.start() ##################################################################################### ### Dont Use this as this has a very high latency due to there being a lot of #### ### threads running and even threads under other threads which doesnt make sense #### ### But this is only true for one system I tested on, Others seem to benefit #### ### from this approach #### #####################################################################################
from turtle import Turtle, Screen from pong import Pong from ball import Ball import time from scoreboard import Scoreboard screen = Screen() screen.bgcolor("black") screen.setup(800, 600) screen.title("PONG") screen.tracer(0) left_pos = (-375, 0) right_pos = (375, 0) l_pong = Pong((left_pos)) r_pong = Pong((right_pos)) ball = Ball() score = Scoreboard() screen.listen() screen.onkey(r_pong.up, "Up") screen.onkey(r_pong.down, "Down") screen.onkey(l_pong.up, "w") screen.onkey(l_pong.down, "s") game = True while game: time.sleep(ball.ball_speed)
def train(shared_model, shared_optimizer, rank, args, info): #env = gym.make(args.env) # make a local (unshared) environment env = Pong({})#{args} #env = Breakout({'paddle_width': 10}) #env.seed(args.seed + rank) torch.manual_seed(args.seed + rank) # seed everything model = NNPolicy(channels=1, memsize=args.hidden, num_actions=args.num_actions) # a local/unshared model state = torch.tensor(prepro(env.reset())) # get first state start_time = last_disp_time = time.time() episode_length, epr, eploss, done = 0, 0, 0, True # bookkeeping while info['frames'][0] <= 8e7 or args.test: # openai baselines uses 40M frames...we'll use 80M model.load_state_dict(shared_model.state_dict()) # sync with shared model hx = torch.zeros(1, 256) if done else hx.detach() # rnn activation vector values, logps, actions, rewards = [], [], [], [] # save values for computing gradientss for step in range(args.rnn_steps): episode_length += 1 value, logit, hx = model((state.view(1,1,80,80), hx)) logp = F.log_softmax(logit, dim=-1) action = torch.exp(logp).multinomial(num_samples=1).data[0]#logp.max(1)[1].data if args.test else state, reward, done, _ = env.step(action.numpy()[0]) if args.render: #env.render() imshow('state',state) waitKey(1) #vid.write(state) state = torch.tensor(prepro(state)) ; epr += reward reward = np.clip(reward, -1, 1) # reward done = done or episode_length >= 1e4 # don't playing one ep for too long info['frames'].add_(1) ; num_frames = int(info['frames'].item()) if num_frames % 2e6 == 0: # save every 2M frames printlog(args, '\n\t{:.0f}M frames: saved model\n'.format(num_frames/1e6)) torch.save(shared_model.state_dict(), args.save_dir+'model.{:.0f}.tar'.format(num_frames/1e6)) if done: # update shared data info['episodes'] += 1 interp = 1 if info['episodes'][0] == 1 else 1 - args.horizon info['run_epr'].mul_(1-interp).add_(interp * epr) info['run_loss'].mul_(1-interp).add_(interp * eploss) if rank == 0 and time.time() - last_disp_time > 20: # print info ~ every minute elapsed = time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)) printlog(args, 'time {}, episodes {:.0f}, frames {:.1f}M, mean epr {:.2f}, run loss {:.2f}' .format(elapsed, info['episodes'].item(), num_frames/1e6, info['run_epr'].item(), info['run_loss'].item())) last_disp_time = time.time() if done: # maybe print info. episode_length, epr, eploss = 0, 0, 0 state = torch.tensor(prepro(env.reset())) values.append(value) ; logps.append(logp) ; actions.append(action) ; rewards.append(reward) next_value = torch.zeros(1,1) if done else model((state.unsqueeze(0), hx))[0] values.append(next_value.detach()) loss = cost_func(args, torch.cat(values), torch.cat(logps), torch.cat(actions), np.asarray(rewards)) eploss += loss.item() shared_optimizer.zero_grad() ; loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 40) for param, shared_param in zip(model.parameters(), shared_model.parameters()): if shared_param.grad is None: shared_param._grad = param.grad # sync gradients with shared model shared_optimizer.step()
for param, shared_param in zip(model.parameters(), shared_model.parameters()): if shared_param.grad is None: shared_param._grad = param.grad # sync gradients with shared model shared_optimizer.step() if __name__ == "__main__": if sys.version_info[0] > 2: mp.set_start_method('spawn') # this must not be in global scope elif sys.platform == 'linux' or sys.platform == 'linux2': raise "Must be using Python 3 with linux!" # or else you get a deadlock in conv2d args = get_args() args.save_dir = '{}/'.format(args.env.lower()) # keep the directory structure simple if args.render: args.processes = 1 ; args.test = True # render mode -> test mode w one process if args.test: args.lr = 0 # don't train in render mode args.num_actions = Pong().actions # get the action space of this game os.makedirs(args.save_dir) if not os.path.exists(args.save_dir) else None # make dir to save models etc. torch.manual_seed(args.seed) shared_model = NNPolicy(channels=1, memsize=args.hidden, num_actions=args.num_actions).share_memory() shared_optimizer = SharedAdam(shared_model.parameters(), lr=args.lr) info = {k: torch.DoubleTensor([0]).share_memory_() for k in ['run_epr', 'run_loss', 'episodes', 'frames']} info['frames'] += shared_model.try_load(args.save_dir) * 1e6 if int(info['frames'].item()) == 0: printlog(args,'', end='', mode='w') # clear log file processes = [] for rank in range(args.processes): p = mp.Process(target=train, args=(shared_model, shared_optimizer, rank, args, info)) p.start() ; processes.append(p) for p in processes: p.join()
def pong_eval(name): # import the solution name into the global namespace as 'exercise' exercise = __import__(convert(name)) name = convert('{0}'.format(name)) # game and agent setup code game = Pong(do_render=args.render) # game.render_time = 0.00001 # immediate rendering (visual debugging) original_game = copy.copy(game) # fetch the agent from the provided solution agent = exercise.get_agent(game) folder = 'pong_evaluation' configuration = setting_configuration() target_path = os.path.join(folder, configuration[0]) target_filename = os.path.join(target_path, name) try: os.makedirs(target_path) except OSError as e: # silently ignore any errors, other errors _will_ appear if this fails import errno if e.errno == errno.EEXIST and os.path.isdir(target_path): pass else: raise with open(os.path.join(target_path, 'settings.txt'), 'w') as fh: fh.write(configuration[1]) try: os.remove(target_filename + '.txt') except OSError: pass for x in xrange(args.initial_training - 1): game_copy = copy.copy(original_game) game_copy.do_render = False # don't render initial training agent.game = game_copy agent.learning = True exercise.train(agent) winning = 0 try: for x in xrange(args.training_epochs): game_copy = copy.copy(original_game) game_copy.do_render = False # don't render training agent.game = game_copy # train the agent using the provided solution agent.learning = True exercise.train(agent) # agent.learner.dump_policy(str(x)) # clean up after training agent.accumulated = 0 # reset accumulated rewards agent.set_epsilon(0.0) # turn off exploration agent.game.reset() # reset the game agent.game = original_game # if the training modifies the game, it is fixed here # evaluate the training results agent.game.do_render = args.render print 'evaluating' file_name, wins, loss = evaluator.pong_evaluate(agent, runs=args.eval_games, name=target_filename, max_count=1000) if wins > loss * 10: winning += 1 if winning > 10: print 'solution succeeds' else: winning = max(0, winning - 1) print 'W {0} | {1} L | Round: {2}'.format(wins, loss, x) except KeyboardInterrupt: print '\rEARLY INTERRUPT!' return file_name
def compute_transfer_model(self): X,Y,bk = [],[],[] i = 0 values = {} while i < self.transfer+1: #at least one iteration burn in time if self.simulator == "logistics": state = Logistics(number = self.state_number,start=True) if not bk: bk = Logistics.bk elif self.simulator == "pong": state = Pong(number = self.state_number,start=True) if not bk: bk = Pong.bk elif self.simulator == "tetris": state = Tetris(number = self.state_number,start=True) if not bk: bk = Tetris.bk elif self.simulator == "wumpus": state = Wumpus(number = self.state_number,start=True) if not bk: bk = Wumpus.bk elif self.simulator == "blocks": state = Blocks_world(number = self.state_number,start=True) if not bk: bk = Blocks_world.bk elif self.simulator == "blackjack": state = Game(number = self.state_number,start=True) if not bk: bk = Game.bk elif self.simulator == "50chain": state = Chain(number = self.state_number,start=True) if not bk: bk = Chain.bk elif self.simulator == "net_admin": state = Admin(number = self.state_number,start=True) if not bk: bk = Admin.bk with open(self.simulator+"_transfer_out.txt","a") as f: if self.transfer: f.write("start state: "+str(state.get_state_facts())+"\n") time_elapsed = 0 within_time = True start = clock() trajectory = [(state.state_number,state.get_state_facts())] while not state.goal(): if self.transfer: f.write("="*80+"\n") state_action_pair = state.execute_random_action() state = state_action_pair[0] #state if self.transfer: f.write(str(state.get_state_facts())+"\n") trajectory.append((state.state_number,state.get_state_facts())) end = clock() time_elapsed = abs(end-start) if self.simulator == "logistics" and time_elapsed > 0.5: within_time = False break elif self.simulator == "pong" and time_elapsed > 1000: within_time = False break elif self.simulator == "tetris" and time_elapsed > 1000: within_time = False break elif self.simulator == "wumpus" and time_elapsed > 1: within_time = False break elif self.simulator == "blocks" and time_elapsed > 1: within_time = False break elif self.simulator == "blackjack" and time_elapsed > 1: within_time = False break elif self.simulator == "50chain" and time_elapsed > 2: within_time = False break elif self.simulator == "net_admin" and time_elapsed > 1: within_time = False break if within_time: self.compute_value_of_trajectory(values,trajectory) self.state_number += len(trajectory)+1 for key in values: state = list(key[1]) value = values[key] X.append(state) Y.append(value) ''' for key in values: facts += list(key[1]) example_predicate = "value(s"+str(key[0])+") "+str(values[key]) examples.append(example_predicate) ''' i += 1 npX = np.array(X) npY = np.array(Y) if not self.transfer: npY = np.zeros(len(npY)) model = MLPRegressor(hidden_layer_sizes=(25,), activation="logistic", solver="lbfgs", alpha=0.0001, batch_size="auto", learning_rate="constant", learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08) print (npX) model.fit(npX,npY) #reg = GradientBoosting(regression = True,treeDepth=2,trees=self.trees,sampling_rate=0.7,loss=self.loss) #reg.setTargets(["value"]) #reg.learn(facts,examples,bk) self.model = model self.AVI()
class Matrix(object): server = None driver = None led = None modes = [] running = False color = None wheel = None pong = None scope = None mode = 0; joysticks = [] pong_running = False scope_running = False def __init__(self): self.server = OSCServer( ("192.168.2.122", 5005) ) self.server.timeout = 0 self.driver = DriverAdaMatrix(rows=32, chain=2) self.driver.SetPWMBits(6) self.led = LEDMatrix(self.driver, 64, 32, serpentine=False) self.modes = [self.mode_presets, self.color_presets, self.color_gradient, self.white_gradient, self.direct_control, self.arcade, self.mindfuck] self.color = colors.Salmon self.wheel = ColorWheel() self.running = True self.joysticks = [0] * 5 self.pong = Pong(self.led) self.scope = Scope(self.led, self.wheel) self.scheme = [] # funny python's way to add a method to an instance of a class # import types # self.server.handle_timeout = types.MethodType(lambda: self.handle_timeout(self), self.server) self.server.addMsgHandler("/mode", self.mode_callback) self.server.addMsgHandler("/coin", self.coin_callback) self.server.addMsgHandler("/js", self.joystick_callback) self.server.addMsgHandler("/pot", self.pot_callback) self.server.addMsgHandler("/fad", self.fader_callback) self.server.addMsgHandler("/beam", self.beam_callback) self.server.addMsgHandler("/scheme", self.scheme_callback) self.server.addMsgHandler("/sleep", self.sleep_callback) # this method of reporting timeouts only works by convention # that before calling handle_request() field .timed_out is # set to False def handle_timeout(self): self.timed_out = True # TODO: update this to take all the joysticks and send them off selectively def joystick_callback(self, path, tags, args, source): if self.pong_running: self.pong.update_sticks(args[0], args[4]) elif self.scope_running: self.scope.update_sticks(args) def pot_callback(self, path, tags, args, source): if self.scope_running: self.scope.update_pots(args) def fader_callback(self, path, tags, args, source): if self.scope_running: self.scope.update_faders(args) def beam_callback(self, path, tags, args, source): if self.scope_running: self.scope.update_beam(args[0]) def mode_callback(self, path, tags, args, source): self.scope_running = True self.led.all_off() if (self.mode == 5 and int(args[0]) != 5): self.pong_running = False self.mode =int(args[0]) self.modes[self.mode]() self.led.update() sleep(1) def coin_callback(self, path, tags, args, source): self.led.all_off() self.led.update() count = 0 while count < 5: self.led.drawText("Thank You!!", 4, 10, size=1, color=colors.Lavender) self.led.update() sleep(1) self.led.all_off() self.led.update() sleep(0.5) count += 1 self.modes[self.mode]() self.led.update() def scheme_callback(self, path, tags, args, source): self.scheme = [] for i in xrange(0, len(args), 3): self.scheme.append(args[i:i+3]) self.wheel.setScheme(self.scheme) def sleep_callback(self, path, tags, args, source): print("sleep plz") self.scope_running = False self.pong_running = False self.led.all_off() self.led.update() # user script that's called by the game engine every frame def each_frame(self): # clear timed_out flag self.server.timed_out = False # handle all pending requests then return while not self.server.timed_out: self.server.handle_request() if self.pong_running: self.led.all_off() self.pong.step() self.led.update() sleep(0.05) elif self.scope_running: self.led.all_off() self.scope.step() self.led.update() sleep(0.05) def mode_presets(self): self.led.drawText("Mode", 6, 5, size=1, color=self.color) self.led.drawText("Presets", 6, 15, size=1, color=self.color) def color_presets(self): self.led.drawText("Color", 6, 5, size=1, color=self.color) self.led.drawText("Presets", 6, 15, size=1, color=self.color) def color_gradient(self): self.led.drawText("Color", 6, 5, size=1, color=self.color) self.led.drawText("Gradients", 6, 15, size=1, color=self.color) def white_gradient(self): self.led.drawText("White", 6, 5, size=1, color=self.color) self.led.drawText("Gradients", 6, 15, size=1, color=self.color) def direct_control(self): self.led.drawText("Direct", 6, 5, size=1, color=self.color) self.led.drawText("Control", 6, 15, size=1, color=self.color) def arcade(self): # self.led.drawText("Arcade", 6, 5, size=1, color=self.color) # self.led.drawText("Mode!!!", 6, 15, size=1, color=self.color) anim = ScrollText(self.led, "Arcade Mode!!!!!", 64, 13, size=1, color=self.color) anim.run(fps=30, untilComplete=True, max_cycles=1) self.pong.reset() self.scope_running = False self.pong_running = True def mindfuck(self): self.led.drawText("Y U NO", 6, 5, size=1, color=self.color) self.led.drawText("LISTEN?!", 6, 15, size=1, color=self.color) self.scope_running = False def run(self): while self.running: sleep(1) self.each_frame()
def connect(sid, environ): GAMES[sid] = Pong() print("connect ", sid) loop = asyncio.get_event_loop() loop.run_until_complete(send_game_states)
def main(): pong = Pong(800, 600) pong.mainloop_sp()