def setUp(self): self.env = DQN.env (self.player_states, (self.community_infos, self.community_cards)) = self.env.reset() (self.player_infos, self.player_hands) = zip(*self.player_states) self.current_state = ((self.player_infos, self.player_hands), (self.community_infos, self.community_cards)) self.state = DQN.create_np_array(self.player_infos, self.player_hands, self.community_cards, self.community_infos) self.state_set = utilities.convert_list_to_tupleA( self.player_states[self.env.learner_bot.get_seat()], self.current_state[1]) self._round = utilities.which_round(self.community_cards) self.current_player = self.community_infos[-3] self.learner_bot, self.villain = self.env.learner_bot, self.env.villain Q = defaultdict(lambda: np.zeros(self.env.action_space.n)) self.agent = DQN.DQNAgent(DQN.state_size, DQN.action_size) # initialise agent self.policy = DQN.make_epsilon_greedy_policy(Q, self.agent.epsilon, self.env.action_space.n) self.villain_action = DQN.get_action_policy( self.player_infos, self.community_infos, self.community_cards, self.env, self._round, self.env.n_seats, self.state_set, self.policy, self.villain) self.learner_action = self.agent.act(self.state, self.player_infos, self.community_infos, self.community_cards, self.env, self._round, self.env.n_seats, self.state_set, self.policy)
def run(params): # Initialize the pygame x = 100 y = 50 os.environ['SDL_VIDEO_WINDOW_POS'] = "%d,%d" % (x, y) record = 0 pygame.init() # FPS = 60 FPS = 120 fpsClock = pygame.time.Clock() gametime = 1 # create the screen xMax = 1600 yMax = 1000 # Background if params['display']: screen = pygame.display.set_mode((xMax, yMax)) background = pygame.image.load('background.jpg') # Sound # mixer.music.load("darude.wav") # mixer.music.play(-1) # Caption and Icon pygame.display.set_caption("Rainy Day ") icon = pygame.image.load('player.png') pygame.display.set_icon(icon) counter_games = 0 games_move_list = [] agent = DQN.DQNAgent(params) weights_filepath = params['weights_path'] if params['load_weights']: agent.model.load_weights(weights_filepath) print("weights loaded") score_plot = [] counter_plot = [] while counter_games < params['episodes']: at_edge = False # if not training do not allow for random actions if not params['train']: agent.epsilon = 0.00 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) counter_games += 1 # Player playerSizeX = 177 playerSizeY = 239 playerImgR = pygame.image.load('player.png') playerImgL = pygame.image.load('playerL.png') curImage = playerImgR # playerX = 800 playerX = 600 playerY = yMax - playerSizeY playerX_change = 0 # Enemy images = ['chromeBall.png', 'edgeBall.png', 'firefoxBall.png'] enemyImg = [] player_move_list = [] enemyX = [] enemyY = [] enemyXVel = [] enemyYVel = [] enemyX_change = [] enemyY_change = [] num_of_enemies = 15 cur_enemies = 1 last_move = 0 # pygame.init() total_score = 0 for i in range(num_of_enemies): enemyImg.append(pygame.image.load(images[random.randint(0, 2)])) # enemyX.append(-100) enemyX.append(random.randint(-300, 100)) enemyY.append(random.randint(0, 150)) enemyXVel.append(random.randint(5, 9)) enemyYVel.append(5) # Score score_value = 0 font = pygame.font.Font('freesansbold.ttf', 32) textX = 100 textY = 100 # Game Over over_font = pygame.font.Font('freesansbold.ttf', 64) # Game Loop running = True game_over = False sprint = False while running: if score_value > record: record = score_value game = Game(440, 440) player = game.player if params['display']: # RGB = Red, Green, Blue screen.fill((0, 0, 0)) # Background Image screen.blit(background, (0, 0)) if params['computer_player']: state_old = agent.get_state(playerX, playerY, enemyX, enemyXVel, enemyY, playerX_change, enemyYVel, cur_enemies) # perform random actions based on agent.epsilon, or choose the action if random.uniform(0, 1) < agent.epsilon or params['random']: final_move = keras.utils.to_categorical(random.randint(0, 2), num_classes=5) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, DQN.num_inputs)), use_multiprocessing=True) final_move = keras.utils.to_categorical(np.argmax(prediction[0]), num_classes=5) last_move = playerX_change playerX_change = player.do_move(final_move, playerX, playerY) # player_move_list.append(playerX_change) else: for event in pygame.event.get(): if event.type == pygame.QUIT: running = False # if keystroke is pressed check whether its right or left keys = pygame.key.get_pressed() playerX_change = 0 if keys[pygame.K_LSHIFT]: sprint = True else: sprint = False if keys[pygame.K_LEFT]: playerX_change = -7 - int(sprint) * 7 curImage = playerImgL if keys[pygame.K_RIGHT]: playerX_change = 7 + int(sprint) * 7 curImage = playerImgR playerX += playerX_change if playerX <= 0: playerX = 0 at_edge = True elif playerX >= xMax - playerSizeX: playerX = xMax - playerSizeX at_edge = True else: at_edge = False # Enemy Movement gametime = gametime + 1 if gametime % 460 == 0 and cur_enemies < num_of_enemies: cur_enemies = cur_enemies + 1 for i in range(cur_enemies): # Ball movement enemyX[i] = enemyX[i] + enemyXVel[i] enemyY[i] = enemyY[i] + enemyYVel[i] enemyYVel[i] = enemyYVel[i] + 0.25 if enemyY[i] > yMax - 128: enemyYVel[i] = -enemyYVel[i] * 0.85 enemyY[i] = yMax - 128 if enemyX[i] > xMax or enemyX[i] < -310: enemyY[i] = random.randint(0, 150) enemyYVel[i] = 5 enemyXVel[i] = -enemyXVel[i] score_value = score_value + 1 if params['display']: screen.blit(enemyImg[i], (enemyX[i], enemyY[i])) # enemy(enemyX[i], enemyY[i], i) # Collision collision = isCollision(enemyX[i] + 64, enemyY[i] + 64, playerX + 85, playerY + 120) if collision: running = False # if gametime % 3 == 0: state_new = agent.get_state(playerX, playerY, enemyX, enemyXVel, enemyY, playerX_change, enemyYVel, cur_enemies) reward = agent.set_reward(playerX, running, enemyX, enemyY, playerX_change, at_edge, playerY, enemyXVel, enemyYVel, last_move, cur_enemies) if params['train'] and not params['random']: # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) # player(curImage, playerX, playerY) if params['display']: screen.blit(curImage, (playerX, playerY)) score = font.render("Score : " + str(score_value), True, (0, 0, 0)) game = font.render("Game Number: " + str(counter_games), True, (0, 0, 0)) high_score = font.render("High Score: " + str(record), True, (0, 0, 0)) if params['display']: screen.blit(score, (textX, textY)) screen.blit(game, (textX, textY - 40)) screen.blit(high_score, (textX, textY - 80)) # show_score(textX, textY) pygame.display.update() fpsClock.tick(FPS) if params['train'] and not params['random']: agent.replay_new(agent.memory, params['batch_size']) print('Game ' + str(counter_games) + ' Score ' + str(score_value)) if score_value == 69: print('nice') score_plot.append(score_value) # games_move_list.append(player_move_list) counter_plot.append(counter_games) mean = statistics.mean(score_plot) stddev = statistics.stdev(score_plot) if params['train']: agent.model.save_weights(params['weights_path']) mean, stddev = test(params) if params['plot_score']: plot_seaborn(counter_plot, score_plot, params['train']) return mean, stddev
def assign(self): number_feature = 1 batch_size = initiation.job_counter * initiation.drone_counter * 200 agent = DQN.DQNAgent(initiation.job_counter, number_feature) reward = 0 best = 1000 for m in range(EPISODES): max_time = 0 for i in range(initiation.job_counter): max_time += initiation.Job[i][3] initiation.Drone = np.zeros( (initiation.drone_counter, 20 * max_time.astype(int))) for i in range(initiation.job_counter): initiation.Job[i][4] = -1 initiation.Drone = np.zeros( (initiation.drone_counter, 20 * max_time.astype(int))) initiation.result = np.zeros( (initiation.job_counter, initiation.drone_counter)) done = False total_time = 0 arr = [0] state = np.array(arr * initiation.job_counter) state = [ np.reshape(state[i], ( 1, 1, )) for i in range(initiation.job_counter) ] ## state for jobs 0(wait) or 1(assigned) action_list = [] ## list to predict job total_time = 0 ## total time of waiting time of job and going + returning time of drone avarage_wtime = 0 assigned_job = 0 for i in range( 1000 ): # because of the random assign, there should be more iterations # REVIEW: should find better way action = agent.act(state) # act function decides to the action if (initiation.Job[action][4].astype(int) == -1 ): # if the selected job is not assigned before time = initiation.Job[action][3] work_time = Functions.g( action, i % initiation.drone_counter) + Functions.r( action, i % initiation.drone_counter) + time.astype(int) state[[ action ][0]] = 1 ## assigned the "action" th cell of state as 1 state = [ np.reshape(state[k], ( 1, 1, )) for k in range(initiation.job_counter) ] # reshape state suitable for act function array of arrays for j in range(initiation.Job[action][5].astype(int), 10 * initiation.max_time.astype(int)): if ( Functions.check_availability( i % initiation.drone_counter, j, j + work_time) == 1 ): ##check_availability of the drone to "action"th job at first possible time initiation.Job[action][4] = j break method.assign_operation( self, action, i % initiation.drone_counter, initiation.Job[action][4].astype(int)) ## assign #print(i%initiation.drone_counter ,action ,initiation.Job[action][4].astype(int)-Functions.g(action,i%initiation.drone_counter),initiation.Job[action][4].astype(int)+work_time-Functions.g(action,i%initiation.drone_counter)) total_time = initiation.Job[action][4] - initiation.Job[ action][5] assigned_job = assigned_job + 1 avarage_wtime = (avarage_wtime + total_time) / assigned_job reward = reward + ( avarage_wtime - total_time ) * 500 ## reward if given according to total time; if the current total time is smaller than the previous reward is bigger agent.remember(state, action, reward, state, done) ## save the state to agent action_list.append(action) #append action to list done = True else: reward = reward - 100 # punishment to select already selected job agent.remember(state, action, reward, state, done) ## save the state to agent done = True if len(agent.memory) > batch_size: agent.replay(batch_size) total_waiting = 0 for i in range(initiation.job_counter): if (initiation.Job[i][4] == -1): total_waiting = 1000 break total_waiting += initiation.Job[i][4] - initiation.Job[i][ 5] ## calculate the waiting time / i tried to minimize that #print(initiation.Job) #print(initiation.result) for n in range(initiation.job_counter): initiation.Job[n][4] = -1 initiation.Drone = np.zeros((initiation.drone_counter, 20 * initiation.max_time.astype(int))) initiation.result = np.zeros( (initiation.job_counter, initiation.drone_counter)) flag = 0 test = total_waiting / initiation.job_counter if (best > test): best = test print(test) print(best) return 0
import numpy as np import gym import tensorflow as tf import DQN from models import AttackModel # adversarial training env = gym.make('PongNoFrameskip-v4') tf.reset_default_graph() sess = tf.Session() dqnAgent = DQN.DQNAgent( env, sess) # , "../ckpts/dqn/pong_adv_training/dqn_episode1050.ckpt") adversary = AttackModel(sess, dqnAgent) adversary.setupAttack("gauss", eps=0.015) adversary.setAttackProb(0.5) testRewards, testLengths, _, _ = dqnAgent.test(2) #lengths, rewards, losses = dqnAgent.train(adversary=adversary, checkpointFolder = "../ckpts/dqn/pong_adv_training/", epNum=2000) #, epStart=1051, eps=MIN_EPS) dqnAttack = AttackModel(dqnAgent) dqnAttack.setupAttack("fgsm", eps=0.01) dqnAttack.setAttackProb(1.0) testRewards, testLengths, attNums, _ = dqnAgent.test(1, adversary=dqnAttack, render=True) print("Mean reward = " + str(np.mean(testRewards)))