def Start(self): filePath = self.Config.FilePath.BestModel cnf = "BestLog/BestLog180126082522.cnf" wgt = "BestLog/BestLog180126082522.wgt" timeLimit = 0.9 net = Network() net.Load(cnf, wgt) net.TimeLimit = timeLimit model = Model() taskName = "TaskEval/EvalTask114.task" task = MujocoTask(model, taskName) #task = MujocoTask.LoadRandom(model, self.Config.Task.EvalDir) env = MujocoEnv(model) agentConfig = self.Config.ViewerAgent agent = Agent(agentConfig, net, model, task) bestAction = agent.SearchBestAction() while True: env.SetSimState(task.StartState) for action in bestAction: env.Step(action) #print(env.GetObservation(task)) env.Render()
def init_environment(self): """ 環境を初期化 """ self.graph = nx.barabasi_albert_graph(n=self.agent_num, m=self.attach) for node in self.graph.nodes(data=True): idx, data = node age = self.get_agent_age() data["agent"] = Agent( agent_setting=self.agent_setting, id=idx, age=age, hometown=self.name, status=Status.SUSCEPTABLE, infection_model=self.infection_model, ) # 公務員を確定 cs_num = math.ceil(self.agent_num * self.economy_setting["civil_servants_rate"]) civil_servants = random.sample(self.graph.nodes(data=True), cs_num) for _, data in civil_servants: data["agent"].is_civil_servant = True # 初期感染者を確定 init_infected = random.sample(self.graph.nodes(data=True), self.init_infection) for _, data in init_infected: data["agent"].status = Status.INFECTED # 経済パラメータを初期化 self.finance = self.economy_setting["init_gdp"] self.tax_rate = self.economy_setting["tax_rate"] self.tmp_tax_revenue = 0 logger.info('Enviromnent "{}" を初期化しました。人口:{}, 初期感染者:{}'.format( self.name.upper(), self.agent_num, self.init_infection))
def test_is_in_range_for_fourth_percent_out_range(self): a = Agent("1", "",0.7, 0.5, 0.9, 0.2) assert( a.is_in_range(3, 0.4) == False) #--------------------------------------------------------------------------
def inflow(self, inflow_agent: Agent, stay_period: int): """ 外部環境からのエージェント流入処理 """ # 流入者の情報を書き換え inflow_agent.current_location = self.name inflow_agent.stay_period = stay_period # 過去に流入したことがある場合はノードの新規作成をスキップ if inflow_agent.code in self.code_list: return # 過去に流入したことがない場合は新規ノードを追加 self._add_new_node(inflow_agent)
def CalcScore(self, net, filePath): bestModel = MujocoModelHumanoid() bestTask = MujocoTask(bestModel, filePath) bestEnv = MujocoEnv(bestModel) bestAgent = Agent(self.Config.CheckerAgent, net, bestModel, bestTask) bestAction = bestAgent.SearchBestAction() bestScore = self.GetScore(bestEnv, bestTask, bestAction) return bestScore
def test_agents_play_a_game(): agent1 = Agent("1", get_random_name(), 0, 0, 0, 0) agent2 = Agent("2", get_random_name(), 0, 0, 0, 0) players = [agent1, agent2] game = Connect4('cls') game.level = 4 game.default() game.game_mode = game.game_modes[0] [agent1, agent2] = game.logic_play(players=players) assert ( (agent1.record[agent1.WINS] == 1 or agent1.record[agent1.DRAWS] == 1 or agent1.record[agent1.LOSSES] == 1) and (agent2.record[agent2.WINS] == 1 or agent2.record[agent2.DRAWS] == 1 or agent2.record[agent2.LOSSES] == 1))
def c_vs_c(self, player_type1, player_type2): """ input : player_type1 and player_type2 are string function: Sets the game to play computer vs computer output: a list with two computer agents """ players = list() P1name = self.view.input_option("Enter " + player_type1 + "'s name: ") p = self.get_computer_parameters() players.append(Agent('1', P1name, p[0], p[1], p[2], p[3])) P2name = self.view.input_option("Enter " + player_type2 + "'s name: ") p = self.get_computer_parameters() players.append(Agent('2', P2name, p[0], p[1], p[2], p[3])) return players
def main(args): oldAgentDir="" experianceDir="" newAgentDir="" trainingIterations="" try: options, arugments= getopt.getopt(args,"hi:o:e:l:") except getopt.GetoptError: print("trainNewAgent.py i- <inputDirecory> -o <outputDirectory> -e <experianceDirectory> -l <trainingIterations>") sys.exit(2) for opt, arg in options: if opt =="-h": print("trainNewAgent.py -i <inputDirecory> -o <outputDirectory> -e <experianceDirectory> -l <trainingIterations>") sys.exit() elif opt=="-i": oldAgentDir=arg elif opt=="-o": newAgentDir=arg elif opt=="-e": experianceDir=arg elif opt=="-l": trainingIterations=arg #main code pathList=os.path.abspath("").split("\\") pathList=pathList[:-2] path="" for pathL in pathList: path+=pathL+"\\" path=path[:-1] agent=Agent(path+"/gameData/agentConfig.json") agent.importAgent(path+"/"+oldAgentDir) agent.loadTrainingData(path+"/"+experianceDir) iter=1 try: iter=int(trainingIterations) except: pass for i in range(iter): print("Training:",i) agent.trainAgent() #export agent agent.export(path+"/"+newAgentDir) exit(1)
def Start(self): filePath = self.Config.FilePath.NextGeneration net = Network() net.Load(filePath.Config, filePath.Weight) model = Model() task = MujocoTask(model, self.GetRandomFile()) env = MujocoEnv(model) agentConfig = self.Config.SelfPlayAgent agent = Agent(agentConfig, net, model, task) bestAction = agent.SearchBestAction() print(bestAction) agent.SaveTrainData(self.Config.GetTrainPath())
def test_reproduce(): trainer = GeneticTrainer(10, 100) parent1 = Agent("1", get_random_name(), 15, 30, 60, 90) parent2 = Agent("2", get_random_name(), 5, 20, 50, 80) child1, child2 = trainer.reproduce(parent1, parent2) parent1_params, parent2_params = [15, 30, 60, 90], [5, 20, 50, 80] child1_params = [ child1.percent_first_move, child1.percent_second_move, child1.percent_third_move, child1.percent_fourth_move ] child2_params = [ child2.percent_first_move, child2.percent_second_move, child2.percent_third_move, child2.percent_fourth_move, ] assert (parent1_params != child1_params and parent2_params != child2_params)
def reproduce(self, parent1: Agent, parent2: Agent) -> [Agent, Agent]: """ :param parent1: an Agent to generate a couple of children with another Agent :param parent2: an Agent to generate a couple of children with another Agent :return: a listf of 2 children (new Agents) with the new combination of the parents (Agents) genes (params) """ random.seed() parent1_params = parent1.percent_first_move, \ parent1.percent_second_move, \ parent1.percent_third_move, \ parent1.percent_fourth_move \ parent2_params = parent2.percent_first_move, \ parent2.percent_second_move, \ parent2.percent_third_move, \ parent2.percent_fourth_move index = random.randint(1, 3) child1_params = parent1_params[:index] + parent2_params[index:] child2_params = parent2_params[:index] + parent1_params[index:] child1 = Agent("1", get_random_name(), 0, 0, 0, 0) child2 = Agent("2", get_random_name(), 0, 0, 0, 0) child1.percent_first_move, \ child1.percent_second_move, \ child1.percent_third_move, \ child1.percent_fourth_move = child1_params child2.percent_first_move, \ child2.percent_second_move, \ child2.percent_third_move, \ child2.percent_fourth_move = child2_params return [child1, child2]
def test_mutate(): trainer = GeneticTrainer(10, 100) individual = Agent("1", get_random_name(), 0.14, 0.33, 0.67, 0.96) original_params = [ individual.percent_first_move, individual.percent_second_move, individual.percent_third_move, individual.percent_fourth_move ] mutated = trainer.mutate(individual, 100) mutated_params = [ mutated.percent_first_move, mutated.percent_second_move, mutated.percent_third_move, mutated.percent_fourth_move ] assert (original_params != mutated_params)
def generate_population(self, population_size) -> list: """ :param population_size: amount of individuals to be created for the new population :return: return the randomly generated population in order to use it if required, if not, it's still kept in the local attribute self.population """ for i in range(0, population_size): random.seed() first_percent = round(random.uniform(0, 0.25), 2) second_percent = round(random.uniform(first_percent, 0.5), 2) third_percent = round(random.uniform(second_percent, 0.75), 2) fourth_percent = round(random.uniform(third_percent, 1), 2) self.population.append( Agent("1", get_random_name(), first_percent, second_percent, third_percent, fourth_percent)) return self.population
def train(self, individual: Agent, opponents: list): ''' :param individual: an Agent that will play against a list of Agents (opponents) in order to check the amount of wins it gets :param opponents: a list of Agents that will play 1 by 1 against the individual to be trained :return: the same individual (Agent) from the input, but 'trained' ''' individual.character = "1" for opponent in opponents: opponent.character = "2" players = [individual, opponent] game = Connect4('cls') game.default() game.game_mode = game.game_modes[0] [individual, _] = game.logic_play(players=players) return individual
def test_make_pairs(): agent1 = Agent("1", get_random_name(), 0, 0, 0, 0) agent2 = Agent("2", get_random_name(), 0, 0, 0, 0) agent3 = Agent("1", get_random_name(), 0, 0, 0, 0) agent4 = Agent("2", get_random_name(), 0, 0, 0, 0) agent5 = Agent("1", get_random_name(), 0, 0, 0, 0) agent6 = Agent("2", get_random_name(), 0, 0, 0, 0) agents = [agent1, agent2, agent3, agent4, agent5, agent6] couples_in_a_row = [] for agent01, agent02 in make_pairs(agents): couples_in_a_row.append(agent01) couples_in_a_row.append(agent02) assert (agents == couples_in_a_row)
def CalcScores(self, best, next, filePath): bestModel = MujocoModelHumanoid() bestTask = MujocoTask(bestModel, filePath) bestEnv = MujocoEnv(bestModel) nextModel = MujocoModelHumanoid() nextTask = MujocoTask(nextModel, filePath) nextEnv = MujocoEnv(nextModel) bestAgent = Agent(self.Config.EvaluateAgent, best, bestModel, bestTask) nextAgent = Agent(self.Config.EvaluateAgent, next, nextModel, nextTask) bestAction = bestAgent.SearchBestAction() nextAction = nextAgent.SearchBestAction() bestScore = self.GetScore(bestEnv, bestTask, bestAction) nextScore = self.GetScore(nextEnv, nextTask, nextAction) #nextAgent.SaveTrainData(self.Config.GetTrainPath("next")) return bestScore, nextScore
def test_throw_die_26_57(self): a = Agent("1", "",0.98, 0.7, 0.5, 0.1) die = a.throw_die(0.26,0.57) assert( 0.26 <= die <= 0.57 )
def test_is_in_range_for_first_percent_bwtween_range(self): a = Agent("1", "",0.7, 0.5, 0.9, 0.2) assert( a.is_in_range(0, 0.63) == True)
def test_is_in_range_for_first_percent_out_range(self): a = Agent("1", "",0.7, 0.5, 0.9, 0.2) assert( a.is_in_range(0, 0.75) == False)
def test_is_in_range_for_second_percent_limit2(self): a = Agent("1", "",0.7, 0.5, 0.9, 0.2) assert( a.is_in_range(1, 0.5) == True)
def test_is_in_range_for_second_percent_bwtween_range(self): a = Agent("1", "",0.7, 0.5, 0.9, 0.2) assert( a.is_in_range(1, 0.30) ==True )
def test_is_in_range_for_third_percent_limit2(self): a = Agent("1", "",0.7, 0.5, 0.9, 0.2) assert( a.is_in_range(2, 0.9) == True)
def test_create_ranges_disordered(self): a = Agent("1", "",0.7, 0.5, 0.9, 0.2) dic = a.create_ranges(0.7, 0.5, 0.9, 0.2) assert ({3: [0, 0.2], 1: [0.21, 0.5], 0: [0.51, 0.7], 2: [0.71, 0.9]} == dic)
def test_is_in_range_for_third_percent_bwtween_range(self): a = Agent("1", "",0.7, 0.5, 0.9, 0.2) assert( a.is_in_range(2 , 0.85) ==True )
from Agent.Agent import Agent from Network.Exceptions import * import os.path pathList = os.path.abspath("").split("\\") pathList = pathList[:-2] path = "" for pathL in pathList: path += pathL + "\\" path = path[:-1] agent = Agent(path + "/gameData/agentConfig.json") agent.createNewAgent() agent.export(path + "/playData/agents/a0B")
def test_is_in_range_for_third_percent_out_range(self): a = Agent("1", "",0.7, 0.5, 0.9, 0.2) assert( a.is_in_range(2, 0.5) == False)
def test_create_ranges_set_biggest_percent(self): a = Agent("1", "",0.98, 0.7, 0.5, 0.1) dic = a.create_ranges(0.88, 0.7, 0.5, 0.1) assert (a.biggest_percent == 0.88)
def compute_path(self): count = 0 from Agent.Agent import Agent self.__agents = [] for drone in program.get_drones(): # Create Reinforcement Learning Agents self.__agents.append( Agent(drone.get_name(), count, drone.get_battery_time(), drone.get_speed(), program.compute_minimum_area(self.__drones), (0, 0), self.__original_environment)) count += 1 # Get number of observation episodes number_episodes = Config.SIMULATIONS import time global_execution_start_time = time.time() start_number = 0 done_count = 0 # Number of times problem has been solved # Get epsilon epsilon = Config.EPSILON # Save epsilon for plotting epsilons = [epsilon] # Total repetitions in all episodes total_unchanged_environment_episodes_count = 0 # Maximum coverage overall max_coverage = 0.0 # Max coverage lists for plotting for the whole experiment max_coverages = [] # Simulations' times episodes_time = [] # Simulations' total rewards rewards_episodes = [] # Store total actions taken per observation episode_total_actions = [] episode_total_valid_actions = [] valid_actions_taken_agent = [] # Compute episodes for episode_number in range(start_number, number_episodes): # Reset agents and environment program.reset() # Update heatmap heatmap = self.get_environment() * 0.0 for element in self.__agents: (x, y) = element.get_position() heatmap[x][y] += 1.0 # Add minimum max coverage max_coverages.append(0.0) # Add max coverage observation coverages_episode = [0.0] # Reset unchanged environments count unchanged_environment_episodes_count = 0 # Create ANN if necessary if (Config.GLOBAL_MODEL): from numpy import dstack input_matrix = dstack((self.get_environment(), self.get_environment(), self.get_environment())) from Model.Model import create_model model = create_model(input_matrix.shape) # Get initial environment for starting observation actual_environment = program.get_environment() # Get visited positions map and agent position map import numpy as np actual_visited_map = np.array(actual_environment * 0.0, dtype=bool) # Changed to bool for first experiments drone_map = np.array(actual_environment * 0.0, dtype=bool) # Changed to bool for first experiments # Rewards and for plotting rewards_episodes.append(0.0) rewards = [] action_rewards = [] for _ in self.__agents: rewards.append([0]) action_rewards.append([0]) # Mark agents positions as true for agent in self.__agents: (i, j) = agent.get_position() drone_map[i, j] = True actual_visited_map[i, j] = True # Print trace every 100 episodes if episode_number % Config.SIMULATIONS_CHECKPOINT == 0 and Config.PRINT_SIMULATIONS: print("Episode {} of {}".format(episode_number + 1, number_episodes)) # Compute paths done = False episode_counter = 0 visited_list = [] # store each agent's visited squares visited_list.append(actual_visited_map) # store each agent's visited squares # Add new values to actions lists episode_total_actions.append(0.0) episode_total_valid_actions.append(0.0) if len(valid_actions_taken_agent): for element in self.get_agents(): valid_actions_taken_agent[element.get_number()].append(0.0) else: for _ in self.get_agents(): valid_actions_taken_agent.append([0.0]) # Store trendline_slope trendline_slope = -1.0 import time start_time = time.time() while not done: # Get previous environment (this way all agents would act at the same time) prev_visited_map = np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=bool).copy() prev_drone_map = drone_map.copy() drone_position_list = [] # store each agent's position # For each agent compute 1 action for agent in program.get_agents(): # Make decision import numpy as np rand_number = np.random.random() if rand_number < epsilon: random_action = True # Get random action chosen_action = np.random.randint(0, len(Config.ACTIONS_DICT.keys())) else: random_action = False # Decide one action if not Config.GLOBAL_MODEL: chosen_action = np.argmax(agent.predict(np.array(prev_visited_map, dtype=int), np.array(prev_drone_map, dtype=int), self.get_environment(), )) else: chosen_action = np.argmax(agent.predict_global_model(np.array(prev_visited_map, dtype=int), np.array(prev_drone_map, dtype=int), self.get_environment(), model)) episode_total_actions[episode_number] += 1.0 # Get agent's position before doing action for printing it in a file prev_position = agent.get_position() # Update environment according to action actual_visited_map, actual_drone_map, reward = agent.do_action(chosen_action, self.__original_environment, prev_visited_map, prev_drone_map) (r, c) = agent.get_position() heatmap[r][c] += 1.0 # Plot heatmap import matplotlib matplotlib.use('Agg') # For running in SO without graphical environment import matplotlib.pyplot as plt plt.plot(rewards[agent.get_number()]) fig, ax = plt.subplots() im = ax.imshow(heatmap) for r in range(Config.ENVIRONMENT_ROWS): for c in range(Config.ENVIRONMENT_COLUMNS): text = ax.text(c, r, heatmap[r, c], ha="center", va="center", color="w") fig.tight_layout() plt.savefig('heatmap_episode_' + str(episode_number) + '.png') plt.clf() # Plot agent's reward graph from numpy import sum rewards[agent.get_number()].append(sum(rewards[agent.get_number()]) + agent.get_reward()) action_rewards[agent.get_number()].append(agent.get_reward()) rewards_episodes[episode_number] += agent.get_reward() import matplotlib matplotlib.use('Agg') # For running in SO without graphical environment import matplotlib.pyplot as plt plt.plot(rewards[agent.get_number()]) plt.savefig('total_reward_evolution_drone_' + str(agent.get_number()) + '.png') plt.clf() plt.plot(action_rewards[agent.get_number()]) plt.savefig('action_reward_evolution_drone_' + str(agent.get_number()) + '.png') plt.clf() if (prev_visited_map != actual_visited_map).any(): agent.increase_valid_taken_actions() episode_total_valid_actions[episode_number] += 1.0 # Store the number of times in a row that the environment does not change if (prev_visited_map == actual_visited_map).all(): unchanged_environment_episodes_count += 1 else: unchanged_environment_episodes_count = 0 # Save taken action in a file with open( Config.BASE_ROUTE + 'actions_' + str(agent.get_number()) + '_' + agent.get_name() + '.csv', 'a+') as f: if not episode_counter: agent.set_status('flying') f.write( 'action_code, action_name, prev_position, actual_position, valid, visited, random_action, environment_shape, actions_taken, valid_taken_actions, unchanged_episodes\n') f.write(str(chosen_action) + ', ' + Config.ACTIONS_DICT[chosen_action] + ', ' + str( prev_position) + ', ' + str(agent.get_position()) + ', ' + str( prev_position != agent.get_position()) + ', ' + str((prev_position != agent.get_position()) and (prev_visited_map[agent.get_position()[0], agent.get_position()[1]])) + ', ' + str(random_action) + ', ' + str(self.__original_environment.shape) + ', ' + str(agent.get_actions_taken()) + ', ' + str(agent.get_valid_taken_actions()) + ', ' + str( unchanged_environment_episodes_count) + '\n') # Memorize new memory observation observation = ( prev_visited_map, actual_visited_map, prev_drone_map, actual_drone_map, chosen_action, reward, agent.get_status()) agent.memorize(observation) # Save agent results for merging with the remaining agents visited_list.append(actual_visited_map + (1.0 - self.get_environment())) import matplotlib matplotlib.use('Agg') # For running in SO without graphical environment import matplotlib.pyplot as plt plt.imshow(np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=bool), cmap='Greys', interpolation='nearest') plt.savefig(Config.BASE_ROUTE + 'combined_visited_list.png') plt.clf() drone_position_list.append(actual_drone_map) # Train if not Config.GLOBAL_MODEL: agent_history = agent.learn(self.get_environment()) agent.get_model().save(str(agent.get_number()) + '_local_model.h5') else: agent_history = agent.learn_global_model(self.get_environment(), model) model.save('global_model.h5') # Check experiment stopping waiting_hours = float(time.time() - start_time) / 60.0 / 60.0 # Convert seconds to hours import numpy as np borders_matrix = 1.0 - np.ceil(self.get_environment()) visited_matrix = np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=float) visited_matrix = np.where(visited_matrix >= 1.0, 1.0, visited_matrix) only_visited_cells_matrix = visited_matrix - borders_matrix visited_cells_count = float(np.count_nonzero(only_visited_cells_matrix == 1.0)) visitable_cells_count = float(np.count_nonzero(self.get_environment() == 1.0)) coverage = visited_cells_count / visitable_cells_count max_coverage = max(coverage, max_coverage) max_coverages[episode_number] = max(coverage, max_coverages[episode_number]) coverages_episode.append(coverage) valid_actions_taken_agent[agent.get_number()][episode_number] = agent.get_valid_taken_actions() if unchanged_environment_episodes_count >= Config.MAXIMUM_UNCHANGED_ENVIRONMENT_EPISODES: total_unchanged_environment_episodes_count += unchanged_environment_episodes_count done = True break elif waiting_hours >= Config.MAXIMUM_WAIT_HOURS and coverage < Config.COMPLETENESS_COVERAGE: total_unchanged_environment_episodes_count += unchanged_environment_episodes_count done = True break # Check if agent had finished if False not in np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=bool): with open(Config.BASE_ROUTE + 'solution_times.txt', 'a+') as f: f.write('solution time ' + str(done_count) + ': ' + time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time)) + ' epsilon: ' + str(epsilon) + '\n') done_count += 1 done = True break episode_counter += 1 # Combine agents results drone_map = np.array(np.sum(drone_position_list, axis=0), dtype=bool) # Plot coverages for each observation graph if len(coverages_episode) > 1: import matplotlib matplotlib.use('Agg') # For running in SO without graphical environment import matplotlib.pyplot as plt ax = plt.figure().gca() ax.set_ylim([0.0, 1.0]) x = list(range(len(coverages_episode))) y = coverages_episode from numpy import polyfit fit = polyfit(x, y, 1) yfit = [n * fit[0] for n in x] + fit[1] ax.plot(x, y) ax.plot(yfit, 'r--') plt.savefig('coverages_episode_' + str(episode_number) + '.png') plt.clf() # Store and plot observation's time episodes_time.append((time.time() - start_time) / 3600.0) import numpy as np average_episode_time = np.average(episodes_time) import matplotlib matplotlib.use('Agg') # For running in SO without graphical environment import matplotlib.pyplot as plt ax = plt.figure().gca() ax.plot(episodes_time) from matplotlib.ticker import MaxNLocator ax.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.savefig('episode_time_hours.png') plt.clf() # Plot valid action percentage per observation graph if len(episode_total_valid_actions) > 1: import matplotlib matplotlib.use('Agg') # For running in SO without graphical environment import matplotlib.pyplot as plt import numpy as np ax = plt.figure().gca() division = np.divide(episode_total_valid_actions, episode_total_actions) ax.set_ylim([0.0, 1.0]) x = list(range(len(division))) y = division from numpy import polyfit fit = polyfit(x, y, 1) yfit = [n * fit[0] for n in x] + fit[1] ax.plot(x, y) ax.plot(yfit, 'r--') plt.savefig('actions_percentages_episodes.png') plt.clf() import matplotlib matplotlib.use('Agg') # For running in SO without graphical environment import matplotlib.pyplot as plt import numpy as np ax = plt.figure().gca() ax.set_ylim([0.0, 1.0]) for element in self.get_agents(): division = np.divide(valid_actions_taken_agent[element.get_number()], episode_total_actions) x = list(range(len(division))) y = division ax.plot(x, y) plt.savefig('percentage_work_per_agent.png') plt.clf() # Plot coverages graph if len(max_coverages) > 1: import matplotlib matplotlib.use('Agg') # For running in SO without graphical environment import matplotlib.pyplot as plt ax = plt.figure().gca() ax.set_ylim(bottom=0.0) x = list(range(len(max_coverages))) y = max_coverages from scipy.stats import linregress trend = linregress(x, y) trendline_slope = trend.slope # or fit[0] from numpy import polyfit fit = polyfit(x, y, 1) yfit = [n * fit[0] for n in x] + fit[1] ax.plot(x, y) ax.plot(yfit, 'r--') plt.savefig('coverages.png') plt.clf() # Plot epsilon graph import matplotlib matplotlib.use('Agg') # For running in SO without graphical environment import matplotlib.pyplot as plt ax = plt.figure().gca() ax.plot(epsilons) from matplotlib.ticker import MaxNLocator ax.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.savefig('epsilons.png') plt.clf() # Update epsilon # The lower the epsilon, less random actions are taken epsilon = max(Config.MIN_EPSILON, epsilon * Config.EPSILON_DECAY) epsilons.append(epsilon)
def test_is_in_range_for_first_percent_limit2(self): a = Agent("1", "",0.7, 0.5, 0.9, 0.2) assert( a.is_in_range(0, 0.70) == True)
def test_throw_die_0_100(self): a = Agent("1", "",0.98, 0.7, 0.5, 0.1) die = a.throw_die(0,1) assert( 0 <= die <= 1 )