def addAFactory(address): if random.random() < 0.5: # with a given probability, create a new factory cloning # an existing one, randomly chosen (can also be a clone) factoryTmpList = [] # agentList contains also recipes for i in range(len(address.agentList)): if address.agentList[i].agType == 'factories': factoryTmpList.append(address.agentList[i]) # print factoryTmpList random.shuffle(factoryTmpList) toBeCloned = factoryTmpList[0] # print toBeCloned.number # cloning (the agent constructor interacts with the graph) common.clonedN += 1 anAgent = Agent(toBeCloned.number * 100 + common.clonedN, address.worldState, toBeCloned.xPos + modPosition(), toBeCloned.yPos + modPosition(), agType=toBeCloned.agType, sector=toBeCloned.sector) address.agentList.append(anAgent) anAgent.setAgentList(address.agentList) if common.verbose: print("Created factory #", anAgent.number, "in sector", anAgent.sector)
def main(args): env = gym.make(args.env) solved_reward = args.solved_reward if isinstance(env.action_space, gym.spaces.Box): solved_reward = args.solved_reward latent_dim = [64, 32] agent = Agent.PPOAgent(env, "./", seed=args.seed, latent_dim=latent_dim, lr=3e-4, learning_epoch=80, max_episodes=10000, max_timestep=1500, learning_freq=4000, solved_reward=230, ) elif isinstance(env.action_space, gym.spaces.Discrete): config = {"solved_reward":145} # latent_dim = [64, 64] agent = Agent.PPOAgent(env, 12, "./",) agent.train(is_render=args.render)
def GenerateAgents(self, num_agents): """ Initiates n number of Agent objects within the Engine. The agents are initiated with affinity to consume, eco-consciousness :param num_agents: :return: """ friendList = [j for j in range(num_agents)] for i in range(num_agents): a = beta.rvs(self.A_params[0], self.A_params[1]) # draw from beta distribution b = 1 - a mu = beta.rvs(self.Mu_params[0], self.Mu_params[1]) if round(mu, 5) == 0: mu = 0.0001 income = np.exp(norm.rvs(self.Income_int[0], self.Income_int[1])) # income drawn from log-normal distribution, then exponentiated to get normal (level) income. delta = rand.uniform(self.Delta_int[0], self.Delta_int[1]) agent = Agent(i, a, b, mu, income, self.Price, delta) # Assign friends friendList.remove(i) # Agents cannot be friends with themselves agent.Friends = rand.sample( friendList, rand.choice(range(self.Friend_int[0], self.Friend_int[1]))) friendList.append(i) self.Agents[i] = agent
def test_deliveries_arrived(self): # Already tested so now safely return return date = '2017-12-02 14:40:00' Agent.deliveries_arrived(date)
def test_agents_set_order(self, MockRandomNumber, MockGetTrackingNumbers): # Already tested so now safely return return pickupTime = '2014-10-22 03:32:43' orderId = 110 dropoffTime = '2014-10-22 03:32:43' MockRandomNumber.side_effect = [7886, 7886, 7889, 8998] MockGetTrackingNumbers.return_value = [7777, 8989, 7886] # connect and set order Agent.connect(DBpath) Agent.agents_set_order(orderId, pickupTime, dropoffTime) connectToDataBase(DBpath) rows = cursor.execute('''select * from deliveries where trackingNo=7889;''') row = rows.fetchone() actualOutput = str(row[0]) + "|" + str(row[1]) + "|" + str(row[2])\ + "|" + str(row[3]) expectedOutput = "7889|110|2014-10-22 03:32:43|2014-10-22 03:32:43" assert expectedOutput == actualOutput
def main(): env = GridWorld() agent = Agent() data = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] # 테이블 초기화 gamma = 1.0 alpha = 0.001 for k in range(50000): # 총 5만 번의 에피소드 진행 done = False history = [] while not done: action = agent.select_action() (y, x), reward, done = env.step(action) history.append((y, x, reward)) env.reset() # 매 에피소드가 끝나고 바로 해당 데이터를 이용해 테이블을 업데이트 cum_reward = 0 # 리턴 G_t for transition in history[::-1]: # 방문했던 상태들을 뒤에서부터 보며 차례차례 리턴을 계산 y, x, reward = transition data[y][x] = data[y][x] + alpha * (cum_reward - data[y][x]) cum_reward = reward + gamma * cum_reward # 학습이 끝나고 난 후 데이터를 출력해보기 위한 코드 for row in data: print(row)
def run(self): global stop while not stop: time.sleep(5) command = input("commande : ").split() if len(command) > 0: if command[0] == "order": start = float(time.time()) + float(command[2]) end = start + float(command[3]) order = Order(float(command[1]), start, end, time.time()) lock.acquire(1) Agent.send({"order": Agregate(AgType.COM, order)}) lock.release() if command[0] == "go": for i in range(60): print("top ", i) start = float(time.time()) + float(command[2]) end = start + float(command[3]) order = Order(float(command[1]), start, end, time.time()) lock.acquire(1) Agent.send({"ORDER": Agregate(AgType.COM, order)}) lock.release() time.sleep(float(command[2]) + float(command[3]) + 3) stop = True if command[0] == "stop": stop = True
def target_update_analysis(self): target_updates = [(4, 0.001), (30, 1), (8, 0.01), (15, 0.1)] target_updates_str = [ "t" + str(a[0]) + "_tau" + str(a[1]) for a in target_updates ] filename = "results/target_update_tau.png" fig = plt.figure(figsize=(12, 8), tight_layout=True) for idx, target_update_tau in enumerate(target_updates): target_update, tau = target_update_tau agent = Agent(self.gamma, self.epsilon_start, self.epsilon_end, self.epsilon_decay, self.alpha, target_update, self.max_iter, tau, self.batch_size, self.dropout_ratio) if agent.reward_exist(): rewards = agent.load_rewards() print("load value for {}".format(agent.tag)) else: rewards = agent.train() rewards = self.moving_average(rewards) epochs = [(i + 1) for i in range(len(rewards))] plt.plot(epochs, rewards, color=self.colors[idx], linestyle='-') plt.xlabel("Epochs") plt.ylabel("Rewards") plt.xlim(0, self.max_iter) plt.ylim(-1200, 300) plt.legend(target_updates_str, loc='best') fig.savefig(filename, dpi=fig.dpi) return
def main(): conversation_id = str(uuid.uuid4()) bot_id = botVersion() user_id = getpass.getuser() file_logger = IOAdapter.FileLogger("../data/output/log.jsonl") sql_logger = IOAdapter.SQLiteLogger("../data/output/log.db") io_adapter = sql_logger.makeIO( conversation_id, user_id, bot_id, file_logger.makeIO(conversation_id, user_id, bot_id, IOAdapter.CommandLineIO())) agent = Agent( io_adapter, [ TFIDFDiscussAction( ml.load_tfidf_model_from_image("../models/vectoriser_img.pk")), #EmbeddingDiscussAction(ml.load_embeddings_model_from_image("../data/qa_pairs_embedded.pk", # "https://storage.googleapis.com/tfhub-modules/google/universal-sentence-encoder/4.tar.gz")), # LdaDiscussAction("../models/lda_n20.pickle", "../models/tf_n20.pickle","../data/docs_preprocessed_topicDistribution_match_20.csv"), FarewellAction(), StaticQuestionAction(), GreetAction(), TopicSuggestionAction("../data/suggestions.json", 3), FallbackAction(), RecapAction(), RegexStaticQuestionAction() ]) try: asyncio.run(agent.run()) finally: sql_logger.close() file_logger.close()
def __init__(self, map, valDim=1): self.map = map self.x_num = map.x_num self.y_num = map.y_num self.defaultEdgeColor = QtGui.QColor(0, 0, 0) self.defaultEdgeWidth = 1 self.defaultActiveCellColor = QtGui.QColor(0, 255, 255) self.defaultObstacleColor = QtGui.QColor(255, 165, 0) self.defaultHumanColor = QtGui.QColor(0, 204, 0) self.defaultRobotColor = QtGui.QColor(0, 0, 204) self.refStartHexIdx = None self.refEndHexIdx = None self.refStartColor = QtGui.QColor(102, 102, 255) self.refEndColor = QtGui.QColor(153, 153, 0) self.activeCells = [] self.currentHexValDim = 0 self.hexValDim = valDim self.hexVals = [] for d in range(self.hexValDim): hexVal = np.ones((self.x_num, self.y_num)) self.hexVals.append(hexVal) self.accessible = np.ones((self.x_num, self.y_num)) self.humanPath = [] self.robotPath = [] self.human = Agent() self.robot = Agent()
def New_Generation( self, Methode=0, Indiv=50, Mute=10 ): #crée une nouvelle génération d'individus à partir d'une ancienne population triée par fitness en fonction de la methode choisie if Methode == 0: Father = self.Pop[0] self.Pop = [] self.AddAgent(Father) for i in range(0, Indiv): G = Genome.Genome(25, 3) G.Set_Map(self.Pop[0].Genome_.Map_[:, :]) A = Agent.Agent(self.Pop[0].posX_, self.Pop[0].posY_, G, self.Grid) A.Mutate(Mute, 0.95) self.AddAgent(A) if Methode == 1: j = 0 PopBis = [] for agent in self.Pop: #chaque agent peut se reproduire dans la limite des places disponible. Les plus performent se reproduiront en premier if j < Indiv: PopBis.append(agent) j += 1 if j < Indiv: G = Genome.Genome(25, 3) G.Set_Map(agent.Genome_.Map_[:, :]) A = Agent.Agent(agent.posX_, agent.posY_, G, self.Grid) A.Mutate(Mute, 0.95) PopBis.append(A) j += 1 else: break self.Pop = PopBis
def main(argv): level = '1' method = '1' try: opts, args = getopt.getopt(argv, "hl:m:", ["level=", "method="]) except getopt.GetoptError: help() sys.exit(2) for opt, arg in opts: if opt == '-h': help() sys.exit() elif opt in ("-l", "--level"): level = arg elif opt in ("-m", "--method"): method = arg if len(level) > 0 and len(method) > 0: print 'Level ', level print 'Method ', method start, grid = env.get_maze_data('Game/Levels/level' + str(level) + '.txt') ag.act(start, grid, method=int(method)) else: help()
def initial(self, Form): self.clear(Form, self.tableWidget) self.clear(Form, self.tableWidget_2) self.textEdit_5.setHtml(_translate("Form", '', None)) self.textEdit_6.setHtml(_translate("Form", "0", None)) player1 = self.textEdit.toPlainText() player2 = self.textEdit_2.toPlainText() agent = [] for i in range(2): if i == 0: x = player1 else: x = player2 if x == 's1': a = Agent.StrategicAgent1( unichr(65 + i) + " (Strate method 1) ") elif x == 's2': a = Agent.StrategicAgent2( unichr(65 + i) + " (Strate method 2) ") elif x == 'mc': a = Agent.MonteCarloAgent(unichr(65 + i) + " (Monte Carlo) ") elif x == 'dp': a = Agent.DynamicProgrammingAgent( unichr(65 + i) + " (Dynamic Programming) ") elif x == 'rd': a = Agent.Agent(unichr(65 + i) + " (Random) ") else: print "Illegal command!!" agent.append(a) global agentA global agentB agentA = agent[0] agentB = agent[1]
def play(model_file_name, config): print('load pretrained model file: ' + model_file_name) agent = Agent(config) load_checkpoint(model_file_name, agent.model) bird_game = game.GameState() total_reward = 0. time_count = 0. # 1.init S action = [1, 0] # do nothing state = init_state() obs, reward, terminal = bird_game.frame_step(action) obs = preprocess(obs) state = np.append(state[1:, :, :], obs.reshape((1, ) + obs.shape), axis=0) while not terminal: action = agent.optimal_action(state) next_obs, reward, terminal = bird_game.frame_step(action) next_obs = preprocess(next_obs) next_state = np.append(state[1:, :, :], next_obs.reshape((1, ) + next_obs.shape), axis=0) state = next_state total_reward += reward time_count += 1 print('total time step is {}'.format(time_count))
def hybrid_agent(): kb = KB() ag = Agent() matrix = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] print("Starting Location = ", ag.FindCurrentLocation()) while (1): if (ag.FindCurrentLocation() == [4, 4]): print("Game won!!\n") break x, y = _FindIndicesForLocation(ag.FindCurrentLocation()) if ([x, y] not in visited): visited.append([x, y]) #append cur loc matrix[x][y] = 1 #current loc is valid kb.tell(ag) #perceive #updating safe for room in unvisited: val = 4 * room[0] + room[1] + 1 if ((kb.ask(-val) == True) and room not in safe): safe.append(room) goals = intersection(safe, unvisited) if (len(goals) == 0): print("No safe path ahead is possible!") break #matrix gets modified in the func itself Plan_route(ag, matrix, goals)
def __init__(self, parent): GGrule.__init__(self, 30) self.TimeDelay = ATOM3Integer(2) self.exactMatch = 1 self.LHS = ASG_omacs(parent) self.obj2423=Agent(parent) self.obj2423.preAction( self.LHS.CREATE ) self.obj2423.isGraphObjectVisual = True if(hasattr(self.obj2423, '_setHierarchicalLink')): self.obj2423._setHierarchicalLink(False) # price self.obj2423.price.setNone() # name self.obj2423.name.setValue('') self.obj2423.name.setNone() self.obj2423.GGLabel.setValue(1) self.obj2423.graphClass_= graph_Agent if parent.genGraphics: new_obj = graph_Agent(160.0,60.0,self.obj2423) new_obj.layConstraints = dict() # Graphical Layout Constraints new_obj.layConstraints['scale'] = [1.0, 1.0] else: new_obj = None self.obj2423.graphObject_ = new_obj # Add node to the root: self.LHS self.LHS.addNode(self.obj2423) self.obj2423.postAction( self.LHS.CREATE ) self.RHS = ASG_omacs(parent)
def createAgent(self, agentlist, x, y, name): agent = Agent(self.environment, x, y, name, data["torus"], data["trace"]) agentlist.append(agent) self.environment.setInCell(x, y, agent) if data["trace"]: agent.printTrace()
def agent_login(failed_attempt, database): failed = failed_attempt validate = Agent(database) #intilializes Agent object Valid_bool = False aid = input('Username: '******'Password(hidden):') Valid_bool = validate.validate_agent( aid, pwd) #validates that aid and pwd are correct ############################################################################################################################### # remainder of lines notifies user that their aid or pwd are incorrect, then lets them know that if they dont have an account # they should get in contact with System Admin to creat account ############################################################################################################################### if Valid_bool == False: print('Username or Password was incorrect') print( 'If you do not have an account, please see System Admin or Login as Customer' ) failed += 1 agent_login(failed, database) else: agent_session = Agent_Session(database) agent_session.start_session(aid) agent_session.close() os.system('clear') main()
def getFitness(self, matrix): ev1_1 = ev.Evaluator(1, matrix) ev2_1 = ev.Evaluator(2) ag1_1 = ag.AlphaBetaAgent(1, 2, ev1_1) ag2_1 = ag.AlphaBetaAgent(2, 2, ev2_1) result1, game1 = self.simulateGame(ag1_1, ag2_1) white, black = game1.countColors() if black == 0: fitness1 = 64 elif white == 0: fitness1 = -64 else: fitness1 = white - black ev1_2 = ev.Evaluator(1, matrix) ev2_2 = ev.Evaluator(2) ag2_2 = ag.AlphaBetaAgent(1, 2, ev2_2) ag1_2 = ag.AlphaBetaAgent(2, 2, ev1_2) result2, game2 = self.simulateGame(ag2_2, ag1_2) white, black = game2.countColors() if black == 0: fitness2 = -64 elif white == 0: fitness2 = 64 else: fitness2 = black - white fitness = (fitness1 + fitness2) return fitness
def Game(agent_name, STOP): agent_list = [] for i in range(len(agent_name)): x = agent_name[i] if x == 's1': a = Agent.StrategicAgent1(unichr(65 + i) + " (Strate method 1) ") agent_list.append(a) elif x == 's2': a = Agent.StrategicAgent2(unichr(65 + i) + " (Strate method 2) ") agent_list.append(a) elif x == 'mc': a = Agent.MonteCarloAgent(unichr(65 + i) + " (Monte Carlo) ") agent_list.append(a) elif x == 'dp': a = Agent.DynamicProgrammingAgent( unichr(65 + i) + " (Dynamic Programming) ") agent_list.append(a) elif x == 'rd': a = Agent.Agent(unichr(65 + i) + " (Random) ") agent_list.append(a) agentA = agent_list[0] agentB = agent_list[1] agent1 = agentB agent2 = agentA round = 0 while (1): round += 1 agent1, agent2 = agent2, agent1 while (1): while (1): pos = agent1.position() result, sink_ship, sink_pos = agent2.Hit_Or_Not(pos) if result != ERROR: break agent1.Update(pos, result, sink_ship, sink_pos) if STOP: print "PlayerA PlayerB" agentA.Print() print "Player " + agent1.Name() + " shoot (" + unichr( 65 + pos[0]) + "," + str(pos[1]) + ")!" r = 'MISS' if result == MISS else 'HIT' print "It is " + r raw_input() if result != HIT: break if result == LOSE: print "Player" + agent1.Name() + " wins!" break agentA.Print() print "Take round " + str((round + 1) / 2) + " to WIN\n" if agent1 == agentA: return np.array([[1, (round + 1) / 2], [0, 0]]) else: return np.array([[0, 0], [1, (round + 1) / 2]])
def simulateEvaluator(): # Alfabeta vs alfabeta variando evaluator for i in range(1, 7): for j in range(1, 7): if i != j: sim = Simulation(ag.AlphaBetaAgent(1, 4, ev.Evaluator(i)), ag.AlphaBetaAgent(2, 4, ev.Evaluator(j)), 1, i, j)
def main(): ag = Agent() knowledge_base(ag) # print(kb) print('curLoc', ag.FindCurrentLocation()) navigate(ag) print(call) print("original path", safe) path_set = set()
def simulateBestAlphaModifyDepth(evaluator): ev1 = ev.Evaluator(evaluator) ev2 = ev.Evaluator(evaluator) # Deterministas #Alfabeta vs minimax for i in range(1, 3): ag1 = ag.MinimaxAgent(1, i, ev1) ag2 = ag.MinimaxAgent(1, 4, ev2) sim = Simulation(ag1, ag2, 1, evaluator, evaluator)
def Predict_action_value(main_agent, Agent_Set, V_pred, W_pred, base_network): Other_Set, Value_list = [], [] network = Network_Dict[str(base_network)] VO_flag = False for agent in Agent_Set: if main_agent.name != agent.name: Other_Set.append(agent) Comb_Set = Combination.Combination_list(Other_Set, base_network-1) pred_state = main_agent.Predit_state(V_pred, W_pred, dt = deltaT) obs_gx, obs_gy, obs_gth = main_agent.Relative_observed_goal(pred_state.Px, pred_state.Py, pred_state.Pth) for Comb_item in Comb_Set: other_state = [V_pred, W_pred, main_agent.state.r, obs_gx, obs_gy, obs_gth, V_max] for agent in Comb_item: obs_state = agent.Relative_observed_state(pred_state.Px, pred_state.Py, pred_state.Pth) m11, m12, m13 = 0, 0, 0 if main_agent.rank > agent.rank: m11 = 1 elif main_agent.rank < agent.rank: m13 = 1 else: m12 = 1 VO_flag = VO_flag or Agent.If_in_VO(pred_state, obs_state, time_factor='INF') other_state += [m11, m12, m13, obs_state.x, obs_state.y, obs_state.Vx, obs_state.Vy, obs_state.r] value_matrix = network.get_value(np.array(other_state)) Value_list.append(value_matrix[0][0]) Value = min(Value_list) VO_R = 0 if not VO_flag: VO_R = 0.5 else: VO_R = 0 #print('in VO') R = 0 main_agent_pred = Agent.Agent('Pred', pred_state.Px, pred_state.Py, pred_state.Pth, pred_state.V, pred_state.W, pred_state.r, main_agent.gx, main_agent.gy, main_agent.gth, main_agent.rank) if Check_Goal(main_agent_pred, Calculate_distance(resX, resY, 0, 0), resTH): R = Arrived_reward for item in Agent_Set: if main_agent.name != item.name: if Check_Collision(main_agent, item): if main_agent.rank > item.rank: R = Collision_high_penalty elif main_agent.rank < item.rank: R = Collision_low_penalty else: R = Collision_equ_penalty break action_value = R + Value + VO_R return action_value
def simulateDepthM(): ev1 = ev.Evaluator(1) ev2 = ev.Evaluator(1) for i in range(1, 9): sim = Simulation(ag.MinimaxAgent(1, i, ev1), ag.AlphaBetaAgent(2, 4, ev2), 1, "", 1) sim = Simulation(ag.AlphaBetaAgent(1, 4, ev2), ag.MinimaxAgent(2, i, ev1), 1, 1, "")
def __init__(self, deck, game_mode): # makes an instance of the deck for themself, this self.game_deck = list() for cards in deck: self.game_deck.append(cards) self.hand = list() self.game_mode = game_mode # initializes the Agent(Other Player) self.opponent = Agent(self.game_deck)
def playMinesweeperWithNum(self): self.agent = Agent(self.dimension, self.numberOfMines) currentBox = self.agent.selectABox() while self.agent.solvedBoxes < self.dimension**2: print(self.agent.unseenBoxes, self.agent.solvedBoxes) print("Query: ", currentBox.row, currentBox.col) queriedBox = self.env.queryBox(currentBox) self.agent.updateBox(queriedBox) currentBox = self.agent.selectABox() self.drawMineField(self.env.mineField)
def playMinesweeperGivenNumber(self): self.agent = Agent(self.dimension, self.numberOfMines) current_box = self.agent.pickABox() while self.agent.solvedBoxes < self.dimension**2: print(self.agent.unseenBoxes, self.agent.solvedBoxes) print("Query: ", current_box.row, current_box.col) queried_box = self.environment.QueryMethodBox(current_box) self.agent.updateBoxInfo(queried_box) current_box = self.agent.pickABox() self.mineFieldSketch(self.environment.mineField)
def train(data: ndarray, window_size: int, agent: Agent, batch_size: int, epochs: int = 3): l = len(data) - 1 states_buy = [] states_sell = [] for e in range(epochs + 1): print(f"Episode:{e} / {epochs}") state = getState(data, 0, window_size + 1) total_profit = 0 agent.inventory = [] for t in range(l): action = agent.act(state) next_state = getState(data, t + 1, window_size + 1) reward = 0 if action == 1: # buy agent.inventory.append(data[t]) states_buy.append(t) print(f"Buy: {formatPrice(data[t])}") elif action == 2 and len(agent.inventory) > 0: # sell bought_price = agent.inventory.pop(0) reward = max(data[t] - bought_price, 0) total_profit += data[t] - bought_price states_sell.append(t) print( f"Sell: {formatPrice(data[t])} | Profit: {formatPrice(data[t] - bought_price)}" ) done = True if t == l - 1 else False next_state = getState(data, t + 1, window_size + 1) agent.memory.append((state, action, reward, next_state, done)) state = next_state if done: print("-----------------------------------") print(f"Total Profit: {formatPrice(total_profit)}") print("-----------------------------------") plot_decisions(data, states_buy, states_sell, total_profit) if len(agent.memory) > batch_size: agent.expReplay(batch_size)
def __init__(self): self.max_length = 100 self.towards_ref = None self.mean_ref = None self.away_ref = None self.me = Agent(200, 200) self.agents = [ Agent(random.randint(10, 990), random.randint(10, 590)) for i in range(50) ] self.me.velocity = Vector2(0.1, 0)
def new_agent(self): x = gauss(self.mean_x, self.sd_x) y = gauss(self.mean_y, self.sd_y) speak = expovariate(1.0/self.mean_speak) learn = expovariate(1.0/self.mean_learn) innovation = expovariate(1.0/self.mean_innovation) agent = Agent(x, y, speak, learn, innovate) self.agents.append(agent) Sim.activate(agent, agent.go()) return agent
def setup(dd, behavID, preyStart, hunter1Start, hunter2Start): sim = s.AgentSimulation() preyBehav = a.runAwayBehaviour(distanceTrapped = -1.0, distanceDanger = dd) basepath = "/home/i7674211/DifferentProjects/PI_code/simulator/behaviourGeneration/group/behav" huntBehav1 = a.scriptedBehaviour(basepath + str(behavID) + ".py") huntBehav2 = a.scriptedBehaviour(basepath + str(behavID) + ".py") prey = a.Agent(maxVelocity = 2.0, startPos = preyStart, drag = 0.1, behav = preyBehav) hunter1 = a.Agent(behav = huntBehav1, startPos = hunter1Start) hunter2 = a.Agent(behav = huntBehav2, startPos = hunter2Start) sim.addAgent(prey) sim.addAgent(hunter1) sim.addAgent(hunter2) return sim
def __init__(self, agent_count, smart_agent_count): self.agents = [] self.bids = [] self.asks = [] self.price_history = [] self.price_history_high = [] self.price_history_low = [] for i in range(agent_count): self.agents.append(Agent(False)) for i in range(smart_agent_count): model_agent = self.agents[i] agent = Agent(True) agent.consumption_value_low = model_agent.consumption_value_low agent.consumption_value_high = model_agent.consumption_value_high agent.production_value = model_agent.production_value self.agents.append(agent)
def main(): p1Gui = Gui("Player 1", 1) p2Gui = Gui("Player 2", 2) pos1 = getRandPos() pos2 = getRandPos() agent = Agent() opAction = [0,0,0] result2 = None p1Gui.drawStar(pos1) while True: print(pos2) type1, param1 = opponent(p1Gui, p2Gui) result1 = executeHumanAction(p1Gui, p2Gui, type1, param1, pos1, pos2) if type1 == move: direction = p1Gui.getDirection(pos1, param1) pos1 = param1 param1 = direction p1Gui.clean() p1Gui.drawStar(pos1) opAction[0] = type1 opAction[1] = param1 opAction[2] = result1 type2, param2 = agent.play(2, result2, opAction, pos2) result2 = executeCpuAction(p1Gui, type2, param2, pos2, pos1) if type2 == move: pos2 = param2
for line in reader: data = dict(zip(header, line)) data['received_pizza'] = json.loads(data['received_pizza'].lower()) kaggleRequests.append(data) fh.close() fh = open(logfile, 'r') reader = csv.reader(fh) header = reader.__next__() for field in ['ID', 'Gen', 'Score']: header.remove(field) bestScore = 0.0 bestAgent = None for i, agent in enumerate(reader): if i >= numAgents: break agentObj = Agent.deserializeAgent(header, agent) score = GeneticAlgorithm.runAgentAgainstTest(agentObj, kaggleRequests, True) if score > bestScore: bestScore = score bestAgent = agentObj if bestScore > goal: GeneticAlgorithm.runAgentAgainstTest(agentObj, kaggleRequests, False) break fh.close()
import random import sys import copy import operator from Observation import * from Reward import * from Action import * from Environment import * from Agent import * from random import Random # Make an agent gridEnvironment = Environment() gridAgent = Agent(gridEnvironment) gridAgent.agent_reset() # A sequence of actions to take actions = [1, 3, 3, 0, 1] # The last state workingObservation = gridAgent.copyObservation(gridAgent.initialObs) # Make sure there is an entry for the last state in the v table gridAgent.initializeVtableStateEntry(workingObservation.worldState) # Report the initial v table print "Initial V Table:" print gridAgent.v_table print "---" # Execute the sequence of actions for a in actions:
bookers = [] times = [] seed = random.randrange(10, 30000, 1) for iteration in range(0, plays): start = datetime.datetime.now() seed += 1 logger.debug("Creating {} random adventures: ".format(numAdv)) advList = Adventure.createAdvList(numAdv, seed) for adv in advList: logger.debug("Adventure ID {}: {} gold reward, needs: {}".format(id(adv), adv.reward, adv.skillMap)) logger.debug("\n Creating {} random adventurers: ".format(numAgents)) agentList = Agent.createAgentList(numAgents, advList, seed) for a in agentList: logger.debug("Adventurer ID {}: Skills {}, Costs {}".format(id(a), a.skillList, [(id(x), y) for x, y in a.costs.items()])) logger.debug("\n Creating booker: ") booker = Booker(agentList, advList) upperBound = booker.upperBound greedyBound = booker.greedyBound logger.debug("Upper Bound for this game is: {} gold".format(upperBound)) logger.debug("Greedy Bound for this game is: {} gold".format(greedyBound)) closedAdventures = booker.completedAdventures openAdventures = booker.adventures booker.run(iters, True) logger.debug(booker.reward)
detail = numpy.empty((nb_ite, nb_agents)) detail2 = numpy.empty((nb_ite, nb_agents)) tab_runs = numpy.empty((nb_ite, nb_runs)) tab_ev_qual = numpy.zeros((nb_events, nb_runs)) tab_fail = numpy.zeros((nb_ite, nb_runs)) print(nb_agents, " agents with ", mean_flex, " avg flexibility.") print(nb_events, "events of ", dur_shed, " with ", dur_delay, " delay and ", dur_recover, " recover.") print("Shedding capacity ordered : ", capacity) print("Starting for ", nb_runs, " x ", nb_ite, " iterations.") rounds = [6, 5, 10, 15, 18] cap = 0 for j in range(nb_runs): #random.seed(seed) Ag.clear() connect = 6# rounds[j]#round(nb_agents / (j+1)) for i in range(nb_agents): Ag.Agent(b_flex[i % len(b_flex)], probs[i % len(probs)], connect) event_cnt = 0 start = None end = None for i in range(nb_ite): if (i - dur_prep) % dur_total_event == 0: start = i + dur_delay end = start + dur_shed order = Order(capacity, start, end) Ag.send({Ag.ORDER: Agregate(AgType.COM, order, i)}, connect) total_flex = total_flex_w = total_x = 0 total_conso = total_order = mode0 = mode1 = mode2 = mode3 = total_fail = 0 for a in Ag.Agent.agentList:
from Reward import * from Action import * from Agent import * from Environment import * from UnawareEnvironment import * from HelplessEnvironment import * from MatrixEnvironment import * import numpy # Set up environment gridEnvironment = Environment() gridEnvironment.randomStart = False gridEnvironment.humanWander = False # Set up agent gridAgent = Agent(gridEnvironment) # Training episodes episodes = 10000 # This is where learning happens for i in range(episodes): gridAgent.qLearn(gridAgent.initialObs) if i%1000 == 0: print i # Use this to prompt user for the initial state (agent x,y and human x,y) ''' print "agent x?" ax = sys.stdin.readline()
import random import sys import copy import operator from Observation import * from Reward import * from Action import * from Environment import * from Agent import * from random import Random # Make an agent gridEnvironment = Environment() gridAgent = Agent(gridEnvironment) # How many states to make? numStates = 10 states = [] # Make some states for i in range(numStates): # Make a state state = [random.randint(1,gridEnvironment.width-1), random.randint(1,gridEnvironment.height-1), True, random.randint(1,gridEnvironment.width-1), random.randint(1,gridEnvironment.height-1), False, False, False] states.append(state) # Create an entry in v_table for state entry = [] for j in range(gridAgent.numActions): entry.append((random.random()-0.5)*100.0) gridAgent.v_table[gridAgent.calculateFlatState(state)] = entry print "v table:"
Agent.Agent.cdic(d,ce,27) print(d) sys.exit() a = Agent.Agent(3, 4) print(a.lvl()) sys.exit() class x: a = 5 def b(self): print(self.a) c = x() c.b() sys.exit() a = Agent.Agent(3) b = Agent.Agent(3) c = Agent.Agent(3) d = Agent.Agent(3) Agent.placeall(20,20)
""" Created on Nov 25, 2011 @author: tlmaloney """ import unittest import Agent import Asset as Asset # Create asset asset = Asset.make(1, "Asset") # Create amount of asset, in this case one unit of asset amount = 1 # Create two agents agent1 = Agent.make(1, "Agent1") agent2 = Agent.make(2, "Agent2") class TestAgent(unittest.TestCase): def test_change_asset_ownership(self): agent1.change_asset_ownership(asset, amount) self.assertEqual(agent1.owned_assets[asset], 1) def test_change_asset_possession(self): agent1.change_asset_possession(asset, amount) self.assertEqual(agent1.possessed_assets[asset], 1) def test_transfer_ownership(self): agent1.transfer_ownership(asset, agent2, amount) self.assertEqual(agent2.owned_assets[asset], 1) self.assertEqual(agent1.owned_assets[asset], 0)
# 0: human does not play # 1: human plays as the bot # 2: human plays as the enemy play = 2 #Max reward received in any iteration maxr = None # Set up environment for initial training gridEnvironment = Environment() gridEnvironment.randomStart = False gridEnvironment.enemyMode = 2 gridEnvironment.verbose = 0 # Set up agent gridAgent = Agent(gridEnvironment) gridAgent.verbose = False # This is where learning happens for i in range(episodes): # Train gridAgent.agent_reset() gridAgent.qLearn(gridAgent.initialObs) # Test gridAgent.agent_reset() gridAgent.executePolicy(gridAgent.initialObs) # Report totalr = gridAgent.totalReward if maxr == None or totalr > maxr: maxr = totalr