def get_agents(db, arg): listObj = { "total": 0, "list": [], } start = arg["paras"].get("start") or 0 limit = arg["paras"].get("limit") or 100 cond = "WHERE 1=1 " ret = db.select(TB_AGENT, cond=cond, limit=int(limit), offset=int(start)) if ret == -1: ERROR("get agent list error") return (DB_ERR, None) for dur in db.cur: obj = dbmysql.row_to_dict(TB_AGENT, dur) item = Agent(db, dbObj=obj) item.loadFromObj() listObj["list"].append(item.toObj()) listObj["total"] = getAgentCount(db) return (OCT_SUCCESS, listObj)
def add_agent(db, arg): paras = arg["paras"] agent = Agent(db) agent.name = paras["name"] agent.address = paras["address"] ret = agent.add() return (ret, None)
def agent_create(): #Create a new agent agent_fields = agent_schema.load(request.json) new_agent = Agent() new_agent.name = agent_fields["name"] db.session.add(new_agent) db.session.commit() return jsonify(agent_schema.dump(new_agent))
def main(): # ============= Initialize variables and objects =========== agent = Agent(AGENT_PARAMS) # ================= Running episodes =================# w = agent.Q_eval[1].input.weight.data.numpy() w1 = w[:, 0] # level w2 = w[:, 1] # gradient w3 = w[:, 2] # is above 0.5 w4 = w[:, 3] # previous valve position print(w) print(f"w1 {sum(w1)}") print(f"w2 {sum(w2)}") print(f"w3 {sum(w3)}") print(f"w4 {sum(w4)}") # Create bars plt.subplot(2, 2, 1) plt.bar(range(len(w1)), w1) plt.subplot(2, 2, 2) plt.bar(range(len(w2)), w2) plt.subplot(2, 2, 3) plt.bar(range(len(w3)), w3) plt.subplot(2, 2, 4) plt.bar(range(len(w4)), w4) # Show graphic plt.show()
def addAgent(db, agentId, name, addr): agent = Agent(db, agentId) agent.name = name agent.address = addr agent.add() return agent
def seed_db(): from models.Office import Office from models.Agent import Agent from models.Region import Region from models.Salesperson import Salesperson from faker import Faker faker = Faker() #Mcgee Real Estate Seeds for i in range(1, 9): office = Office() office.name = REGION_NAMES[i] db.session.add(office) db.session.commit() for i in range(20): agent = Agent() namesplit = faker.name().split(" ") agent.first_name = namesplit[0] agent.last_name = namesplit[1] agent.email = f"{namesplit[0]}{namesplit[1]}@mcgee.com" agent.office_id = random.randint(1, 8) db.session.add(agent) #Decided to use set region ids instead #May Black Real Estate Seeds # for i in range(20): # states = ["NSW", "QLD", "SA", "TAS", "VIC", "WA", "ACT", "NT"] # region = Region() # region.name = (random.choice(states)) # db.session.add(region) for i in range(1, 9): region = Region() region.name = REGION_NAMES[i] db.session.add(region) db.session.commit() for i in range(20): salesperson = Salesperson() namenonsplit = faker.name() namesplit = faker.name().split(" ") #Faker sometimes returned more than two words for a name - workaround salesperson.name = f"{namesplit[0]} {namesplit[1]}" salesperson.email = f"{namesplit[0]}{namesplit[1]}@mcgee.com" salesperson.region_id = random.randint(1, 8) db.session.add(salesperson) db.session.commit() print("Tables seeded")
def main(): #============= Initialize variables ===========# environment = Environment() agent = Agent() # ================= Running episodes =================# all_rewards = [] batch_size = BATCH_SIZE for e in range(EPISODES): state, action, next_state, episode_reward = environment.reset( ) # Reset level in tank # Running through states in the episode for t in range(MAX_TIME): action = agent.act(state) z = agent.action_choices[action] terminated, next_state = environment.get_next_state(z, state) reward = environment.get_reward(next_state, terminated, t) agent.remember(state, action, next_state, reward, terminated) episode_reward += reward if terminated: break if environment.show_rendering: environment.render(z, next_state[-1]) if len(agent.memory) > batch_size: agent.replay(batch_size) if keyboard.is_pressed('ctrl+c'): break # Live plot rewards # agent.decay_exploration() all_rewards.append(episode_reward) if keyboard.is_pressed('ctrl+c'): break if LIVE_REWARD_PLOT: environment.plot(all_rewards, agent.epsilon) if not environment.running: break print("##### {} EPISODES DONE #####".format(e)) print("Max rewards for all episodes: {}".format(np.max(all_rewards))) print("Mean rewards for the last 10 episodes: {}".format( np.mean(all_rewards[-10:]))) if SAVE_ANN_MODEL: print("ANN_Model was saved")
def background_update(): global commonDataStruct global bg_worker with dataLock: with app.app_context(): try: salesperson_query = db.session.query(Salesperson).filter(Salesperson.region_id==2) #finds all salespersons that have region id of 2 which is QLD updated = False print('Comparing McGee and Mayblack databases...') for salesperson in salesperson_query: agent_query = db.session.query(Agent).filter(Agent.email==salesperson.email) #finds all salespersons that already exist in agents matched on email if len(list(agent_query))==0:#makes the above into a list and if len = 0 it means it already exists and doesnt add, otherwise add first_name, last_name = salesperson.name.split(" ") agent = Agent(first_name=first_name, last_name=last_name, email=salesperson.email, office_id=salesperson.region_id) db.session.add(agent) updated = True db.session.commit() print('Differences in databases found, update completed, sleeping...') if updated else print('Nothing found to update, sleeping...') bg_worker = threading.Timer(POOL_TIME, background_update, ()) bg_worker.start() except (NameError, exc.ProgrammingError, NoResultFound): return
def main(): # ============= Initialize variables and objects ===========# max_mean_reward = MAIN_PARAMS["MAX_MEAN_REWARD"] environment = Environment(TANK_PARAMS, TANK_DIST, MAIN_PARAMS) agent = Agent(AGENT_PARAMS) mean_episode = MAIN_PARAMS["MEAN_EPISODE"] episodes = MAIN_PARAMS["EPISODES"] all_rewards = [] all_mean_rewards = [] t_mean = [] # ================= Running episodes =================# try: for e in range(episodes): states, episode_reward = environment.reset() # Reset level in tank for t in range(MAIN_PARAMS["MAX_TIME"]): actions = agent.act(states[-1]) # get action choice from state z = agent.get_z(actions) terminated, next_state = environment.get_next_state( z, states[-1], t ) # Calculate next state with action rewards = sum_rewards( next_state, terminated, get_reward ) # get reward from transition to next state # Store data rewards = sum_rewards(next_state, terminated, get_reward) rewards.append(np.sum(rewards)) episode_reward.append(rewards) states.append(next_state) agent.remember(states, rewards, terminated, t) if environment.show_rendering: environment.render(z) if True in terminated: break episode_reward = np.array(episode_reward) episode_total_reward = [] t_mean.append(t) for i in range(environment.n_tanks + 1): episode_total_reward.append(sum(episode_reward[:, i])) all_rewards.append(episode_total_reward) # Print mean reward and save better models if e % mean_episode == 0 and e != 0: mean_reward = np.array(all_rewards[-mean_episode:]) mean_r = [] t_mean = int(np.mean(t_mean)) for i in range(environment.n_tanks + 1): mean_r.append(np.mean(mean_reward[:, i])) all_mean_rewards.append(mean_r) print( f"Mean {mean_episode} of {e}/{episodes} episodes ### timestep {t_mean+1} ### tot reward: {mean_r[-1]} \ r1: {mean_r[0]} ex1: {round(agent.epsilon[0],2)} r2: {mean_r[1]} ex2: {round(agent.epsilon[1],2)}" ) t_mean = [] if mean_r[-1] >= max_mean_reward: agent.save_trained_model() max_mean_reward = mean_r[-1] # Train model if agent.is_ready(): agent.Qreplay(e) if not environment.running: break # if agent.epsilon <= agent.epsilon_min: # break except KeyboardInterrupt: pass print("Memory length: {}".format(len(agent.memory))) print("##### {} EPISODES DONE #####".format(e + 1)) print("Max rewards for all episodes: {}".format(np.max(all_rewards))) all_mean_rewards = np.array(all_mean_rewards) labels = ["Tank 1", "Tank 2"] for i in range(environment.n_tanks): plt.plot(all_mean_rewards[:, i], label=labels[i]) plt.legend() plt.show() plt.plot(all_mean_rewards[:, -1], label="Total rewards") plt.ylabel("Mean rewards of last {} episodes".format(mean_episode)) plt.legend() plt.show()
def main(): # ============= Initialize variables and objects ===========# max_mean_reward = 50 * len(TANK_PARAMS) environment = Environment(TANK_PARAMS, TANK_DIST, MAIN_PARAMS) agent = Agent(AGENT_PARAMS) mean_episode = MAIN_PARAMS["MEAN_EPISODE"] episodes = MAIN_PARAMS["EPISODES"] all_rewards = [] all_mean_rewards = [] # ================= Running episodes =================# try: for e in range(episodes): states, episode_reward = environment.reset() # Reset level in tank for t in range(MAIN_PARAMS["MAX_TIME"]): actions = agent.act(states[-1]) # get action choice from state z = agent.get_z(actions) terminated, next_state = environment.get_next_state( z, states[-1], t) # Calculate next state with action rewards = sum_rewards( next_state, terminated, get_reward) # get reward from transition to next state # Store data episode_reward.append(np.sum(rewards)) states.append(next_state) agent.remember(states, rewards, terminated, t) if environment.show_rendering: environment.render(z) if True in terminated: break all_rewards.append(np.sum(np.array(episode_reward))) # Print mean reward and save better models if e % mean_episode == 0 and e != 0: mean_reward = np.mean(all_rewards[-mean_episode:]) all_mean_rewards.append(mean_reward) print("{} of {}/{} episodes\ reward: {} exp_1: {} exp_2: {}".format( mean_episode, e, episodes, round(mean_reward, 2), round(agent.epsilon[0], 2), round(agent.epsilon[1], 2), )) if agent.save_model_bool: max_mean_reward = agent.save_model(mean_reward, max_mean_reward) # Train model if agent.is_ready(): agent.Qreplay(e) if keyboard.is_pressed("ctrl+x"): break if environment.live_plot: environment.plot(all_rewards, agent.epsilon) if not environment.running: break # if agent.epsilon <= agent.epsilon_min: # break except KeyboardInterrupt: pass print("Memory length: {}".format(len(agent.memory))) print("##### {} EPISODES DONE #####".format(e + 1)) print("Max rewards for all episodes: {}".format(np.max(all_rewards))) plt.ioff() plt.clf() x_range = np.arange(0, e - e % mean_episode, mean_episode) plt.plot(x_range, all_mean_rewards) plt.ylabel("Mean rewards of last {} episodes".format(mean_episode)) plt.show()
def main(): # ============= Initialize variables and objects ===========# environment = Environment(TANK_PARAMS, TANK_DIST, MAIN_PARAMS) agent = Agent(AGENT_PARAMS) z = [] h = [] d = [] # ================= Running episodes =================# state, episode_reward = environment.reset() h_ = np.array([state[0][0][0], state[0][1][0]]) h.append(h_) for t in range(MAIN_PARAMS["MAX_TIME"]): action = agent.act(state[-1]) # get action choice from state z_ = agent.action_choices[ action] # convert action choice into valve position z.append(np.array(z_)) terminated, next_state = environment.get_next_state( z[-1], state[-1], t) # Calculate next state with action reward = get_reward( next_state, terminated) # get reward from transition to next state # Store data episode_reward.append(reward) state.append(next_state) h_ = [] d_ = [] for i in range(agent.n_tanks): d_.append(environment.tanks[i].dist.flow[t] + environment.q_inn[i]) h_.append(np.array(next_state[i][0])) d.append(d_) h.append(h_) if environment.show_rendering: environment.render(z[-1]) if True in terminated: break if keyboard.is_pressed("ctrl+x"): break if not environment.running: break print(np.sum(episode_reward)) _, (ax1, ax2, ax3) = plt.subplots(3, sharex=False, sharey=False) d = np.array(d) h = np.array(h[:-1]) z = np.array(z) h *= 10 ax1.plot(h[:-1, 0], color="peru", label="Tank 1") ax1.plot(h[:-1, 1], color="firebrick", label="Tank 2") ax1.set_ylabel("Level") ax1.legend(loc="upper right") ax1.set_ylim(0, 10) ax2.plot(z[1:, 0], color="peru", label="Tank 1") ax2.plot(z[1:, 1], color="firebrick", label="Tank 2") ax2.legend(loc="upper right") ax2.set_ylabel("Valve") ax2.set_ylim(0, 1.01) ax3.plot(d[:, 0], color="peru", label="Tank 1") ax3.plot(d[:, 1], color="firebrick", label="Tank 2") ax3.set_ylabel("Disturbance") ax3.legend(loc="upper right") # plt.legend([l1, l2, l3], ["Tank height", "Valve position", "Disturbance"]) plt.tight_layout() plt.xlabel("Time") plt.show()
def main(): # ============= Initialize variables and objects ===========# environment = Environment(TANK_PARAMS, TANK_DIST, MAIN_PARAMS) agent = Agent(AGENT_PARAMS) z = [] h = [] d = [] # ================= Running episodes =================# state, episode_reward = environment.reset() h_ = np.array([state[0][i][0] for i in range(6)]) h.append(h_) for t in range(MAIN_PARAMS["MAX_TIME"]): action = agent.act(state[-1]) # get action choice from state z_ = agent.action_choices[ action] # convert action choice into valve position z.append(np.array(z_)) terminated, next_state = environment.get_next_state( z[-1], state[-1], t) # Calculate next state with action reward = sum_rewards( next_state, terminated, get_reward) # get reward from transition to next state # Store data episode_reward.append(reward) state.append(next_state) h_ = [] d_ = [] for i in range(agent.n_tanks): d_.append(environment.tanks[i].dist.flow[t - 1] + environment.q_inn[i]) h_.append(np.array(next_state[i][0])) d.append(d_) h.append(h_) if environment.show_rendering: environment.render(z[-1]) if True in terminated: break if not environment.running: break colors = [ "peru", "firebrick", "darkslategray", "darkviolet", "mediumseagreen", "darkcyan", ] print(f"reward: {np.sum(episode_reward)}") h = np.array(h) * 10 d = np.array(d) z = np.array(z) for i in range(2): _, (ax1, ax2, ax3) = plt.subplots(3, sharex=False, sharey=False) ax1.plot( h[1:-1, 0 + i * 3], color=colors[0 + i * 3], label="Tank {}".format(str(1 + i * 3)), ) ax1.plot( h[1:-1, 1 + i * 3], color=colors[1 + i * 3], label="Tank {}".format(str(2 + i * 3)), ) ax1.plot( h[1:-1, 2 + i * 3], color=colors[2 + i * 3], label="Tank {}".format(str(3 + i * 3)), ) ax1.set_ylabel("Level") ax1.legend(loc="upper left") ax1.set_ylim(2.5, 7.5) ax2.plot( z[1:, 0 + i * 3], color=colors[0 + i * 3], label="Tank {}".format(str(1 + i * 3)), ) ax2.plot( z[1:, 1 + i * 3], color=colors[1 + i * 3], label="Tank {}".format(str(2 + i * 3)), ) ax2.plot( z[1:, 2 + i * 3], color=colors[2 + i * 3], label="Tank {}".format(str(3 + i * 3)), ) ax2.set_ylabel("Valve") ax2.legend(loc="upper left") ax2.set_ylim(-0.01, 1.01) ax3.plot( d[1:-1, 0 + i * 3], color=colors[0 + i * 3], label="Tank {}".format(str(1 + i * 3)), ) ax3.plot( d[1:-1, 1 + i * 3], color=colors[1 + i * 3], label="Tank {}".format(str(2 + i * 3)), ) ax3.plot( d[1:-1, 2 + i * 3], color=colors[2 + i * 3], label="Tank {}".format(str(3 + i * 3)), ) ax3.set_ylabel("Disturbance") ax3.legend(loc="upper left") ax3.set_ylim(-0.01, 4) plt.tight_layout() plt.xlabel("Time") plt.show()
def main(): # ============= Initialize variables and objects ===========# max_mean_reward = MAIN_PARAMS["MAX_MEAN_REWARD"] environment = Environment(TANK_PARAMS, TANK_DIST, MAIN_PARAMS) agent = Agent(AGENT_PARAMS) mean_episode = MAIN_PARAMS["MEAN_EPISODE"] episodes = MAIN_PARAMS["EPISODES"] all_rewards = [] all_mean_rewards = [] t_mean = [] # ================= Running episodes =================# try: for e in range(episodes): states, episode_reward = environment.reset() # Reset level in tank for t in range(MAIN_PARAMS["MAX_TIME"]): z = agent.act(states[-1]) # get action choice from state terminated, next_state = environment.get_next_state( z, states[-1], t) states.append(next_state) rewards = sum_rewards(next_state, terminated, get_reward) rewards.append(np.sum(rewards)) episode_reward.append(rewards) agent.remember(states, rewards, terminated, t) if environment.show_rendering: environment.render(z) if True in terminated: break # Collect summary of episode episode_reward = np.array(episode_reward) episode_total_reward = [] t_mean.append(t) for i in range(environment.n_tanks + 1): episode_total_reward.append(sum(episode_reward[:, i])) all_rewards.append(episode_total_reward) # Print mean reward and save better models if e % mean_episode == 0 and e != 0: mean_reward = np.array(all_rewards[-mean_episode:]) mean_r = [] t_mean = int(np.mean(t_mean)) for i in range(environment.n_tanks + 1): mean_r.append(np.mean(mean_reward[:, i])) all_mean_rewards.append(mean_r) print( f"Mean {mean_episode} of {e}/{episodes} episodes ### timestep {t_mean+1} ### tot reward: {mean_r[-1]}" ) t_mean = [] if mean_r[-1] >= max_mean_reward: agent.save_trained_model() max_mean_reward = mean_r[-1] agent.PolicyGradientReplay(e) if not environment.running: break except KeyboardInterrupt: pass print("Memory length: {}".format(len(agent.memory))) print("##### {} EPISODES DONE #####".format(e + 1)) print("Max rewards for all episodes: {}".format(np.max(all_rewards))) all_mean_rewards = np.array(all_mean_rewards) plt.plot(all_mean_rewards[:, -1], label="Total rewards") plt.ylabel("Mean rewards of last {} episodes".format(mean_episode)) plt.legend() plt.show()