def init(self, arglist, env): num_thread = 1 tf_config = tf.ConfigProto(inter_op_parallelism_threads=num_thread, intra_op_parallelism_threads=num_thread) self.sess = tf.InteractiveSession(config=tf_config) # To make sure that training and testing are based on diff seeds if arglist.restore: create_seed(np.random.randint(2)) else: create_seed(arglist.seed) # Create agent trainers self.obs_shape_n = [ env.observation_space[i].shape for i in range(env.n) ] self.num_adversaries = min(env.n, arglist.num_adversaries) self.trainers = get_trainers(env, self.num_adversaries, self.obs_shape_n, arglist) print('Using good policy {} and adv policy {}'.format( arglist.good_policy, arglist.adv_policy)) # Initialize U.initialize() # Load previous results, if necessary if arglist.load_dir == "": arglist.load_dir = arglist.save_dir if arglist.restore or arglist.benchmark: print('Loading previous state...') U.load_state(arglist.load_dir) self.episode_rewards = [0.0] # sum of rewards for all agents self.agent_rewards = [[0.0] for _ in range(env.n)] # individual agent reward self.final_ep_rewards = [] # sum of rewards for training curve self.final_ep_ag_rewards = [] # agent rewards for training curve self.agent_info = [[[]]] # placeholder for benchmarking info self.saver = tf.train.Saver() self.obs_n = env.reset() self.train_step = 0 self.t_start = time.time() self.new_episode = True # start of a new episode (used for replay buffer) self.start_saving_comm = False if arglist.graph: print("Setting up graph writer!") self.writer = tf.summary.FileWriter("learning_curves/graph", sess.graph) if arglist.analysis: print("Starting analysis on {}...".format(arglist.analysis)) if arglist.analysis != 'video': analyze.run_analysis(arglist, env, self.trainers) return # should be a single run
def post(self): im = self.__validate_post_args() if im is None: return result = analyze.run_analysis(im) self.write(utils.serialize(result)) self.set_header('Content-Type', 'application/json; charset=UTF-8') self.set_status(200)
def add(): #pdb.set_trace() if request.method == 'POST': # download html, parse text into individual words #print "generating response for URL: " + response.form['site'] article_bag = bag.get_bag(request.form['site']) # analysis is done as part of generating output data = { 'input' : request.form, 'output' : analyze.run_analysis(article_bag) } js = json.dumps(data) return Response(js, status=200, mimetype='application/json') else: return "Must POST for this method to work"
def test_analyze(): prefix = "results/single_trial_test" res = a.run_analysis(prefix) print res
def train(arglist): # To make sure that training and testing are based on diff seeds if arglist.restore: create_seed(np.random.randint(2)) else: create_seed(arglist.seed) with U.single_threaded_session() as sess: # Create environment env = make_env(arglist.scenario, arglist, arglist.benchmark) # Create agent trainers obs_shape_n = [env.observation_space[i].shape for i in range(env.n)] num_adversaries = min(env.n, arglist.num_adversaries) trainers = get_trainers(env, num_adversaries, obs_shape_n, arglist) print('Using good policy {} and adv policy {}'.format(arglist.good_policy, arglist.adv_policy)) # Initialize U.initialize() # Load previous results, if necessary if arglist.load_dir == "": arglist.load_dir = arglist.save_dir if arglist.restore or arglist.benchmark: print('Loading previous state...') U.load_state(arglist.load_dir) if arglist.analysis: print("Starting analysis on {}...".format(arglist.analysis)) if arglist.analysis != 'video': analyze.run_analysis(arglist, env, trainers) return # should be a single run episode_rewards = [0.0] # sum of rewards for all agents agent_rewards = [[0.0] for _ in range(env.n)] # individual agent reward final_ep_rewards = [] # sum of rewards for training curve final_ep_ag_rewards = [] # agent rewards for training curve agent_info = [[[]]] # placeholder for benchmarking info saver = tf.train.Saver() obs_n = env.reset() episode_step = 0 train_step = 0 t_start = time.time() new_episode = True # start of a new episode (used for replay buffer) start_saving_comm = False if arglist.graph: print("Setting up graph writer!") writer = tf.summary.FileWriter("learning_curves/graph",sess.graph) print('Starting iterations...') while True: if arglist.actor_lstm: # get critic input states p_in_c_n, p_in_h_n = get_lstm_states('p', trainers) # num_trainers x 1 x 1 x 64 if arglist.critic_lstm: q_in_c_n, q_in_h_n = get_lstm_states('q', trainers) # num_trainers x 1 x 1 x 64 # get action action_n = [agent.action(obs) for agent, obs in zip(trainers,obs_n)] if arglist.critic_lstm: # get critic output states p_states = [p_in_c_n, p_in_h_n] if arglist.actor_lstm else [] update_critic_lstm(trainers, obs_n, action_n, p_states) q_out_c_n, q_out_h_n = get_lstm_states('q', trainers) # num_trainers x 1 x 1 x 64 if arglist.actor_lstm: p_out_c_n, p_out_h_n = get_lstm_states('p', trainers) # num_trainers x 1 x 1 x 64 # environment step new_obs_n, rew_n, done_n, info_n = env.step(action_n) episode_step += 1 done = all(done_n) terminal = (episode_step >= arglist.max_episode_len) # collect experience for i, agent in enumerate(trainers): num_episodes = len(episode_rewards) # do this every iteration if arglist.critic_lstm and arglist.actor_lstm: agent.experience(obs_n[i], action_n[i], rew_n[i], new_obs_n[i], done_n[i], # terminal, p_in_c_n[i][0], p_in_h_n[i][0], p_out_c_n[i][0], p_out_h_n[i][0], q_in_c_n[i][0], q_in_h_n[i][0], q_out_c_n[i][0], q_out_h_n[i][0], new_episode) elif arglist.critic_lstm: agent.experience(obs_n[i], action_n[i], rew_n[i], new_obs_n[i], done_n[i], # terminal, q_in_c_n[i][0], q_in_h_n[i][0], q_out_c_n[i][0], q_out_h_n[i][0],new_episode) elif arglist.actor_lstm: agent.experience(obs_n[i], action_n[i], rew_n[i], new_obs_n[i], done_n[i], # terminal, p_in_c_n[i][0], p_in_h_n[i][0], p_out_c_n[i][0], p_out_h_n[i][0], new_episode) else: agent.experience(obs_n[i], action_n[i], rew_n[i], new_obs_n[i], done_n[i], # terminal, new_episode) obs_n = new_obs_n # Adding rewards if arglist.tracking: for i, a in enumerate(trainers): if arglist.num_episodes - len(episode_rewards) <= 1000: a.tracker.record_information("goal", np.array(env.world.landmarks[0].state.p_pos)) a.tracker.record_information("position",np.array(env.world.agents[i].state.p_pos)) a.tracker.record_information("ag_reward", rew_n[i]) a.tracker.record_information("team_dist_reward", info_n["team_dist"][i]) a.tracker.record_information("team_diff_reward", info_n["team_diff"][i]) # Closing graph writer if arglist.graph: writer.close() for i, rew in enumerate(rew_n): episode_rewards[-1] += rew agent_rewards[i][-1] += rew if done or terminal: new_episode = True num_episodes = len(episode_rewards) obs_n = env.reset() # reset trainers if arglist.actor_lstm or arglist.critic_lstm: for agent in trainers: agent.reset_lstm() if arglist.tracking: for agent in trainers: agent.tracker.reset() episode_step = 0 episode_rewards.append(0) for a in agent_rewards: a.append(0) agent_info.append([[]]) else: new_episode=False # increment global step counter train_step += 1 # for benchmarking learned policies if arglist.benchmark: for i, info in enumerate(info_n): agent_info[-1][i].append(info_n['n']) if train_step > arglist.benchmark_iters and (done or terminal): file_name = arglist.benchmark_dir + arglist.exp_name + '.pkl' print('Finished benchmarking, now saving...') with open(file_name, 'wb') as fp: pickle.dump(agent_info[:-1], fp) break continue # update all trainers, if not in display or benchmark mode loss = None # get same episode sampling if arglist.sync_sampling: inds = [random.randint(0, len(trainers[0].replay_buffer._storage)-1) for i in range(arglist.batch_size)] else: inds = None for agent in trainers: # if arglist.lstm: # agent.preupdate(inds=inds) # else: agent.preupdate(inds) for agent in trainers: loss = agent.update(trainers, train_step) if loss is None: continue # for displaying learned policies if arglist.display: env.render() # continue # save model, display training output if terminal and (len(episode_rewards) % arglist.save_rate == 0): U.save_state(arglist.save_dir, saver=saver) # print statement depends on whether or not there are adversaries if num_adversaries == 0: print("steps: {}, episodes: {}, mean episode reward: {}, time: {}".format( train_step, len(episode_rewards), np.mean(episode_rewards[-arglist.save_rate:]), round(time.time()-t_start, 3))) else: print("steps: {}, episodes: {}, mean episode reward: {}, agent episode reward: {}, time: {}".format( train_step, len(episode_rewards), np.mean(episode_rewards[-arglist.save_rate:]), [np.mean(rew[-arglist.save_rate:]) for rew in agent_rewards], round(time.time()-t_start, 3))) t_start = time.time() # Keep track of final episode reward final_ep_rewards.append(np.mean(episode_rewards[-arglist.save_rate:])) for rew in agent_rewards: final_ep_ag_rewards.append(np.mean(rew[-arglist.save_rate:])) # saves final episode reward for plotting training curve later if len(episode_rewards) > arglist.num_episodes: # U.save_state(arglist.save_dir, saver=saver) if arglist.tracking: for agent in trainers: agent.tracker.save() rew_file_name = "rewards/" + arglist.commit_num + "_rewards.pkl" with open(rew_file_name, 'wb') as fp: pickle.dump(final_ep_rewards, fp) agrew_file_name = "rewards/" + arglist.commit_num + "_agrewards.pkl" # agrew_file_name = arglist.plots_dir + arglist.exp_name + '_agrewards.pkl' with open(agrew_file_name, 'wb') as fp: pickle.dump(final_ep_ag_rewards, fp) print('...Finished total of {} episodes.'.format(len(episode_rewards))) break
def kickoff(): run_analysis(fbid, start_date, end_date) summary = pd.read_csv('%s_facebook_analysis.csv' % fbid) return render_template('index.html.jinja', summary = summary)