def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() set_global_seeds(seed + 10000 * rank) env = gym.make(env_id) logger.configure() env = Monitor(env, os.path.join(logger.get_dir(), str(rank))) env.seed(seed) return env
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return wrap_deepmind(env, **wrapper_kwargs)
def main(): statsd_client = StatsClient(statsd_host, statsd_port, prefix="wifi.parse.data") statsd_client.gauge("WA_SOURCE_FJ_1001.success", 0) statsd_client.gauge("WA_SOURCE_FJ_1001.failed", 0) statsd_client.gauge("WA_BASIC_FJ_1003.success", 0) statsd_client.gauge("WA_BASIC_FJ_1003.failed", 0) statsd_client.gauge("file.failed", 0) list = os.listdir(config["monitor_path"]) # 列出文件夹下所有的目录与文件 for i in list: com_path = os.path.join(config["monitor_path"], i) Monitor(stastd=statsd_client, zipinfo="True").operate_change(com_path) event_handler = Monitor(stastd=statsd_client) observer = Observer() observer.schedule(event_handler, path=config["monitor_path"], recursive=True) # recursive递归的 observer.start() observer.join()
def make_robotics_env(env_id, seed, rank=0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) env = FlattenDictWrapper(env, ['observation', 'desired_goal']) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), info_keywords=('is_success', )) env.seed(seed) return env
def test(self): mon = Gdk.Display.get_monitor(Gdk.Display.get_default(), 0) monitor = Monitor.from_monitor(mon) hwinfo = HWinfo() hwinfo_data = hwinfo.dmi_load() manufacturer = hwinfo_data['sys_vendor'].lower() model = hwinfo_data['product_name'].lower() expected_width = self.expected_width expected_height = self.expected_height if monitor.width != expected_width or monitor.height != expected_height: self.fail( "Internal display did not match expected resolution, expected: " "{0}x{1} got: {2}x{3}".format(expected_width, expected_height, monitor.width, monitor.height))
def main(_): # create visualizer #visualizer = TensorboardVisualizer() monitor = Monitor(FLAGS) #log_dir = monitor.log_dir #visualizer.initialize(log_dir, None) saved_mean_reward = None # openAI logger L.configure(monitor.log_dir, format_strs=['stdout', 'csv']) # initialize env atari_env = AtariEnv(monitor) #screen_shot_subgoal(atari_env) # we should probably follow deepmind style env # stack 4 frames and scale float env = wrapper.wrap_deepmind(atari_env, frame_stack=True, scale=True) # get default tf_session sess = U.get_session() # create q networks for controller controller_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) controller_network = Q_network(env.observation_space, env.action_space.n, controller_optimizer, scope='controller') controller = Controller(controller_network, env.action_space.n) # create q networks for meta-controller num_goals = env.unwrapped.goals_space.n metacontroller_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) metacontroller_network = Q_network(env.observation_space, num_goals, metacontroller_optimizer, scope='metacontroller') metacontroller = MetaController(metacontroller_network, num_goals) # Create the schedule for exploration starting from 1. exploration2 = LinearSchedule(schedule_timesteps=int(EXPLORATION_FRACTION * monitor.num_timesteps), initial_p=1.0, final_p=EXPLORATION_FINAL_EPS) # initialize experience replay controller_replay_buffer = ReplayBuffer(D1_MEMORY_SIZE) metacontroller_replay_buffer = ReplayBuffer(D2_MEMORY_SIZE) # initialize critic critic = Critic(env.unwrapped) total_extrinsic_reward = [] # for success rate total_goal_reached = np.zeros(num_goals, dtype=np.int32) total_goal_sampled = np.zeros(num_goals, dtype=np.int32) total_goal_epsilon = np.ones(num_goals, dtype=np.float32) ep = 0 total_step = 0 init_ob = env.reset() U.initialize() # initialize target network in both controller and meta sess.run(metacontroller.network.update_target_op) sess.run(controller.network.update_target_op) # load ckpt if presence model_path = tf.train.latest_checkpoint(monitor.ckpt_dir) model_saved = False model_file = os.path.join(monitor.ckpt_dir, 'model') if model_path is not None: U.load_variables(model_file) L.log('loaded model from %s' % model_file) model_saved = True while ep < MAX_EPISODE: # count number of steps # init environment game play variables init_ob = env.reset() observation = np.reshape(init_ob['observation'], (1, )+init_ob['observation'].shape) desired_goal = metacontroller.sample_act(sess, observation, update_eps=1.0)[0] env.unwrapped.desired_goal = desired_goal total_goal_sampled[desired_goal] += 1 # given predicted goal, we encode this goal bounding mask to the observation np array ob_with_g = env.unwrapped._add_goal_mask(init_ob['observation'], desired_goal) # NOTE: Below code verify added mask correctly # for i in range(ob_with_g.shape[-1]): # ob = ob_with_g[:,:,i] # image = Image.fromarray(ob) # image = image.convert('RGB') # image.save('test_%i.png' % i) done = False reached_goal = False while not done: extrinsic_rewards = 0 s0 = init_ob['observation'] while not (done or reached_goal): update_eps1_with_respect_to_g = get_epsilon(total_goal_epsilon, total_goal_reached, total_goal_sampled, desired_goal, total_step, EXPLORATION_WARM_UP) ob_with_g_reshaped = np.reshape(ob_with_g, (1, )+ob_with_g.shape) primitive_action_t = controller.sample_act(sess, ob_with_g_reshaped, update_eps=update_eps1_with_respect_to_g)[0] # obtain extrinsic reward from environment ob_tp1, extrinsic_reward_t, done_t, info = env.step(primitive_action_t) reached_goal = env.unwrapped.reached_goal(desired_goal) ob_with_g_tp1 = env.unwrapped._add_goal_mask(ob_tp1['observation'], desired_goal) intrinsic_reward_t = critic.criticize(desired_goal, reached_goal, primitive_action_t, done_t) controller_replay_buffer.add(ob_with_g, primitive_action_t, intrinsic_reward_t, ob_with_g_tp1, done_t) # sample from replay_buffer1 to train controller obs_with_g_t, primitive_actions_t, intrinsic_rewards_t, obs_with_g_tp1, dones_t = controller_replay_buffer.sample(TRAIN_BATCH_SIZE) weights, batch_idxes = np.ones_like(intrinsic_rewards_t), None # get q estimate for tp1 as 'supervised' ob_with_g_tp1_reshaped = np.reshape(ob_with_g_tp1, (1, )+ob_with_g.shape) q_tp1 = controller.get_q(sess, ob_with_g_tp1_reshaped)[0] td_error = controller.train(sess, obs_with_g_t, primitive_actions_t, intrinsic_rewards_t, obs_with_g_tp1, dones_t, weights, q_tp1) # join train meta-controller only sample from replay_buffer2 to train meta-controller if total_step >= WARMUP_STEPS: L.log('join train has started ----- step %d', total_step) # sample from replay_buffer2 to train meta-controller init_obs, goals_t, extrinsic_rewards_t, obs_terminate_in_g, dones_t = metacontroller_replay_buffer.sample(TRAIN_BATCH_SIZE) weights, batch_idxes = np.ones_like(extrinsic_rewards_t), None # get q estimate for tp1 as 'supervised' obs_terminate_in_g_reshaped = np.reshape(obs_terminate_in_g, (1, )+obs_terminate_in_g.shape) q_tp1 = metacontroller.get_q(sess, obs_terminate_in_g_reshaped)[0] td_error = metacontroller.train(sess, init_obs, goals_t, extrinsic_rewards_t, obs_terminate_in_g, dones_t, weights, q_tp1) if total_step % UPDATE_TARGET_NETWORK_FREQ == 0: #L.log('UPDATE BOTH CONTROLLER Q NETWORKS ----- step %d', step) sess.run(controller.network.update_target_op) # its fine, we aren't really training meta dqn until after certain steps. sess.run(metacontroller.network.update_target_op) extrinsic_rewards += extrinsic_reward_t ob_with_g = ob_with_g_tp1 done = done_t total_step += 1 # we are done / reached_goal # store transitions of init_ob, goal, all the extrinsic rewards, current ob in D2 # print("ep %d : step %d, goal extrinsic total %d" % (ep, step, extrinsic_rewards)) # clean observation without goal encoded metacontroller_replay_buffer.add(init_ob['observation'], desired_goal, extrinsic_rewards, ob_tp1['observation'], done) # if we are here then we have finished the desired goal if not done: #print("ep %d : goal %d reached, not yet done, extrinsic %d" % (ep, desired_goal, extrinsic_rewards)) exploration_ep = 1.0 total_goal_reached[env.unwrapped.achieved_goal] += 1 if total_step >= WARMUP_STEPS: t = total_step - WARMUP_STEPS exploration_ep = exploration2.value(t) ob_with_g_reshaped = np.reshape(ob_with_g, (1, )+ob_with_g.shape) while env.unwrapped.achieved_goal == desired_goal: desired_goal = metacontroller.sample_act(sess, ob_with_g_reshaped, update_eps=exploration_ep)[0] env.unwrapped.desired_goal = desired_goal total_goal_sampled[desired_goal] += 1 L.log('ep %d : achieved goal was %d ----- new goal --- %d' % (ep, env.unwrapped.achieved_goal, desired_goal)) # start again reached_goal = False # finish an episode total_extrinsic_reward.append(extrinsic_rewards) ep += 1 mean_100ep_reward = round(np.mean(total_extrinsic_reward[-101:-1]), 1) if ep % monitor.print_freq == 0 : L.record_tabular("steps", total_step) L.record_tabular("episodes", ep) L.record_tabular("mean 100 episode reward", mean_100ep_reward) L.dump_tabular() if total_step % monitor.ckpt_freq == 0: if saved_mean_reward is None or mean_100ep_reward > saved_mean_reward: L.log("Saving model due to mean reward increase: {} -> {}".format( saved_mean_reward, mean_100ep_reward)) U.save_variables(model_file) model_saved = True saved_mean_reward = mean_100ep_reward # verified our model was saved if model_saved: L.log('restored model with mean reward: %d' % saved_mean_reward) U.load_variables(model_file)
def train(env, agent, args): monitor = Monitor(train=True, spec="-{}".format(args.method)) monitor.init_log(args.log, "roi_m.{}_e.{}_n.{}".format(args.model, args.env_name, args.name)) env.reset() S = set() corWs = queue.Queue() # add two extreme points corWs.put(FloatTensor([1.0, 0.0])) corWs.put(FloatTensor([0.0, 1.0])) # outer_loop! for _ in range(args.ws): print(colored("size of corWs: {}".format(corWs.qsize()), "green")) if corWs.qsize() == 0: corWs.put(FloatTensor([1.0, 0.0])) corWs.put(FloatTensor([0.0, 1.0])) corner_w = corWs.get_nowait() while not is_corner(corner_w, S) and corWs.qsize()>0: corner_w = corWs.get_nowait() print(colored("{} left....".format(corWs.qsize()), "green")) if not is_corner(corner_w, S): print(colored("no more corner w...", "green")) print(colored("Final S contains", "green")) for s in S: print(colored(s, "green")) break print(colored("solve for w: {}".format(corner_w), "green")) for num_eps in range(int(args.episode_num / args.ws)): terminal = False env.reset() loss = 0 cnt = 0 tot_reward = 0 tot_reward_mo = 0 probe = None if args.env_name == "dst": probe = corner_w elif args.env_name in ['ft', 'ft5', 'ft7']: probe = FloatTensor([0.8, 0.2, 0.0, 0.0, 0.0, 0.0]) while not terminal: state = env.observe() action = agent.act(state, corner_w) agent.w_kept = corner_w next_state, reward, terminal = env.step(action) if args.log: monitor.add_log(state, action, reward, terminal, agent.w_kept) agent.memorize(state, action, next_state, reward, terminal, roi=True) loss += agent.learn(corner_w) if cnt > 100: terminal = True agent.reset() tot_reward = tot_reward + (probe.cpu().numpy().dot(reward)) * np.power(args.gamma, cnt) tot_reward_mo = tot_reward_mo + reward * np.power(args.gamma, cnt) cnt = cnt + 1 _, q = agent.predict(probe) if args.env_name == "dst": act_1 = q[0, 3] act_2 = q[0, 1] elif args.env_name in ['ft', 'ft5', 'ft7']: act_1 = q[0, 1] act_2 = q[0, 0] if args.method == "crl-naive": act_1 = act_1.data.cpu() act_2 = act_2.data.cpu() elif args.method == "crl-envelope": act_1 = probe.dot(act_1.data) act_2 = probe.dot(act_2.data) elif args.method == "crl-energy": act_1 = probe.dot(act_1.data) act_2 = probe.dot(act_2.data) print("end of eps %d with total reward (1) %0.2f (%0.2f, %0.2f), the Q is %0.2f | %0.2f; loss: %0.4f" % ( num_eps, tot_reward, tot_reward_mo[0], tot_reward_mo[1], act_1, act_2, # q__max, loss / cnt)) monitor.update(num_eps, tot_reward, act_1, act_2, # q__max, loss / cnt) # agent.is_train=False terminal = False env.reset() cnt = 0 tot_reward_mo = 0 while not terminal: state = env.observe() action = agent.act(state, corner_w) agent.w_kept = corner_w next_state, reward, terminal = env.step(action) if cnt > 100: terminal = True agent.reset() tot_reward_mo = tot_reward_mo + reward * np.power(args.gamma, cnt) cnt = cnt + 1 agent.is_train=True S, corWs = update_ccs(S, corWs, tot_reward_mo) print(colored("----------------\n", "red")) print(colored("Current S contains", "red")) for s in S: print(colored(s, "red")) print(colored("----------------\n", "red")) # if num_eps+1 % 100 == 0: # agent.save(args.save, args.model+args.name+"_tmp_{}".format(number)) agent.save(args.save, "roi_m.{}_e.{}_n.{}".format(args.model, args.env_name, args.name))
def train(self): ''' call this function when the episode ends ''' self.episodecount += 1 if self.monitor is None: self.monitor = Monitor("-" + self.algorithm) if not self.is_training: logger.info("Not in training mode") return else: logger.info("Update naive morl policy parameters.") logger.info("Episode Num so far: %s" % (self.episodecount)) if len(self.trans_mem) > self.batch_size * 10: self.update_count += 1 minibatch = self.sample(self.trans_mem, self.priority_mem, self.batch_size) batchify = lambda x: list(x) * self.weight_num state_batch = batchify(map(lambda x: x.s, minibatch)) action_batch = batchify(map(lambda x: LongTensor([x.a]), minibatch)) reward_batch = batchify(map(lambda x: x.r.unsqueeze(0), minibatch)) next_state_batch = batchify(map(lambda x: x.s_, minibatch)) terminal_batch = batchify(map(lambda x: x.d, minibatch)) mask_batch = batchify(map(lambda x: x.ms.unsqueeze(0), minibatch)) next_mask_batch = batchify( map(lambda x: x.ms_.unsqueeze(0), minibatch)) w_batch = np.random.randn(self.weight_num, self.model_.reward_size) w_batch = np.abs(w_batch) / \ np.linalg.norm(w_batch, ord=1, axis=1, keepdims=True) w_batch = torch.from_numpy(w_batch.repeat( self.batch_size, axis=0)).type(FloatTensor) if self.algorithm == 'naive': __, Q = self.model_(Variable(torch.cat(state_batch, dim=0)), Variable(w_batch), Variable(torch.cat(mask_batch, dim=0))) # detach since we don't want gradients to propagate # HQ, _ = self.model_(Variable(torch.cat(next_state_batch, dim=0), volatile=True), # Variable(w_batch, volatile=True)) _, DQ = self.model( Variable(torch.cat(next_state_batch, dim=0), requires_grad=False), Variable(w_batch, requires_grad=False), Variable(torch.cat(next_mask_batch, dim=0), requires_grad=False)) _, act = self.model_( Variable(torch.cat(next_state_batch, dim=0), requires_grad=False), Variable(w_batch, requires_grad=False), Variable(torch.cat(next_mask_batch, dim=0), requires_grad=False))[1].max(1) HQ = DQ.gather(1, act.unsqueeze(dim=1)).squeeze() w_reward_batch = torch.bmm( w_batch.unsqueeze(1), torch.cat(reward_batch, dim=0).unsqueeze(2)).squeeze() nontmlmask = self.nontmlinds(terminal_batch) with torch.no_grad(): Tau_Q = Variable( torch.zeros(self.batch_size * self.weight_num).type(FloatTensor)) Tau_Q[nontmlmask] = self.gamma * HQ[nontmlmask] Tau_Q += Variable(w_reward_batch) actions = Variable(torch.cat(action_batch, dim=0)) # Compute Huber loss loss = F.smooth_l1_loss(Q.gather(1, actions.unsqueeze(dim=1)), Tau_Q.unsqueeze(dim=1)) elif self.algorithm == 'envelope': action_size = self.model_.action_size reward_size = self.model_.reward_size __, Q = self.model_(Variable(torch.cat(state_batch, dim=0)), Variable(w_batch), w_num=self.weight_num, execmask=Variable( torch.cat(mask_batch, dim=0))) # detach since we don't want gradients to propagate # HQ, _ = self.model_(Variable(torch.cat(next_state_batch, dim=0), volatile=True), # Variable(w_batch, volatile=True), w_num=self.weight_num) _, DQ = self.model(Variable(torch.cat(next_state_batch, dim=0), requires_grad=False), Variable(w_batch, requires_grad=False), execmask=Variable(torch.cat(next_mask_batch, dim=0), requires_grad=False)) w_ext = w_batch.unsqueeze(2).repeat(1, action_size, 1) w_ext = w_ext.view(-1, self.model.reward_size) _, tmpQ = self.model_(Variable(torch.cat(next_state_batch, dim=0), requires_grad=False), Variable(w_batch, requires_grad=False), execmask=Variable(torch.cat( next_mask_batch, dim=0), requires_grad=False)) tmpQ = tmpQ.view(-1, reward_size) # print(torch.bmm(w_ext.unsqueeze(1), # tmpQ.data.unsqueeze(2)).view(-1, action_size)) act = torch.bmm( Variable(w_ext.unsqueeze(1), requires_grad=False), tmpQ.unsqueeze(2)).view(-1, action_size).max(1)[1] HQ = DQ.gather( 1, act.view(-1, 1, 1).expand(DQ.size(0), 1, DQ.size(2))).squeeze() nontmlmask = self.nontmlinds(terminal_batch) with torch.no_grad(): Tau_Q = Variable( torch.zeros(self.batch_size * self.weight_num, reward_size).type(FloatTensor)) Tau_Q[nontmlmask] = self.gamma * HQ[nontmlmask] # Tau_Q.volatile = False Tau_Q += Variable(torch.cat(reward_batch, dim=0)) actions = Variable(torch.cat(action_batch, dim=0)) Q = Q.gather( 1, actions.view(-1, 1, 1).expand(Q.size(0), 1, Q.size(2))).view(-1, reward_size) Tau_Q = Tau_Q.view(-1, reward_size) wQ = torch.bmm(Variable(w_batch.unsqueeze(1)), Q.unsqueeze(2)).squeeze() wTQ = torch.bmm(Variable(w_batch.unsqueeze(1)), Tau_Q.unsqueeze(2)).squeeze() # loss = F.mse_loss(Q.view(-1), Tau_Q.view(-1)) # print self.beta loss = self.beta * F.mse_loss(wQ.view(-1), wTQ.view(-1)) loss += (1 - self.beta) * F.mse_loss(Q.view(-1), Tau_Q.view(-1)) self.optimizer.zero_grad() loss.backward() for param in self.model_.parameters(): param.grad.data.clamp_(-1, 1) self.optimizer.step() if self.update_count % self.update_freq == 0: self.model.load_state_dict(self.model_.state_dict()) self.monitor.update(self.episodecount, loss=loss.data) self.savePolicyInc() # self.out_policy_file)
class MORLPolicy(Policy.Policy): '''Derived from :class:`Policy` ''' def __init__(self, in_policy_file, out_policy_file, domainString='CamRestaurants', is_training=False, action_names=None): super(MORLPolicy, self).__init__(domainString, is_training) self.domainString = domainString self.domainUtil = FlatOnt.FlatDomainOntology(self.domainString) self.in_policy_file = in_policy_file self.out_policy_file = out_policy_file self.is_training = is_training self.accum_belief = [] self.domainUtil = FlatOnt.FlatDomainOntology(self.domainString) self.prev_state_check = None # parameter settings if 0: # cfg.has_option('morlpolicy', 'n_in'): #ic304: this was giving me a weird error, disabled it until i can check it deeper self.n_in = cfg.getint('morlpolicy', 'n_in') else: self.n_in = self.get_n_in(domainString) self.n_rew = 1 if cfg.has_option('morlpolicy', 'n_rew'): self.n_rew = cfg.getint('morlpolicy', 'n_rew') self.lr = 0.001 if cfg.has_option('morlpolicy', 'learning_rate'): self.lr = cfg.getfloat('morlpolicy', 'learning_rate') self.epsilon = 0.5 if cfg.has_option('morlpolicy', 'epsilon'): self.epsilon = cfg.getfloat('morlpolicy', 'epsilon') self.epsilon_decay = True if cfg.has_option('morlpolicy', 'epsilon_decay'): self.epsilon_decay = cfg.getboolean('morlpolicy', 'epsilon_decay') self.randomseed = 1234 if cfg.has_option('GENERAL', 'seed'): self.randomseed = cfg.getint('GENERAL', 'seed') self.gamma = 1.0 if cfg.has_option('morlpolicy', 'gamma'): self.gamma = cfg.getfloat('morlpolicy', 'gamma') self.weight_num = 32 if cfg.has_option('morlpolicy', 'weight_num'): self.weight_num = cfg.getint('morlpolicy', 'weight_num') self.episode_num = 1000 if cfg.has_option('morlpolicy', 'episode_num'): self.episode_num = cfg.getfloat('morlpolicy', 'episode_num') self.optimizer = "Adam" if cfg.has_option('morlpolicy', 'optimizer'): self.optimizer = cfg.get('morlpolicy', 'optimizer') self.save_step = 100 if cfg.has_option('policy', 'save_step'): self.save_step = cfg.getint('policy', 'save_step') self.update_freq = 50 if cfg.has_option('morlpolicy', 'update_freq'): self.update_freq = cfg.getint('morlpolicy', 'update_freq') self.policyfeatures = [] if cfg.has_option('morlpolicy', 'features'): logger.info('Features: ' + str(cfg.get('morlpolicy', 'features'))) self.policyfeatures = json.loads(cfg.get('morlpolicy', 'features')) self.algorithm = 'naive' if cfg.has_option('morlpolicy', 'algorithm'): self.algorithm = cfg.get('morlpolicy', 'algorithm') logger.info('Learning algorithm: ' + self.algorithm) self.batch_size = 32 if cfg.has_option('morlpolicy', 'batch_size'): self.batch_size = cfg.getint('morlpolicy', 'batch_size') self.mem_size = 1000 if cfg.has_option('morlpolicy', 'mem_size'): self.mem_size = cfg.getint('morlpolicy', 'mem_size') self.training_freq = 1 if cfg.has_option('morlpolicy', 'training_freq'): self.training_freq = cfg.getint('morlpolicy', 'training_freq') # set beta for envelope algorithm self.beta = 0.1 if cfg.has_option('morlpolicy', 'beta'): self.beta = cfg.getfloat('morlpolicy', 'beta') self.beta_init = self.beta self.beta_uplim = 1.00 self.tau = 1000. self.beta_expbase = float( np.power(self.tau * (self.beta_uplim - self.beta), 1. / (self.episode_num + 1))) self.beta_delta = self.beta_expbase / self.tau self.beta -= self.beta_delta # using homotopy method for optimization self.homotopy = False if cfg.has_option('morlpolicy', 'homotopy'): self.homotopy = cfg.getboolean('morlpolicy', 'homotopy') self.epsilon_delta = (self.epsilon - 0.05) / self.episode_num self.episodecount = 0 # construct the models self.state_dim = self.n_in self.summaryaction = SummaryAction.SummaryAction(domainString) if action_names is None: self.action_names = self.summaryaction.action_names else: self.action_names = action_names self.action_dim = len(self.action_names) self.stats = [0 for _ in range(self.action_dim)] self.reward_dim = self.n_rew model = None if self.algorithm == 'naive': model = naive.NaiveLinearCQN(self.state_dim, self.action_dim, self.reward_dim) elif self.algorithm == 'envelope': model = envelope.EnvelopeLinearCQN(self.state_dim, self.action_dim, self.reward_dim) self.model_ = model self.model = copy.deepcopy(model) # initialize memory self.trans_mem = deque() self.trans = namedtuple('trans', ['s', 'a', 's_', 'r', 'd', 'ms', 'ms_']) self.priority_mem = deque() self.mem_last_state = None self.mem_last_action = None self.mem_last_mask = None self.mem_cur_state = None self.mem_cur_action = None self.mem_cur_mask = None if self.optimizer == 'Adam': self.optimizer = optim.Adam(self.model_.parameters(), lr=self.lr) elif self.optimizer == 'RMSprop': self.optimizer = optim.RMSprop(self.model_.parameters(), lr=self.lr) try: self.loadPolicy(self.in_policy_file) except: logger.info("No previous model found...") self.w_kept = None self.update_count = 0 if self.is_training: self.model_.train() if use_cuda: self.model.cuda() self.model_.cuda() self.monitor = None def get_n_in(self, domain_string): if domain_string == 'CamRestaurants': return 268 elif domain_string == 'CamHotels': return 111 elif domain_string == 'SFRestaurants': return 636 elif domain_string == 'SFHotels': return 438 elif domain_string == 'Laptops6': return 268 # ic340: this is wrong elif domain_string == 'Laptops11': return 257 elif domain_string is 'TV': return 188 else: print 'DOMAIN {} SIZE NOT SPECIFIED, PLEASE DEFINE n_in'.format( domain_string) def act_on(self, state, preference=None): if self.lastSystemAction is None and self.startwithhello: systemAct, nextaIdex = 'hello()', -1 else: systemAct, nextaIdex = self.nextAction(state, preference) self.lastSystemAction = systemAct self.summaryAct = nextaIdex self.prevbelief = state systemAct = DiaAct.DiaAct(systemAct) return systemAct def record(self, reward, domainInControl=None, weight=None, state=None, action=None): if domainInControl is None: domainInControl = self.domainString if self.actToBeRecorded is None: self.actToBeRecorded = self.summaryAct if state is None: state = self.prevbelief if action is None: action = self.actToBeRecorded cState, cAction = self.convertStateAction(state, action) execMask = self.summaryaction.getExecutableMask(state, cAction) execMask = torch.Tensor(execMask).type(FloatTensor) # # normalising total return to -1~1 # reward /= 20.0 self.mem_last_state = self.mem_cur_state self.mem_last_action = self.mem_cur_action self.mem_last_mask = self.mem_cur_mask self.mem_cur_state = np.vstack( [np.expand_dims(x, 0) for x in [cState]]) # self.mem_cur_action = np.eye(self.action_dim, self.action_dim)[[cAction]] self.mem_cur_action = cAction self.mem_cur_mask = execMask state = self.mem_last_state action = self.mem_last_action next_state = self.mem_cur_state terminal = False if state is not None and action is not None: self.trans_mem.append( self.trans( torch.from_numpy(state).type(FloatTensor), # state action, # action torch.from_numpy(next_state).type( FloatTensor), # next state torch.from_numpy(reward).type(FloatTensor), # reward terminal, # terminal self.mem_last_mask, # action mask self.mem_cur_mask)) # next action mask # randomly produce a preference for calculating priority # preference = self.w_kept preference = torch.randn(self.model_.reward_size) preference = (torch.abs(preference) / torch.norm(preference, p=1)).type(FloatTensor) state = torch.from_numpy(state).type(FloatTensor) _, q = self.model_(Variable(state, requires_grad=False), Variable(preference.unsqueeze(0), requires_grad=False), execmask=Variable( self.mem_last_mask.unsqueeze(0), requires_grad=False)) q = q[0, action].data if self.algorithm == 'naive': wr = preference.dot(torch.from_numpy(reward).type(FloatTensor)) if not terminal: next_state = torch.from_numpy(next_state).type(FloatTensor) hq, _ = self.model_(Variable(next_state, requires_grad=False), Variable(preference.unsqueeze(0), requires_grad=False), execmask=Variable( self.mem_cur_mask.unsqueeze(0), requires_grad=False)) hq = hq.data[0] p = abs(wr + self.gamma * hq - q) else: self.w_kept = None # if self.epsilon_decay: # self.epsilon -= self.epsilon_delta p = abs(wr - q) elif self.algorithm == 'envelope': wq = preference.dot(q) wr = preference.dot(torch.from_numpy(reward).type(FloatTensor)) if not terminal: next_state = torch.from_numpy(next_state).type(FloatTensor) hq, _ = self.model_(Variable(next_state, requires_grad=False), Variable(preference.unsqueeze(0), requires_grad=False), execmask=Variable( self.mem_cur_mask.unsqueeze(0), requires_grad=False)) hq = hq.data[0] whq = preference.dot(hq) p = abs(wr + self.gamma * whq - wq) else: self.w_kept = None # if self.epsilon_decay: # self.epsilon -= self.epsilon_delta # if self.homotopy: # self.beta += self.beta_delta # self.beta_delta = (self.beta - self.beta_init) * self.beta_expbase + self.beta_init - self.beta p = abs(wr - wq) p += 1e-5 self.priority_mem.append(p) if len(self.trans_mem) > self.mem_size: self.trans_mem.popleft() self.priority_mem.popleft() self.actToBeRecorded = None def finalizeRecord(self, reward, domainInControl=None): if domainInControl is None: domainInControl = self.domainString if self.episodes[domainInControl] is None: logger.warning( "record attempted to be finalized for domain where nothing has been recorded before" ) return # # normalising total return to -1~1 # reward /= 20.0 terminal_state, terminal_action = self.convertStateAction( TerminalState(), TerminalAction()) # # normalising total return to -1~1 # reward /= 20.0 self.mem_last_state = self.mem_cur_state self.mem_last_action = self.mem_cur_action self.mem_last_mask = self.mem_cur_mask self.mem_cur_state = np.vstack( [np.expand_dims(x, 0) for x in [terminal_state]]) self.mem_cur_action = None self.mem_cur_mask = torch.zeros(self.action_dim).type(FloatTensor) state = self.mem_last_state action = self.mem_last_action next_state = self.mem_cur_state terminal = True if state is not None: self.trans_mem.append( self.trans( torch.from_numpy(state).type(FloatTensor), # state action, # action torch.from_numpy(next_state).type( FloatTensor), # next state torch.from_numpy(reward).type(FloatTensor), # reward terminal, # terminal self.mem_last_mask, # action mask self.mem_cur_mask)) # next action mask # randomly produce a preference for calculating priority # preference = self.w_kept preference = torch.randn(self.model_.reward_size) preference = (torch.abs(preference) / torch.norm(preference, p=1)).type(FloatTensor) state = torch.from_numpy(state).type(FloatTensor) _, q = self.model_( Variable(state, requires_grad=False), Variable(preference.unsqueeze(0), requires_grad=False)) q = q.data[0, action] if self.algorithm == 'naive': wr = preference.dot(torch.from_numpy(reward).type(FloatTensor)) if not terminal: next_state = torch.from_numpy(next_state).type(FloatTensor) hq, _ = self.model_( Variable(next_state, requires_grad=False), Variable(preference.unsqueeze(0), requires_grad=False)) hq = hq.data[0] p = abs(wr + self.gamma * hq - q) else: self.w_kept = None # if self.epsilon_decay: # self.epsilon -= self.epsilon_delta p = abs(wr - q) elif self.algorithm == 'envelope': wq = preference.dot(q) wr = preference.dot(torch.from_numpy(reward).type(FloatTensor)) if not terminal: next_state = torch.from_numpy(next_state).type(FloatTensor) hq, _ = self.model_( Variable(next_state, requires_grad=False), Variable(preference.unsqueeze(0), requires_grad=False)) hq = hq.data[0] whq = preference.dot(hq) p = abs(wr + self.gamma * whq - wq) else: self.w_kept = None # if self.epsilon_decay: # self.epsilon -= self.epsilon_delta # if self.homotopy: # self.beta += self.beta_delta # self.beta_delta = (self.beta - self.beta_init) * self.beta_expbase + self.beta_init - self.beta p = abs(wr - wq) p += 1e-5 self.priority_mem.append(p) if len(self.trans_mem) > self.mem_size: self.trans_mem.popleft() self.priority_mem.popleft() def convertStateAction(self, state, action): ''' nnType = 'dnn' #nnType = 'rnn' # expand one dimension to match the batch size of 1 at axis 0 if nnType == 'rnn': belief = np.expand_dims(belief,axis=0) ''' if isinstance(state, TerminalState): if self.domainUtil.domainString == 'CamRestaurants': return [0] * 268, action elif self.domainUtil.domainString == 'CamHotels': return [0] * 111, action elif self.domainUtil.domainString == 'SFRestaurants': return [0] * 633, action elif self.domainUtil.domainString == 'SFHotels': return [0] * 438, action elif self.domainUtil.domainString == 'Laptops11': return [0] * 257, action elif self.domainUtil.domainString == 'TV': return [0] * 188, action else: flat_belief = flatten_belief(state, self.domainUtil) self.prev_state_check = flat_belief return flat_belief, action def convertDIPStateAction(self, state, action): ''' ''' if isinstance(state, TerminalState): return [0] * 89, action else: dip_state = DIP_state(state.domainStates[state.currentdomain], self.domainString) action_name = self.actions.action_names[action] act_slot = 'general' for slot in dip_state.slots: if slot in action_name: act_slot = slot flat_belief = dip_state.get_beliefStateVec(act_slot) self.prev_state_check = flat_belief return flat_belief, action def nextAction(self, beliefstate, preference=None): ''' select next action :param beliefstate: :param preference: :returns: (int) next summary action ''' beliefVec = flatten_belief(beliefstate, self.domainUtil) execMask = self.summaryaction.getExecutableMask( beliefstate, self.lastSystemAction) execMask = torch.Tensor(execMask).type(FloatTensor) if preference is None: if self.w_kept is None: self.w_kept = torch.randn(self.model_.reward_size) self.w_kept = (torch.abs(self.w_kept) / torch.norm(self.w_kept, p=1)).type(FloatTensor) preference = self.w_kept if self.is_training and (len(self.trans_mem) < self.batch_size * 10 or torch.rand(1)[0] < self.epsilon): admissible = [i for i, x in enumerate(execMask) if x == 0.0] random.shuffle(admissible) nextaIdex = admissible[0] else: state = np.reshape(beliefVec, (1, len(beliefVec))) state = torch.from_numpy(state).type(FloatTensor) if self.algorithm == 'naive': _, Q = self.model_( Variable(state, requires_grad=False), Variable(preference.unsqueeze(0), requires_grad=False), Variable(execMask.unsqueeze(0), requires_grad=False)) nextaIdex = np.argmax(Q.detach().cpu().numpy()) elif self.algorithm == 'envelope': _, Q = self.model_(Variable(state, requires_grad=False), Variable(preference.unsqueeze(0), requires_grad=False), execmask=Variable(execMask.unsqueeze(0), requires_grad=False)) Q = Q.view(-1, self.model_.reward_size) Q = torch.mv(Q.data, preference) action = Q.max(0)[1].cpu().numpy() nextaIdex = int(action) self.stats[nextaIdex] += 1 summaryAct = self.action_names[nextaIdex] beliefstate = beliefstate.getDomainState(self.domainUtil.domainString) masterAct = self.summaryaction.Convert(beliefstate, summaryAct, self.lastSystemAction) return masterAct, nextaIdex def sample(self, pop, pri, k): pri = np.array(pri).astype(np.float) inds = np.random.choice(range(len(pop)), k, replace=False, p=pri / pri.sum()) return [pop[i] for i in inds] def actmsk(self, num_dim, index): mask = ByteTensor(num_dim).zero_() mask[index] = 1 return mask.unsqueeze(0) def nontmlinds(self, terminal_batch): mask = ByteTensor(terminal_batch) inds = torch.arange(0, len(terminal_batch)).type(LongTensor) inds = inds[mask.eq(0)] return inds def train(self): ''' call this function when the episode ends ''' self.episodecount += 1 if self.monitor is None: self.monitor = Monitor("-" + self.algorithm) if not self.is_training: logger.info("Not in training mode") return else: logger.info("Update naive morl policy parameters.") logger.info("Episode Num so far: %s" % (self.episodecount)) if len(self.trans_mem) > self.batch_size * 10: self.update_count += 1 minibatch = self.sample(self.trans_mem, self.priority_mem, self.batch_size) batchify = lambda x: list(x) * self.weight_num state_batch = batchify(map(lambda x: x.s, minibatch)) action_batch = batchify(map(lambda x: LongTensor([x.a]), minibatch)) reward_batch = batchify(map(lambda x: x.r.unsqueeze(0), minibatch)) next_state_batch = batchify(map(lambda x: x.s_, minibatch)) terminal_batch = batchify(map(lambda x: x.d, minibatch)) mask_batch = batchify(map(lambda x: x.ms.unsqueeze(0), minibatch)) next_mask_batch = batchify( map(lambda x: x.ms_.unsqueeze(0), minibatch)) w_batch = np.random.randn(self.weight_num, self.model_.reward_size) w_batch = np.abs(w_batch) / \ np.linalg.norm(w_batch, ord=1, axis=1, keepdims=True) w_batch = torch.from_numpy(w_batch.repeat( self.batch_size, axis=0)).type(FloatTensor) if self.algorithm == 'naive': __, Q = self.model_(Variable(torch.cat(state_batch, dim=0)), Variable(w_batch), Variable(torch.cat(mask_batch, dim=0))) # detach since we don't want gradients to propagate # HQ, _ = self.model_(Variable(torch.cat(next_state_batch, dim=0), volatile=True), # Variable(w_batch, volatile=True)) _, DQ = self.model( Variable(torch.cat(next_state_batch, dim=0), requires_grad=False), Variable(w_batch, requires_grad=False), Variable(torch.cat(next_mask_batch, dim=0), requires_grad=False)) _, act = self.model_( Variable(torch.cat(next_state_batch, dim=0), requires_grad=False), Variable(w_batch, requires_grad=False), Variable(torch.cat(next_mask_batch, dim=0), requires_grad=False))[1].max(1) HQ = DQ.gather(1, act.unsqueeze(dim=1)).squeeze() w_reward_batch = torch.bmm( w_batch.unsqueeze(1), torch.cat(reward_batch, dim=0).unsqueeze(2)).squeeze() nontmlmask = self.nontmlinds(terminal_batch) with torch.no_grad(): Tau_Q = Variable( torch.zeros(self.batch_size * self.weight_num).type(FloatTensor)) Tau_Q[nontmlmask] = self.gamma * HQ[nontmlmask] Tau_Q += Variable(w_reward_batch) actions = Variable(torch.cat(action_batch, dim=0)) # Compute Huber loss loss = F.smooth_l1_loss(Q.gather(1, actions.unsqueeze(dim=1)), Tau_Q.unsqueeze(dim=1)) elif self.algorithm == 'envelope': action_size = self.model_.action_size reward_size = self.model_.reward_size __, Q = self.model_(Variable(torch.cat(state_batch, dim=0)), Variable(w_batch), w_num=self.weight_num, execmask=Variable( torch.cat(mask_batch, dim=0))) # detach since we don't want gradients to propagate # HQ, _ = self.model_(Variable(torch.cat(next_state_batch, dim=0), volatile=True), # Variable(w_batch, volatile=True), w_num=self.weight_num) _, DQ = self.model(Variable(torch.cat(next_state_batch, dim=0), requires_grad=False), Variable(w_batch, requires_grad=False), execmask=Variable(torch.cat(next_mask_batch, dim=0), requires_grad=False)) w_ext = w_batch.unsqueeze(2).repeat(1, action_size, 1) w_ext = w_ext.view(-1, self.model.reward_size) _, tmpQ = self.model_(Variable(torch.cat(next_state_batch, dim=0), requires_grad=False), Variable(w_batch, requires_grad=False), execmask=Variable(torch.cat( next_mask_batch, dim=0), requires_grad=False)) tmpQ = tmpQ.view(-1, reward_size) # print(torch.bmm(w_ext.unsqueeze(1), # tmpQ.data.unsqueeze(2)).view(-1, action_size)) act = torch.bmm( Variable(w_ext.unsqueeze(1), requires_grad=False), tmpQ.unsqueeze(2)).view(-1, action_size).max(1)[1] HQ = DQ.gather( 1, act.view(-1, 1, 1).expand(DQ.size(0), 1, DQ.size(2))).squeeze() nontmlmask = self.nontmlinds(terminal_batch) with torch.no_grad(): Tau_Q = Variable( torch.zeros(self.batch_size * self.weight_num, reward_size).type(FloatTensor)) Tau_Q[nontmlmask] = self.gamma * HQ[nontmlmask] # Tau_Q.volatile = False Tau_Q += Variable(torch.cat(reward_batch, dim=0)) actions = Variable(torch.cat(action_batch, dim=0)) Q = Q.gather( 1, actions.view(-1, 1, 1).expand(Q.size(0), 1, Q.size(2))).view(-1, reward_size) Tau_Q = Tau_Q.view(-1, reward_size) wQ = torch.bmm(Variable(w_batch.unsqueeze(1)), Q.unsqueeze(2)).squeeze() wTQ = torch.bmm(Variable(w_batch.unsqueeze(1)), Tau_Q.unsqueeze(2)).squeeze() # loss = F.mse_loss(Q.view(-1), Tau_Q.view(-1)) # print self.beta loss = self.beta * F.mse_loss(wQ.view(-1), wTQ.view(-1)) loss += (1 - self.beta) * F.mse_loss(Q.view(-1), Tau_Q.view(-1)) self.optimizer.zero_grad() loss.backward() for param in self.model_.parameters(): param.grad.data.clamp_(-1, 1) self.optimizer.step() if self.update_count % self.update_freq == 0: self.model.load_state_dict(self.model_.state_dict()) self.monitor.update(self.episodecount, loss=loss.data) self.savePolicyInc() # self.out_policy_file) def savePolicy(self, FORCE_SAVE=False): """ Does not use this, cause it will be called from agent after every episode. we want to save the policy only periodically. """ pass def savePolicyInc(self, FORCE_SAVE=False): """ save model and replay buffer """ if self.episodecount % self.save_step == 0: torch.save( self.model, "{}.{}.pkl".format(self.out_policy_file, self.algorithm)) def loadPolicy(self, filename): """ load model and replay buffer """ # load models self.model_ = torch.load("{}.{}.pkl".format(filename, self.algorithm)) self.model = copy.deepcopy(self.model_) def restart(self): self.summaryAct = None self.lastSystemAction = None self.prevbelief = None self.actToBeRecorded = None self.w_kept = None if self.epsilon_decay: self.epsilon -= self.epsilon_delta if self.homotopy: self.beta += self.beta_delta self.beta_delta = ( self.beta - self.beta_init ) * self.beta_expbase + self.beta_init - self.beta
def fit(self, x, y_true, x_test, y_test, loss, epochs, batch_size, learning_rate=1e-3, momentum=0.9, weight_decay=0.0002, zeta=0.3, dropoutrate=0., testing=True, save_filename="", monitor=False): """ :param x: (array) Containing parameters :param y_true: (array) Containing one hot encoded labels. :return (array) A 2D array of metrics (epochs, 3). """ if not x.shape[0] == y_true.shape[0]: raise ValueError("Length of x and y arrays don't match") self.monitor = Monitor( save_filename=save_filename) if monitor else None # Initiate the loss object with the final activation function self.loss = loss() self.learning_rate = learning_rate self.momentum = momentum self.weight_decay = weight_decay self.zeta = zeta self.dropout_rate = dropoutrate self.save_filename = save_filename self.input_layer_connections.append(self.get_core_input_connections()) np.savez_compressed(self.save_filename + "_input_connections.npz", inputLayerConnections=self.input_layer_connections) maximum_accuracy = 0 metrics = np.zeros((epochs, 4)) for i in range(epochs): # Shuffle the data seed = np.arange(x.shape[0]) np.random.shuffle(seed) x_ = x[seed] y_ = y_true[seed] if self.monitor: self.monitor.start_monitor() # training t1 = datetime.datetime.now() for j in range(x.shape[0] // batch_size): k = j * batch_size l = (j + 1) * batch_size z, a, masks = self._feed_forward(x_[k:l], True) self._back_prop(z, a, masks, y_[k:l]) t2 = datetime.datetime.now() if self.monitor: self.monitor.stop_monitor() print("\nSET-MLP Epoch ", i) print("Training time: ", t2 - t1) # test model performance on the test data at each epoch # this part is useful to understand model performance and can be commented for production settings if testing: t3 = datetime.datetime.now() accuracy_test, activations_test = self.predict(x_test, y_test) accuracy_train, activations_train = self.predict(x, y_true) t4 = datetime.datetime.now() maximum_accuracy = max(maximum_accuracy, accuracy_test) loss_test = self.loss.loss(y_test, activations_test) loss_train = self.loss.loss(y_true, activations_train) metrics[i, 0] = loss_train metrics[i, 1] = loss_test metrics[i, 2] = accuracy_train metrics[i, 3] = accuracy_test print(f"Testing time: {t4 - t3}\n; Loss test: {loss_test}; \n" f"Accuracy test: {accuracy_test}; \n" f"Maximum accuracy val: {maximum_accuracy}") t5 = datetime.datetime.now() if i < epochs - 1: # do not change connectivity pattern after the last epoch # self.weights_evolution_I() # this implementation is more didactic, but slow. self.weights_evolution_II( ) # this implementation has the same behaviour as the one above, but it is much faster. t6 = datetime.datetime.now() print("Weights evolution time ", t6 - t5) # save performance metrics values in a file if self.save_filename != "": np.savetxt(self.save_filename + ".txt", metrics) if self.save_filename != "" and self.monitor: with open(self.save_filename + "_monitor.json", 'w') as file: file.write( json.dumps(self.monitor.get_stats(), indent=4, sort_keys=True, default=str)) return metrics
def main(args): # process config c = Configs(args.config) ROOT = os.environ['TENSOROFLOW'] model_directory = '%s/examples/model/multi_layer_nmt' % ROOT model_path = '%s/model' % model_directory dictionary_path = { 'source': '%s/source_dictionary.pickle' % model_directory, 'source_reverse': '%s/source_reverse_dictionary.pickle' % model_directory, 'target': '%s/target_dictionary.pickle' % model_directory, 'target_reverse': '%s/target_reverse_dictionary.pickle' % model_directory } PAD = c.const['PAD'] EOS = c.const['EOS'] train_step = c.option['train_step'] max_time = c.option['max_time'] batch_size = c.option['batch_size'] vocabulary_size = c.option['vocabulary_size'] input_embedding_size = c.option['embedding_size'] hidden_units = c.option['hidden_units'] layers = c.option['layers'] source_train_data_path = c.data['source_train_data'] target_train_data_path = c.data['target_train_data'] source_valid_data_path = c.data['source_valid_data'] target_valid_data_path = c.data['target_valid_data'] source_test_data_path = c.data['source_test_data'] target_test_data_path = c.data['target_test_data'] # read data if args.mode == 'train': source_dictionary, source_reverse_dictionary = build_dictionary( read_words(source_train_data_path), vocabulary_size) source_train_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_train_data_path) ] target_dictionary, target_reverse_dictionary = build_dictionary( read_words(target_train_data_path), vocabulary_size) target_train_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_train_data_path) ] source_valid_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_valid_data_path) ] target_valid_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_valid_data_path) ] if args.debug: source_train_datas = source_train_datas[:1000] target_train_datas = source_train_datas[:1000] else: with open(dictionary_path['source'], 'rb') as f1, \ open(dictionary_path['source_reverse'], 'rb') as f2, \ open(dictionary_path['target'], 'rb') as f3, \ open(dictionary_path['target_reverse'], 'rb') as f4: source_dictionary = pickle.load(f1) source_reverse_dictionary = pickle.load(f2) target_dictionary = pickle.load(f3) target_reverse_dictionary = pickle.load(f4) source_test_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_test_data_path) ] target_test_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_test_data_path) ] # placeholder encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs') decoder_labels = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_labels') # embed embeddings = tf.Variable(tf.random_uniform( [vocabulary_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32, name='embeddings') encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs) # encoder encoder_units = hidden_units encoder_layers = [ tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers ] encoder_cell = tf.contrib.rnn.MultiRNNCell(encoder_layers) encoder_output, encoder_final_state = tf.nn.dynamic_rnn( encoder_cell, encoder_inputs_embedded, dtype=tf.float32, time_major=True) del encoder_output # decoder decoder_units = encoder_units decoder_layers = [ tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers ] decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_layers) decoder_output, decoder_final_state = tf.nn.dynamic_rnn( decoder_cell, decoder_inputs_embedded, initial_state=encoder_final_state, scope="plain_decoder", dtype=tf.float32, time_major=True) decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size) decoder_prediction = tf.argmax( decoder_logits, 2) # max_time: axis=0, batch: axis=1, vocab: axis=2 # optimizer stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(decoder_labels, depth=vocabulary_size, dtype=tf.float32), logits=decoder_logits, ) loss = tf.reduce_mean(stepwise_cross_entropy) train_op = tf.train.AdamOptimizer().minimize(loss) saver = tf.train.Saver() minibatch_idx = {'train': 0, 'valid': 0, 'test': 0} with tf.Session() as sess: if args.mode == 'train': # train global_max_step = train_step * ( len(source_train_datas) // batch_size + 1) loss_freq = global_max_step // 100 if global_max_step > 100 else 1 loss_log = [] batch_loss_log = [] loss_suffix = '' es = EarlyStopper(max_size=5, edge_threshold=0.1) m = Monitor(global_max_step) sess.run(tf.global_variables_initializer()) global_step = 0 stop_flag = False for batch in range(train_step): if stop_flag: break current_batch_loss_log = [] while True: # minibatch process m.monitor(global_step, loss_suffix) source_train_batch, _ = batchnize(source_train_datas, batch_size, minibatch_idx['train']) target_train_batch, minibatch_idx['train'] = batchnize( target_train_datas, batch_size, minibatch_idx['train']) batch_data = seq2seq(source_train_batch, target_train_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } sess.run(fetches=[train_op, loss], feed_dict=feed_dict) if global_step % loss_freq == 0: source_valid_batch, _ = batchnize( source_valid_datas, batch_size, minibatch_idx['valid']) target_valid_batch, minibatch_idx['valid'] = batchnize( target_valid_datas, batch_size, minibatch_idx['valid']) batch_data = seq2seq(source_valid_batch, target_valid_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } loss_val = sess.run(fetches=loss, feed_dict=feed_dict) loss_log.append(loss_val) current_batch_loss_log.append(loss_val) loss_suffix = 'loss: %f' % loss_val es_status = es(loss_val) if batch > train_step // 2 and es_status: print('early stopping at step: %d' % global_step) stop_flag = True break global_step += 1 if minibatch_idx['train'] == 0: batch_loss = np.mean(current_batch_loss_log) batch_loss_log.append(batch_loss) print('Batch: {}/{}, batch loss: {}'.format( batch + 1, train_step, batch_loss)) break # save tf.graph and variables saver.save(sess, model_path) print('save at %s' % model_path) # save plot of loss plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log) plt.savefig('%s_global_loss.png' % model_path) plt.figure() plt.plot(np.arange(len(batch_loss_log)), batch_loss_log) plt.savefig('%s_batch_loss.png' % model_path) # save dictionary with open(dictionary_path['source'], 'wb') as f1, \ open(dictionary_path['source_reverse'], 'wb') as f2, \ open(dictionary_path['target'], 'wb') as f3, \ open(dictionary_path['target_reverse'], 'wb') as f4: pickle.dump(source_dictionary, f1) pickle.dump(source_reverse_dictionary, f2) pickle.dump(target_dictionary, f3) pickle.dump(target_reverse_dictionary, f4) elif args.mode == 'eval': saver.restore(sess, model_path) print('load from %s' % model_path) else: raise # args.mode should be train or eval # evaluate loss_val = [] input_vectors = None predict_vectors = None for i in range(len(source_test_datas) // batch_size + 1): source_test_batch, _ = batchnize(source_test_datas, batch_size, minibatch_idx['test']) target_test_batch, minibatch_idx['test'] = batchnize( target_test_datas, batch_size, minibatch_idx['test']) batch_data = seq2seq(source_test_batch, target_test_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict) if predict_vectors is None: predict_vectors = pred.T else: predict_vectors = np.vstack((predict_vectors, pred.T)) input_ = batch_data['encoder_inputs'] if input_vectors is None: input_vectors = input_.T else: input_vectors = np.vstack((input_vectors, input_.T)) loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict)) input_sentences = '' predict_sentences = '' for i, (input_vector, predict_vector) in enumerate( zip(input_vectors[:len(source_test_datas)], predict_vectors[:len(target_test_datas)])): input_sentences += ' '.join([ source_reverse_dictionary[vector] for vector in input_vector if not vector == PAD ]) predict_sentences += ' '.join([ target_reverse_dictionary[vector] for vector in predict_vector if not vector == PAD ]) if i < len(source_test_datas) - 1: input_sentences += '\n' predict_sentences += '\n' evaluate_input_path = '%s.evaluate_input' % model_path evaluate_predict_path = '%s.evaluate_predict' % model_path with open(evaluate_input_path, 'w') as f1, \ open(evaluate_predict_path, 'w') as f2: f1.write(input_sentences) f2.write(predict_sentences) print('input sequences at {}'.format(evaluate_input_path)) print('predict sequences at {}'.format(evaluate_predict_path)) print('mean of loss: %f' % np.mean(loss_val)) print('finish.')
def main(args): # process config c = Configs(args.config) ROOT = os.environ['TENSOROFLOW'] model_path = '%s/examples/model/basic_nmt/model' % ROOT PAD = c.const['PAD'] EOS = c.const['EOS'] train_step = c.option['train_step'] max_time = c.option['max_time'] batch_size = c.option['batch_size'] vocabulary_size = c.option['vocabulary_size'] input_embedding_size = c.option['embedding_size'] hidden_units = c.option['hidden_units'] source_train_data_path = c.data['source_train_data'] target_train_data_path = c.data['target_train_data'] source_valid_data_path = c.data['source_valid_data'] target_valid_data_path = c.data['target_valid_data'] source_test_data_path = c.data['source_test_data'] target_test_data_path = c.data['target_test_data'] # read data source_dictionary, source_reverse_dictionary = build_dictionary( read_words(source_train_data_path), vocabulary_size) source_train_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_train_data_path) ] target_dictionary, target_reverse_dictionary = build_dictionary( read_words(target_train_data_path), vocabulary_size) target_train_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_train_data_path) ] source_valid_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_valid_data_path) ] target_valid_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_valid_data_path) ] source_test_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_test_data_path) ] target_test_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_test_data_path) ] # placeholder encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs') decoder_labels = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_labels') # embed embeddings = tf.Variable(tf.random_uniform( [vocabulary_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32, name='embeddings') encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs) # encoder encoder_units = hidden_units encoder_cell = tf.contrib.rnn.LSTMCell(encoder_units) _, encoder_final_state = tf.nn.dynamic_rnn(encoder_cell, encoder_inputs_embedded, dtype=tf.float32, time_major=True) # decoder decoder_units = encoder_units decoder_cell = tf.contrib.rnn.LSTMCell(decoder_units) decoder_output, decoder_final_state = tf.nn.dynamic_rnn( decoder_cell, decoder_inputs_embedded, initial_state=encoder_final_state, scope="plain_decoder", dtype=tf.float32, time_major=True) decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size) decoder_prediction = tf.argmax( decoder_logits, 2) # max_time: axis=0, batch: axis=1, vocab: axis=2 # optimizer stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(decoder_labels, depth=vocabulary_size, dtype=tf.float32), logits=decoder_logits, ) loss = tf.reduce_mean(stepwise_cross_entropy) train_op = tf.train.AdamOptimizer().minimize(loss) saver = tf.train.Saver() batch_idx = {'train': 0, 'valid': 0, 'test': 0} with tf.Session() as sess: if args.mode == 'train': # train loss_freq = train_step // 100 loss_log = [] loss_suffix = '' es = EarlyStopper(max_size=5, edge_threshold=0.1) m = Monitor(train_step) sess.run(tf.global_variables_initializer()) for i in range(train_step): m.monitor(i, loss_suffix) source_train_batch, _ = batchnize(source_train_datas, batch_size, batch_idx['train']) target_train_batch, batch_idx['train'] = batchnize( target_train_datas, batch_size, batch_idx['train']) batch_data = seq2seq(source_train_batch, target_train_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } sess.run(fetches=[train_op, loss], feed_dict=feed_dict) if i % loss_freq == 0: source_valid_batch, _ = batchnize(source_valid_datas, batch_size, batch_idx['valid']) target_valid_batch, batch_idx['valid'] = batchnize( target_valid_datas, batch_size, batch_idx['valid']) batch_data = seq2seq(source_valid_batch, target_valid_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } loss_val = sess.run(fetches=loss, feed_dict=feed_dict) loss_log.append(loss_val) loss_suffix = 'loss: %f' % loss_val es_status = es(loss_val) if i > train_step // 2 and es_status: print('early stopping at step: %d' % i) break saver.save(sess, model_path) print('save at %s' % model_path) plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log) plt.savefig('%s_loss.png' % model_path) elif args.mode == 'eval': saver.restore(sess, model_path) print('load from %s' % model_path) else: raise # evaluate loss_val = [] input_vectors = None predict_vectors = None for i in range(len(source_test_datas) // batch_size + 1): source_test_batch, _ = batchnize(source_test_datas, batch_size, batch_idx['test']) target_test_batch, batch_idx['test'] = batchnize( target_test_datas, batch_size, batch_idx['test']) batch_data = seq2seq(source_test_batch, target_test_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict) if predict_vectors is None: predict_vectors = pred.T else: predict_vectors = np.vstack((predict_vectors, pred.T)) input_ = batch_data['encoder_inputs'] if input_vectors is None: input_vectors = input_.T else: input_vectors = np.vstack((input_vectors, input_.T)) loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict)) input_sentences = '' predict_sentences = '' for i, (input_vector, predict_vector) in enumerate( zip(input_vectors[:len(source_test_datas)], predict_vectors[:len(target_test_datas)])): input_sentences += ' '.join([ source_reverse_dictionary[vector] for vector in input_vector if not vector == PAD ]) predict_sentences += ' '.join([ target_reverse_dictionary[vector] for vector in predict_vector if not vector == PAD ]) if i < len(source_test_datas) - 1: input_sentences += '\n' predict_sentences += '\n' evaluate_input_path = '%s.evaluate_input' % model_path evaluate_predict_path = '%s.evaluate_predict' % model_path with open(evaluate_input_path, 'w') as f1, \ open(evaluate_predict_path, 'w') as f2: f1.write(input_sentences) f2.write(predict_sentences) print('input sequences at {}'.format(evaluate_input_path)) print('predict sequences at {}'.format(evaluate_predict_path)) print('mean of loss: %f' % np.mean(loss_val)) print('finish.')
def main(args): tf.reset_default_graph() # process config c = Configs(args.config) ROOT = os.environ['TENSOROFLOW'] model_path = '%s/examples/model/multi_layer_seq2seq/model' % ROOT PAD = c.const['PAD'] EOS = c.const['EOS'] train_step = c.option['train_step'] max_time = c.option['max_time'] batch_size = c.option['batch_size'] vocabulary_size = c.option['vocabulary_size'] input_embedding_size = c.option['embedding_size'] hidden_units = c.option['hidden_units'] layers = c.option['layers'] datas = [] # placeholder encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs') decoder_labels = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_labels') # embed embeddings = tf.Variable(tf.random_uniform( [vocabulary_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32, name='embeddings') encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs) # encoder encoder_units = hidden_units encoder_layers = [ tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers ] encoder_cell = tf.contrib.rnn.MultiRNNCell(encoder_layers) encoder_output, encoder_final_state = tf.nn.dynamic_rnn( encoder_cell, encoder_inputs_embedded, dtype=tf.float32, time_major=True) del encoder_output # decoder decoder_units = encoder_units decoder_layers = [ tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers ] decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_layers) decoder_output, decoder_final_state = tf.nn.dynamic_rnn( decoder_cell, decoder_inputs_embedded, initial_state=encoder_final_state, scope="plain_decoder", dtype=tf.float32, time_major=True) decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size) decoder_prediction = tf.argmax( decoder_logits, 2) # max_time: axis=0, batch: axis=1, vocab: axis=2 # optimizer stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(decoder_labels, depth=vocabulary_size, dtype=tf.float32), logits=decoder_logits, ) loss = tf.reduce_mean(stepwise_cross_entropy) train_op = tf.train.AdamOptimizer().minimize(loss) saver = tf.train.Saver() with tf.Session() as sess: if args.mode == 'train': # train loss_freq = train_step // 100 loss_log = [] loss_suffix = '' es = EarlyStopper(max_size=5, edge_threshold=0.1) m = Monitor(train_step) sess.run(tf.global_variables_initializer()) for i in range(train_step): m.monitor(i, loss_suffix) batch_data = through(datas, max_time, batch_size, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } sess.run(fetches=[train_op, loss], feed_dict=feed_dict) if i % loss_freq == 0: batch_data = through(datas, max_time, batch_size, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } loss_val = sess.run(fetches=loss, feed_dict=feed_dict) loss_log.append(loss_val) loss_suffix = 'loss: %f' % loss_val es_status = es(loss_val) if i > train_step // 2 and es_status: print('early stopping at step: %d' % i) break saver.save(sess, model_path) print('save at %s' % model_path) plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log) plt.savefig('%s_loss.png' % model_path) elif args.mode == 'eval': saver.restore(sess, model_path) print('load from %s' % model_path) else: raise # evaluate batch_data = through(datas, max_time, batch_size, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict) input_ = batch_data['encoder_inputs'] loss_val = sess.run(fetches=loss, feed_dict=feed_dict) print('input sequences...\n{}'.format(input_)) print('predict sequences...\n{}'.format(pred)) print('loss: %f' % loss_val) print('finish.')
def run(**kwargs): ''' Setup TF, gym environment, etc. ''' logdir = kwargs['logdir'] seed = kwargs['seed'] headless = kwargs['headless'] if headless: import matplotlib ################################################################ # SEEDS ################################################################ tf.set_random_seed(seed * 20) np.random.seed(seed * 20) ################################################################ # SETUP GYM + RL ALGO ################################################################ env = gym.make('snake-v0') # Make the gym environment ################################################################ # TF BOILERPLATE ################################################################ tf_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) sess = tf.Session(config=tf_config) def rgb2gray(rgb): return np.dot(rgb[..., :3], [0.299, 0.587, 0.114]) with tf.Session() as sess: network = DQN( sess, create_basic([64, 64, 256], transpose=True), [(env.world.number_of_snakes) * 2 + 1, env.world.screen_width, env.world.screen_height], None, n_actions=4, batch_size=None, gamma=.99, update_freq=None, ddqn=True, # double dqn buffer_size=None, clip_grad=None, batches_per_epoch=None, is_sparse=False, use_priority=False) monitor = Monitor(os.path.join(logdir, 'test_gifs')) # summary_writer = tf.summary.FileWriter(logdir) ## Load model from where you left off ## Does not play nice w/ plots in tensorboard at the moment ## TODO: FIX saver = tf.train.Saver(max_to_keep=2) if True: try: print('Loading Model...') ckpt = tf.train.get_checkpoint_state( os.path.join(os.getcwd(), logdir)) saver.restore(sess, ckpt.model_checkpoint_path) iteration_offset = int( ckpt.model_checkpoint_path.split('-')[-1].split('.')[0]) except: print('Failed to load. Starting from scratch') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) iteration_offset = 0 else: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) iteration_offset = 0 ################################################################ # Fill Buffer ################################################################ tic = time.time() total_timesteps = 0 for iteration in range(5): obs = env.reset() # obs = env.render('rgb_array', headless = headless).astype(float) # obs /= obs.max() # obs = rgb2gray(obs) done_n = np.array([False] * env.n_actors) steps = 0 viewer = None while not done_n.all(): if True: if (not viewer) and (not headless): from gym.envs.classic_control import rendering viewer = rendering.SimpleImageViewer() rgb = env.render('rgb_array', headless=headless) scaler = 10 rgb = repeat_upsample(rgb, scaler, scaler) if not headless: viewer.imshow(rgb) time.sleep(.01) monitor.add(rgb, iteration, iteration) last_obs = np.array([[x.A for x in obs]]) acts = network.greedy_select( last_obs, 0) #network.greedy_select([[last_obs]], 0) acts = [str(x) for x in acts] # Next step obs, reward_n, done_n = env.step(acts[-1]) # obs = env.render('rgb_array', headless = headless).astype(float) # obs /= obs.max() # obs = rgb2gray(obs) steps += 1 if steps > 300: break monitor.make_gifs(iteration, fps=12) pdb.set_trace()
def run(**kwargs): ''' Setup TF, gym environment, etc. ''' iterations=kwargs['iterations'] discount=kwargs['discount'] batch_size=kwargs['batch_size'] num_batches=kwargs['num_batches'] max_seq_length=kwargs['max_seq_length'] learning_rate=kwargs['learning_rate'] animate=kwargs['animate'] logdir=kwargs['logdir'] seed=kwargs['seed'] games_played_per_epoch=kwargs['games_played_per_epoch'] load_model = False mcts_iterations=kwargs['mcts_iterations'] batches_per_epoch=kwargs['batches_per_epoch'] headless=kwargs['headless'] update_freq=kwargs['update_freq'] buffer_size=kwargs['buffer_size'] if headless: import matplotlib ################################################################ # SEEDS ################################################################ tf.set_random_seed(seed) np.random.seed(seed) ################################################################ # SETUP GYM + RL ALGO ################################################################ env = gym.make('snake-v0') # Make the gym environment maximum_number_of_steps = max_seq_length #or env.max_episode_steps # Maximum length for episodes ################################################################ # TF BOILERPLATE ################################################################ tf_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) sess = tf.Session(config=tf_config) summary_writers = [] for idx in np.arange(env.n_actors): summary_writers.append(tf.summary.FileWriter(os.path.join(logdir,'tensorboard','snake_%s' % idx) )) summary_writers.append(tf.summary.FileWriter(os.path.join(logdir,'tensorboard','training_stats') )) def rgb2gray(rgb): return np.dot(rgb[...,:3], [0.299, 0.587, 0.114]) with tf.Session() as sess: network = DQN( sess, create_basic([16,16,64], transpose=True), [1,env.world.screen_width,env.world.screen_height], summary_writers[-1], n_actions=4, batch_size=batch_size, gamma=.99, update_freq=update_freq, ddqn=True, # double dqn buffer_size = buffer_size, clip_grad = None, batches_per_epoch = batches_per_epoch, is_sparse = False ) monitor = Monitor(os.path.join(logdir,'gifs')) epsilon_schedule = LinearSchedule(iterations*9/10, 1.0, 0.01) learning_rate_schedule = PiecewiseSchedule([(0,1e-3),(20000,5e-4),(50000,1e-4)], outside_value=1e-4) saver = tf.train.Saver(max_to_keep=2) # summary_writer = tf.summary.FileWriter(logdir) ## Load model from where you left off ## Does not play nice w/ plots in tensorboard at the moment ## TODO: FIX if load_model == True: try: print ('Loading Model...') ckpt = tf.train.get_checkpoint_state(logdir) saver.restore(sess,ckpt.model_checkpoint_path) iteration_offset = int(ckpt.model_checkpoint_path.split('-')[-1].split('.')[0]) except: print ('Failed to load. Starting from scratch') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) iteration_offset = 0 else: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) iteration_offset = 0 summary_writers[0].add_graph(sess.graph) ################################################################ # Fill Buffer ################################################################ tic = time.time() total_timesteps = 0 while not network.buffer.full(N=buffer_size/2): network.buffer.games_played += 1 print 'Game number: %s. Buffer_size: %s' % (network.buffer.games_played, network.buffer.buffer_size) _ = env.reset() obs = env.render('rgb_array', headless = headless).astype(float) obs /= obs.max() obs = rgb2gray(obs) done_n = np.array([False]*env.n_actors) steps = 0 while not done_n.all(): last_obs = obs acts = network.greedy_select([[last_obs]], 1.) acts = [str(x) for x in acts] # Next step _, reward_n, done_n = env.step(acts[-1]) obs = env.render('rgb_array', headless = headless).astype(float) obs /= obs.max() obs = rgb2gray(obs) steps += 1 network.store(np.array([[last_obs]]), # state np.array(acts), # action np.array(reward_n), #rewards np.array([[obs]]), #new state np.array(done_n) #done ) if steps > maximum_number_of_steps: done_n[:] = True print 'Filled Buffer' ################################################################ # Train Loop ################################################################ network.buffer.soft_reset() total_number_of_steps_in_iteration = 0 for iteration in range(iteration_offset, iteration_offset + iterations): print('{0} Iteration {1} {0}'.format('*'*10, iteration)) timesteps_in_iteration = 0 if (iteration % update_freq == 0): saver.save(sess,os.path.join(logdir,'model-'+str(iteration)+'.cptk')) print "Saved Model. Timestep count: %s" % iteration total_reward = np.array([0]*env.n_actors) while True: network.buffer.games_played += 1 if (((network.buffer.games_played) % 10) == 0): print 'Epoch: %s. Game number: %s' % (iteration, network.buffer.games_played) _ = env.reset() rgb = obs = env.render('rgb_array', headless = headless).astype(float) obs /= obs.max() obs = rgb2gray(obs) animate_episode = (iteration % (update_freq) == 0) and animate done_n = np.array([False]*env.n_actors) steps = 0 # Runs policy, collects observations and rewards viewer = None while not done_n.all(): if animate_episode: if (not viewer) and (not headless): from gym.envs.classic_control import rendering viewer = rendering.SimpleImageViewer() rgb = env.render('rgb_array', headless = headless) scaler = 10 rgb=repeat_upsample(rgb,scaler,scaler) if not headless: viewer.imshow(rgb) time.sleep(.01) monitor.add(rgb, iteration, network.buffer.games_played) # ob = get_data(np.array(raw_observations)[-2:]) last_obs = obs # Control the exploration acts = network.greedy_select([[last_obs]], epsilon_schedule.value(network.epoch)) # epsilon greedy acts = [str(x) for x in acts] # Next step _, reward_n, done_n = env.step(acts[-1]) obs = env.render('rgb_array', headless = headless).astype(float) obs /= obs.max() obs = rgb2gray(obs) total_reward += np.array(reward_n) if total_number_of_steps_in_iteration % 4 == 0: network.train_step(learning_rate_schedule) total_number_of_steps_in_iteration += 1 steps += 1 network.store(np.array([[last_obs]]), # state np.array(acts), # action np.array(reward_n), #rewards np.array([[obs]]), #new state np.array(done_n) #done ) # terminate the collection of data if the controller shows stability # for a long time. This is a good thing. if steps > maximum_number_of_steps: done_n[:] = True if viewer: viewer.close() if network.buffer.games_played >= 1: break monitor.make_gifs(iteration) for count, writer in enumerate(summary_writers): if count < (len(summary_writers) - 1): summary = tf.Summary() summary.value.add(tag='Average Reward', simple_value=(total_reward[count])) summary.value.add(tag='Steps Taken', simple_value=(steps)) writer.add_summary(summary, iteration) writer.flush()
class Dense_MLP: def __init__(self, dimensions, activations): """ :param dimensions: (tpl/ list) Dimensions of the neural net. (input, hidden layer, output) :param activations: (tpl/ list) Activations functions. Example of three hidden layer with - 3312 input features - 3000 hidden neurons - 3000 hidden neurons - 3000 hidden neurons - 5 output classes layers --> [1, 2, 3, 4, 5] ---------------------------------------- dimensions = (3312, 3000, 3000, 3000, 5) activations = ( Relu, Relu, Relu, Sigmoid) """ self.n_layers = len(dimensions) self.loss = None self.learning_rate = None self.momentum = None self.weight_decay = None self.dropout_rate = 0. # dropout rate self.dimensions = dimensions self.save_filename = "" self.monitor = None # Weights and biases are initiated by index. For a one hidden layer net you will have a w[1] and w[2] self.w = {} self.b = {} self.pdw = {} self.pdd = {} # Activations are also initiated by index. For the example we will have activations[2] and activations[3] self.activations = {} for i in range(len(dimensions) - 1): # He uniform initialization limit = np.sqrt(6. / float(dimensions[i])) self.w[i + 1] = np.random.uniform( -limit, limit, (dimensions[i], dimensions[i + 1])) self.b[i + 1] = np.zeros(dimensions[i + 1]) self.activations[i + 2] = activations[i] def _feed_forward(self, x, drop=False): """ Execute a forward feed through the network. :param x: (array) Batch of input data vectors. :return: (tpl) Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers. """ # w(x) + b z = {} # activations: f(z) a = { 1: x } # First layer has no activations as input. The input x is the input. masks = {} for i in range(1, self.n_layers): z[i + 1] = a[i] @ self.w[i] + self.b[i] a[i + 1] = self.activations[i + 1].activation(z[i + 1]) if drop: if i < self.n_layers - 1: # apply dropout a[i + 1], keep_mask = dropout(a[i + 1], self.dropout_rate) masks[i + 1] = keep_mask return z, a, masks def _back_prop(self, z, a, masks, y_true): """ The input dicts keys represent the layers of the net. a = { 1: x, 2: f(w1(x) + b1) 3: f(w2(a2) + b2) 4: f(w3(a3) + b3) 5: f(w4(a4) + b4) } :param z: (dict) w(x) + b :param a: (dict) f(z) :param y_true: (array) One hot encoded truth vector. :return: """ keep_prob = 1. if self.dropout_rate > 0: keep_prob = np.float32(1. - self.dropout_rate) # Determine partial derivative and delta for the output layer. # delta output layer delta = self.loss.delta(y_true, a[self.n_layers]) dw = np.dot(a[self.n_layers - 1].T, delta) update_params = {self.n_layers - 1: (dw, np.mean(delta, axis=0))} # In case of three layer net will iterate over i = 2 and i = 1 # Determine partial derivative and delta for the rest of the layers. # Each iteration requires the delta from the previous layer, propagating backwards. for i in reversed(range(2, self.n_layers)): # dropout for the backpropagation step if keep_prob != 1: delta = (delta @ self.w[i].transpose() ) * self.activations[i].prime(z[i]) delta = delta * masks[i] delta /= keep_prob else: delta = (delta @ self.w[i].transpose() ) * self.activations[i].prime(z[i]) dw = np.dot(a[i - 1].T, delta) update_params[i - 1] = (dw, np.mean(delta, axis=0)) for k, v in update_params.items(): self._update_w_b(k, v[0], v[1]) def _update_w_b(self, index, dw, delta): """ Update weights and biases. :param index: (int) Number of the layer :param dw: (array) Partial derivatives :param delta: (array) Delta error. """ # perform the update with momentum if index not in self.pdw: self.pdw[index] = -self.learning_rate * dw self.pdd[index] = -self.learning_rate * delta else: self.pdw[index] = self.momentum * self.pdw[ index] - self.learning_rate * dw self.pdd[index] = self.momentum * self.pdd[ index] - self.learning_rate * delta self.w[index] += self.pdw[index] - self.weight_decay * self.w[index] self.b[index] += self.pdd[index] - self.weight_decay * self.b[index] def fit(self, x, y_true, x_test, y_test, loss, epochs, batch_size, learning_rate=1e-3, momentum=0.9, weight_decay=0.0002, dropoutrate=0., testing=True, save_filename="", monitor=False): """ :param x: (array) Containing parameters :param y_true: (array) Containing one hot encoded labels. :return (array) A 2D array of metrics (epochs, 3). """ if not x.shape[0] == y_true.shape[0]: raise ValueError("Length of x and y arrays don't match") self.monitor = Monitor( save_filename=save_filename) if monitor else None # Initiate the loss object with the final activation function self.loss = loss() self.learning_rate = learning_rate self.momentum = momentum self.weight_decay = weight_decay self.dropout_rate = dropoutrate self.save_filename = save_filename maximum_accuracy = 0 metrics = np.zeros((epochs, 4)) for i in range(epochs): # Shuffle the data seed = np.arange(x.shape[0]) np.random.shuffle(seed) x_ = x[seed] y_ = y_true[seed] if self.monitor: self.monitor.start_monitor() # training t1 = datetime.datetime.now() for j in range(x.shape[0] // batch_size): k = j * batch_size l = (j + 1) * batch_size z, a, masks = self._feed_forward(x_[k:l], True) self._back_prop(z, a, masks, y_[k:l]) t2 = datetime.datetime.now() if self.monitor: self.monitor.stop_monitor() print("\nDense-MLP Epoch ", i) print("Training time: ", t2 - t1) # test model performance on the test data at each epoch # this part is useful to understand model performance and can be commented for production settings if (testing): t3 = datetime.datetime.now() accuracy_test, activations_test = self.predict( x_test, y_test, batch_size) accuracy_train, activations_train = self.predict( x, y_true, batch_size) t4 = datetime.datetime.now() maximum_accuracy = max(maximum_accuracy, accuracy_test) loss_test = self.loss.loss(y_test, activations_test) loss_train = self.loss.loss(y_true, activations_train) metrics[i, 0] = loss_train metrics[i, 1] = loss_test metrics[i, 2] = accuracy_train metrics[i, 3] = accuracy_test print(f"Testing time: {t4 - t3}\n; Loss test: {loss_test}; \n" f"Accuracy test: {accuracy_test}; \n" f"Maximum accuracy val: {maximum_accuracy}") # save performance metrics values in a file if save_filename != "": np.savetxt(save_filename + ".txt", metrics) if self.save_filename != "" and self.monitor: with open(self.save_filename + "_monitor.json", 'w') as file: file.write( json.dumps(self.monitor.get_stats(), indent=4, sort_keys=True, default=str)) return metrics def predict(self, x_test, y_test, batch_size=100): """ :param x_test: (array) Test input :param y_test: (array) Correct test output :param batch_size: :return: (flt) Classification accuracy :return: (array) A 2D array of shape (n_cases, n_classes). """ activations = np.zeros((y_test.shape[0], y_test.shape[1])) for j in range(x_test.shape[0] // batch_size): k = j * batch_size l = (j + 1) * batch_size _, a_test, _ = self._feed_forward(x_test[k:l], drop=False) activations[k:l] = a_test[self.n_layers] accuracy = compute_accuracy(activations, y_test) return accuracy, activations
def make_env(): env = make_mujoco_env(args.env, args.seed) # env = gym.make(env_id) env = Monitor(env, logger.get_dir(), allow_early_resets=True) return env
def run(**kwargs): ''' Setup TF, gym environment, etc. ''' iterations = kwargs['iterations'] discount = kwargs['discount'] batch_size = kwargs['batch_size'] num_batches = kwargs['num_batches'] max_seq_length = kwargs['max_seq_length'] learning_rate = kwargs['learning_rate'] animate = kwargs['animate'] logdir = kwargs['logdir'] seed = kwargs['seed'] games_played_per_epoch = kwargs['games_played_per_epoch'] load_model = False mcts_iterations = kwargs['mcts_iterations'] batches_per_epoch = kwargs['batches_per_epoch'] headless = kwargs['headless'] update_freq = kwargs['update_freq'] buffer_size = kwargs['buffer_size'] use_priority = kwargs['use_priority'] policy_batch_size = kwargs['policy_batch_size'] reservoir_buffer_size = kwargs['reservoir_buffer_size'] if headless: import matplotlib ################################################################ # SEEDS ################################################################ tf.set_random_seed(seed) np.random.seed(seed) ################################################################ # SETUP GYM + RL ALGO ################################################################ env = gym.make('snake-v1') # Make the gym environment maximum_number_of_steps = max_seq_length #or env.max_episode_steps # Maximum length for episodes ################################################################ # TF BOILERPLATE ################################################################ tf_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) sess = tf.Session(config=tf_config) summary_writers = [] for idx in np.arange(env.n_actors): summary_writers.append( tf.summary.FileWriter( os.path.join(logdir, 'tensorboard', 'snake_%s' % idx))) summary_writers.append( tf.summary.FileWriter( os.path.join(logdir, 'tensorboard', 'training_stats'))) with tf.Session() as sess: networks = [] for i in range(env.n_actors): networks.append( SelfPlay( sess, create_basic([64, 64, 256], transpose=True), [(env.n_actors) * 2 + 1, env.world.screen_width, env.world.screen_height], summary_writers[-1], n_actions=4, batch_size=batch_size, gamma=.99, update_freq=update_freq, ddqn=True, # double dqn buffer_size=buffer_size, clip_grad=None, batches_per_epoch=batches_per_epoch, is_sparse=True, use_priority=use_priority, _id=i, policy_batch_size=policy_batch_size, reservoir_buffer_size=reservoir_buffer_size)) monitor = Monitor(os.path.join(logdir, 'gifs')) epsilon_schedule = PiecewiseSchedule( [(0, .2), (50000, .05), (75000, .01)], outside_value=.01) #LinearSchedule(iterations*60/100, 1., 0.001) eta_schedule = PiecewiseSchedule( [(0, .8), (60000, .4)], outside_value=.4) #LinearSchedule(iterations*60/100, 0.2, 0.1) if use_priority: beta_schedule = LinearSchedule(iterations, 0.4, 1.) learning_rate_schedule = PiecewiseSchedule([(0, 1e-3), (30000, 5e-4), (60000, 1e-4)], outside_value=1e-4) policy_learning_rate_schedule = PiecewiseSchedule([(0, 1e-3), (4000, 5e-4), (20000, 1e-4)], outside_value=1e-4) saver = tf.train.Saver(max_to_keep=2) # summary_writer = tf.summary.FileWriter(logdir) ## Load model from where you left off ## Does not play nice w/ plots in tensorboard at the moment ## TODO: FIX if load_model == True: try: print('Loading Model...') ckpt = tf.train.get_checkpoint_state(logdir) saver.restore(sess, ckpt.model_checkpoint_path) iteration_offset = int( ckpt.model_checkpoint_path.split('-')[-1].split('.')[0]) except: print('Failed to load. Starting from scratch') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) iteration_offset = 0 else: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) iteration_offset = 0 summary_writers[0].add_graph(sess.graph) ################################################################ # Train Loop ################################################################ tic = time.time() total_timesteps = 0 while not all([ network.buffer.full(N=int(buffer_size / 2.)) for network in networks ]): networks[0].buffer.games_played += 1 print 'Game number: %s. Buffer_sizes: %s' % ( networks[0].buffer.games_played, [network.buffer.buffer_size for network in networks]) obs = env.reset() done_n = np.array([False] * env.n_actors) steps = 0 length_alive = np.array([0] * env.n_actors) viewer = None while not done_n.all(): length_alive[env.world.idxs_of_alive_snakes] += 1 last_obs = obs acts = [] for i, network in enumerate(networks): act = network.greedy_select( np.array([[x.A for x in get_data(last_obs, i)]]), 1.) acts += [str(act[0])] # Next step obs, reward_n, done_n = env.step(acts) steps += 1 for i in env.world.idxs_of_alive_snakes: priority = networks[i].get_error( np.array(get_data(last_obs, i)), np.array(acts[i]), np.array(reward_n[i]), np.array(get_data(obs, i)), np.array(done_n[i])) networks[i].store( np.array(get_data(last_obs, i)), # state np.array(acts[i]), # action np.array(reward_n[i]), #rewards np.array(get_data(obs, i)), #new state np.array(done_n[i]), #done priority=priority) # networks[i].store_reservoir(np.array(get_data(last_obs, i)), # state # np.array(int(acts[i]))) # terminate the collection of data if the controller shows stability # for a long time. This is a good thing. if steps > maximum_number_of_steps: done_n[:] = True print 'Filled Buffer' to_learn = np.array([0] * env.n_actors) frames_seen = np.array([0] * env.n_actors) for iteration in range(iteration_offset, iteration_offset + iterations + 1): print('{0} Iteration {1} {0}'.format('*' * 10, iteration)) networks[0].buffer.soft_reset() timesteps_in_iteration = 0 if (iteration % update_freq == 0): saver.save( sess, os.path.join(logdir, 'model-' + str(iteration) + '.cptk')) print "Saved Model. Timestep count: %s" % iteration total_number_of_steps_in_iteration = 0 total_reward = np.array([0] * env.n_actors) while True: networks[0].buffer.games_played += 1 if (((networks[0].buffer.games_played) % 10) == 0): print 'Epoch: %s. Game number: %s' % ( iteration, networks[0].buffer.games_played) obs = env.reset() # raw_observations = [] # raw_observations.append(np.array(obs)) animate_episode = ((networks[0].buffer.games_played - 1) == 0) and (iteration % update_freq == 0) and animate done_n = np.array([False] * env.n_actors) steps = 0 # Runs policy, collects observations and rewards viewer = None length_alive = np.array([0] * env.n_actors) game_time = time.time() action_times = [] learn_times = [] select_from_average = np.array([True] * env.n_actors) for idx in range(select_from_average.shape[0]): r = np.random.uniform() eta = eta_schedule.value(iteration) if (eta > 0) and (r <= eta): select_from_average[idx] = False # Sample from greedy while not done_n.all(): if animate_episode: if (not viewer) and (not headless): from gym.envs.classic_control import rendering viewer = rendering.SimpleImageViewer() rgb = env.render('rgb_array', headless=headless) scaler = 10 rgb = repeat_upsample(rgb, scaler, scaler) if not headless: viewer.imshow(rgb) time.sleep(.01) monitor.add(rgb, iteration, networks[0].buffer.games_played) length_alive[env.world.idxs_of_alive_snakes] += 1 to_learn[env.world.idxs_of_alive_snakes] += 1 # ob = get_data(np.array(raw_observations)[-2:]) last_obs = obs # Control the exploration acts = [] action_time = time.time() for i, network in enumerate(networks): if env.world.snakes[i].alive: act = network.select_from_policy( np.array([[x.A for x in get_data(last_obs, i)]]), epsilon_schedule.value(iteration), select_from_average[i]) acts += [str(act[0])] else: acts += [str(0)] action_times.append(time.time() - action_time) # Next step obs, reward_n, done_n = env.step(acts) total_reward += np.array(reward_n) total_number_of_steps_in_iteration += 1 steps += 1 for i in env.world.idxs_of_alive_snakes: priority = networks[i].get_error( np.array(get_data(last_obs, i)), np.array(acts[i]), np.array(reward_n[i]), np.array(get_data(obs, i)), np.array(done_n[i])) networks[i].store( np.array(get_data(last_obs, i)), # state np.array(acts[i]), # action np.array(reward_n[i]), #rewards np.array(get_data(obs, i)), #new state np.array(done_n[i]), #done priority=priority) if not select_from_average[i]: networks[i].store_reservoir( np.array(get_data(last_obs, i)), # state np.array(int(acts[i]))) # max: to cover all new steps added to buffer, min: to not overdo too much learn_time = time.time() for network_id in [ x for x in range(len(to_learn)) if to_learn[x] >= max( networks[x].batch_size, networks[x].avg_policy_batch_size) ]: to_learn[network_id] = 0 network = networks[network_id] for _ in range(5): frames_seen[network_id] += networks[ network_id].batch_size if use_priority: network.train_step(learning_rate_schedule, beta_schedule) else: network.train_step(learning_rate_schedule) for _ in range(5): if network.reservoir.buffer_size > 0: network.avg_policy_train_step( policy_learning_rate_schedule) learn_times.append(time.time() - learn_time) # terminate the collection of data if the controller shows stability # for a long time. This is a good thing. if steps > maximum_number_of_steps: done_n[:] = True if viewer: viewer.close() if networks[0].buffer.games_played >= 1: break game_time = time.time() - game_time monitor.make_gifs(iteration) for count, writer in enumerate(summary_writers[:-1]): summary = tf.Summary() summary.value.add(tag='Average Reward', simple_value=(total_reward[count])) summary.value.add(tag='Steps Taken', simple_value=(length_alive[count])) summary.value.add(tag='Frames Seen', simple_value=frames_seen[count]) writer.add_summary(summary, iteration) writer.flush() summary = tf.Summary() summary.value.add(tag='Time Elapsed/Game', simple_value=game_time) summary.value.add(tag='Time Elapsed/Total Actions', simple_value=np.sum(action_times)) summary.value.add(tag='Time Elapsed/Mean Actions', simple_value=np.mean(action_times)) summary.value.add(tag='Time Elapsed/Max Actions', simple_value=np.max(action_times)) summary.value.add(tag='Time Elapsed/Min Actions', simple_value=np.min(action_times)) summary.value.add(tag='Time Elapsed/Total Learn', simple_value=np.sum(learn_times)) summary.value.add(tag='Time Elapsed/Mean Learn', simple_value=np.mean(learn_times)) summary.value.add(tag='Time Elapsed/Max Learn', simple_value=np.max(learn_times)) summary.value.add(tag='Time Elapsed/Min Learn', simple_value=np.min(learn_times)) summary_writers[-1].add_summary(summary, iteration) summary_writers[-1].flush() print game_time, sum(action_times), sum(learn_times)
class SET_MLP: def __init__(self, dimensions, activations, epsilon=20): """ :param dimensions: (tpl/ list) Dimensions of the neural net. (input, hidden layer, output) :param activations: (tpl/ list) Activations functions. Example of three hidden layer with - 3312 input features - 3000 hidden neurons - 3000 hidden neurons - 3000 hidden neurons - 5 output classes layers --> [1, 2, 3, 4, 5] ---------------------------------------- dimensions = (3312, 3000, 3000, 3000, 5) activations = ( Relu, Relu, Relu, Sigmoid) """ self.n_layers = len(dimensions) self.loss = None self.dropout_rate = 0. # dropout rate self.learning_rate = None self.momentum = None self.weight_decay = None self.epsilon = epsilon # control the sparsity level as discussed in the paper self.zeta = None # the fraction of the weights removed self.dimensions = dimensions self.save_filename = "" self.input_layer_connections = [] self.monitor = None # Weights and biases are initiated by index. For a one hidden layer net you will have a w[1] and w[2] self.w = {} self.b = {} self.pdw = {} self.pdd = {} # Activations are also initiated by index. For the example we will have activations[2] and activations[3] self.activations = {} for i in range(len(dimensions) - 1): self.w[i + 1] = create_sparse_weights( self.epsilon, dimensions[i], dimensions[i + 1]) # create sparse weight matrices self.b[i + 1] = np.zeros(dimensions[i + 1], dtype='float32') self.activations[i + 2] = activations[i] def _feed_forward(self, x, drop=False): """ Execute a forward feed through the network. :param x: (array) Batch of input data vectors. :return: (tpl) Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers. """ # w(x) + b z = {} # activations: f(z) a = { 1: x } # First layer has no activations as input. The input x is the input. masks = {} for i in range(1, self.n_layers): z[i + 1] = a[i] @ self.w[i] + self.b[i] a[i + 1] = self.activations[i + 1].activation(z[i + 1]) if drop: if i < self.n_layers - 1: # apply dropout a[i + 1], keep_mask = dropout(a[i + 1], self.dropout_rate) masks[i + 1] = keep_mask return z, a, masks def _back_prop(self, z, a, masks, y_true): """ The input dicts keys represent the layers of the net. a = { 1: x, 2: f(w1(x) + b1) 3: f(w2(a2) + b2) 4: f(w3(a3) + b3) 5: f(w4(a4) + b4) } :param z: (dict) w(x) + b :param a: (dict) f(z) :param y_true: (array) One hot encoded truth vector. :return: """ keep_prob = 1. if self.dropout_rate > 0: keep_prob = np.float32(1. - self.dropout_rate) # Determine partial derivative and delta for the output layer. # delta output layer delta = self.loss.delta(y_true, a[self.n_layers]) dw = coo_matrix(self.w[self.n_layers - 1], dtype='float32') # compute backpropagation updates backpropagation_updates_numpy(a[self.n_layers - 1], delta, dw.row, dw.col, dw.data) update_params = { self.n_layers - 1: (dw.tocsr(), np.mean(delta, axis=0)) } # In case of three layer net will iterate over i = 2 and i = 1 # Determine partial derivative and delta for the rest of the layers. # Each iteration requires the delta from the previous layer, propagating backwards. for i in reversed(range(2, self.n_layers)): # dropout for the backpropagation step if keep_prob != 1: delta = (delta @ self.w[i].transpose() ) * self.activations[i].prime(z[i]) delta = delta * masks[i] delta /= keep_prob else: delta = (delta @ self.w[i].transpose() ) * self.activations[i].prime(z[i]) dw = coo_matrix(self.w[i - 1], dtype='float32') # compute backpropagation updates backpropagation_updates_numpy(a[i - 1], delta, dw.row, dw.col, dw.data) update_params[i - 1] = (dw.tocsr(), np.mean(delta, axis=0)) for k, v in update_params.items(): self._update_w_b(k, v[0], v[1]) def _update_w_b(self, index, dw, delta): """ Update weights and biases. :param index: (int) Number of the layer :param dw: (array) Partial derivatives :param delta: (array) Delta error. """ # perform the update with momentum if index not in self.pdw: self.pdw[index] = -self.learning_rate * dw self.pdd[index] = -self.learning_rate * delta else: self.pdw[index] = self.momentum * self.pdw[ index] - self.learning_rate * dw self.pdd[index] = self.momentum * self.pdd[ index] - self.learning_rate * delta self.w[index] += self.pdw[index] - self.weight_decay * self.w[index] self.b[index] += self.pdd[index] - self.weight_decay * self.b[index] def fit(self, x, y_true, x_test, y_test, loss, epochs, batch_size, learning_rate=1e-3, momentum=0.9, weight_decay=0.0002, zeta=0.3, dropoutrate=0., testing=True, save_filename="", monitor=False): """ :param x: (array) Containing parameters :param y_true: (array) Containing one hot encoded labels. :return (array) A 2D array of metrics (epochs, 3). """ if not x.shape[0] == y_true.shape[0]: raise ValueError("Length of x and y arrays don't match") self.monitor = Monitor( save_filename=save_filename) if monitor else None # Initiate the loss object with the final activation function self.loss = loss() self.learning_rate = learning_rate self.momentum = momentum self.weight_decay = weight_decay self.zeta = zeta self.dropout_rate = dropoutrate self.save_filename = save_filename self.input_layer_connections.append(self.get_core_input_connections()) np.savez_compressed(self.save_filename + "_input_connections.npz", inputLayerConnections=self.input_layer_connections) maximum_accuracy = 0 metrics = np.zeros((epochs, 4)) for i in range(epochs): # Shuffle the data seed = np.arange(x.shape[0]) np.random.shuffle(seed) x_ = x[seed] y_ = y_true[seed] if self.monitor: self.monitor.start_monitor() # training t1 = datetime.datetime.now() for j in range(x.shape[0] // batch_size): k = j * batch_size l = (j + 1) * batch_size z, a, masks = self._feed_forward(x_[k:l], True) self._back_prop(z, a, masks, y_[k:l]) t2 = datetime.datetime.now() if self.monitor: self.monitor.stop_monitor() print("\nSET-MLP Epoch ", i) print("Training time: ", t2 - t1) # test model performance on the test data at each epoch # this part is useful to understand model performance and can be commented for production settings if testing: t3 = datetime.datetime.now() accuracy_test, activations_test = self.predict(x_test, y_test) accuracy_train, activations_train = self.predict(x, y_true) t4 = datetime.datetime.now() maximum_accuracy = max(maximum_accuracy, accuracy_test) loss_test = self.loss.loss(y_test, activations_test) loss_train = self.loss.loss(y_true, activations_train) metrics[i, 0] = loss_train metrics[i, 1] = loss_test metrics[i, 2] = accuracy_train metrics[i, 3] = accuracy_test print(f"Testing time: {t4 - t3}\n; Loss test: {loss_test}; \n" f"Accuracy test: {accuracy_test}; \n" f"Maximum accuracy val: {maximum_accuracy}") t5 = datetime.datetime.now() if i < epochs - 1: # do not change connectivity pattern after the last epoch # self.weights_evolution_I() # this implementation is more didactic, but slow. self.weights_evolution_II( ) # this implementation has the same behaviour as the one above, but it is much faster. t6 = datetime.datetime.now() print("Weights evolution time ", t6 - t5) # save performance metrics values in a file if self.save_filename != "": np.savetxt(self.save_filename + ".txt", metrics) if self.save_filename != "" and self.monitor: with open(self.save_filename + "_monitor.json", 'w') as file: file.write( json.dumps(self.monitor.get_stats(), indent=4, sort_keys=True, default=str)) return metrics def get_core_input_connections(self): values = np.sort(self.w[1].data) first_zero_pos = find_first_pos(values, 0) last_zero_pos = find_last_pos(values, 0) largest_negative = values[int((1 - self.zeta) * first_zero_pos)] smallest_positive = values[int( min(values.shape[0] - 1, last_zero_pos + self.zeta * (values.shape[0] - last_zero_pos)))] wlil = self.w[1].tolil() wdok = dok_matrix((self.dimensions[0], self.dimensions[1]), dtype="float32") # remove the weights closest to zero keep_connections = 0 for ik, (row, data) in enumerate(zip(wlil.rows, wlil.data)): for jk, val in zip(row, data): if (val < largest_negative) or (val > smallest_positive): wdok[ik, jk] = val keep_connections += 1 return wdok.tocsr().getnnz(axis=1) def weights_evolution_I(self): # this represents the core of the SET procedure. It removes the weights closest to zero in each layer and add new random weights for i in range(1, self.n_layers - 1): values = np.sort(self.w[i].data) first_zero_pos = find_first_pos(values, 0) last_zero_pos = find_last_pos(values, 0) largest_negative = values[int((1 - self.zeta) * first_zero_pos)] smallest_positive = values[int( min( values.shape[0] - 1, last_zero_pos + self.zeta * (values.shape[0] - last_zero_pos)))] wlil = self.w[i].tolil() pdwlil = self.pdw[i].tolil() wdok = dok_matrix((self.dimensions[i - 1], self.dimensions[i]), dtype="float32") pdwdok = dok_matrix((self.dimensions[i - 1], self.dimensions[i]), dtype="float32") # remove the weights closest to zero keep_connections = 0 for ik, (row, data) in enumerate(zip(wlil.rows, wlil.data)): for jk, val in zip(row, data): if (val < largest_negative) or (val > smallest_positive): wdok[ik, jk] = val pdwdok[ik, jk] = pdwlil[ik, jk] keep_connections += 1 limit = np.sqrt(6. / float(self.dimensions[i])) # add new random connections for kk in range(self.w[i].data.shape[0] - keep_connections): ik = np.random.randint(0, self.dimensions[i - 1]) jk = np.random.randint(0, self.dimensions[i]) while (wdok[ik, jk] != 0): ik = np.random.randint(0, self.dimensions[i - 1]) jk = np.random.randint(0, self.dimensions[i]) wdok[ik, jk] = np.random.uniform(-limit, limit) pdwdok[ik, jk] = 0 self.pdw[i] = pdwdok.tocsr() self.w[i] = wdok.tocsr() def weights_evolution_II(self): # this represents the core of the SET procedure. It removes the weights closest to zero in each layer and add new random weights # improved running time using numpy routines - Amarsagar Reddy Ramapuram Matavalam ([email protected]) for i in range(1, self.n_layers - 1): # uncomment line below to stop evolution of dense weights more than 80% non-zeros # if self.w[i].count_nonzero() / (self.w[i].get_shape()[0]*self.w[i].get_shape()[1]) < 0.8: t_ev_1 = datetime.datetime.now() # converting to COO form - Added by Amar wcoo = self.w[i].tocoo() vals_w = wcoo.data rows_w = wcoo.row cols_w = wcoo.col pdcoo = self.pdw[i].tocoo() vals_pd = pdcoo.data rows_pd = pdcoo.row cols_pd = pdcoo.col # print("Number of non zeros in W and PD matrix before evolution in layer",i,[np.size(valsW), np.size(valsPD)]) values = np.sort(self.w[i].data) first_zero_pos = find_first_pos(values, 0) last_zero_pos = find_last_pos(values, 0) largest_negative = values[int((1 - self.zeta) * first_zero_pos)] smallest_positive = values[int( min( values.shape[0] - 1, last_zero_pos + self.zeta * (values.shape[0] - last_zero_pos)))] #remove the weights (W) closest to zero and modify PD as well vals_w_new = vals_w[(vals_w > smallest_positive) | (vals_w < largest_negative)] rows_w_new = rows_w[(vals_w > smallest_positive) | (vals_w < largest_negative)] cols_w_new = cols_w[(vals_w > smallest_positive) | (vals_w < largest_negative)] new_w_row_col_index = np.stack((rows_w_new, cols_w_new), axis=-1) old_pd_row_col_index = np.stack((rows_pd, cols_pd), axis=-1) new_pd_row_col_index_flag = array_intersect( old_pd_row_col_index, new_w_row_col_index) # careful about order vals_pd_new = vals_pd[new_pd_row_col_index_flag] rows_pd_new = rows_pd[new_pd_row_col_index_flag] cols_pd_new = cols_pd[new_pd_row_col_index_flag] self.pdw[i] = coo_matrix( (vals_pd_new, (rows_pd_new, cols_pd_new)), (self.dimensions[i - 1], self.dimensions[i])).tocsr() if i == 1: self.input_layer_connections.append( coo_matrix( (vals_w_new, (rows_w_new, cols_w_new)), (self.dimensions[i - 1], self.dimensions[i])).getnnz( axis=1)) np.savez_compressed( self.save_filename + "_input_connections.npz", inputLayerConnections=self.input_layer_connections) # add new random connections keep_connections = np.size(rows_w_new) length_random = vals_w.shape[0] - keep_connections limit = np.sqrt(6. / float(self.dimensions[i - 1])) random_vals = np.random.uniform(-limit, limit, length_random) zero_vals = 0 * random_vals # explicit zeros # adding (wdok[ik,jk]!=0): condition while length_random > 0: ik = np.random.randint(0, self.dimensions[i - 1], size=length_random, dtype='int32') jk = np.random.randint(0, self.dimensions[i], size=length_random, dtype='int32') random_w_row_col_index = np.stack((ik, jk), axis=-1) random_w_row_col_index = np.unique( random_w_row_col_index, axis=0) # removing duplicates in new rows&cols oldW_row_col_index = np.stack((rows_w_new, cols_w_new), axis=-1) unique_flag = ~array_intersect( random_w_row_col_index, oldW_row_col_index) # careful about order & tilda ik_new = random_w_row_col_index[unique_flag][:, 0] jk_new = random_w_row_col_index[unique_flag][:, 1] # be careful - row size and col size needs to be verified rows_w_new = np.append(rows_w_new, ik_new) cols_w_new = np.append(cols_w_new, jk_new) length_random = vals_w.shape[0] - np.size( rows_w_new) # this will constantly reduce lengthRandom # adding all the values along with corresponding row and column indices - Added by Amar vals_w_new = np.append( vals_w_new, random_vals) # be careful - we can add to an existing link ? # vals_pd_new = np.append(vals_pd_new, zero_vals) # be careful - adding explicit zeros - any reason?? if vals_w_new.shape[0] != rows_w_new.shape[0]: print("not good") self.w[i] = coo_matrix( (vals_w_new, (rows_w_new, cols_w_new)), (self.dimensions[i - 1], self.dimensions[i])).tocsr() # print("Number of non zeros in W and PD matrix after evolution in layer",i,[(self.w[i].data.shape[0]), (self.pdw[i].data.shape[0])]) t_ev_2 = datetime.datetime.now() print("Weights evolution time for layer", i, "is", t_ev_2 - t_ev_1) def predict(self, x_test, y_test, batch_size=100): """ :param x_test: (array) Test input :param y_test: (array) Correct test output :param batch_size: :return: (flt) Classification accuracy :return: (array) A 2D array of shape (n_cases, n_classes). """ activations = np.zeros((y_test.shape[0], y_test.shape[1])) for j in range(x_test.shape[0] // batch_size): k = j * batch_size l = (j + 1) * batch_size _, a_test, _ = self._feed_forward(x_test[k:l], drop=False) activations[k:l] = a_test[self.n_layers] accuracy = compute_accuracy(activations, y_test) return accuracy, activations
def train(env, agent, args): monitor = Monitor(train=True, spec="-{}".format(args.method)) monitor.init_log( args.log, "m.{}_e.{}_n.{}".format(args.model, args.env_name, args.name)) env.reset() initial_state = env.observe() for num_eps in range(args.episode_num): terminal = False env.reset() loss = 0 cnt = 0 act1 = 0 act2 = 0 tot_reward = 0 tot_reward_nc = 0 tot_reward_dist = 0 mask = None next_mask = None probe = None if args.env_name == "dst": probe = FloatTensor([0.8, 0.2]) elif args.env_name == "crp": probe = FloatTensor([0.5, 0.5]) elif args.env_name in ['ft', 'ft5', 'ft7']: probe = FloatTensor([0.8, 0.2, 0.0, 0.0, 0.0, 0.0]) while not terminal: t_now = time.time() state = env.observe() t_obs = time.time() - t_now t_now = time.time() if args.env_name == "crp": mask = env.env.get_action_out_mask() action = agent.act(state, mask=mask) t_policy = time.time() - t_now t_now = time.time() next_state, reward, terminal = env.step(action, step=0.5) t_step = time.time() - t_now if args.env_name == "crp": next_mask = env.env.get_action_out_mask() if args.log: monitor.add_log(state, action, reward, terminal, agent.w_kept) t_now = time.time() agent.memorize(state, action, next_state, reward, terminal, mask, next_mask) t_mem = time.time() - t_now t_now = time.time() loss += agent.learn() t_learn = time.time() - t_now if terminal: # terminal = True t_now = time.time() agent.reset() t_reset = time.time() - t_now tot_reward = tot_reward + (probe.cpu().numpy().dot(reward)) act1 += reward[0] act2 += reward[1] tot_reward_nc = tot_reward_nc + 1 - reward[0] tot_reward_dist = tot_reward_dist + env.env.get_distortion( absolute=True, tollerance=0) / 10 cnt = cnt + 1 # _, q = agent.predict(probe, initial_state=initial_state) # if args.env_name == "dst": # act_1 = q[0, 3] # act_2 = q[0, 1] if args.env_name == "crp": act_1 = act1 act_2 = act2 # elif args.env_name in ['ft', 'ft5', 'ft7']: # act_1 = q[0, 1] # act_2 = q[0, 0] # if args.method == "crl-naive": # act_1 = act_1.data.cpu() # act_2 = act_2.data.cpu() # elif args.method == "crl-envelope": # act_1 = probe.dot(act_1.data) # act_2 = probe.dot(act_2.data) # elif args.method == "crl-energy": # act_1 = probe.dot(act_1.data) # act_2 = probe.dot(act_2.data) print( "end of eps %d with total reward (1) %0.2f, the Q is %0.2f | %0.2f; loss: %0.4f; total_nc: %0.2f; total_dist: %0.2f;beta : %0.2f;eps : %0.2f;" % ( num_eps, tot_reward, act_1, act_2, # q__max, loss / cnt, tot_reward_nc, tot_reward_dist, agent.beta, agent.epsilon)) # print("t_obs : %0.2f;t_policy : %0.2f;t_step : %0.2f;t_mem : %0.2f;t_learn : %0.2f;t_reset : %0.2f" % ( # t_obs, # t_policy, # t_step, # t_mem, # t_learn, # t_reset,)) monitor.update( num_eps, tot_reward, act_1, act_2, # q__max, loss / cnt) if (num_eps) % 10 == 0: agent.save( args.save, "m.{}_e.{}_n.{}".format(args.model, args.env_name, args.name)) agent.save( args.save, "m.{}_e.{}_n.{}.ep{}".format(args.model, args.env_name, args.name, num_eps // 100))
def train(env, agent, args): monitor = Monitor(train=True, spec="-{}".format(args.method)) monitor.init_log( args.log, "m.{}_e.{}_n.{}".format(args.model, args.env_name, args.name)) env.reset() for num_eps in range(args.episode_num): terminal = False env.reset() loss = 0 cnt = 0 tot_reward = 0 probe = None if args.env_name == "dst": probe = FloatTensor([0.8, 0.2]) elif args.env_name in ['ft', 'ft5', 'ft7']: probe = FloatTensor([0.8, 0.2, 0.0, 0.0, 0.0, 0.0]) while not terminal: state = env.observe() action = agent.act(state) next_state, reward, terminal = env.step(action) if args.log: monitor.add_log(state, action, reward, terminal, agent.w_kept) agent.memorize(state, action, next_state, reward, terminal) loss += agent.learn() if cnt > 100: terminal = True agent.reset() tot_reward = tot_reward + ( probe.cpu().numpy().dot(reward)) * np.power(args.gamma, cnt) cnt = cnt + 1 _, q = agent.predict(probe) if args.env_name == "dst": act_1 = q[0, 3] act_2 = q[0, 1] elif args.env_name in ['ft', 'ft5', 'ft7']: act_1 = q[0, 1] act_2 = q[0, 0] if args.method == "crl-naive": act_1 = act_1.data.cpu() act_2 = act_2.data.cpu() elif args.method == "crl-envelope": act_1 = probe.dot(act_1.data) act_2 = probe.dot(act_2.data) elif args.method == "crl-energy": act_1 = probe.dot(act_1.data) act_2 = probe.dot(act_2.data) print( "end of eps %d with total reward (1) %0.2f, the Q is %0.2f | %0.2f; loss: %0.4f" % ( num_eps, tot_reward, act_1, act_2, # q__max, loss / cnt)) monitor.update( num_eps, tot_reward, act_1, act_2, # q__max, loss / cnt) if num_eps + 1 % 500 == 0: agent.save( args.save, "m.{}_e.{}_n.{}".format(args.model, args.env_name, args.name))
def main(args): # process config c = Configs(args.config) ROOT = os.environ['TENSOROFLOW'] output = c.option.get('output', 'examples/model/buf') model_directory = '%s/%s' % (ROOT, output) model_path = '%s/model' % model_directory dictionary_path = { 'source': '%s/source_dictionary.pickle' % model_directory, 'source_reverse': '%s/source_reverse_dictionary.pickle' % model_directory, 'target': '%s/target_dictionary.pickle' % model_directory, 'target_reverse': '%s/target_reverse_dictionary.pickle' % model_directory } PAD = c.const['PAD'] BOS = c.const['BOS'] EOS = c.const['EOS'] train_step = c.option['train_step'] max_time = c.option['max_time'] batch_size = c.option['batch_size'] vocabulary_size = c.option['vocabulary_size'] input_embedding_size = c.option['embedding_size'] hidden_units = c.option['hidden_units'] layers = c.option['layers'] source_train_data_path = c.data['source_train_data'] target_train_data_path = c.data['target_train_data'] source_valid_data_path = c.data['source_valid_data'] target_valid_data_path = c.data['target_valid_data'] source_test_data_path = c.data['source_test_data'] target_test_data_path = c.data['target_test_data'] # initialize output directory if pathlib.Path(model_directory).exists(): print('Warning: model %s is exists.') print('Old model will be overwritten.') while True: print('Do you wanna continue? [yes|no]') command = input('> ') if command == 'yes': shutil.rmtree(model_directory) break elif command == 'no': sys.exit() else: print('You can only input "yes" or "no".') print('Make new model: %s' % model_directory) pathlib.Path(model_directory).mkdir() # read data if args.mode == 'train': source_dictionary, source_reverse_dictionary = build_dictionary( read_words(source_train_data_path), vocabulary_size) source_train_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_train_data_path) ] target_dictionary, target_reverse_dictionary = build_dictionary( read_words(target_train_data_path), vocabulary_size) target_train_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_train_data_path) ] source_valid_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_valid_data_path) ] target_valid_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_valid_data_path) ] if args.debug: source_train_datas = source_train_datas[:1000] target_train_datas = source_train_datas[:1000] else: with open(dictionary_path['source'], 'rb') as f1, \ open(dictionary_path['source_reverse'], 'rb') as f2, \ open(dictionary_path['target'], 'rb') as f3, \ open(dictionary_path['target_reverse'], 'rb') as f4: source_dictionary = pickle.load(f1) source_reverse_dictionary = pickle.load(f2) target_dictionary = pickle.load(f3) target_reverse_dictionary = pickle.load(f4) source_test_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_test_data_path) ] target_test_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_test_data_path) ] # placeholder encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs') decoder_labels = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_labels') # embed embeddings = tf.Variable(tf.random_uniform( [vocabulary_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32, name='embeddings') encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs) # encoder with bidirection encoder_units = hidden_units encoder_layers_fw = [ tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers ] encoder_cell_fw = tf.contrib.rnn.MultiRNNCell(encoder_layers_fw) encoder_layers_bw = [ tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers ] encoder_cell_bw = tf.contrib.rnn.MultiRNNCell(encoder_layers_bw) (encoder_output_fw, encoder_output_bw), encoder_state = tf.nn.bidirectional_dynamic_rnn( encoder_cell_fw, encoder_cell_bw, encoder_inputs_embedded, dtype=tf.float32, time_major=True) encoder_outputs = tf.concat((encoder_output_fw, encoder_output_bw), 2) encoder_state = tuple( tf.contrib.rnn.LSTMStateTuple( tf.concat((encoder_state[0][layer].c, encoder_state[1][layer].c), 1), tf.concat((encoder_state[0][layer].h, encoder_state[1][layer].h), 1)) for layer in range(layers)) # decoder with attention decoder_units = encoder_units * 2 attention_units = decoder_units decoder_layers = [ tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers ] cell = tf.contrib.rnn.MultiRNNCell(decoder_layers) sequence_length = tf.cast([max_time] * batch_size, dtype=tf.int32) beam_width = 1 tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch( encoder_outputs, multiplier=beam_width) tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch( encoder_state, multiplier=beam_width) tiled_sequence_length = tf.contrib.seq2seq.tile_batch( sequence_length, multiplier=beam_width) attention_mechanism = tf.contrib.seq2seq.LuongAttention( num_units=attention_units, memory=tiled_encoder_outputs, memory_sequence_length=tiled_sequence_length) attention_cell = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=256) decoder_initial_state = attention_cell.zero_state(dtype=tf.float32, batch_size=batch_size * beam_width) decoder_initial_state = decoder_initial_state.clone( cell_state=tiled_encoder_final_state) if args.mode == 'train': helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=tf.cast([max_time] * batch_size, dtype=tf.int32), time_major=True) elif args.mode == 'eval': """ helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=tf.cast([max_time] * batch_size, dtype=tf.int32), time_major=True) """ helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=embeddings, start_tokens=tf.tile([BOS], [batch_size]), end_token=EOS) decoder = tf.contrib.seq2seq.BasicDecoder( cell=attention_cell, helper=helper, initial_state=decoder_initial_state) decoder_outputs = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, output_time_major=True, impute_finished=False, maximum_iterations=max_time) decoder_logits = tf.contrib.layers.linear(decoder_outputs[0][0], vocabulary_size) decoder_prediction = tf.argmax( decoder_logits, 2) # max_time: axis=0, batch: axis=1, vocab: axis=2 # optimizer stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(decoder_labels, depth=vocabulary_size, dtype=tf.float32), logits=decoder_logits, ) loss = tf.reduce_mean(stepwise_cross_entropy) regularizer = 0.0 * tf.nn.l2_loss(decoder_outputs[0][0]) train_op = tf.train.AdamOptimizer().minimize(loss + regularizer) saver = tf.train.Saver() minibatch_idx = {'train': 0, 'valid': 0, 'test': 0} with tf.Session() as sess: if args.mode == 'train': # train global_max_step = train_step * ( len(source_train_datas) // batch_size + 1) loss_freq = global_max_step // 100 if global_max_step > 100 else 1 loss_log = [] batch_loss_log = [] loss_suffix = '' es = EarlyStopper(max_size=5, edge_threshold=0.1) m = Monitor(global_max_step) log = Logger('%s/log' % model_directory) sess.run(tf.global_variables_initializer()) global_step = 0 stop_flag = False for batch in range(train_step): if stop_flag: break current_batch_loss_log = [] while True: # minibatch process m.monitor(global_step, loss_suffix) source_train_batch, _ = batchnize(source_train_datas, batch_size, minibatch_idx['train']) target_train_batch, minibatch_idx['train'] = batchnize( target_train_datas, batch_size, minibatch_idx['train']) batch_data = seq2seq(source_train_batch, target_train_batch, max_time, vocabulary_size, reverse=True) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } sess.run(fetches=[train_op, loss], feed_dict=feed_dict) if global_step % loss_freq == 0: source_valid_batch, _ = batchnize( source_valid_datas, batch_size, minibatch_idx['valid']) target_valid_batch, minibatch_idx['valid'] = batchnize( target_valid_datas, batch_size, minibatch_idx['valid']) batch_data = seq2seq(source_valid_batch, target_valid_batch, max_time, vocabulary_size, reverse=True) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } loss_val = sess.run(fetches=loss, feed_dict=feed_dict) loss_log.append(loss_val) current_batch_loss_log.append(loss_val) loss_suffix = 'loss: %f' % loss_val global_step += 1 if minibatch_idx['train'] == 0: batch_loss = np.mean(current_batch_loss_log) batch_loss_log.append(batch_loss) loss_msg = 'Batch: {}/{}, batch loss: {}'.format( batch + 1, train_step, batch_loss) print(loss_msg) log(loss_msg) es_status = es(batch_loss) if batch > train_step // 2 and es_status: print('early stopping at step: %d' % global_step) stop_flag = True break # save tf.graph and variables saver.save(sess, model_path) print('save at %s' % model_path) # save plot of loss plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log) plt.savefig('%s_global_loss.png' % model_path) plt.figure() plt.plot(np.arange(len(batch_loss_log)), batch_loss_log) plt.savefig('%s_batch_loss.png' % model_path) # save dictionary with open(dictionary_path['source'], 'wb') as f1, \ open(dictionary_path['source_reverse'], 'wb') as f2, \ open(dictionary_path['target'], 'wb') as f3, \ open(dictionary_path['target_reverse'], 'wb') as f4: pickle.dump(source_dictionary, f1) pickle.dump(source_reverse_dictionary, f2) pickle.dump(target_dictionary, f3) pickle.dump(target_reverse_dictionary, f4) elif args.mode == 'eval': saver.restore(sess, model_path) print('load from %s' % model_path) else: raise # args.mode should be train or eval # evaluate loss_val = [] input_vectors = None predict_vectors = None for i in range(len(source_test_datas) // batch_size + 1): source_test_batch, _ = batchnize(source_test_datas, batch_size, minibatch_idx['test']) target_test_batch, minibatch_idx['test'] = batchnize( target_test_datas, batch_size, minibatch_idx['test']) batch_data = seq2seq(source_test_batch, target_test_batch, max_time, vocabulary_size, reverse=True) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict) if predict_vectors is None: predict_vectors = pred.T else: predict_vectors = np.vstack((predict_vectors, pred.T)) input_ = batch_data['encoder_inputs'] if input_vectors is None: input_vectors = input_.T else: input_vectors = np.vstack((input_vectors, input_.T)) loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict)) input_sentences = '' predict_sentences = '' ignore_token = EOS for i, (input_vector, predict_vector) in enumerate( zip(input_vectors[:len(source_test_datas)], predict_vectors[:len(target_test_datas)])): input_sentences += ' '.join([ source_reverse_dictionary[vector] for vector in input_vector if not vector == ignore_token ]) predict_sentences += ' '.join([ target_reverse_dictionary[vector] for vector in predict_vector if not vector == ignore_token ]) if i < len(source_test_datas) - 1: input_sentences += '\n' predict_sentences += '\n' evaluate_input_path = '%s.evaluate_input' % model_path evaluate_predict_path = '%s.evaluate_predict' % model_path with open(evaluate_input_path, 'w') as f1, \ open(evaluate_predict_path, 'w') as f2: f1.write(input_sentences) f2.write(predict_sentences) print('input sequences at {}'.format(evaluate_input_path)) print('predict sequences at {}'.format(evaluate_predict_path)) print('mean of loss: %f' % np.mean(loss_val)) print('finish.')