Beispiel #1
0
class Preprocessor():
    def __init__(self, data_dir, start_index=0):
        self.normalizer = Normalizer()
        self.data_dir = data_dir
        self.docs_number = len(glob(os.path.join(self.data_dir, '**', '*.nxml')))
        self.inserter = Inserter()
        self.start_index = 0


    def preprocess(self):
        print('Start time: {0}'.format(datetime.now()))
        for index, file_name in enumerate(sorted(glob(os.path.join(self.data_dir, '**', '*.nxml')))):
            if index >= self.start_index:
                terms = self.normalizer.normalize(file_name)

                for term in set(terms):
                    self.inserter.insert(term, self.doc_id(file_name), terms.count(term))

                if index % 100 == 0:
                    print('processing doc {0}/{1}'.format(index + 1, self.docs_number))

        print('Fineished at: {0}'.format(datetime.now()))


    def doc_id(self, file_name):
        return os.path.splitext(os.path.basename(file_name))[0]
Beispiel #2
0
    def test_compare_to_scikit_learn_changing_k(self):
        normalizer = Normalizer(self.data)
        data = normalizer.normalize()

        testSize = 100
        trainSize = len(data.data) - testSize
        for i in range(1, 12):
            with self.subTest(i=i):
                print("k: ", i)
                neighbours = i

                trainData = {}
                testData = {}

                trainData['data'] = data.data[:trainSize]
                trainData['target'] = data.target[:trainSize]

                testData['data'] = data.data[trainSize:]
                testData['target'] = data.target[:trainSize]
                knn = KNN(trainData)

                #scikit-learn model:
                model = KNeighborsClassifier(n_neighbors=neighbours)
                model.fit(trainData['data'], trainData['target'])

                ourCounter = 0
                sciCounter = 0
                for i, e in enumerate(testData['data']):
                    if knn.makeGuess(e, neighbours) == testData['target'][i]:
                        ourCounter+=1

                    if model.predict([e]) == testData['target'][i]:
                        sciCounter+=1

                self.assertAlmostEqual(ourCounter/(testSize), sciCounter/(testSize), 3)
Beispiel #3
0
def wrangle(path, out_path):
    """
    An example to show how to use wrangler

    :param path: path to input data file
    :param out_path: path to store normalized data

    """

    spark = SparkSession.builder.getOrCreate()

    data = spark.read.csv(path, header=True, encoding='utf-8')

    functions = [lowercase, trim]

    # hospital cols
    columns = data.columns

    transformer = Transformer(functions, columns)

    data = transformer.transform(data)

    cols_info = list()

    # hospital cols
    for col in data.columns:
        cols_info.append(ColNormInfo(col))

    normalizer = Normalizer(cols_info)

    data = normalizer.normalize(data)

    data.toPandas().to_csv(out_path, index=False, header=True)
class classifier(AffineModel):
    def __init__(self, **kwargs):
        self.normalizer = None
        AffineModel.__init__(self, **kwargs)

    def fit(self, data, *args, **kwargs):
        self.normalizer = Normalizer(data)
        return AffineModel.fit(self, data, *args, **kwargs)

    def score(self, data, *args, **kwargs):
        d = self.normalizer.normalize(data)
        return AffineModel.score(self, d, *args, **kwargs)
Beispiel #5
0
	def clean(self, tweets):
		for tw in tweets:
			count = 0
			for t in tweets[tw]:
				norm = Normalizer()
				stp = StpRemoval()
				t['text_clean'] = t['text'].encode('utf-8', errors='ignore')
				t['text_clean'] = t['text_clean'].translate(string.maketrans(string.punctuation, ' ' * len(string.punctuation)))
				text = norm.normalize(t['text_clean'])
				text = stp.removeStp(t['text_clean'])
				tweets[tw][count]['text_clean'] = text.lower()
				count = count + 1
		return tweets
Beispiel #6
0
def main(in_subt, out_subt):
    assert in_subt != ""
    assert out_subt != ""

    parser = Parser()
    normalizer = Normalizer()
    lemma_filter = Filter()

    try:
        f = codecs.open(in_subt, 'r', encoding='utf8')
        text = f.read()
        f.close()
    except IOError:
        sys.exit("The subtitle could not be found in the path you provided.")

    parser.parse(text)
    normalizer.normalize(parser.get_text())
    lemma_filter.clean_lemmas(normalizer.get_lemmas())

    new_sub = Subtitle(parser.get_indexes(), parser.get_times(),
                       parser.get_text(), lemma_filter.get_final_lemmas(),
                       lemma_filter.get_dict(), out_subt)
    new_sub.create_subtitle()
Beispiel #7
0
class Agent:
    def __init__(self,
                 n_states,
                 n_actions,
                 n_goals,
                 action_bounds,
                 capacity,
                 env,
                 k_future,
                 batch_size,
                 action_size=1,
                 tau=0.05,
                 actor_lr=1e-3,
                 critic_lr=1e-3,
                 gamma=0.98):
        self.device = device("cpu")
        self.n_states = n_states
        self.n_actions = n_actions
        self.n_goals = n_goals
        self.k_future = k_future
        self.action_bounds = action_bounds
        self.action_size = action_size
        self.env = env

        self.actor = Actor(self.n_states,
                           n_actions=self.n_actions,
                           n_goals=self.n_goals).to(self.device)
        self.critic = Critic(self.n_states,
                             action_size=self.action_size,
                             n_goals=self.n_goals).to(self.device)
        self.sync_networks(self.actor)
        self.sync_networks(self.critic)
        self.actor_target = Actor(self.n_states,
                                  n_actions=self.n_actions,
                                  n_goals=self.n_goals).to(self.device)
        self.critic_target = Critic(self.n_states,
                                    action_size=self.action_size,
                                    n_goals=self.n_goals).to(self.device)
        self.init_target_networks()
        self.tau = tau
        self.gamma = gamma

        self.capacity = capacity
        self.memory = Memory(self.capacity, self.k_future, self.env)

        self.batch_size = batch_size
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.actor_optim = Adam(self.actor.parameters(), self.actor_lr)
        self.critic_optim = Adam(self.critic.parameters(), self.critic_lr)

        self.state_normalizer = Normalizer(self.n_states[0],
                                           default_clip_range=5)
        self.goal_normalizer = Normalizer(self.n_goals, default_clip_range=5)

    def choose_action(self, state, goal, train_mode=True):
        #takes state and goal, concatenates it and passes it to actor network
        #actor returns action, to which random weird noises are added and returned
        state = self.state_normalizer.normalize(state)
        goal = self.goal_normalizer.normalize(goal)
        state = np.expand_dims(state, axis=0)
        goal = np.expand_dims(goal, axis=0)

        with torch.no_grad():
            x = np.concatenate([state, goal], axis=1)
            x = from_numpy(x).float().to(self.device)
            action = self.actor(x)[0].cpu().data.numpy()

        if train_mode:
            action += 0.2 * np.random.randn(self.n_actions)
            action = np.clip(action, self.action_bounds[0],
                             self.action_bounds[1])

            random_actions = np.random.uniform(low=self.action_bounds[0],
                                               high=self.action_bounds[1],
                                               size=self.n_actions)
            action += np.random.binomial(1, 0.3,
                                         1)[0] * (random_actions - action)

        return action

    def store(self, mini_batch):
        for batch in mini_batch:
            self.memory.add(batch)
        self._update_normalizer(mini_batch)

    def init_target_networks(self):
        self.hard_update_networks(self.actor, self.actor_target)
        self.hard_update_networks(self.critic, self.critic_target)

    @staticmethod
    def hard_update_networks(local_model, target_model):
        target_model.load_state_dict(local_model.state_dict())

    @staticmethod
    def soft_update_networks(local_model, target_model, tau=0.05):
        for t_params, e_params in zip(target_model.parameters(),
                                      local_model.parameters()):
            t_params.data.copy_(tau * e_params.data +
                                (1 - tau) * t_params.data)

    def train(self):
        states, actions, rewards, next_states, goals = self.memory.sample(
            self.batch_size)

        states = self.state_normalizer.normalize(states)
        next_states = self.state_normalizer.normalize(next_states)
        goals = self.goal_normalizer.normalize(goals)
        inputs = np.concatenate([states, goals], axis=1)
        next_inputs = np.concatenate([next_states, goals], axis=1)

        inputs = torch.Tensor(inputs).to(self.device)
        rewards = torch.Tensor(rewards).to(self.device)
        next_inputs = torch.Tensor(next_inputs).to(self.device)
        actions = torch.Tensor(actions).to(self.device)

        with torch.no_grad():
            #get Qmax
            target_q = self.critic_target(next_inputs,
                                          self.actor_target(next_inputs))
            #apply bellman equation on Qmax to get computed Q for actions from above(initial state, action)
            target_returns = rewards + self.gamma * target_q.detach()
            target_returns = torch.clamp(target_returns, -1 / (1 - self.gamma),
                                         0)

        #use critic to generate actual Q for (initial states and actions)
        q_eval = self.critic(inputs, actions)
        critic_loss = (target_returns - q_eval).pow(2).mean()

        a = self.actor(inputs)
        actor_loss = -self.critic(inputs, a).mean()
        actor_loss += a.pow(2).mean()

        self.actor_optim.zero_grad()
        actor_loss.backward()
        self.sync_grads(self.actor)
        self.actor_optim.step()

        self.critic_optim.zero_grad()
        critic_loss.backward()
        self.sync_grads(self.critic)
        self.critic_optim.step()

        return actor_loss.item(), critic_loss.item()

    def save_weights(self):
        torch.save(
            {
                "actor_state_dict": self.actor.state_dict(),
                "state_normalizer_mean": self.state_normalizer.mean,
                "state_normalizer_std": self.state_normalizer.std,
                "goal_normalizer_mean": self.goal_normalizer.mean,
                "goal_normalizer_std": self.goal_normalizer.std
            }, "NBM_FetchPickAndPlace_v2.pth")

    def load_weights(self):

        checkpoint = torch.load("NBM_FetchPickAndPlace_v2.pth")
        actor_state_dict = checkpoint["actor_state_dict"]
        self.actor.load_state_dict(actor_state_dict)
        state_normalizer_mean = checkpoint["state_normalizer_mean"]
        self.state_normalizer.mean = state_normalizer_mean
        state_normalizer_std = checkpoint["state_normalizer_std"]
        self.state_normalizer.std = state_normalizer_std
        goal_normalizer_mean = checkpoint["goal_normalizer_mean"]
        self.goal_normalizer.mean = goal_normalizer_mean
        goal_normalizer_std = checkpoint["goal_normalizer_std"]
        self.goal_normalizer.std = goal_normalizer_std

    def set_to_eval_mode(self):
        self.actor.eval()
        # self.critic.eval()

    def update_networks(self):
        self.soft_update_networks(self.actor, self.actor_target, self.tau)
        self.soft_update_networks(self.critic, self.critic_target, self.tau)

    def _update_normalizer(self, mini_batch):
        states, goals = self.memory.sample_for_normalization(mini_batch)

        self.state_normalizer.update(states)
        self.goal_normalizer.update(goals)
        self.state_normalizer.recompute_stats()
        self.goal_normalizer.recompute_stats()

    @staticmethod
    def sync_networks(network):
        comm = MPI.COMM_WORLD
        flat_params = _get_flat_params_or_grads(network, mode='params')
        comm.Bcast(flat_params, root=0)
        _set_flat_params_or_grads(network, flat_params, mode='params')

    @staticmethod
    def sync_grads(network):
        flat_grads = _get_flat_params_or_grads(network, mode='grads')
        comm = MPI.COMM_WORLD
        global_grads = np.zeros_like(flat_grads)
        comm.Allreduce(flat_grads, global_grads, op=MPI.SUM)
        _set_flat_params_or_grads(network, global_grads, mode='grads')
Beispiel #8
0
    if np_train_f_ic is None:
        np_train_f_ic = np_ic
    else:
        np_train_f_ic = np.append(np_train_f_ic, np_ic)

# Normalizers
t_normalizer = Normalizer()
v_normalizer = Normalizer()
i_normalizer = Normalizer()

t_normalizer.parametrize(np_t)
v_normalizer.parametrize(np.array(train_vs))
i_normalizer.parametrize(np.array(train_ics))

# Train data normalization
np_norm_train_u_t = t_normalizer.normalize(np_train_u_t)
np_norm_train_u_v = v_normalizer.normalize(np_train_u_v)
np_norm_train_u_ic = i_normalizer.normalize(np_train_u_ic)

np_norm_train_f_t = t_normalizer.normalize(np_train_f_t)
np_norm_train_f_v = v_normalizer.normalize(np_train_f_v)
np_norm_train_f_ic = i_normalizer.normalize(np_train_f_ic)

# PINN instancing
hidden_layers = [9, 9]
learning_rate = 0.001

# Model for normalized data
model = CircuitPINN(R=R,
                    L=L,
                    hidden_layers=hidden_layers,
#score the tweet
score = []

#create normalizer object
norm = Normalizer()

#create Stop word Removal object
st = StpRemoval()

#create sentiment analysis object
s = Sentianal()

for i in range(0, len(prab_data)):
    #normalize
    line = norm.normalize(prab_data['tweet'][i])

    #remove stopword
    line = st.removeStp(line)

    #score sentiment
    score.append(s.compute(line))

#join the dataframe
score_data = DataFrame(score)
prab_data = prab_data.join(score_data)
prab_data.columns = ['tweet_processed', 'score']

#write to csv file
prab_data.to_csv("score_prabowo2.csv")
Beispiel #10
0
def write_tsv(records, output_path):
    normalizer = Normalizer(hira_kata=True, kogaki_dakuon=True)
    ret = []
    check = []
    for rec in records:
        try:
            # [[Category:教育漢字 第1学年|せい]]
            joyo, joyo_yomi = _extract_joyo(rec)
            edu_year, edu_yomi = _extract_edu(rec, joyo)
            sokaku = _extract_sokaku(rec['sokaku'])
            busyu, busyu_kaku = _extract_busyu(rec['busyu'])
            jiscode = _extract_jiscode(rec)
            kuten = _extract_kuten(rec)

            ''' nai -> 常用漢字表内, gai -> 常用漢字表外, other -> それ以外 '''
            kunyomi_nai, kunyomi_gai, kunyomi_other = _extract_kunyomi(rec)
            onyomi_go, onyomi_kan, onyomi_other = _extract_onyomi(rec)
        except Exception as e:
            pass
            #pprint(rec)
            #print(e)

        if joyo_yomi and len(joyo_yomi) > 0:
            tmp = ','.join([onyomi_go, onyomi_kan, onyomi_other,
                kunyomi_nai, kunyomi_gai, kunyomi_other])
            tmp = normalizer.normalize(tmp)
            tmp = tmp.replace('-', '')
            for y in joyo_yomi + edu_yomi:
                y = normalizer.normalize(y)
                if not y in tmp:
                    ## TODO: FIX wiktionary
                    #print(rec['pageid'], rec['title'], y, tmp) # => yomi lacks
                    pass
                    break

        ret.append({
            'pageid': rec['pageid'], # https://ja.wiktionary.org/wiki/?curid={pageid}
            'title': rec['title'],
            'is_kanji': 1 if char_utils.is_kanji(rec['title'][0]) else 0,
            'joyo': joyo,
            'edu_year': edu_year,
            'sokaku': sokaku,
            'busyu' : busyu,
            'busyu_kaku': busyu_kaku,
            'joyo_yomi': ','.join(joyo_yomi + edu_yomi),
            'onyomi_go' : onyomi_go,
            'onyomi_kan': onyomi_kan,
            'onyomi_other': onyomi_other,
            'kunyomi_nai': kunyomi_nai,
            'kunyomi_gai': kunyomi_gai,
            'kunyomi_other': kunyomi_other,
            'jis': jiscode,
            'kuten': kuten,
        })
        if joyo == 0 and edu_year > 0:
            pprint(rec)

        #char_code = hex(ord(kanji))
        #fsutils.write_file(content, '{}/{}.txt'.format('check', char_code))

    #check = sorted(list(set(check)))
    #pprint(check)

    fsutils.write_csv(ret, output_path)
    print(' ********* COMPLETED ********* ')
    return ret
Beispiel #11
0
class DDPG:
    """ Deep Deterministic Policy Gradient (DDPG) Helper Class
    """
    def __init__(self,
                 env,
                 act_dim,
                 state_dim,
                 goal_dim,
                 act_range,
                 buffer_size=int(1e6),
                 gamma=0.98,
                 lr=0.001,
                 tau=0.95):
        """ Initialization
        """
        # Environment and A2C parameters
        self.act_dim = act_dim
        self.act_range = act_range
        self.env_dim = state_dim + goal_dim
        self.gamma = gamma
        self.lr = lr
        self.tau = tau
        self.env = env

        # Create actor and critic networks
        self.actor_network = Actor(self.env_dim, act_dim, act_range)
        self.actor_target_network = Actor(self.env_dim, act_dim, act_range)
        self.actor_target_network.load_state_dict(
            self.actor_network.state_dict())

        self.critic_network = Critic(self.env_dim, act_dim, act_range)
        self.critic_target_network = Critic(self.env_dim, act_dim, act_range)
        self.actor_target_network.load_state_dict(
            self.actor_network.state_dict())

        sync_networks(self.actor_network)
        sync_networks(self.critic_network)

        # Optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(),
                                            lr=lr)
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(),
                                             lr=lr)

        # Replay buffer
        # self.buffer = MemoryBuffer(buffer_size)
        self.buffer = ReplayMemory(buffer_size)

        # Normalizers
        self.goal_normalizer = Normalizer(
            goal_dim, default_clip_range=5)  # Clip between [-5, 5]
        self.state_normalizer = Normalizer(state_dim, default_clip_range=5)

    def policy_action(self, s, g):
        """ Use the actor to predict value
        """
        input = self.preprocess_inputs(s, g)
        return self.actor_network(input)

    def memorize(self, experiences):
        """ Store experience in memory buffer
        """
        for exp in experiences:
            self.buffer.push(exp)

    def sample_batch(self, batch_size):
        return deepcopy(self.buffer.sample(batch_size))

    def clip_states_goals(self, state, goal):
        state = np.clip(state, -200, 200)
        goal = np.clip(goal, -200, 200)
        return state, goal

    def preprocess_inputs(self, state, goal):
        """Normalize and concatenate state and goal"""
        #state, goal = self.clip_states_goals(state, goal)
        state_norm = self.state_normalizer.normalize(state)
        goal_norm = self.goal_normalizer.normalize(goal)
        inputs = np.concatenate([state_norm, goal_norm])
        return torch.tensor(inputs, dtype=torch.float32).unsqueeze(0)

    def select_actions(self, pi):
        # add the gaussian
        action = pi.cpu().numpy().squeeze()
        action += 0.2 * self.act_range * np.random.randn(*action.shape)
        action = np.clip(action, -self.act_range, self.act_range)
        # random actions...
        random_actions = np.random.uniform(low=-self.act_range,
                                           high=self.act_range,
                                           size=self.act_dim)
        # choose if use the random actions
        action += np.random.binomial(1, 0.3, 1)[0] * (random_actions - action)
        action = np.clip(action, -self.act_range, self.act_range)

        return action

    def update_network(self, batch_size):
        s, actions, rewards, ns, _, g = self.sample_batch(batch_size)

        states, goals = self.clip_states_goals(s, g)
        new_states, new_goals = self.clip_states_goals(ns, g)

        norm_states = self.state_normalizer.normalize(states)
        norm_goals = self.goal_normalizer.normalize(goals)
        inputs_norm = np.concatenate([norm_states, norm_goals], axis=1)

        norm_new_states = self.state_normalizer.normalize(new_states)
        norm_new_goals = self.goal_normalizer.normalize(new_goals)
        inputs_next_norm = np.concatenate([norm_new_states, norm_new_goals],
                                          axis=1)

        # To tensor
        inputs_norm_tensor = torch.tensor(inputs_norm, dtype=torch.float32)
        inputs_next_norm_tensor = torch.tensor(inputs_next_norm,
                                               dtype=torch.float32)
        actions_tensor = torch.tensor(actions, dtype=torch.float32)
        r_tensor = torch.tensor(rewards, dtype=torch.float32)

        with torch.no_grad():
            # do the normalization
            # concatenate the stuffs
            actions_next = self.actor_target_network(inputs_next_norm_tensor)
            q_next_value = self.critic_target_network(inputs_next_norm_tensor,
                                                      actions_next)
            q_next_value = q_next_value.detach()
            target_q_value = r_tensor + self.gamma * q_next_value
            target_q_value = target_q_value.detach()
            # clip the q value
            clip_return = 1 / (1 - self.gamma)
            target_q_value = torch.clamp(target_q_value, -clip_return, 0)
        # the q loss
        real_q_value = self.critic_network(inputs_norm_tensor, actions_tensor)
        critic_loss = (target_q_value - real_q_value).pow(2).mean()
        # the actor loss
        actions_real = self.actor_network(inputs_norm_tensor)
        actor_loss = -self.critic_network(inputs_norm_tensor,
                                          actions_real).mean()
        actor_loss += 1.0 * (actions_real / self.act_range).pow(2).mean()
        # start to update the network
        self.actor_optim.zero_grad()
        actor_loss.backward()
        sync_grads(self.actor_network)
        self.actor_optim.step()
        # update the critic_network
        self.critic_optim.zero_grad()
        critic_loss.backward()
        sync_grads(self.critic_network)
        self.critic_optim.step()

    def soft_update_target_network(self, target, source):
        for target_param, param in zip(target.parameters(),
                                       source.parameters()):
            target_param.data.copy_((1 - self.tau) * param.data +
                                    self.tau * target_param.data)

    def train(self, args):
        if MPI.COMM_WORLD.Get_rank() == 0:
            self.create_save_dir(args["save_dir"], args["env_name"],
                                 args["HER_strat"])

        success_rates = []
        for ep_num in range(NUM_EPOCHS):
            start = time.time()
            for _ in range(NUM_CYCLES):
                for _ in range(ROLLOUT_PER_WORKER):
                    # Reset episode
                    observation = self.env.reset()
                    current_state = observation['observation']
                    goal = observation['desired_goal']
                    old_achieved_goal = observation['achieved_goal']
                    episode_exp = []
                    episode_exp_her = []
                    for _ in range(self.env._max_episode_steps):
                        if args['render']: self.env.render()
                        with torch.no_grad():
                            pi = self.policy_action(current_state, goal)
                            action = self.select_actions(pi)
                        obs, reward, _, _ = self.env.step(action)
                        new_state = obs['observation']
                        new_achieved_goal = obs['achieved_goal']
                        # Add outputs to memory buffer
                        episode_exp.append([
                            current_state, action, reward, new_state,
                            old_achieved_goal, goal
                        ])
                        if reward == 0: break

                        old_achieved_goal = new_achieved_goal
                        current_state = new_state

                    if args["HER_strat"] == "final":
                        experience = episode_exp[-1]
                        # set g' to achieved goal
                        experience[-1] = np.copy(experience[-2])
                        reward = self.env.compute_reward(
                            experience[-2], experience[-1],
                            None)  # set reward of success
                        experience[2] = reward
                        episode_exp_her.append(experience)

                    elif args["HER_strat"] in ["future", "episode"]:
                        # For each transition of the episode trajectory
                        for t in range(len(episode_exp)):
                            # Add K random states which come from the same episode as the transition
                            for _ in range(args["HER_k"]):
                                if args["HER_strat"] == "future":
                                    # Select a future exp from the same episod
                                    selected = np.random.randint(
                                        t, len(episode_exp))
                                elif args["HER_strat"] == "episode":
                                    # Select an exp from the same episode
                                    selected = np.random.randint(
                                        0, len(episode_exp))
                                # Take the achieved goal of the selected
                                ag_selected = np.copy(episode_exp[selected][5])
                                s, a, _, ns, ag, _ = episode_exp[t]
                                r = self.env.compute_reward(
                                    ag_selected, ag, None)
                                # New transition where the achieved goal of the selected is the new goal
                                her_transition = [s, a, r, ns, ag, ag_selected]
                                episode_exp_her.append(her_transition)

                    self.memorize(deepcopy(episode_exp))
                    self.memorize(deepcopy(episode_exp_her))

                    # Update Normalizers with the observations of this episode
                    self.update_normalizers(deepcopy(episode_exp),
                                            deepcopy(episode_exp_her))

                for _ in range(OPTIMIZATION_STEPS):
                    # Sample experience from buffer
                    self.update_network(args["batch_size"])

                # Soft update the target networks
                self.soft_update_target_network(self.actor_target_network,
                                                self.actor_network)
                self.soft_update_target_network(self.critic_target_network,
                                                self.critic_network)

            success_rate = self.eval()
            success_rates.append(success_rate)
            if MPI.COMM_WORLD.Get_rank() == 0:
                print("Epoch:", ep_num + 1, " -- success rate:",
                      success_rates[-1], " -- duration:",
                      time.time() - start)
                torch.save([
                    self.state_normalizer.mean, self.state_normalizer.std,
                    self.goal_normalizer.mean, self.goal_normalizer.std,
                    self.actor_network.state_dict()
                ], self.model_path + '/model.pt')

        return success_rates

    def create_save_dir(self, save_dir, env_name, her_strat):
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        # path to save the model
        subdir = os.path.join(save_dir, env_name)
        if not os.path.exists(subdir):
            os.mkdir(subdir)
        self.model_path = os.path.join(save_dir, env_name, her_strat)
        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)

    def update_normalizers(self, episode_exp, episode_exp_her):
        # Update Normalizers
        episode_exp_states = np.vstack(np.array(episode_exp)[:, 0])
        episode_exp_goals = np.vstack(np.array(episode_exp)[:, 5])
        if len(episode_exp_her) != 0:
            episode_exp_her_states = np.vstack(np.array(episode_exp_her)[:, 0])
            episode_exp_her_goals = np.vstack(np.array(episode_exp_her)[:, 5])
            states = np.concatenate(
                [episode_exp_states, episode_exp_her_states])
            goals = np.concatenate([episode_exp_goals, episode_exp_her_goals])
        else:
            states = np.copy(episode_exp_states)
            goals = np.copy(episode_exp_goals)

        states, goals = self.clip_states_goals(states, goals)

        self.state_normalizer.update(deepcopy(states))
        self.goal_normalizer.update(deepcopy(goals))
        self.state_normalizer.recompute_stats()
        self.goal_normalizer.recompute_stats()

    def eval(self):
        total_success_rate = []
        for _ in range(NUM_TEST):
            per_success_rate = []
            observation = self.env.reset()
            state = observation['observation']
            goal = observation['desired_goal']
            for _ in range(self.env._max_episode_steps):
                # self.env.render()
                with torch.no_grad():
                    input = self.preprocess_inputs(state, goal)
                    pi = self.actor_network(input)
                    action = pi.detach().cpu().numpy().squeeze()
                new_observation, _, _, info = self.env.step(action)
                state = new_observation['observation']
                per_success_rate.append(info['is_success'])
            total_success_rate.append(per_success_rate)

        total_success_rate = np.array(total_success_rate)
        local_success_rate = np.mean(total_success_rate[:, -1])
        global_success_rate = MPI.COMM_WORLD.allreduce(local_success_rate,
                                                       op=MPI.SUM)
        return global_success_rate / MPI.COMM_WORLD.Get_size()
#score the tweet
score = []

#create normalizer object
norm = Normalizer()

#create Stop word Removal object
st = StpRemoval()

#create sentiment analysis object
s = Sentianal()

for i in range(0,len(prab_data)):
	#normalize
	line = norm.normalize(prab_data['tweet'][i])
	
	#remove stopword
	line = st.removeStp(line)

	#score sentiment
	score.append(s.compute(line))

#join the dataframe
score_data = DataFrame(score)
prab_data = prab_data.join(score_data)
prab_data.columns = ['tweet_processed','score']

#write to csv file
prab_data.to_csv("score_prabowo2.csv")
Beispiel #13
0
class DdpgHer(object):

    _default_config = {
        'n_epochs': 50,
        'n_cycles': 50,
        'n_batches': 40,
        'checkpoint_freq': 5,
        'seed': 123,
        'num_workers': 1,
        'replay_strategy': 'future',
        'clip_return': 50.,
        'noise_eps': 0.2,
        'random_eps': 0.3,
        'buffer_size': int(1e6),
        'replay_k': 4,
        'clip_obs': 200.,
        'batch_size': 256,
        'hidden_units': 256,
        'gamma': 0.98,
        'action_l2': 1.,
        'lr_actor': 0.001,
        'lr_critic': 0.001,
        'polyak': 0.95,
        'n_test_rollouts': 10,
        'clip_range': 5.,
        'demo_length': 20,
        'local_dir': None,
        'cuda': None,
        'max_gpus': None,
        'rollouts_per_worker': 2,
        'goal_space_bins': None,
        'archer_params': None,
        'q_filter': False,
        'prm_loss_weight': 0.001,
        'aux_loss_weight': 0.0078,
        'demo_batch_size': None,
        'demo_file': None,
        'num_demo': 100,
    }

    def __init__(self, env, config, reporter=None):
        super(DdpgHer).__init__()

        self.env = env
        self.config = {**DdpgHer._default_config, **config}
        self.seed(self.config['seed'])

        a_space, obs_space = self.env.action_space, self.env.observation_space
        obs_size = obs_space.spaces['observation'].shape[0]
        goal_size = obs_space.spaces['desired_goal'].shape[0]
        self.env_params = get_env_params(self.env)
        self.reporter = reporter

        if self.config['cuda'] is None:
            self.config['cuda'] = torch.cuda.is_available()

        if self.config['cuda']:
            n_gpus = torch.cuda.device_count()
            assert n_gpus > 0
            max_gpus = self.config['max_gpus']
            if max_gpus is None:
                max_gpus = n_gpus
            n_gpus = min(n_gpus, max_gpus)
            n_workers = MPI.COMM_WORLD.size
            rank = MPI.COMM_WORLD.rank
            w_per_gpu = int(np.ceil(n_workers / n_gpus))
            gpu_i = rank // w_per_gpu
            print(f'Worker with rank {rank} assigned GPU {gpu_i}.')
            torch.cuda.set_device(gpu_i)

        self.bc_loss = self.config.get('demo_file') is not None
        self.q_filter = self.config['q_filter']

        # create the network
        self.actor_network = ActorNetwork(
            action_space=a_space,
            observation_space=obs_space,
            hidden_units=self.config['hidden_units'])
        self.critic_network = CriticNetwork(
            action_space=a_space,
            observation_space=obs_space,
            hidden_units=self.config['hidden_units'])

        # sync the networks across the cpus
        sync_networks(self.actor_network)
        sync_networks(self.critic_network)

        # build up the target network
        self.actor_target_network = ActorNetwork(
            action_space=a_space,
            observation_space=obs_space,
            hidden_units=self.config['hidden_units'])
        self.critic_target_network = CriticNetwork(
            action_space=a_space,
            observation_space=obs_space,
            hidden_units=self.config['hidden_units'])

        # load the weights into the target networks
        self.actor_target_network.load_state_dict(
            self.actor_network.state_dict())
        self.critic_target_network.load_state_dict(
            self.critic_network.state_dict())

        # if use gpu
        if self.config['cuda']:
            self.actor_network.cuda()
            self.critic_network.cuda()
            self.actor_target_network.cuda()
            self.critic_target_network.cuda()

        # create the optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(),
                                            lr=self.config['lr_actor'])
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(),
                                             lr=self.config['lr_critic'])

        # goal_space_bins should be of the form:
        # [dict(axis=0, box=np.linspace(0.0, 2.0, 15)), dict(axis=1, box=np.linspace(0.0, 2.0, 15)), ...]
        weight_her_sampling = False
        self._num_reached_goals_in_bin = None
        self._num_visited_goals_in_bin = None
        self._num_observed_goals_in_bin = None
        self._goal_space_bins = self.config['goal_space_bins']
        if self._goal_space_bins is not None:
            weight_her_sampling = True
            self._num_reached_goals_in_bin = np.zeros(
                tuple(1 + b['box'].size for b in self._goal_space_bins))
            self._num_visited_goals_in_bin = self._num_reached_goals_in_bin.copy(
            )
            self._num_observed_goals_in_bin = self._num_reached_goals_in_bin.copy(
            )

        # her sampler
        self.her_module = HerSampler(
            self.config['replay_strategy'],
            self.config['replay_k'],
            self.env.compute_reward,
            weight_sampling=weight_her_sampling,
            archer_params=self.config['archer_params'])

        # create the normalizer
        self.o_norm = Normalizer(size=obs_size,
                                 default_clip_range=self.config['clip_range'])
        self.g_norm = Normalizer(size=goal_size,
                                 default_clip_range=self.config['clip_range'])

        # create the replay and demo buffers
        self.buffer = ReplayBuffer(self.env_params, self.config['buffer_size'],
                                   self.her_module.sample_her_transitions)
        self.demo_buffer = None
        if self.bc_loss:
            self._init_demo_buffer(update_stats=True)

        self._trained = False

    def _bin_idx_for_goals(self, goals: np.ndarray):
        assert self._goal_space_bins is not None
        return tuple(
            np.digitize(goals[..., b['axis']], b['box'], right=False)
            for b in self._goal_space_bins)

    def _get_info_for_goals(self, goals: np.ndarray):
        assert self._goal_space_bins is not None
        idx = self._bin_idx_for_goals(goals)
        times_success = self._num_reached_goals_in_bin[idx]
        times_visited = self._num_visited_goals_in_bin[idx]
        times_observed = self._num_observed_goals_in_bin[idx]
        tot_success = self._num_reached_goals_in_bin.sum()
        tot_visited = self._num_visited_goals_in_bin.sum()
        tot_observed = self._num_observed_goals_in_bin.sum()
        return (
            times_success,
            tot_success,
            times_visited,
            tot_visited,
            times_observed,
            tot_observed,
        )

    def seed(self, value):
        import random
        np.random.seed(value)
        random.seed(value)
        torch.manual_seed(value)
        self.env.seed(value)

    def _training_step(self):
        rollout_times = []
        update_times = []
        update_results = []
        taken_steps = 0
        failed_steps = 0
        sampling_tot_time = 0.0
        sampling_calls = 0
        step_tic = datetime.now()
        for _ in range(self.config['n_cycles']):
            mb_obs, mb_ag, mb_g, mb_actions = [], [], [], []
            while len(mb_obs) < self.config["rollouts_per_worker"]:
                tic = datetime.now()
                step_failure = False
                # reset the rollouts
                ep_obs, ep_ag, ep_g, ep_actions = [], [], [], []
                # reset the environment
                observation = self.env.reset()
                obs = observation['observation']
                ag = observation['achieved_goal']
                g = observation['desired_goal']

                if self._goal_space_bins is not None:
                    goal_idx = self._bin_idx_for_goals(g)
                    self._num_observed_goals_in_bin[goal_idx] += 1

                # start to collect samples
                for t in range(self.env_params['max_timesteps']):
                    with torch.no_grad():
                        input_tensor = self._preproc_inputs(obs, g)
                        pi = self.actor_network(input_tensor)
                        action = self._select_actions(pi)

                    try:
                        observation_new, _, _, info = self.env.step(action)
                    except MujocoException:
                        step_failure = True
                        break

                    obs_new = observation_new['observation']
                    ag_new = observation_new['achieved_goal']

                    if self._goal_space_bins is not None:
                        goal_idx = self._bin_idx_for_goals(ag_new)
                        self._num_visited_goals_in_bin[goal_idx] += 1
                        if bool(info['is_success']):
                            self._num_reached_goals_in_bin[goal_idx] += 1

                    # append rollouts
                    ep_obs.append(obs.copy())
                    ep_ag.append(ag.copy())
                    ep_g.append(g.copy())
                    ep_actions.append(action.copy())
                    # re-assign the observation
                    obs = obs_new
                    ag = ag_new
                ep_obs.append(obs.copy())
                ep_ag.append(ag.copy())

                if step_failure:
                    failed_steps += 1
                    continue

                taken_steps += self.env_params['max_timesteps']
                mb_obs.append(ep_obs)
                mb_ag.append(ep_ag)
                mb_g.append(ep_g)
                mb_actions.append(ep_actions)
                rollout_times.append((datetime.now() - tic).total_seconds())

            # convert them into arrays
            mb_obs = np.array(mb_obs)
            mb_ag = np.array(mb_ag)
            mb_g = np.array(mb_g)
            mb_actions = np.array(mb_actions)
            # store the episodes
            self.buffer.store_episode([mb_obs, mb_ag, mb_g, mb_actions])
            self._update_normalizer([mb_obs, mb_ag, mb_g, mb_actions])

            tic = datetime.now()
            # train the network
            for _ in range(self.config['n_batches']):
                # sample the episodes
                sampling_tic = datetime.now()
                sampled_transitions = self._sample_batch()
                sampling_tot_time += (datetime.now() -
                                      sampling_tic).total_seconds()
                sampling_calls += 1
                res = self._update_network(sampled_transitions)
                update_results.append(res)
            # soft update
            self._soft_update_target_network(self.actor_target_network,
                                             self.actor_network)
            self._soft_update_target_network(self.critic_target_network,
                                             self.critic_network)
            update_times.append((datetime.now() - tic).total_seconds())
        step_time = (datetime.now() - step_tic).total_seconds()

        tic = datetime.now()
        success_rate, avg_ep_reward = self._eval_agent()
        eval_time = (datetime.now() - tic).total_seconds()

        update_results_dict = dict()
        for k in update_results[0].keys():
            update_results_dict['avg_' + k] = np.mean(
                [r[k] for r in update_results])

        return {
            "test_success_rate": success_rate,
            "test_mean_ep_reward": avg_ep_reward,
            "avg_her_sampling_time": sampling_tot_time / sampling_calls,
            "avg_rollout_time": np.mean(rollout_times),
            "avg_network_update_time": np.mean(update_times),
            "evaluation_time": eval_time,
            "step_time": step_time,
            "env_steps": taken_steps,
            "failed_steps": failed_steps,
            **update_results_dict,
        }

    def _init_demo_buffer(self, update_stats=True):
        assert self.bc_loss
        file_path = self.config['demo_file']
        num_demo = self.config['num_demo']
        self.demo_buffer = ReplayBuffer(self.env_params,
                                        self.config['buffer_size'],
                                        self.her_module.sample_her_transitions)

        # data must be a dictionary of (at least) 4 lists; each list contains partial information for each episode.
        data = pickle.load(open(file_path, 'rb'))
        assert isinstance(data, dict)

        ordered_data = []
        for k in ['mb_obs', 'mb_ag', 'mb_g', 'mb_actions']:
            mb_data = np.asarray(data[k])
            assert len(mb_data) >= num_demo
            ordered_data.append(mb_data[:num_demo])

        self.demo_buffer.store_episode(ordered_data)
        if update_stats:
            self._update_normalizer(ordered_data)

    def _sample_batch(self):
        batch_size = self.config['batch_size']
        sample_kwargs = dict()
        if self._goal_space_bins is not None:
            sample_kwargs['get_info_for_goals'] = self._get_info_for_goals
        if self.bc_loss:
            demo_batch_size = self.config['demo_batch_size']
            transitions = self.buffer.sample(batch_size - demo_batch_size,
                                             **sample_kwargs)
            transitions_demo = self.demo_buffer.sample(demo_batch_size)
            for k, values in transitions_demo.items():
                rollout_vec = transitions[k].tolist()
                for v in values:
                    rollout_vec.append(v.tolist())
                transitions[k] = np.array(rollout_vec)
        else:
            transitions = self.buffer.sample(batch_size, **sample_kwargs)
        return transitions

    def save_checkpoint(self, epoch=0):
        local_dir = self.config.get('local_dir')
        if local_dir is not None:
            local_dir = local_dir + '/checkpoints'
            os.makedirs(local_dir, exist_ok=True)
            model_path = f'{local_dir}/model_{epoch}.pt'
            status_path = f'{local_dir}/status_{epoch}.pkl'
            torch.save([
                self.o_norm.mean, self.o_norm.std, self.g_norm.mean,
                self.g_norm.std,
                self.actor_network.state_dict()
            ], model_path)
            with open(status_path, 'wb') as f:
                pickle.dump(dict(config=self.config), f)

    @staticmethod
    def load(env, local_dir, epoch=None):
        epoch = epoch or '*[0-9]'
        models = glob.glob(f'{local_dir}/model_{epoch}.pt')
        assert len(models) > 0, "No checkpoints found!"

        model_path = sorted(models, key=os.path.getmtime)[-1]
        epoch = model_path.split("_")[-1].split(".")[0]
        status_path = f'{local_dir}/status_{epoch}.pkl'

        with open(status_path, 'rb') as f:
            status = pickle.load(f)
        status['config']['cuda'] = torch.cuda.is_available()
        agent = DdpgHer(env, status['config'])
        agent._trained = True

        o_mean, o_std, g_mean, g_std, actor_state = torch.load(
            model_path, map_location=lambda storage, loc: storage)

        agent.o_norm.mean = o_mean
        agent.o_norm.std = o_std
        agent.g_norm.mean = g_mean
        agent.g_norm.std = g_std

        agent.actor_network.load_state_dict(actor_state)
        agent.actor_network.eval()
        print(f'Loaded model for epoch {epoch}.')
        return agent

    def predict(self, obs):
        if not self._trained:
            raise RuntimeError
        g = obs['desired_goal']
        obs = obs['observation']
        with torch.no_grad():
            inputs = self._preproc_inputs(obs, g)
            pi = self.actor_network(inputs)
            action = pi.cpu().numpy().squeeze()
        return action

    def train(self):
        if self._trained:
            raise RuntimeError

        # make sure that different workers have different seeds
        # (from baselines' original implementation)
        local_uniform = np.random.uniform(size=(1, ))
        root_uniform = local_uniform.copy()
        MPI.COMM_WORLD.Bcast(root_uniform, root=0)
        if MPI.COMM_WORLD.Get_rank() != 0:
            assert local_uniform[0] != root_uniform[0]

        tic = datetime.now()
        n_epochs = self.config.get('n_epochs')
        saved_checkpoints = 0
        total_env_steps = 0

        for iter_i in it.count():
            if n_epochs is not None and iter_i >= n_epochs:
                break
            res = self._training_step()
            total_env_steps += res['env_steps']

            if MPI.COMM_WORLD.Get_rank() == 0:
                if (iter_i + 1) % self.config['checkpoint_freq'] == 0:
                    self.save_checkpoint(epoch=(iter_i + 1))
                    saved_checkpoints += 1
                if callable(self.reporter):
                    self.reporter(
                        **{
                            **res,
                            "training_iteration": iter_i + 1,
                            "total_time": (datetime.now() -
                                           tic).total_seconds(),
                            "checkpoints": saved_checkpoints,
                            "total_env_steps": total_env_steps,
                            "current_buffer_size": self.buffer.current_size,
                        })

    # pre_process the inputs
    def _preproc_inputs(self, obs, g):
        obs_norm = self.o_norm.normalize(obs)
        g_norm = self.g_norm.normalize(g)
        # concatenate the stuffs
        inputs = np.concatenate([obs_norm, g_norm])
        inputs = torch.tensor(inputs, dtype=torch.float32).unsqueeze(0)
        if self.config['cuda']:
            inputs = inputs.cuda()
        return inputs

    # this function will choose action for the agent and do the exploration
    def _select_actions(self, pi):
        action = pi.cpu().numpy().squeeze()
        # add the gaussian
        action += self.config['noise_eps'] * self.env_params[
            'action_max'] * np.random.randn(*action.shape)
        action = np.clip(action, -self.env_params['action_max'],
                         self.env_params['action_max'])
        # random actions...
        random_actions = np.random.uniform(low=-self.env_params['action_max'],
                                           high=self.env_params['action_max'],
                                           size=self.env_params['action'])
        # choose if use the random actions
        action += np.random.binomial(1, self.config['random_eps'],
                                     1)[0] * (random_actions - action)
        return action

    # update the normalizer
    def _update_normalizer(self, episode_batch):
        mb_obs, mb_ag, mb_g, mb_actions = episode_batch
        mb_obs_next = mb_obs[:, 1:, :]
        mb_ag_next = mb_ag[:, 1:, :]
        # get the number of normalization transitions
        num_transitions = mb_actions.shape[1]
        # create the new buffer to store them
        buffer_temp = {
            'obs': mb_obs,
            'ag': mb_ag,
            'g': mb_g,
            'actions': mb_actions,
            'obs_next': mb_obs_next,
            'ag_next': mb_ag_next,
        }
        transitions = self.her_module.sample_her_transitions(
            buffer_temp, num_transitions)
        obs, g = transitions['obs'], transitions['g']
        # pre process the obs and g
        transitions['obs'], transitions['g'] = self._preproc_og(obs, g)
        # update
        self.o_norm.update(transitions['obs'])
        self.g_norm.update(transitions['g'])
        # recompute the stats
        self.o_norm.recompute_stats()
        self.g_norm.recompute_stats()

    def _preproc_og(self, o, g):
        o = np.clip(o, -self.config['clip_obs'], self.config['clip_obs'])
        g = np.clip(g, -self.config['clip_obs'], self.config['clip_obs'])
        return o, g

    # soft update
    def _soft_update_target_network(self, target, source):
        for target_param, param in zip(target.parameters(),
                                       source.parameters()):
            target_param.data.copy_((1 - self.config['polyak']) * param.data +
                                    self.config['polyak'] * target_param.data)

    # update the network
    def _update_network(self, transitions):

        # pre-process the observation and goal
        o, o_next, g = transitions['obs'], transitions[
            'obs_next'], transitions['g']
        transitions['obs'], transitions['g'] = self._preproc_og(o, g)
        transitions['obs_next'], transitions['g_next'] = self._preproc_og(
            o_next, g)

        # start to do the update
        obs_norm = self.o_norm.normalize(transitions['obs'])
        g_norm = self.g_norm.normalize(transitions['g'])
        inputs_norm = np.concatenate([obs_norm, g_norm], axis=1)
        obs_next_norm = self.o_norm.normalize(transitions['obs_next'])
        g_next_norm = self.g_norm.normalize(transitions['g_next'])
        inputs_next_norm = np.concatenate([obs_next_norm, g_next_norm], axis=1)

        # transfer them into the tensor
        inputs_norm_tensor = torch.tensor(inputs_norm, dtype=torch.float32)
        inputs_next_norm_tensor = torch.tensor(inputs_next_norm,
                                               dtype=torch.float32)
        actions_tensor = torch.tensor(transitions['actions'],
                                      dtype=torch.float32)
        r_tensor = torch.tensor(transitions['r'], dtype=torch.float32)

        if self.config['cuda']:
            inputs_norm_tensor = inputs_norm_tensor.cuda()
            inputs_next_norm_tensor = inputs_next_norm_tensor.cuda()
            actions_tensor = actions_tensor.cuda()
            r_tensor = r_tensor.cuda()

        # calculate the target Q value function
        with torch.no_grad():
            # do the normalization
            # concatenate the stuffs
            actions_next = self.actor_target_network(inputs_next_norm_tensor)
            q_next_value = self.critic_target_network(inputs_next_norm_tensor,
                                                      actions_next)
            q_next_value = q_next_value.detach()
            target_q_value = r_tensor + self.config['gamma'] * q_next_value
            target_q_value = target_q_value.detach()
            # clip the q value
            clip_return = 1 / (1 - self.config['gamma'])
            target_q_value = torch.clamp(target_q_value, -clip_return, 0)

        # the q loss
        real_q_value = self.critic_network(inputs_norm_tensor, actions_tensor)
        critic_loss = (target_q_value - real_q_value).pow(2).mean()

        # self.main.Q_tf ==> real_q_value
        # self.main.Q_pi_tf ==> self.critic_network(inputs_norm_tensor, actions_real) ==> approx_q_value

        # the actor loss
        action_l2 = self.config['action_l2']
        actions_real = self.actor_network(inputs_norm_tensor)
        approx_q_value = self.critic_network(inputs_norm_tensor, actions_real)

        if self.bc_loss:
            # train with demonstrations using behavior cloning

            # choose only the demo buffer samples
            b_size = self.config['batch_size']
            demo_b_size = self.config['demo_batch_size']
            mask = np.concatenate(
                (np.zeros(b_size - demo_b_size), np.ones(demo_b_size)), axis=0)
            mask = torch.tensor(mask,
                                dtype=torch.uint8,
                                device=actions_real.device)

            if self.q_filter:
                # use Q-filter trick to perform BC only when needed
                with torch.no_grad():
                    mask &= (real_q_value > approx_q_value).squeeze()

            prm_loss_weight = self.config['prm_loss_weight']
            cloning_loss = self.config['aux_loss_weight'] * (
                actions_real[mask] - actions_tensor[mask]).pow(2).sum()
        else:
            # train without demonstrations
            prm_loss_weight = 1.0
            cloning_loss = None

        actor_loss = -prm_loss_weight * approx_q_value.mean()
        actor_loss += prm_loss_weight * action_l2 * (
            actions_real / self.env_params['action_max']).pow(2).mean()

        if cloning_loss is not None:
            actor_loss += cloning_loss

        # update actor network
        self.actor_optim.zero_grad()
        actor_loss.backward()
        sync_grads(self.actor_network)
        self.actor_optim.step()

        # update critic network
        self.critic_optim.zero_grad()
        critic_loss.backward()
        sync_grads(self.critic_network)
        self.critic_optim.step()

        res = dict(actor_loss=actor_loss.item(),
                   critic_loss=critic_loss.item())
        if cloning_loss is not None:
            res['cloning_loss'] = cloning_loss.item()
        return res

    # do the evaluation
    def _eval_agent(self):
        total_success_rate = []
        ep_rewards = []
        for _ in range(self.config['n_test_rollouts']):
            per_success_rate = []
            ep_reward = 0.0
            observation = self.env.reset()
            obs = observation['observation']
            g = observation['desired_goal']
            for _ in range(self.env_params['max_timesteps']):
                with torch.no_grad():
                    input_tensor = self._preproc_inputs(obs, g)
                    pi = self.actor_network(input_tensor)
                    # convert the actions
                    actions = pi.detach().cpu().numpy().squeeze()
                observation_new, rew, _, info = self.env.step(actions)
                obs = observation_new['observation']
                g = observation_new['desired_goal']
                per_success_rate.append(info['is_success'])
                ep_reward += rew
            ep_rewards.append(ep_reward)
            total_success_rate.append(per_success_rate)
        total_success_rate = np.array(total_success_rate)
        local_success_rate = np.mean(total_success_rate[:, -1])
        global_success_rate = MPI.COMM_WORLD.allreduce(local_success_rate,
                                                       op=MPI.SUM)
        global_success_rate /= MPI.COMM_WORLD.Get_size()

        avg_ep_reward = np.array(ep_rewards).mean()
        global_avg_ep_reward = MPI.COMM_WORLD.allreduce(avg_ep_reward,
                                                        op=MPI.SUM)
        global_avg_ep_reward /= MPI.COMM_WORLD.Get_size()

        return global_success_rate, global_avg_ep_reward
Beispiel #14
0
#score the tweet
score = []

#create normalizer object
norm = Normalizer()

#create Stop word Removal object
st = StpRemoval()

#create sentiment analysis object
s = Sentianal()

for i in range(0, len(jkw_data)):
    #normalize
    line = norm.normalize(jkw_data['tweet'][i])

    #remove stopword
    line = st.removeStp(line)

    #score sentiment
    score.append(s.compute(line))

#join the dataframe
score_data = DataFrame(score)
jkw_data = jkw_data.join(score_data)
jkw_data.columns = ['tweet_processed', 'score']

#write to csv file
jkw_data.to_csv("score_jokowi2.csv")
    def post(self):
        """
        compare
        ---
        tag: compare
        description: Compare two answers.
        requestBody:
            description: Input message
            required: true
            content:
                application/json:
                    schema:
                        $ref: '#/components/schemas/DiffRequest'
        responses:
            '200':
                description: Success
                content:
                    text/plain:
                        schema:
                            type: string
                            example: "Successfully validated"
            '400':
                description: Malformed message
                content:
                    text/plain:
                        schema:
                            type: string
        """
        self.validate(request)
        #print (json.dumps(request.json, indent=2))

        normalizer = Normalizer()
        answer_1 = request.json['answer_1']
        answer_2 = request.json['answer_2']

        if 'query_graph' in answer_1:
            answer_1['question_graph'] = answer_1['query_graph']
            del answer_1['query_graph']

        if 'query_graph' in answer_2:
            answer_2['question_graph'] = answer_2['query_graph']
            del answer_2['query_graph']

        if 'results' in answer_1:
            answer_1['answers'] = answer_1['results']
            del answer_1['results']

        if 'results' in answer_2:
            answer_2['answers'] = answer_2['results']
            del answer_2['results']

        if 'question_graph' not in answer_1 and 'reasoner_id' in answer_1 and 'query_type_id' in answer_1:
            if answer_1['reasoner_id'] == 'RTX' and answer_1[
                    'query_type_id'] in q_map:
                answer_1['question_graph'] = q_map[
                    answer_1['query_type_id']]['question_graph']
                answer_1['question_graph']['nodes'][0]['curie'] = answer_1[
                    'terms'][answer_1['question_graph']['nodes'][0]['type']]

        if 'question_graph' not in answer_2 and 'reasoner_id' in answer_2 and 'query_type_id' in answer_2:
            if answer_2['reasoner_id'] == 'RTX' and answer_2[
                    'query_type_id'] in q_map:
                answer_2['question_graph'] = q_map[
                    answer_2['query_type_id']]['question_graph']
                answer_2['question_graph']['nodes'][0]['curie'] = answer_2[
                    'terms'][answer_2['question_graph']['nodes'][0]['type']]

        answer_1_norm = normalizer.normalize(answer_1)
        answer_2_norm = normalizer.normalize(answer_2)

        if isinstance(answer_1_norm, str):
            raise Exception(answer_1_norm)
        if isinstance(answer_2_norm, str):
            raise Exception(answer_2_norm)

        node_diff = NodeDiff(answer_1_norm, answer_2_norm)
        graph_comparator = GraphComparator()

        return {
            'node_diff': node_diff.node_diff(),
            'graph_diff': graph_comparator.compare(answer_1_norm,
                                                   answer_2_norm)
        }
Beispiel #16
0
                        help='path to manifest file')
    parser.add_argument('--input', default=None, help='path to input file')
    parser.add_argument('--section',
                        default=None,
                        help='section input (for testing)')
    parser.add_argument('--row', default=None, help='row input (for testing)')

    args = parser.parse_args()

    assert args.manifest

    normalizer = Normalizer()
    normalizer.read_manifest(args.manifest)

    if args.section and args.row:
        section_id, row_id, valid = normalizer.normalize(
            args.section, args.row)
        print """
        Input:
            [section] {}\t[row] {}
        Output:
            [section_id] {}\t[row_id] {}
        Valid?:
            {}
        """.format(args.section, args.row, section_id, row_id, valid)

    elif args.input:
        samples = read_input(args.input)
        matched = normalize_samples(normalizer, samples, verbose=False)
        output_samples(matched)
Beispiel #17
0
class Agent:
    def __init__(self, env):
        """Args:
            env(gym.Core.env): environment
        """

        with open('./configuration.json') as config_file:
            self.config = json.load(config_file)['agent']

        self.env = env
        self.state = None
        self.epsilon = None
        self.epsilon_decay = None
        self.state_size = None
        self.actions_size = None
        self.actor = None
        self.actor_target = None
        self.critic = None
        self.critic_target = None
        self.actor_optim = None
        self.critic_optim = None
        self.gamma = None
        self.memory = None
        self.batch_size = None
        self.action_space = None
        self.normalizer = None

    def __str__(self):
        return 'RL_Agent Object'

    def reset(self):
        self.action_space = self.env.action_space
        obs_space = self.env.observation_space.spaces
        obs_len = obs_space['observation'].shape[0]
        goal_len = obs_space['desired_goal'].shape[0]
        self.state_size = obs_len + goal_len
        self.actions_size = self.action_space.shape[0]
        max_action = float(self.env.action_space.high[0])

        self.actor = ActorNet(self.state_size, *self.config['net_sizes'],
                              self.actions_size, max_action)
        self.critic = CriticNet(self.state_size, *self.config['net_sizes'],
                                self.actions_size)
        self.actor_target = ActorNet(self.state_size,
                                     *self.config['net_sizes'],
                                     self.actions_size, max_action)
        self.critic_target = CriticNet(self.state_size,
                                       *self.config['net_sizes'],
                                       self.actions_size)
        self.actor_optim = Adam(self.actor.parameters(),
                                lr=self.config['learning_rate'])
        self.critic_optim = Adam(self.critic.parameters(),
                                 lr=self.config['learning_rate'])

        self.update(self.critic_target, self.critic, 1)
        self.update(self.actor_target, self.actor, 1)

        self.epsilon = self.config['epsilon']
        self.epsilon_decay = self.config['epsilon_decay']
        self.gamma = self.config['gamma']

        if self.config['PER']:
            self.memory = self.memory = PrioritizedMemory(
                self.config['memory_size'], self.config["memory_alpha"],
                self.config["memory_epsilon"], self.config["memory_beta"],
                self.config["memory_beta_increment"])
        else:
            self.memory = ReplayBuffer(self.config['memory_size'])

        self.batch_size = self.config['batch_size']
        self.normalizer = Normalizer(obs_len, goal_len)
        # warm up the normalizer
        self.normalizer.observe(self.env.reset())

    def run(self, train):
        total_reward = 0
        done = False
        self.state = self.env.reset()
        self.normalizer.observe(self.state)
        self.state = self.normalizer.normalize(self.state)
        ep_transitions = []

        # start episode
        while not done:
            if self.config['render']:
                self.env.render()

            # act and observe
            action = self._get_action_epsilon_greedy(self.state)
            obs, reward, done, info = self.env.step(action)
            total_reward += reward

            # normalize the state
            self.normalizer.observe(obs)
            obs = self.normalizer.normalize(obs)

            # save the transition for later HER processing
            transition = [self.state, reward, action, obs, not done]
            ep_transitions.append(transition)

            # save to memory
            self.append_sample_to_memory(
                *copy.deepcopy((flatten_state_dict_for_model(self.state),
                                reward, action,
                                flatten_state_dict_for_model(obs), not done)))

            self.state = obs

        if random.random() < self.config["her-probability"]:
            her_trs = generate_her_transitions(ep_transitions,
                                               self.env.compute_reward,
                                               self.config['her-type'],
                                               self.config['her-k_value'])
            for t in her_trs:
                self.append_sample_to_memory(*t)

        if len(self.memory) > self.batch_size * 5 and train:
            for i in range(40):
                self._train()
            self.soft_update_networks()

        if self.epsilon > self.config['epsilon_min']:
            self.epsilon *= self.epsilon_decay

        return total_reward

    def _train(self):
        batch, indexes, importance_sampling_weights = None, None, None
        if self.config['PER']:
            batch, indexes, importance_sampling_weights = \
                self.sample_from_per_memory(self.batch_size)
            importance_sampling_weights = torch.Tensor(
                importance_sampling_weights)
        else:
            batch = self.memory.get_random_batch(self.batch_size)

        state_batch = torch.Tensor(batch[0])
        reward_batch = torch.Tensor(batch[1])
        action_batch = torch.Tensor(batch[2])
        next_state_batch = torch.Tensor(batch[3])
        # unused - see additional info in the Readme
        # mask_batch = torch.Tensor(batch[4] * 1)

        next_q_values = self.critic_target(next_state_batch,
                                           self.actor_target(next_state_batch))
        expected_q_values = reward_batch + (self.gamma * next_q_values)
        expected_q_values = expected_q_values.clamp_(-50., 0.).detach()

        self.critic_optim.zero_grad()
        q_values = self.critic(state_batch, action_batch)

        if self.config['PER']:
            errors = torch.abs(q_values - expected_q_values)
            critic_loss = (importance_sampling_weights * errors**2).sum()
            for i in range(self.batch_size):
                index = indexes[i]
                self.memory.update(index, errors[i].detach().numpy())
        else:
            critic_loss = mse_loss(q_values, expected_q_values)
        critic_loss.backward()

        self.critic_optim.step()

        self.actor_optim.zero_grad()
        policy_loss = self.critic(state_batch, self.actor(state_batch))
        action_reg = (self.actor.action_preact**2).mean()
        policy_loss = -policy_loss.mean() + action_reg
        policy_loss.backward()

        self.actor_optim.step()

    def get_action_greedy(self, state):
        """Hey, actor - act!... plus detach().numpy() ..."""
        return self.actor(flatten_state_dict_for_model(state)).detach().numpy()

    def _get_action_epsilon_greedy(self, state):
        """Returns an action for given state by using the actor network.
        With epsilon probability, it returns a fully random action.
        In both cases, there is a OU noise added as well.
        Parameters can be specified in the configuration file.
        """

        if random.random() > self.epsilon:
            action = self.get_action_greedy(state) + \
                     np.random.normal(scale=0.2, size=self.actions_size)
        else:
            action = self.env.action_space.sample()
        return np.clip(action, -1., 1.)

    def append_sample_to_memory(self, state, reward, action, next_state, done):
        """Adds given transition to the memory. In case of using Prioritized
        Experience Replay, it calculates the TD error."""
        if not self.config['PER']:
            self.memory.append((state, reward, action, next_state, done))
        else:
            q = self.critic(
                torch.Tensor(state).unsqueeze(0),
                torch.Tensor(action).unsqueeze(0))

            target_val = self.critic_target(
                torch.Tensor(next_state).unsqueeze(0),
                self.actor_target(torch.Tensor(next_state).unsqueeze(0)))

            target = reward + (self.gamma * target_val * (done * 1)).detach()
            error = abs(q - target).detach().numpy()
            self.memory.add((state, reward, action, next_state, done), error)

    def soft_update_networks(self):
        self.update(self.critic_target, self.critic,
                    self.config['network_update_amount'])
        self.update(self.actor_target, self.actor,
                    self.config['network_update_amount'])

    def update(self, target, src, amount):
        for target_param, param in zip(target.parameters(), src.parameters()):
            target_param.data.copy_(target_param.data * (1.0 - amount) +
                                    param.data * amount)

    def sample_from_per_memory(self, batch_size):
        transition_batch, indexes, importance_sampling_weights = \
            self.memory.sample(batch_size)

        x, r, u, y, d = [], [], [], [], []
        for i in transition_batch:
            X, R, U, Y, D = i
            x.append(np.array(X, copy=False))
            y.append(np.array(Y, copy=False))
            u.append(np.array(U, copy=False))
            r.append(np.array(R, copy=False))
            d.append(np.array(D, copy=False))

        return ((np.array(x), np.array(r).reshape(-1,
                                                  1), np.array(u), np.array(y),
                 np.array(d).reshape(-1,
                                     1)), indexes, importance_sampling_weights)
Beispiel #18
0
class HER(pl.LightningModule):
    def __init__(self, hparams):
        super(HER, self).__init__()

        self.hparams = hparams

        self.test_env = make_env(hparams, render=self.hparams.render_test)
        sample_obs = self.test_env.observation_space['observation'].sample()
        sample_goal = self.test_env.observation_space['achieved_goal'].sample()

        # HARD CODED VALUES FOR Bullet-HRL
        action_limits, state_limits = get_env_boundaries()
        action_offset, action_bounds, action_clip_low, action_clip_high = action_limits

        state_shape = sample_obs.shape[0]
        action_shape = self.test_env.action_space.shape[0]
        goal_shape = sample_goal.shape[0]
        self.action_clips = (action_clip_low, action_clip_high)

        self.model = DDPG(params=self.hparams,
                          obs_size=state_shape,
                          goal_size=goal_shape,
                          act_size=action_shape,
                          action_clips=(action_clip_low, action_clip_high),
                          action_bounds=action_bounds,
                          action_offset=action_offset)

        self.model.actor.share_memory()
        self.model.critic.share_memory()

        self.state_normalizer = Normalizer(
            state_shape, default_clip_range=self.hparams.clip_range)
        self.goal_normalizer = Normalizer(
            goal_shape, default_clip_range=self.hparams.clip_range)

        self.replay_buffer = SharedReplayBuffer(self.hparams.buffer_size,
                                                state_shape, action_shape,
                                                goal_shape)

    def log_func(self, d):
        self.log_dict(d, on_step=True, prog_bar=True)

    def collate_fn(self, batch):
        return collate.default_convert(batch)

    def __dataloader(self) -> DataLoader:
        dataset = RLDataset(self.replay_buffer, self.hparams.batch_size,
                            self.hparams.n_batches,
                            self.hparams.replay_initial)
        dataloader = DataLoader(dataset=dataset,
                                collate_fn=self.collate_fn,
                                batch_size=1,
                                num_workers=1,
                                pin_memory=True)
        return dataloader

    def train_dataloader(self):
        return self.__dataloader()

    def __testloader(self):
        testset = TestDataset(hparams=self.hparams,
                              test_env=self.test_env,
                              model=self.model,
                              state_normalizer=self.state_normalizer,
                              goal_normalizer=self.goal_normalizer)
        testloader = DataLoader(dataset=testset, batch_size=1)

        return testloader

    def val_dataloader(self):
        return self.__testloader()

    def configure_optimizers(self):
        return [self.model.crt_opt, self.model.act_opt], []

    def training_step(self, batch, batch_idx, optimizer_idx):
        states_v, actions_v, next_states_v, rewards_v, dones_mask, goals_v = batch[
            0]
        norm_states_v = self.state_normalizer.normalize(states_v)
        norm_goals_v = self.goal_normalizer.normalize(goals_v)
        if optimizer_idx == 0:
            norm_next_states_v = self.state_normalizer.normalize(next_states_v)
            # train critic
            q_v = self.model.critic(norm_states_v, norm_goals_v, actions_v)
            with torch.no_grad():
                next_act_v = self.model.tgt_act_net(norm_next_states_v,
                                                    norm_goals_v)
                q_next_v = self.model.tgt_crt_net(norm_next_states_v,
                                                  norm_goals_v, next_act_v)
                q_next_v[dones_mask] = 0.0
                q_ref_v = rewards_v.unsqueeze(
                    dim=-1) + q_next_v * self.hparams.gamma
                # clip the q value
                clip_return = 1 / (1 - self.hparams.gamma)
                q_ref_v = torch.clamp(q_ref_v, -clip_return, 0)
            critic_loss_v = F.mse_loss(q_v, q_ref_v.detach())
            tqdm_dict = {'critic_loss': critic_loss_v}
            self.log_dict(tqdm_dict, prog_bar=True)

            return critic_loss_v

        elif optimizer_idx == 1:
            # train actor
            self.model.actor.offset.requires_grad = False
            self.model.actor.action_bounds.requires_grad = False

            cur_actions_v = self.model.actor(norm_states_v, norm_goals_v)
            actor_loss_v = -self.model.critic(norm_states_v, norm_goals_v,
                                              cur_actions_v).mean()
            actor_loss_v += ((cur_actions_v - self.model.actor.offset) /
                             self.model.actor.action_bounds).pow(2).mean()
            tqdm_dict = {'actor_loss': actor_loss_v}
            self.log_dict(tqdm_dict, prog_bar=True)

            if batch_idx % self.hparams.sync_batches == 0:
                self.model.alpha_sync(self.hparams.polyak)

            return actor_loss_v

    def validation_step(self, batch, batch_idx):
        to_log = dict()
        for k, v in batch.items():
            to_log[k] = v.detach().cpu().numpy()
        to_log['epoch_nr'] = int(self.current_epoch)
        if self.logger is not None:
            self.logger.experiment.log(to_log)
Beispiel #19
0
inp = inp[1:]
correct = correct[1:]
assert len(inp) == len(correct)

i1 = inp[0]
c1 = correct[0]

right = 0
wrong = 0
count = 0
cc = 0

for i, c in zip(inp, correct):
    count += 1
    # try:
    section_id, row_id, valid = normalizer.normalize(i['section'], i['row'])
    # if not valid:
    # print(count, section_id, row_id)
    if section_id != c['section_id'] or row_id != c['row_id'] or valid != c[
            'valid']:
        cc += 1
        wrong += 1
        if cc == 3:
            print(i, c)
            print('count: ', count, 'section id: ', section_id, 'row id: ',
                  row_id, 'valid: ', valid)
    else:
        right += 1
    # except:
    #     wrong += 1
    # print(section_id, c['section_id'])
Beispiel #20
0
#score the tweet
score = []

#create normalizer object
norm = Normalizer()

#create Stop word Removal object
st = StpRemoval()

#create sentiment analysis object
s = Sentianal()

for i in range(0,len(jkw_data)):
	#normalize
	line = norm.normalize(jkw_data['tweet'][i])
	
	#remove stopword
	line = st.removeStp(line)

	#score sentiment
	score.append(s.compute(line))

#join the dataframe
score_data = DataFrame(score)
jkw_data = jkw_data.join(score_data)
jkw_data.columns = ['tweet_processed','score']

#write to csv file
jkw_data.to_csv("score_jokowi2.csv")
Beispiel #21
0
# Author : Alfan F. Wicaksono
# IR Lab, FASILKOM, UI

# Script for pre-processing twitter corpus

from normalizer import Normalizer
from stpremoval import StpRemoval

##################### you can modify this part ######################

corpusFile = "debatcapres_2014_sesi1.txt"
preprocessedFile = "debatcapres_2014_sesi1_processed.txt"

#####################################################################

nm = Normalizer()
sw = StpRemoval()

fin = open(corpusFile, "r")
fout = open(preprocessedFile, "w")

for line in fin:
    line = line.strip()  # remove carriage return
    line = nm.normalize(line)  # normalization
    line = sw.removeStp(line)  # remove stop word
    fout.write(line)  # put preprocessed tweet on the new file
    fout.write("\n")

fin.close()
fout.close()
Beispiel #22
0
class Planner(object):
    @store_args
    def __init__(self,
                 inp_dim,
                 hid_size,
                 seq_len,
                 out_dim,
                 buffer_size,
                 batch_size=64,
                 optim_stepsize=1e-3,
                 sample_func=None,
                 norm_eps=1e-2,
                 norm_clip=5,
                 scope='planner',
                 layerNorm=False,
                 **kwargs):
        '''
        Implemention of LSTM Planner that produces given number of subgoals between src and dest.
        Args:
            inp_dim : dimension for the LSTM
            hid_size : cell_state_size
            seq_len : max_timesteps
            out_dim : dimension for LSTM output
        '''
        # self.main = lstm(hid_size, layerNorm)

        self.adamepsilon = 1e-6

        self.mode = tf.contrib.learn.ModeKeys.TRAIN  # TRAIN for training, INFER for prediction, EVAL for evaluation
        self.infer_outputs = None
        with tf.variable_scope(self.scope):
            self._create_network()

        buffer_shape = [
            seq_len + 2, out_dim
        ]  # plus 2: the [0] is 'src', [1] is 'dest', [2:] are 'labels',
        if self.sample_func is None:
            from sampler import make_sample_plans
            self.sample_func = make_sample_plans()
        self.buffer = PlanReplayBuffer(buffer_shape, buffer_size,
                                       self.sample_func)

    def _create_network(self):
        self.sess = U.get_session()

        self.inp_src = tf.placeholder(shape=[None, 1, self.inp_dim],
                                      dtype=tf.float32,
                                      name='input_src')
        self.inp_dest = tf.placeholder(shape=[None, 1, self.out_dim],
                                       dtype=tf.float32,
                                       name='input_dest')
        self.labels = tf.placeholder(shape=[None, self.seq_len, self.out_dim],
                                     dtype=tf.float32,
                                     name='label')
        self.src_seq_len = tf.placeholder(tf.int32, (None, ),
                                          name='source_sequence_length')
        self.tar_seq_len = tf.placeholder(tf.int32, (None, ),
                                          name='target_sequence_length')
        # running averages
        # with tf.variable_scope('goal_stats_src'):
        #     self.goal_stats_src = Normalizer(self.inp_dim, self.norm_eps, self.norm_clip, sess=self.sess)
        with tf.variable_scope('goal_stats_dest'):
            self.goal_stats_dest = Normalizer(self.out_dim,
                                              self.norm_eps,
                                              self.norm_clip,
                                              sess=self.sess,
                                              PLN=True)

        # normalize inp_src, and goals labels
        inp_src = self.goal_stats_dest.normalize(self.inp_src)
        inp_dest = self.goal_stats_dest.normalize(self.inp_dest)
        goal_labels = self.goal_stats_dest.normalize(self.labels)
        with tf.variable_scope('goal_gen'):
            encoder_cell = tf.nn.rnn_cell.LSTMCell(self.hid_size)
            encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
                encoder_cell,
                inp_src,
                sequence_length=self.src_seq_len,
                dtype=tf.float32)

            decoder_cell = tf.nn.rnn_cell.LSTMCell(self.hid_size)
            project_layer = tf.layers.Dense(self.out_dim)

            with tf.variable_scope("decode"):
                train_inp = tf.concat([inp_dest, goal_labels[:, :-1, :]],
                                      axis=-2)
                train_helper = tf.contrib.seq2seq.TrainingHelper(
                    train_inp, sequence_length=self.tar_seq_len)
                train_decoder = tf.contrib.seq2seq.BasicDecoder(
                    decoder_cell,
                    train_helper,
                    encoder_state,
                    output_layer=project_layer)
                train_outputs, _, final_seq_len = tf.contrib.seq2seq.dynamic_decode(
                    train_decoder, maximum_iterations=self.seq_len)
                self.train_outputs = train_outputs.rnn_output

            with tf.variable_scope("decode", reuse=True):
                infer_helper = ContinousInferHelper(inp_dest[:, 0, :],
                                                    self.tar_seq_len)
                infer_decoder = tf.contrib.seq2seq.BasicDecoder(
                    decoder_cell,
                    infer_helper,
                    encoder_state,
                    output_layer=project_layer)
                infer_outputs, _, final_seq_len = tf.contrib.seq2seq.dynamic_decode(
                    infer_decoder, maximum_iterations=self.seq_len)
                self.infer_outputs = self.goal_stats_dest.denormalize(
                    infer_outputs.rnn_output)

            log_sigma = tf.get_variable(name="logstd",
                                        shape=[1, self.out_dim],
                                        initializer=U.normc_initializer(0.1))

            goals = train_outputs.rnn_output
            loss =   0.5 * tf.reduce_sum(tf.square((goal_labels - goals)/tf.exp(log_sigma)), axis=-1) \
                + 0.5 * np.log(2*np.pi) * tf.to_float(tf.shape(self.labels)[-1]) \
                + tf.reduce_sum(log_sigma, axis=-1)
            self.loss = tf.reduce_mean(loss)
            self.tr_outputs = self.goal_stats_dest.denormalize(
                self.train_outputs
            )  # just for inspect the correctness of training

        var_list = self._vars('')
        self.grads = U.flatgrad(self.loss, var_list)
        self.adam = MpiAdam(var_list, epsilon=self.adamepsilon)

        tf.variables_initializer(self._global_vars('')).run()
        self.adam.sync()

    def train(self, use_buffer=False, justEval=False, **kwargs):
        self.mode = tf.contrib.learn.ModeKeys.TRAIN
        if not use_buffer:
            src = np.reshape(kwargs['src'], [-1, 1, self.inp_dim])
            dest = np.reshape(kwargs['dest'], [-1, 1, self.out_dim])
            lbl = kwargs['lbl']
        else:
            episode_batch = self.buffer.sample(self.batch_size)
            src = np.reshape(episode_batch[:, 0, :], [-1, 1, self.inp_dim])
            lbl = episode_batch[:, 2:, :]
            dest = np.reshape(episode_batch[:, 1, :], [-1, 1, self.out_dim])
        src_seq_len = [1] * src.shape[0]
        tar_seq_len = [self.seq_len] * dest.shape[0]
        # compute grads
        loss, g, tr_sub_goals, te_sub_goals = self.sess.run(
            [self.loss, self.grads, self.tr_outputs, self.infer_outputs],
            feed_dict={
                self.inp_src: src,
                self.inp_dest: dest,
                self.labels: lbl,
                self.src_seq_len: src_seq_len,
                self.tar_seq_len: tar_seq_len
            })
        if not justEval:
            self.adam.update(g, stepsize=self.optim_stepsize)
        return loss, tr_sub_goals[-1], te_sub_goals[-1]

    def plan(self, src, dest):
        src = np.reshape(src, [-1, 1, self.inp_dim])
        dest = np.reshape(dest, [-1, 1, self.out_dim])
        src_seq_len = [1] * src.shape[0]
        tar_seq_len = [self.seq_len] * dest.shape[0]
        plan_goals = self.sess.run(self.infer_outputs,
                                   feed_dict={
                                       self.inp_src: src,
                                       self.inp_dest: dest,
                                       self.src_seq_len: src_seq_len,
                                       self.tar_seq_len: tar_seq_len
                                   })

        assert plan_goals.shape[0] == src.shape[0] and plan_goals.shape[
            1] == self.seq_len
        plan_goals = np.flip(plan_goals, axis=-2)
        plan_goals = np.concatenate([plan_goals, dest],
                                    axis=-2)  # append the ultimate goal
        return plan_goals

    def store_episode(self, episode_batch, update_stats=True):
        """ episode_batch : [batch_size * (subgoal_num+1) * subgoal_dim]
        """
        isNull = episode_batch.shape[0] < 1
        if not isNull:
            self.buffer.store_episode(episode_batch)
        # logger.info("buffer store_episode done. updating statistics.")
        if update_stats:
            subgoals = episode_batch[:, 1:, :]
            self.goal_stats_dest.update(subgoals, isNull=isNull)
            # logger.info("ready to recomput_stats")
            # print(subgoals)
            self.goal_stats_dest.recompute_stats(inc=episode_batch.shape[0])

    def update_normalizer_stats(self, batch):
        # self.goal_stats_src.update(batch['src'])
        self.goal_stats_dest.update(batch['dest'])
        # self.goal_stats_src.recompute_stats()
        self.goal_stats_dest.recompute_stats()

    def _vars(self, scope):
        res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                scope=self.scope + '/' + scope)
        assert len(res) > 0
        return res

    def _global_vars(self, scope):
        res = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                scope=self.scope + '/' + scope)
        return res

    def save(self, save_path):
        assert self.infer_outputs is not None
        var_list = self._global_vars('')
        U.save_variables(save_path, variables=var_list, sess=self.sess)

    def load(self, load_path):
        if self.infer_outputs is None:
            self._create_network()
        var_list = self._global_vars('')
        U.load_variables(load_path, variables=var_list)

    def logs(self, prefix=''):
        logs = []
        logs += [('subgoals/buff_size', self.buffer.get_current_episode_size())
                 ]
        logs += [('goals/mean',
                  np.mean(self.sess.run([self.goal_stats_dest.mean])))]
        logs += [('goals/std',
                  np.mean(self.sess.run([self.goal_stats_dest.std])))]

        if prefix != '':
            prefix = prefix.strip('/')
            return [(prefix + '/' + key, val) for key, val in logs]
        else:
            return logs