def fit(self, neps, tri, perm):
        start = datetime.now()
        self.epsilon, self.alpha, self.gamma = perm

        returns = np.zeros((neps, tri))
        for t in range(tri):
            self.agent = SGA.sentGenAgent(self.epsilon, self.alpha, self.gamma)
            print('Running trial: ', t)
            for n in range(neps):
                #if n % 100 == 0:
                #    print('Running episode: ', n)
                env = ENV.Environment(self.rewards, self.reward_func)
                state = env.prev_state_id
                run = 'run'
                l = 0
                while run == 'run':
                    action = self.agent.getAction(state)
                    run, reward = env.getNextState(action)
                    returns[n, t] += (self.gamma**l) * reward
                    l += 1
                    qsa, phisa = self.agent.qValue(state, action)
                    next_state = env.next_state_id
                    max_qsa_prime = []
                    for act in self.agent.actions.keys():
                        if run == 'terminate':
                            qsa_prime = 0.0
                        else:
                            qsa_prime, _ = self.agent.qValue(next_state, act)
                        max_qsa_prime.append(qsa_prime)
                    qsa_prime = max(max_qsa_prime)
                    self.agent.updateWeights(reward, qsa_prime, qsa, phisa)
                    state = next_state
            #print returns[:,t]
        end = datetime.now()
        return returns, end - start
    def predict(self):
        sentences = []
        for n in range(1000):
            sent = []
            env = ENV.Environment(self.rewards, self.reward_func)
            state = env.prev_state_id
            sent.append(env.prev_state)
            run = 'run'
            while run == 'run':
                action = self.agent.getAction(state)
                run, reward = env.getNextState(action)
                next_state = env.next_state
                sent.append(next_state)
                state = env.next_state_id
            sentences.append(sent)

        return self.score(sentences)