Example #1
0
 def run (self, data_loader, batch_size, beam_size=3): #data is either a list of lists or a dataset_loader
     self.encoder.eval()
     self.decoder.eval()
     self.vae.eval()            
     
     pbar = ProgressBar()
     pbar.set(total_steps=len(data_loader)) 
    
     total_loss = 0.
     with torch.no_grad():
         for counter, (x, y) in enumerate(data_loader):                
             pbar.update(progress=counter, text="Epoch {:d}, progress {}/{}, eval average loss \033[93m{:.6f}\033[0m ... ".format(self.epoch, counter, len(data_loader), total_loss/(counter+1)))  
             
             if x.size(0) != batch_size:
                 print("\t Incomplete batch, skipping.")
                 continue
             
             if(self.train_on_gpu):
                 x, y = x.cuda(), y.cuda()
             
             x = x[0:1,:]                
             y = y[0:1,:]
             results, scores, loss = self._run_instance(x, y, beam_size)
     
     pbar.update(text="Epoch {:d}, eval done, average loss \033[93m{:.6f}\033[0m".format(self.epoch, total_loss/len(data_loader)))     
     return total_loss/len(data_loader)
Example #2
0
def samples_distribution_from_states(mymdp,
                                     policy,
                                     phi,
                                     states,
                                     n_next=20,
                                     seed=1,
                                     verbose=True):
    n = states.shape[0]
    states_next = np.ones([n, mymdp.dim_S])
    feat = np.zeros((n, phi.dim))
    feat_next = np.zeros_like(feat)
    rewards = np.ones(n)
    np.random.seed(seed)

    with ProgressBar(enabled=verbose) as p:
        for k in xrange(n):
            p.update(k, n, "Sampling MDP Distribution")
            s = states[k, :]
            s0, a, s1, r = mymdp.sample_step(s,
                                             policy=policy,
                                             n_samples=n_next)
            states[k, :] = s0
            feat[k, :] = phi(s0)
            fn = apply_rowise(phi, s1)
            feat_next[k, :] = np.mean(fn, axis=0)
            states_next[k, :] = np.mean(s1, axis=0)
            rewards[k] = np.mean(r)

    return states, rewards, states_next, feat, feat_next
Example #3
0
    def avg_error_data_budget(self, methods, n_indep, verbose=False, n_jobs=1, **kwargs):

        res = []
        if n_jobs == 1:
            with ProgressBar(enabled=verbose) as p:

                for seed in range(n_indep):
                    p.update(
                        seed, n_indep, "{} of {} seeds".format(seed, n_indep))
                    kwargs['seed'] = seed
                    res.append(self.error_data_budget(methods, **kwargs))

        else:
            jobs = []
            for seed in range(n_indep):
                kwargs = kwargs.copy()
                kwargs['seed'] = seed
                self.projection_operator()
                jobs.append((tmp3, [self, methods], kwargs))

            res = Parallel(n_jobs=n_jobs, verbose=verbose)(jobs)
        errors, times = zip(*res)

        errors = np.array(errors).swapaxes(0, 1)
        return np.mean(errors, axis=1), np.std(errors, axis=1), np.mean(times, axis=0)
Example #4
0
def samples_cached(mymdp,
                   policy,
                   n_iter=1000,
                   n_restarts=100,
                   no_next_noise=False,
                   seed=1.,
                   verbose=0.):
    assert (seed is not None)
    states = np.ones([n_restarts * n_iter, mymdp.dim_S])
    states_next = np.ones([n_restarts * n_iter, mymdp.dim_S])
    actions = np.ones([n_restarts * n_iter, mymdp.dim_A])
    rewards = np.ones(n_restarts * n_iter)
    np.random.seed(seed)

    restarts = np.zeros(n_restarts * n_iter, dtype="bool")
    k = 0

    with ProgressBar(enabled=(verbose > 2.)) as p:
        while k < n_restarts * n_iter:
            restarts[k] = True
            for s, a, s_n, r in mymdp.sample_transition(n_iter,
                                                        policy,
                                                        with_restart=False,
                                                        seed=None):
                states[k, :] = s
                states_next[k, :] = s_n
                rewards[k] = r
                actions[k, :] = a

                k += 1
                p.update(k, n_restarts * n_iter)
                if k >= n_restarts * n_iter:
                    break
    return states, actions, rewards, states_next, restarts
Example #5
0
class EZNCoderC(object):    
    def __init__(self):
        self._encoder = EZNCoder()
        self._encoder.include_observer(self)
        self._p_bar = ProgressBar('green', width=70, block='▣', empty='□')
        print 'EZNCoder  v0.2'
        self._encoder.avixvid()
            
    def update(self):
        print 'Converting: ' + self._encoder.get_cur_conv()
        print '\n'
        print 'Remaining ' + str(self._encoder.get_rem_count()) + ' file(s).'
        
    def update_percent(self):
        i = self._encoder.get_percent()
        self._p_bar.render(i,'')        
    
    def cleanup(self):    
        pass
Example #6
0
    def regularization_paths(self, methods, n_samples=1000, n_eps=1,
                             seed=1, criteria=["RMSBE"], verbose=0):

        # Intialization
        self._init_methods(methods)
        err_f = [self._init_error_fun(criterion) for criterion in criteria]

        errors = dict([(crit, [[] for m in methods]) for crit in criteria])
        for m in methods:
            m.reset_trace()

        # Generate trajectories
        s, a, r, s_n, restarts = self.mdp.samples_cached(n_iter=n_samples,
                                                         n_restarts=n_eps,
                                                         policy=self.behavior_policy,
                                                         seed=seed)
        if self.off_policy:
            m_a_beh = policies.mean_action_trajectory(self.behavior_policy, s)
            m_a_tar = policies.mean_action_trajectory(self.target_policy, s)
            rhos = np.zeros_like(r)
            self.rhos = rhos

        # Method learning
        with ProgressBar(enabled=(verbose > 2.)) as p:
            for i in xrange(n_samples * n_eps):
                p.update(i, n_samples * n_eps)
                f0 = self.phi(s[i])
                f1 = self.phi(s_n[i])
                if restarts[i]:
                    for k, m in enumerate(methods):
                        m.reset_trace()

                for k, m in enumerate(methods):
                    if self.off_policy:
                        rhos[i] = self.target_policy.p(s[i], a[i], mean=m_a_tar[i]) / self.behavior_policy.p(s[i], a[i], mean=m_a_beh[i])
                        m.update_V(s[i], s_n[i], r[i],
                                    rho=rhos[i],
                                    f0=f0, f1=f1)
                    else:
                        m.update_V(s[i], s_n[i], r[i], f0=f0, f1=f1)


        for i, m in enumerate(methods):
            v = m.regularization_path()
            for tau, theta in v:
                for i_e, crit in enumerate(criteria):
                    errors[crit][i].append((tau, theta, err_f[i_e](theta)))

        return errors
Example #7
0
def samples_distribution(mymdp,
                         policy,
                         phi,
                         policy_traj=None,
                         n_subsample=1,
                         n_iter=1000,
                         n_restarts=100,
                         n_next=20,
                         seed=1,
                         verbose=True):
    assert (n_subsample == 1)  # not implemented, do that if you need it
    states = np.ones([n_restarts * n_iter, mymdp.dim_S])
    if policy_traj is None:
        policy_traj = policy
    states_next = np.ones([n_restarts * n_iter, mymdp.dim_S])
    feat = np.zeros((n_restarts * n_iter, phi.dim))
    feat_next = np.zeros_like(feat)
    rewards = np.ones(n_restarts * n_iter)
    np.random.seed(seed)

    k = 0
    s = mymdp.start()
    c = 0
    with ProgressBar(enabled=verbose) as p:
        for k in xrange(n_restarts * n_iter):
            if mymdp.terminal_f(s) or c >= n_iter:
                s = mymdp.start()
                c = 0
            p.update(k, n_restarts * n_iter, "Sampling MDP Distribution")
            s0, a, s1, r = mymdp.sample_step(s,
                                             policy=policy,
                                             n_samples=n_next)
            states[k, :] = s0
            feat[k, :] = phi(s0)
            fn = apply_rowise(phi, s1)
            feat_next[k, :] = np.mean(fn, axis=0)
            states_next[k, :] = np.mean(s1, axis=0)
            rewards[k] = np.mean(r)
            _, _, s, _ = mymdp.sample_step(s, policy=policy_traj, n_samples=1)
            c += 1

    return states, rewards, states_next, feat, feat_next
Example #8
0
    def avg_error_traces(self, methods, n_indep, verbose=0, n_jobs=1, **kwargs):
        res = []
        if n_jobs == 1:
            with ProgressBar(enabled=(verbose > 0)) as p:

                for seed in range(n_indep):
                    p.update(
                        seed, n_indep, "{} of {} seeds".format(seed, n_indep))
                    kwargs['seed'] = seed
                    res.append(
                        self.error_traces(methods, verbose=verbose, **kwargs))
        else:
            jobs = []
            for seed in range(n_indep):
                kwargs = kwargs.copy()
                kwargs['seed'] = seed
                #self.projection_operator()
                jobs.append((tmp, [self, methods], kwargs))
            res = Parallel(n_jobs=n_jobs, verbose=verbose)(jobs)
            res = np.array(res)
        return np.mean(res, axis=0), np.std(res, axis=0), res
Example #9
0
def accum_reward_for_states(mymdp,
                            policy,
                            states,
                            gamma,
                            n_eps,
                            l_eps,
                            seed,
                            verbose=3,
                            n_jobs=24):
    n = states.shape[0]
    rewards = np.ones(n)
    if n_jobs == 1:
        with ProgressBar(enabled=(verbose >= 1)) as p:
            for k in xrange(n):
                p.update(k, n, "Sampling acc. reward")
                np.random.seed(seed)
                r = mymdp.sample_accum_reward(states[k],
                                              gamma,
                                              policy,
                                              n_eps=n_eps,
                                              l_eps=l_eps)
                rewards[k] = np.mean(r)
    else:
        jobs = []
        b = int(n / n_jobs) + 1
        k = 0
        while k < n:
            kp = min(k + b, n)
            jobs.append(
                (run1,
                 [mymdp, policy, states[k:kp], gamma, n_eps, l_eps, seed], {
                     "verbose": verbose - 1,
                     "n_jobs": 1
                 }))
            k = kp
        res = Parallel(n_jobs=n_jobs, verbose=verbose)(jobs)
        rewards = np.concatenate(res, axis=0)
    return rewards
Example #10
0
 def _eval(self, valid_loader, batch_size):                
     self.encoder.eval()
     self.decoder.eval()
     self.vae.eval()            
     
     pbar = ProgressBar()
     pbar.set(total_steps=len(valid_loader)) 
    
     counter = 0 
     total_loss = 0.
     with torch.no_grad():
         for counter, (x, y) in enumerate(valid_loader):                
             #if counter > 5:
             #    break
             pbar.update(progress=counter, text="Epoch {:d}, progress {}/{}, eval average loss \033[93m{:.6f}\033[0m ... ".format(self.epoch, counter, len(valid_loader), total_loss/(counter+1)))   
             batch_size = x.size(0)
             max_seq_len_x = x.size(1)
             max_seq_len_y = y.size(1)
             loss = 0
             #print("  Epoch {}, batch: {}/{}, max_seq_len_x: {}, max_seq_len_y: {}".format(self.epoch, counter, len(valid_loader), max_seq_len_x, max_seq_len_y))
             if x.size(0) != batch_size:
                 print("\t Incomplete batch, skipping.")
                 continue
             
             if(self.train_on_gpu):
                 x, y = x.cuda(), y.cuda()
             
             encoder_hidden = self.encoder.init_hidden(batch_size)
             decoder_hidden = self.decoder.init_hidden(batch_size)
     
             encoder_output, encoder_hidden = self.encoder(x, encoder_hidden)                 
             encoder_last_output = torch.zeros(batch_size, self.encoder_hidden_dim*2, device=self.device)
             for j in range(batch_size):
                 encoder_last_output[j] = encoder_output[j][-1]
             
             # VAE
             z, mu, logvar = self.vae(encoder_last_output)
             
             word_softmax_projection = torch.zeros(batch_size, 5, dtype = torch.float, device=self.device)
             word_softmax_projection[:,2] = 1. # beginning of sentence value is 2, set it  #XXX
             
             decoder_output = decoder_hidden[0].view(self.decoder_n_layers, 1, batch_size, self.decoder_hidden_dim) #torch.Size([2, 1, 64, 512])
             decoder_output = decoder_output[-1].permute(1,0,2) 
                             
             loss = 0             
             print_example = True
             example_array = [2]
             
             for i in range(max_seq_len_y): 
                 #print("\t Decoder step {}/{}".format(i, max_seq_len_y))                        
                 _, decoder_input = word_softmax_projection.max(1) # no need for values, just indexes 
                 decoder_input = decoder_input.unsqueeze(1)          
                 
                 decoder_output, decoder_hidden, word_softmax_projection = self.decoder.forward_step(decoder_input, decoder_hidden, z)                    
                 word_softmax_projection = word_softmax_projection.squeeze(1) # eliminate dim 1
                 if print_example:                        
                     _, mi = word_softmax_projection[0].max(0)
                     example_array.append(mi.item())
                     
                 target_y = y[:,i] # select from y the ith column and shape as an array                    
                 loss += self.criterion(word_softmax_projection, target_y) 
             
             loss /= batch_size
             KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
             loss += KLD            
             
             total_loss += loss.data.item()    
             
             #print("\t\t\t Eval Loss: {}".format(loss.data.item()))
             if print_example:
                 print_example = False 
                 print()    
                 print("\n\n----- X:")
                 print(" ".join([self.src_i2w[str(wi.data.item())] for wi in x[0]]))                                            
                 print("----- Y:")
                 print(" ".join([self.tgt_i2w[str(wi.data.item())] for wi in y[0]]))                    
                 print("----- OUR PREDICTION:")
                 print(" ".join([self.tgt_i2w[str(wi)] for wi in example_array]))
                 print()
                 print(" ".join([str(wi.data.item()) for wi in y[0]]))
                 print(" ".join([str(wi) for wi in example_array]))
                 print()
                 #self.writer.add_text('EvalText', " ".join([self.i2w[str(wi.data.item())] for wi in y[0]]) + " --vs-- "+" ".join([self.i2w[str(wi)] for wi in example_array]), self.epoch)                    
     
     pbar.update(text="Epoch {:d}, eval done, average loss \033[93m{:.6f}\033[0m".format(self.epoch, total_loss/len(valid_loader))) 
 
     return total_loss/len(valid_loader)
Example #11
0
    def _train_epoch(self, train_loader):                       
        self.epoch += 1
        self.encoder.train()
        self.decoder.train()
        self.vae.train()        
        
        #encoder_hidden = self.encoder.init_hidden(batch_size)
        #decoder_hidden = self.decoder.init_hidden(batch_size)
        total_loss = 0.
        pbar = ProgressBar()
        pbar.set(total_steps=len(train_loader)) 
        
        for counter, (x, y) in enumerate(train_loader):
            batch_size = x.size(0)
            max_seq_len_x = x.size(1) # x este 64 x 399 (variabil)
            max_seq_len_y = y.size(1) # y este 64 x variabil
            
            pbar.update(progress=counter, text="Epoch {:d}, progress {}/{}, train average loss \033[93m{:.6f}\033[0m (bs/mx/my = {}/{}/{}) ... ".format(self.epoch, counter, len(train_loader), total_loss/(counter+1), batch_size, max_seq_len_x, max_seq_len_y))                         
                        
            #if counter > 1:               
            #    break                
            if counter % 500 == 0 and counter > 0:
                self.save_checkpoint("last")
                
            loss = 0   
            """            
            if x.size(0) != batch_size:
                print("\t Incomplete batch, skipping.")
                continue
            """
            # print(x.size()) # x is a 64 * 399 tensor (batch*max_seq_len_x)               

            if(self.train_on_gpu):
                x, y = x.cuda(), y.cuda()

            encoder_hidden = self.encoder.init_hidden(batch_size)
            decoder_hidden = self.decoder.init_hidden(batch_size)        
            #print(decoder_hidden[0].size())
            
            # zero grads in optimizer
            self.optimizer.zero_grad()                
            
            # encoder
            # x is batch_size x max_seq_len_x            
            encoder_output, encoder_hidden = self.encoder(x, encoder_hidden)             
            # encoder_output is batch_size x max_seq_len_x x encoder_hidden (where encoder_hidden is double because it is bidirectional)
            # print(encoder_output.size())
            
            # take last state of encoder as encoder_last_output # not necessary when using attention                
            encoder_last_output = torch.zeros(batch_size, self.encoder_hidden_dim*2, device=self.device) # was with ,1, in middle ?
            for j in range(batch_size):
                encoder_last_output[j] = encoder_output[j][-1]
            # encoder_last_output is last state of the encoder batch_size * encoder_hidden_dim
        
            # VAE
            z, mu, logvar = self.vae(encoder_last_output) # all are (batch_size, encoder_hidden_dim)
            
            # create first decoder output for initial attention call, extract from decoder_hidden
            decoder_output = decoder_hidden[0].view(self.decoder_n_layers, 1, batch_size, self.decoder_hidden_dim) #torch.Size([2, 1, 64, 512])
            # it should look like batch_size x 1 x decoder_hidden_size, so tranform it
            decoder_output = decoder_output[-1].permute(1,0,2) 
            #print(decoder_output.size())
                
            recon_loss = 0                 
            for i in range(max_seq_len_y): 
                #print("\t Decoder step {}/{}".format(i, max_seq_len_y))    
                
                # teacher forcing (or it is first word which always is start-of-sentence)
                if random.random()<=self.teacher_forcing_ratio or i==0:
                    decoder_input = torch.zeros(batch_size, 1, dtype = torch.long, device=self.device) # 1 in middle is because lstm expects (batch, seq_len, input_size): 
                    for j in range(batch_size):
                        decoder_input[j]=y[j][i]                
                    #print(decoder_input.size()) # batch_size x 1                            
                else: # feed own previous prediction extracted from word_softmax_projection
                    _, decoder_input = word_softmax_projection.max(1) # no need for values, just indexes 
                    decoder_input = decoder_input.unsqueeze(1) # from batch_size to batch_size x 1                    
                    #print(decoder_input.size()) # batch_size x 1                            

                
                # z context is batch_size * encoder_hidden_dim            
                decoder_output, decoder_hidden, word_softmax_projection = self.decoder.forward_step(decoder_input, decoder_hidden, z)
                # first, reduce word_softmax_projection which is torch.Size([64, 1, 50004]) to 64 * 50004
                word_softmax_projection = word_softmax_projection.squeeze(1) # eliminate dim 1
                
                # now, select target y
                # y looks like batch_size * max_seq_len_y : tensor([[    2, 10890, 48108,  ...,     0,     0,     0], ... ... ..
                target_y = y[:,i] # select from y the ith column and shape as an array 
                # target_y now looks like [ 10, 2323, 5739, 24, 9785 ... ] of size 64 (batch_size)
                #print(word_softmax_projection.size())
                #print(target_y.size())
                recon_loss += self.criterion(word_softmax_projection, target_y)
                # end decoder individual step
                
            global_minibatch_step = (self.epoch-1)*len(train_loader)+counter   
            #print("epoch {}, counter {}, global_minibatch_step {}".format(self.epoch, counter, global_minibatch_step))        
            
            self.log.var("train_loss|Total loss|Recon loss|Weighted KLD loss", global_minibatch_step, recon_loss.data.item(), y_index=1)
            
            KL_weight = self.vae.kl_anneal_function(step=global_minibatch_step, k=self.vae_kld_anneal_k, x0=self.vae_kld_anneal_x0, anneal_function=self.vae_kld_anneal_function)
            self.log.var("KLD weight", global_minibatch_step, KL_weight, y_index=0)
            
            KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
            self.log.var("KLD", global_minibatch_step, KLD.data.item(), y_index=0)
            
            KLD *= KL_weight
            self.log.var("train_loss|Total loss|Recon loss|Weighted KLD loss", global_minibatch_step, KLD.data.item(), y_index=2)
            
            loss = recon_loss + KLD
            self.log.var("train_loss|Total loss|Recon loss|Weighted KLD loss", global_minibatch_step, loss.data.item(), y_index=0)
                        
            total_loss += loss.data.item() / batch_size 
            loss.backward() # calculate the loss and perform backprop
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(self.encoder.parameters(), self.gradient_clip)
            nn.utils.clip_grad_norm_(self.decoder.parameters(), self.gradient_clip)
            nn.utils.clip_grad_norm_(self.vae.parameters(), self.gradient_clip)
            self.optimizer.step()
            
            #self.writer.add_scalar('Train/Loss', loss.data.item())            
            #break
            self.log.draw()
            self.log.draw(last_quarter = True)
            # end batch
        
        #end epoch
        pbar.update(text="Epoch {:d}, train done, average loss \033[93m{:.6f}\033[0m".format(self.epoch, total_loss))  #/len(train_loader)
        
        
        return total_loss #/len(train_loader)
Example #12
0
    def error_traces(self, methods, n_samples=1000, n_eps=1, verbose=0.,
                     seed=1, criteria=["RMSBE"], error_every=1, episodic=False,
                     eval_on_traces=False, n_samples_eval=None):

        # Intialization
        self._init_methods(methods)
        err_f = [self._init_error_fun(criterion) for criterion in criteria]
        err_f_gen = [self._init_error_fun(
            criterion, general=True) for criterion in criteria]

        if episodic:
            n_e = n_eps
            k_e = 0
        else:
            n_e = int(np.ceil(float(n_samples * n_eps) / error_every))

        errors = np.ones((len(methods), len(criteria), n_e)) * np.inf
        for m in methods:
            m.reset_trace()

        # Generate trajectories
        with Timer("Generate Samples", active=(verbose > 1.)):
            s, a, r, s_n, restarts = self.mdp.samples_cached(n_iter=n_samples,
                                                             n_restarts=n_eps,
                                                             policy=self.behavior_policy,
                                                             seed=seed, verbose=verbose)
        with Timer("Generate Double Samples", active=(verbose > 1.)):
            a2, r2, s_n2 = self.mdp.samples_cached_transitions(
                policy=self.behavior_policy,
                states=s, seed=seed)
        if eval_on_traces:
            print "Evaluation of traces samples"
            self.set_mu_from_states(
                seed=self.mu_seed, s=s, n_samples_eval=n_samples_eval)

        if self.off_policy:
            with Timer("Generate off-policy weights", active=(verbose > 1.)):
                m_a_beh = policies.mean_action_trajectory(
                    self.behavior_policy, s)
                m_a_tar = policies.mean_action_trajectory(
                    self.target_policy, s)
                rhos = np.zeros_like(r)
                rhos2 = np.zeros_like(r2)
                self.rhos = rhos

        # Method learning
        with ProgressBar(enabled=(verbose > 2.)) as p:
            for i in xrange(n_samples * n_eps):
                p.update(i, n_samples * n_eps)
                f0 = self.phi(s[i])
                f1 = self.phi(s_n[i])
                f1t = self.phi(s_n2[i])
                if restarts[i]:
                    for k, m in enumerate(methods):
                        m.reset_trace()
                        if episodic:
                            cur_theta = m.theta
                            if not np.isfinite(np.sum(cur_theta)):
                                errors[k,:, k_e] = np.nan
                                continue
                            for i_e in range(len(criteria)):
                                if isinstance(m, td.LinearValueFunctionPredictor):
                                    errors[k, i_e, k_e] = err_f[i_e](cur_theta)
                                else:
                                    errors[k, i_e, k_e] = err_f_gen[i_e](m.V)

                    if episodic:
                        k_e += 1
                        if k_e >= n_e:
                            break

                for k, m in enumerate(methods):
                    if self.off_policy:
                        rhos[i] = self.target_policy.p(s[i], a[i], mean=m_a_tar[i]) / self.behavior_policy.p(s[i], a[i], mean=m_a_beh[i])
                        rhos2[i] = self.target_policy.p(s[i], a2[i], mean=m_a_tar[i]) / self.behavior_policy.p(s[i], a2[i], mean=m_a_beh[i])
                        m.update_V(s[i], s_n[i], r[i],
                                   rho=rhos[i], rhot=rhos2[i],
                                   f0=f0, f1=f1, f1t=f1t, s1t=s_n[i], rt=r2[i])
                    else:
                        m.update_V(s[i], s_n[i], r[i],
                                   f0=f0, f1=f1, s1t=s_n2[i], f1t=f1t, rt=r2[i])
                    if i % error_every == 0 and not episodic:
                        cur_theta = m.theta
                        if not np.isfinite(np.sum(cur_theta)):
                            errors[k,:, int(i / error_every)] = np.nan
                            continue
                        for i_e in range(len(criteria)):
                            if isinstance(m, td.LinearValueFunctionPredictor):
                                errors[k, i_e, int(
                                    i / error_every)] = err_f[i_e](cur_theta)
                            else:
                                errors[k, i_e, int(
                                    i / error_every)] = err_f_gen[i_e](m.V)

        return errors
Example #13
0
    def error_traces_cpu_time(self, method, max_t=600, max_passes=None, min_diff=0.1, n_samples=1000, n_eps=1, verbose=0.,
                     seed=1, criteria=["RMSBE"], error_every=1,
                     eval_on_traces=False, n_samples_eval=None, eval_once=False):

        # Intialization
        self._init_methods([method])
        err_f = [self._init_error_fun(criterion) for criterion in criteria]
        err_f_gen = [self._init_error_fun(
            criterion, general=True) for criterion in criteria]

        times = []
        errors = []
        processed = []

        method.reset_trace()
        if hasattr(method, "lam") and method.lam > 0.:
            print "WARNING: reuse of samples only works without e-traces"

        # Generate trajectories
        with Timer("Generate Samples", active=(verbose > 1.)):
            s, a, r, s_n, restarts = self.mdp.samples_cached(n_iter=n_samples,
                                                             n_restarts=n_eps,
                                                             policy=self.behavior_policy,
                                                             seed=seed, verbose=verbose)
        with Timer("Generate Double Samples", active=(verbose > 1.)):
            a2, r2, s_n2 = self.mdp.samples_cached_transitions(
                policy=self.behavior_policy,
                states=s, seed=seed)
        if eval_on_traces:
            print "Evaluation of traces samples"
            self.set_mu_from_states(
                seed=self.mu_seed, s=s, n_samples_eval=n_samples_eval)

        if self.off_policy:
            with Timer("Generate off-policy weights", active=(verbose > 1.)):
                m_a_beh = policies.mean_action_trajectory(
                    self.behavior_policy, s)
                m_a_tar = policies.mean_action_trajectory(
                    self.target_policy, s)
                rhos = np.zeros_like(r)
                rhos2 = np.zeros_like(r2)
                self.rhos = rhos

        # Method learning
        i = 0
        last_t = 0.
        passes = 0
        u = 0
        with ProgressBar(enabled=(verbose > 2.)) as p:
            while method.time < max_t:


                f0 = self.phi(s[i])
                f1 = self.phi(s_n[i])
                f1t = self.phi(s_n2[i])
                #assert not np.any(np.isnan(f0))
                #assert not np.any(np.isnan(f1))
                #assert not np.any(np.isnan(f1t))
                if restarts[i]:
                    method.reset_trace()
                if self.off_policy:
                    rhos[i] = self.target_policy.p(s[i], a[i], mean=m_a_tar[i]) / self.behavior_policy.p(s[i], a[i], mean=m_a_beh[i])
                    rhos2[i] = self.target_policy.p(s[i], a2[i], mean=m_a_tar[i]) / self.behavior_policy.p(s[i], a2[i], mean=m_a_beh[i])
                    method.update_V(s[i], s_n[i], r[i],
                                rho=rhos[i], rhot=rhos2[i],
                                f0=f0, f1=f1, f1t=f1t, s1t=s_n[i], rt=r2[i])
                else:
                    method.update_V(s[i], s_n[i], r[i],
                                f0=f0, f1=f1, s1t=s_n2[i], f1t=f1t, rt=r2[i])
                u+=1
                assert(method.time > last_t)
                if method.time - last_t > min_diff:
                    p.update(method.time, max_t)
                    last_t = method.time
                    if not eval_once:
                        cur_theta = method.theta
                        e = np.empty(len(criteria))
                        for i_e in range(len(criteria)):
                            e[i_e] = err_f[i_e](cur_theta)
                        errors.append(e)
                        processed.append(u)
                        times.append(method.time)
                i += 1
                if i >= n_samples * n_eps:
                    passes += 1
                    if max_passes is not None and passes >= max_passes:
                        break
                i = i % (n_samples * n_eps)
        if eval_once:
            cur_theta = method.theta
            e = np.empty(len(criteria))
            for i_e in range(len(criteria)):
                e[i_e] = err_f[i_e](cur_theta)
            return e, method.time

        return errors, processed, times
Example #14
0
 def __init__(self):
     self._encoder = EZNCoder()
     self._encoder.include_observer(self)
     self._p_bar = ProgressBar('green', width=70, block='▣', empty='□')
     print 'EZNCoder  v0.2'
     self._encoder.avixvid()
Example #15
0
    def _eval(self, valid_loader):                
        self.encoder.eval()
        self.decoder.eval()
        self.attention.eval()            
         
        pbar = ProgressBar()
        pbar.set(total_steps=len(valid_loader)) 
       
        counter = 0 
        total_loss = 0.
        with torch.no_grad():
            for counter, (x, y) in enumerate(valid_loader):                
                #if counter > 5:
                #    break
                pbar.update(progress=counter, text="Epoch {:d}, progress {}/{}, eval average loss \033[93m{:.6f}\033[0m ... ".format(self.epoch, counter, len(valid_loader), total_loss/(counter+1)))   
                
                batch_size = x.size(0)
                max_seq_len_x = x.size(1)
                max_seq_len_y = y.size(1)

                loss = 0
                                
                if(self.train_on_gpu):
                    x, y = x.cuda(), y.cuda()
                
                encoder_hidden = self.encoder.init_hidden(batch_size)
                decoder_hidden = self.decoder.init_hidden(batch_size)
       
                encoder_output, encoder_hidden = self.encoder(x, encoder_hidden) 
                word_softmax_projection = torch.zeros(batch_size, 5, dtype = torch.float, device=self.device)
                word_softmax_projection[:,2] = 1. # beginning of sentence value is 2, set it  #XXX
                
                decoder_output = decoder_hidden[0].view(self.decoder_n_layers, 1, batch_size, self.decoder_hidden_dim) #torch.Size([2, 1, 64, 512])
                decoder_output = decoder_output[-1].permute(1,0,2) 
                                
                loss = 0             
                print_example = True
                example_array = []
                
                for i in range(max_seq_len_y): 
                    #print("\t Decoder step {}/{}".format(i, max_seq_len_y))                        
                    _, decoder_input = word_softmax_projection.max(1) # no need for values, just indexes 
                    decoder_input = decoder_input.unsqueeze(1)                                           
                    context = self.attention(encoder_output, decoder_output)
                    
                    decoder_output, decoder_hidden, word_softmax_projection = self.decoder.forward_step(decoder_input, decoder_hidden, context)                    
                    word_softmax_projection = word_softmax_projection.squeeze(1) # eliminate dim 1
                    if print_example:                        
                        _, mi = word_softmax_projection[0].max(0)
                        example_array.append(mi.item())
                        
                    target_y = y[:,i] # select from y the ith column and shape as an array                    
                    loss += self.criterion(word_softmax_projection, target_y) 
                
                total_loss += loss.data.item() / batch_size    
                
                #print("\t\t\t Eval Loss: {}".format(loss.data.item()))
                if print_example:
                    print_example = False 
                    print()    
                    print("\n\n----- X:")
                    print(" ".join([self.src_i2w[str(wi.data.item())] for wi in x[0]]))                                            
                    print("----- Y:")
                    print(" ".join([self.tgt_i2w[str(wi.data.item())] for wi in y[0]]))                    
                    print("----- OUR PREDICTION:")
                    print(" ".join([self.tgt_i2w[str(wi)] for wi in example_array]))
                    print()
                    print(" ".join([str(wi.data.item()) for wi in y[0]]))
                    print(" ".join([str(wi) for wi in example_array]))
                    print()
            
        self.log.var("Loss|Train loss|Validation loss", self.epoch, total_loss, y_index=1)
        self.log.draw()        
        
        pbar.update(text="Epoch {:d}, eval done, average loss \033[93m{:.6f}\033[0m".format(self.epoch, total_loss/len(valid_loader))) 
    
        return total_loss/len(valid_loader)
Example #16
0
    def _train_epoch(self, train_loader):                       
        self.epoch += 1
        self.encoder.train()
        self.decoder.train()
        self.attention.train()        
        
        total_loss = 0.
        pbar = ProgressBar()
        pbar.set(total_steps=len(train_loader)) 
        
        for counter, (x, y) in enumerate(train_loader):
            batch_size = x.size(0)
            max_seq_len_x = x.size(1) # x este 64 x 399 (variabil)
            max_seq_len_y = y.size(1) # y este 64 x variabil

            
            pbar.update(progress=counter, text="Epoch {:d}, progress {}/{}, train average loss \033[93m{:.6f}\033[0m (mx/my = {}/{}) ... ".format(self.epoch, counter, len(train_loader), total_loss/(counter+1), max_seq_len_x, max_seq_len_y))                         
                        
            #if counter > 1:               
            #    break                
            if counter % 1000 == 0 and counter > 0:
                self.save_checkpoint("last")
            
            
            loss = 0            
            # print(x.size()) # x is a 64 * 399 tensor (batch*max_seq_len_x)               

            if(self.train_on_gpu):
                x, y = x.cuda(), y.cuda()
            
            encoder_hidden = self.encoder.init_hidden(batch_size)
            decoder_hidden = self.decoder.init_hidden(batch_size)
            #print(decoder_hidden[0].size())
            
            # zero grads in optimizer
            self.optimizer.zero_grad()                
            
            # encoder
            # x is batch_size x max_seq_len_x            
            encoder_output, encoder_hidden = self.encoder(x, encoder_hidden)             
            # encoder_output is batch_size x max_seq_len_x x encoder_hidden
            #print(encoder_output.size())
            
            # create first decoder output for initial attention call, extract from decoder_hidden
            decoder_output = decoder_hidden[0].view(self.decoder_n_layers, 1, batch_size, self.decoder_hidden_dim) #torch.Size([2, 1, 64, 512])
            # it should look like batch_size x 1 x decoder_hidden_size, so tranform it
            decoder_output = decoder_output[-1].permute(1,0,2) 
            #print(decoder_output.size())
                
            loss = 0                 
            for i in range(max_seq_len_y): # why decoder_hidden is initialized in epoch and not in batch??
                #print("\t Decoder step {}/{}".format(i, max_seq_len_y))    
                
                # teacher forcing (or it is first word which always is start-of-sentence)
                if random.random()<=self.teacher_forcing_ratio or i==0:
                    decoder_input = torch.zeros(batch_size, 1, dtype = torch.long, device=self.device) # 1 in middle is because lstm expects (batch, seq_len, input_size): 
                    for j in range(batch_size):
                        decoder_input[j]=y[j][i]                
                    #print(decoder_input.size()) # batch_size x 1                            
                else: # feed own previous prediction extracted from word_softmax_projection
                    _, decoder_input = word_softmax_projection.max(1) # no need for values, just indexes 
                    decoder_input = decoder_input.unsqueeze(1) # from batch_size to batch_size x 1                    
                    #print(decoder_input.size()) # batch_size x 1                            

                # remove me, for printing attention
                if counter == 1:
                    self.attention.should_print = False#True
                    #print("\t Decoder step {}/{}".format(i, max_seq_len_y))    
                else:
                    self.attention.should_print = False
                    self.attention.att_mat = []
                context = self.attention(encoder_output, decoder_output)
                
                # context is batch_size * encoder_hidden_dim            
                decoder_output, decoder_hidden, word_softmax_projection = self.decoder.forward_step(decoder_input, decoder_hidden, context)
                # first, reduce word_softmax_projection which is torch.Size([64, 1, 50004]) to 64 * 50004
                word_softmax_projection = word_softmax_projection.squeeze(1) # eliminate dim 1
                
                # now, select target y
                # y looks like batch_size * max_seq_len_y : tensor([[    2, 10890, 48108,  ...,     0,     0,     0], ... ... ..
                target_y = y[:,i] # select from y the ith column and shape as an array 
                # target_y now looks like [ 10, 2323, 5739, 24, 9785 ... ] of size 64 (batch_size)
                #print(word_softmax_projection.size())
                #print(target_y.size())
                loss += self.criterion(word_softmax_projection, target_y) # ignore index not set as we want 0 to count to error too
            
            # remove me, attention printing
            """if counter == 1:
                fig = plt.figure(figsize=(12, 10))
                sns.heatmap(self.attention.att_mat,cmap="gist_heat")                
                plt.tight_layout()            
                fig.savefig('img/__'+str(self.epoch)+'.png')
                plt.clf()
            """    
            total_loss += loss.data.item()/batch_size
            loss.backward() # calculate the loss and perform backprop
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(self.encoder.parameters(), self.gradient_clip)
            nn.utils.clip_grad_norm_(self.decoder.parameters(), self.gradient_clip)
            nn.utils.clip_grad_norm_(self.attention.parameters(), self.gradient_clip)
            self.optimizer.step()
            # end batch
            
        # end current epoch
        pbar.update(text="Epoch {:d}, train done, average loss \033[93m{:.6f}\033[0m".format(self.epoch, total_loss)) 
        self.log.var("Loss|Train loss|Validation loss", self.epoch, total_loss, y_index=0)
        self.log.draw()
        
        return total_loss