def run_epoch(self, session, data, train_op=None, verbose=10):
   config = self.config
   dp = config.dropout
   if not train_op:
     train_op = tf.no_op()
     dp = 1
   total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
   total_loss = []
   state = self.initial_state.eval()
   for step, (x, y) in enumerate(
     ptb_iterator(data, config.batch_size, config.num_steps)):
     # We need to pass in the initial state and retrieve the final state to give
     # the RNN proper history
     feed = {self.input_placeholder: x,
             self.labels_placeholder: y,
             self.initial_state: state,
             self.dropout_placeholder: dp}
     loss, state, _ = session.run(
         [self.calculate_loss, self.final_state, train_op], feed_dict=feed)
     total_loss.append(loss)
     if verbose and step % verbose == 0:
         sys.stdout.write('\r{} / {} : pp = {}'.format(
             step, total_steps, np.exp(np.mean(total_loss))))
         sys.stdout.flush()
   if verbose:
     sys.stdout.write('\r')
   return np.exp(np.mean(total_loss))
def run_epoch(session, data, train_op=None, verbose=10):
    dp = dropout
    if not train_op:
        train_op = tf.no_op()
        dp = 1
    total_steps = sum(1 for x in ptb_iterator(data, batch_size, num_steps))
    total_loss = []
    state = initial_state[0].eval(), initial_state[1].eval()
    for step, (x, y) in enumerate(ptb_iterator(data, batch_size, num_steps)):
        feed = {
            input_placeholder: x,
            labels_placeholder: y,
            initial_state: state,
            dropout_placeholder: dp
        }
        loss, state, _ = session.run([loss_op, final_state, train_op],
                                     feed_dict=feed)
        total_loss.append(loss)
        if verbose and step % verbose == 0:
            sys.stdout.write('\r{} / {} : pp = {}'.format(
                step, total_steps, np.exp(np.mean(total_loss))))
            sys.stdout.flush()
    if verbose:
        sys.stdout.write('\r')
    return np.exp(np.mean(total_loss))
 def run_epoch(self, session, data, train_op=None, verbose=10):
   config = self.config
   dp = config.dropout
   if not train_op:
     train_op = tf.no_op()
     dp = 1.0
   total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
   total_loss = []
   state = self.initial_state.eval()
   for step, (x, y) in enumerate(
     ptb_iterator(data, config.batch_size, config.num_steps)):
     # We need to pass in the initial state and retrieve the final state to give
     # the RNN proper history
     feed = {self.input_placeholder: x,
             self.labels_placeholder: y,
             self.initial_state: state,
             self.dropout_placeholder: dp}
     loss, state, _ = session.run(
         [self.calculate_loss, self.final_state, train_op], feed_dict=feed)
     total_loss.append(loss)
     if verbose and step % verbose == 0:
         sys.stdout.write('\r{} / {} : pp = {}'.format(
             step, total_steps, np.exp(np.mean(total_loss))))
         sys.stdout.flush()
   if verbose:
     sys.stdout.write('\r')
   return np.exp(np.mean(total_loss))
Beispiel #4
0
def compute_loss(model, model_info, device, data, loss_fn):

    model.eval()
    all_losses = np.empty((0, 35))

    # LOOP THROUGH MINIBATCHES
    for step, (x, y) in tqdm.tqdm(enumerate(ptb_iterator(data, model.batch_size, model.seq_len)),
                                  total=(len(data)//model.batch_size - 1)//model.seq_len):
        if model_info.model == 'TRANSFORMER':
            batch = Batch(torch.from_numpy(x).long().to(device))
            model.zero_grad()
            outputs = model.forward(batch.data, batch.mask).transpose(1,0)
        else:
            inputs = torch.from_numpy(x.astype(np.int64)).transpose(0, 1).contiguous().to(device)#.cuda()
            model.zero_grad()
            hidden = model.init_hidden().to(device)
            outputs, hidden = model(inputs, hidden)

        # Target
        targets = torch.from_numpy(y.astype(np.int64)).transpose(0, 1).contiguous().to(device)

        # Loss computation
        outputs = outputs.contiguous()
        losses_in_batch = []
        for output_t, target_t in zip(outputs, targets):
            losses_in_batch.append(loss_fn(output_t, target_t).data.item())
        all_losses = np.vstack((all_losses, losses_in_batch))
    # Return
    return np.mean(all_losses, axis=0)
Beispiel #5
0
    def run_epoch(self, session, data, train_op=None, verbose=10, writer=None):

        config = self.config
        dp = config.dropout
        is_training = 1
        if not train_op:
            train_op = tf.no_op()
            dp = 1
            is_training = 0
        total_steps = sum(
            1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
        total_loss = []
        state = self.initial_state.eval()
        merged_summary = tf.summary.merge_all()
        # merged_summary = tf.summary.merge([self.loss_summary])
        for step, (x, y) in enumerate(
                ptb_iterator(data, config.batch_size, config.num_steps)):
            if is_training == 1:
                self.total_train_step += 1
            # We need to pass in the initial state and retrieve the final state to give
            # the RNN proper history
            feed = {
                self.input_placeholder: x,
                self.labels_placeholder: y,
                self.initial_state: state,
                self.dropout_placeholder: dp
            }
            if step % 5 == 0:
                loss, state, _, summary_str = session.run([
                    self.calculate_loss, self.final_state, train_op,
                    merged_summary
                ],
                                                          feed_dict=feed)
                writer.add_summary(summary_str, self.total_train_step)
            else:
                loss, state, _ = session.run(
                    [self.calculate_loss, self.final_state, train_op],
                    feed_dict=feed)

            total_loss.append(loss)
            if verbose and step % verbose == 0:
                sys.stdout.write('\r{} / {} : pp = {}'.format(
                    step, total_steps, np.exp(np.mean(total_loss))))
                sys.stdout.flush()
        if verbose:
            sys.stdout.write('\r')
        return np.exp(np.mean(total_loss))
Beispiel #6
0
 def run_epoch(self, session, data, train_op=None, verbose=10, epoch=0):
   config = self.config
   dp = config.dropout
   if not train_op:
     train_op = tf.no_op()
     dp = 1
   total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
   total_loss = []
   state = self.initial_state.eval()
   for step, (x, y) in enumerate(
     ptb_iterator(data, config.batch_size, config.num_steps)):
     # We need to pass in the initial state and retrieve the final state to give
     # the RNN proper history
     feed = {self.input_placeholder: x,
             self.labels_placeholder: y,
             self.initial_state: state,
             self.dropout_placeholder: dp}
Beispiel #7
0
def run_epoch(our_model,
              config,
              model_optimizer,
              criterion,
              data,
              mode='train',
              verbose=10):
    """
  Run for one epoch. Operations are determined by the mode. 

  """
    if mode == 'train':
        our_model.zero_grad()
    else:
        our_model.eval()
    ### take the init_hidden as the state input for the 1st step.
    state = our_model.init_hidden()
    total_steps = sum(
        1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
    total_loss = []
    for step, (x, y) in enumerate(
            ptb_iterator(data, config.batch_size, config.num_steps)):
        x = torch.from_numpy(x).type(torch.LongTensor)
        y = torch.from_numpy(y).type(torch.LongTensor)
        ## if you are using cpu, do not attach x,y to cuda.
        x = x.cuda()
        y = y.cuda()
        outputs, state = our_model(x, state)
        loss = compute_loss(outputs, y, criterion)
        if mode == 'train':
            loss.backward()
            model_optimizer.step()
            state = state.detach()
        ## if you are using cpu, you do not need to change loss.data back to cpu.
        total_loss.append(loss.data.cpu().numpy())
        if verbose and step % verbose == 0:
            sys.stdout.write('\r{} / {} : pp = {}'.format(
                step, total_steps, np.exp(np.mean(total_loss))))
            sys.stdout.flush()
    if verbose:
        sys.stdout.write('\r')
    return np.exp(np.mean(total_loss))
Beispiel #8
0
def generate_text(session, model, config, starting_text='<eos>',
                  stop_length=5, stop_tokens=None, temp=0.2):
  """Generate text from the model.

  Hint: Create a feed-dictionary and use sess.run() to execute the model. Note
        that you will need to use model.initial_state as a key to feed_dict
  Hint: Fetch model.final_state and model.predictions[-1]. (You set
        model.final_state in add_model() and model.predictions is set in
        __init__)
  Hint: Store the outputs of running the model in local variables state and
        y_pred (used in the pre-implemented parts of this function.)

  Args:
    session: tf.Session() object
    model: Object of type RNNLM_Model
    config: A Config() object
    starting_text: Initial text passed to model.
  Returns:
    output: List of word idxs
  """
  state = model.initial_state.eval()
  # Imagine tokens as a batch size of one, length of len(tokens[0])
  tokens = [model.vocab.encode(word) for word in starting_text.split()]
  for i in xrange(stop_length):
    ### YOUR CODE HERE
    for step, (x, y) in enumerate(
      ptb_iterator(tokens, config.batch_size, config.num_steps)):
        #train_pp = model.run_epoch(
        #      session, tokens,
        #      train_op=model.train_step, verbose=100)
        #import pdb; pdb.set_trace()

        train_op = tf.no_op()
        feed = {model.input_placeholder: x,
                model.labels_placeholder: y,
                model.initial_state: state,
                model.dropout_placeholder: 1}

        loss, state, summary = session.run(
              [model.calculate_loss, model.final_state, train_op], feed_dict=feed)


        y_pred = state
    ### END YOUR CODE
    try:
        next_word_idx = sample(y_pred[0], temperature=temp)
        #print(model.vocab.decode(next_word_idx)) # for checking
        tokens.append(next_word_idx)
    except ValueError: 
        print("Exception on sum(y_pred[0][:-1])>1 {}".format(sum(y_pred[0][:-1])))
    if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens:
      break
  output = [model.vocab.decode(word_idx) for word_idx in tokens]
  return output
def generate_text(session, model, config, starting_text='<eos>',
                  stop_length=100, stop_tokens=None, temp=1.0):
  """Generate text from the model.

  Hint: Create a feed-dictionary and use sess.run() to execute the model. Note
        that you will need to use model.initial_state as a key to feed_dict
  Hint: Fetch model.final_state and model.predictions[-1]. (You set
        model.final_state in add_model() and model.predictions is set in
        __init__)
  Hint: Store the outputs of running the model in local variables state and
        y_pred (used in the pre-implemented parts of this function.)

  Args:
    session: tf.Session() object
    model: Object of type RNNLM_Model
    config: A Config() object
    starting_text: Initial text passed to model.
  Returns:
    output: List of word idxs
  """
  state = model.initial_state.eval()
  # Imagine tokens as a batch size of one, length of len(tokens[0])
  tokens = [model.vocab.encode(word) for word in starting_text.split()]
  print ptb_iterator(tokens, config.batch_size, config.num_steps)
  for i in xrange(stop_length):
    ### YOUR CODE HERE
    for (x,y) in ptb_iterator(tokens, config.batch_size,config.num_steps): 
      feed = {model.initial_state: state, 
            model.input_placeholder: x,
            model.labels_placeholder:y, 
            model.dropout_placeholder:1}
      state, y_pred = session.run([model.final_state, model.predictions[-1]],feed_dict = feed)
    ### END YOUR CODE
    next_word_idx = sample(y_pred[0], temperature=temp)
    tokens.append(next_word_idx)
    if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens:
      break
  output = [model.vocab.decode(word_idx) for word_idx in tokens]
  return output
Beispiel #10
0
    def run_epoch(self, session, data, train_op=None, verbose=10):
        config = self.config
        dp = config.dropout
        # 表示不训练
        if not train_op:
            train_op = tf.no_op()
            dp = 1
        # 总的迭代步骤
        total_steps = sum(
            1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
        # 总的损失列表
        total_loss = []
        # 每次跑完所有数据后要初始化状态
        state = self.initial_state.eval()
        # 对每个批量数据
        for step, (x, y) in enumerate(
                ptb_iterator(data, config.batch_size, config.num_steps)):
            # 获取feed_dict
            feed = {
                self.input_placeholder: x,
                self.labels_placeholder: y,
                self.initial_state: state,
                self.dropout_placeholder: dp
            }
            # 得到损失和最后的状态
            loss, state, _ = session.run(
                [self.calculate_loss, self.final_state, train_op],
                feed_dict=feed)
            total_loss.append(loss)

            # 如果让过程显示可见
            if verbose and step % verbose == 0:
                sys.stdout.write('\r{} / {} : pp = {}'.format(
                    step, total_steps, np.exp(np.mean(total_loss))))
                sys.stdout.flush()
        if verbose:
            sys.stdout.write('\r')
        # 指数化损失,相当于困惑度,当然也可以不用指数化,只是为了容易看出变化
        return np.exp(np.mean(total_loss))
    def run_epoch(model, data):
        """
        One epoch of training/validation (depending on flag is_train).
        """
        model.eval()
        seq_losses = np.zeros(model.seq_len)
        # LOOP THROUGH MINIBATCHES
        for step, (x, y) in enumerate(
                utils.ptb_iterator(data, model.batch_size, model.seq_len)):
            if step % 10 == 0:
                print('step', step)
            step_seq_losses = []
            if args.model != 'TRANSFORMER':
                hidden = model.init_hidden()
                hidden = hidden.to(device)

            if args.model == 'TRANSFORMER':
                batch = utils.Batch(torch.from_numpy(x).long().to(device))
                model.zero_grad()
                outputs = model.forward(batch.data, batch.mask).transpose(1, 0)
                # print ("outputs.shape", outputs.shape)
            else:
                inputs = torch.from_numpy(x.astype(np.int64)).transpose(
                    0, 1).contiguous().to(device)  # .cuda()
                model.zero_grad()
                hidden = utils.repackage_hidden(hidden)
                if task == '5.2':
                    model.init_hidden_state_list()
                outputs, hidden = model(inputs, hidden)

            targets = torch.from_numpy(y.astype(np.int64)).transpose(
                0, 1).contiguous().to(device)  # .cuda()
            # LOSS COMPUTATION
            # This line currently averages across all the sequences in a mini-batch
            # and all time-steps of the sequences.
            # For problem 5.3, you will (instead) need to compute the average loss
            # at each time-step separately.
            if task == '5.1':
                with torch.no_grad():
                    for output, target in zip(outputs, targets):
                        l = loss_fn(output, target)
                        step_seq_losses.append(l.data.item())
                    seq_losses = np.sum(
                        [seq_losses, np.array(step_seq_losses)], axis=0)
            elif task == '5.2':
                loss = loss_fn(outputs[-1], targets[-1])
                layer_grads = []
                for unit in model.hidden_stack:
                    ret = torch.autograd.grad(loss,
                                              unit.hiddens,
                                              retain_graph=True)
                    # mean over examples in the minibatch 35x[20x1500] -> 35x[1500]
                    layer_grads.append([r.mean(dim=0) for r in ret])
                stacked = []
                # iterate over each time-step
                for i in range(len(layer_grads[0])):
                    stacked.append(
                        torch.stack([
                            layer_grads[j][i] for j in range(len(layer_grads))
                        ]))
                # 34 x 2 x 1500 -> 34 x 1
                ts_grads = [s.norm() for s in stacked]
                print('norms: ', ts_grads)
                ts_path = os.path.join(args['experiment_path'],
                                       'timestep_grads.npy')
                print('\nDONE\n\nSaving timestep_grads to ' + ts_path)
                np.save(ts_path, ts_grads)
                break

        if task == '5.1':
            seq_loss = seq_losses / (step + 1)
            log_str = '\nseq_losses (len={}, sum={}): {}'.format(
                len(seq_loss), sum(seq_loss), seq_loss)
            print(log_str)

            sl_path = os.path.join(args['experiment_path'], 'seq_loss.npy')
            print('\nDONE\n\nSaving seq_loss to ' + sl_path)
            np.save(sl_path, seq_loss)
Beispiel #12
0
from utils import ptb_iterator, sample

train_data = [i for i in range(1024)]
#num_steps is how many things in a sequence do we grab from data.
#If you want to grab 10 words then num_steps is 10
for batch in ptb_iterator(train_data, batch_size=2, num_steps=1):
    print("Batch")
    x, y = batch
    print(x, y)