Ejemplo n.º 1
0
def run_epoch(session, m, data, eval_op, verbose=False):
  """Runs the model on the given data."""
  if len(data) <= 1:
    return np.inf

  epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps
  start_time = time.time()
  costs = 0.0
  iters = 0
  state = m.initial_state.eval()
  for step, (x, y) in enumerate(reader.data_iterator(data, m.batch_size,
                                                    m.num_steps)):
    cost, state, _ = session.run([m.cost, m.final_state, eval_op],
                                 {m.input_data: x,
                                  m.targets: y,
                                  m.initial_state: state})
    costs += cost
    iters += m.num_steps

    if verbose and step % (epoch_size // 10) == 10:
      print("%.3f perplexity: %.3f speed: %.0f wps" %
            (step * 1.0 / epoch_size, np.exp(costs / iters),
             iters * m.batch_size / (time.time() - start_time)))

  return np.exp(costs / iters)
Ejemplo n.º 2
0
def run_epoch(session, m, data, eval_op, verbose=False):
    """Runs the model on the given data."""
    posts, labels = data
    n_docs, n_words = posts.shape
    epoch_size = n_docs // m.batch_size
    costs = 0.0
    accs = 0.0
    iters = 0
    state = m.initial_state.eval()

    for step, (texts, label) in enumerate(reader.data_iterator(data, m.batch_size,
                                                      m.num_steps)):
        cost, state, acc, summary, _ = session.run([m.cost, m.final_state, m.accuracy, m.summary, eval_op],
                                                                 {m.input_data: texts,
                                                                     m.targets: label,
                                                                     m.initial_state: state})
        costs += cost
        accs += acc
        iters += 1

        if verbose and step % (epoch_size // 10) == 0:
            print("%.3f xentropy: %.3f " %
                  (step * 1.0 / epoch_size, costs / iters))

    return costs / iters, accs / iters, summary
Ejemplo n.º 3
0
def eval(data, Is_test=False, use_bn_trainstat=False):
    model.eval()
    if use_bn_trainstat:
        model.apply(set_bn_train)
    tperp = 0
    tbpc = 0
    count = 0
    start_time = time.time()
    hidden = Variable(torch.zeros(args.num_layers, batch_size,
                                  args.hidden_size).cuda(),
                      requires_grad=False)
    for batchi, (x, y) in enumerate(
            data_iterator(data, args.batch_size, args.seq_len)):
        inputs = x
        targets = y
        targets = targets.transpose(1, 0)
        targets = targets.reshape((-1))
        inputs = inputs.transpose(1, 0)
        inputs = Variable(torch.from_numpy(np.int64(inputs)).cuda(),
                          requires_grad=False)
        targets = Variable(torch.from_numpy(np.int64(targets)).cuda(),
                           requires_grad=False)

        output, hidden = model(inputs, hidden)
        hidden = hidden.detach()
        output = output.detach(
        )  # to reduce the memory in case two graphs are generated due to the scoping rule of python
        loss = criterion(output, targets)
        perp = torch.exp(loss)
        bpc = (loss / np.log(2.0))

        tperp = tperp + perp.data.cpu().numpy()  #accuracy
        tbpc = tbpc + bpc.data.cpu().numpy()
        count += 1
    elapsed = time.time() - start_time
    if Is_test:
        print("test perp and bpc: ", tperp / (count + 0.0),
              tbpc / (count + 0.0))
    else:
        print("eval perp and bpc: ", tperp / (count + 0.0),
              tbpc / (count + 0.0))
    #print ('eval time per batch: ', elapsed/(count+0.0))
    return tperp / (count + 0.0)
Ejemplo n.º 4
0
def train():
    model.train()
    tperp = 0
    tbpc = 0
    count = 0
    start_time = time.time()
    hidden = Variable(torch.zeros(args.num_layers, batch_size,
                                  args.hidden_size).cuda(),
                      requires_grad=False)
    dropindex = np.random.randint(seq_len * 5)
    for batchi, (x, y) in enumerate(
            data_iterator(train_data[dropindex:], batch_size, seq_len)):
        inputs = x
        targets = y
        targets = targets.transpose(1, 0)
        targets = targets.reshape((-1))
        inputs = inputs.transpose(1, 0)
        inputs = Variable(torch.from_numpy(np.int64(inputs)).cuda(),
                          requires_grad=False)
        targets = Variable(torch.from_numpy(np.int64(targets)).cuda(),
                           requires_grad=False)

        model.zero_grad()
        output, hidden = model(inputs, hidden)
        hidden = hidden.detach()
        loss = criterion(output, targets)
        perp = torch.exp(loss)
        bpc = (loss / np.log(2.0))

        loss.backward()
        clip_gradient(model, gradientclip_value)
        optimizer.step()
        clip_weight(model, U_bound)

        tperp = tperp + perp.data.cpu().numpy()  #accuracy
        tbpc = tbpc + bpc.data.cpu().numpy()
        count += 1
    elapsed = time.time() - start_time
    print("train perp and bpc: ", tperp / (count + 0.0), tbpc / (count + 0.0))
learning_rate = np.float32(lr)

t_prep = 0
t_bpc = 0
count = 0
lastbpc = 100
patience = 0
patienceThre = 5

for epoci in range(1, 10000):
    hid_init = np.zeros((batch_size, sum_units), dtype='float32')
    dropindex = 0
    if args.data_aug:
        dropindex = np.random.randint(seq_len * 5)
    for batchi, (x, y) in enumerate(
            data_iterator(train_data[dropindex:], batch_size, seq_len)):
        if rnnmodel == indrnn_onlyrecurrent:
            for para in params:
                if para.name == 'hidden_to_hidden.W':
                    para.set_value(
                        np.clip(para.get_value(), -1 * U_bound, U_bound))
        perp, bpc, hid_init = train_fn(x, y, hid_init, learning_rate)

        if np.isnan(perp):
            print('NaN detected in cost')
            assert (2 == 3)
        if np.isinf(perp):
            print('INF detected in cost')
            assert (2 == 3)
        t_prep += perp
        t_bpc += bpc
Ejemplo n.º 6
0
  session.run(tf.initialize_all_variables())
  saver = tf.train.Saver()
  # reload_file = './checkpoints/SimpleMatchLSTM_50.ckpt'
  # if reload_file is not None:
  #   saver.restore(session, reload_file)
  batch_size = 256

  for epoch in range(1, 81):
    print "epoch ", epoch
    _train_loss = 0.0
    _train_hit = 0
    count = 0.0
    for start in range(0, len(reader.train_set) - batch_size, batch_size):
      end = min(len(reader.train_set), start + batch_size)
      p, plen, pmask, h, hlen, hmask, y = reader.data_iterator(reader.train_set, start, end)
      _loss, _hit,  _ = session.run([model.loss, model.hit, model.train_op], feed_dict={
              model.premise: p,
              model.premise_length: plen,
              model.premise_mask: pmask,
              model.hypothesis: h,
              model.hypothesis_length: hlen,
              model.hypothesis_mask: hmask,
              model.target: y
          })
      _train_loss += _loss * (end - start)
      _train_hit += _hit
    print "train_loss: ", _train_loss / len(reader.train_set)
    print "train_hit  ", _train_hit , " over ", len(reader.train_set)
    print "train_hit rate: ", float(_train_hit) / len(reader.train_set)
    print ""

learning_rate=np.float32(lr)

t_prep=0
t_bpc=0
count=0
lastbpc=100
patience=0
patienceThre=2

for epoci in range(1,10000):  
  hid_init=np.zeros((batch_size, sum_units), dtype='float32')
  if args.data_aug:
    dropindex=np.random.randint(seq_len*5)  
  for batchi, (x, y) in enumerate(data_iterator(train_data[dropindex:], batch_size, seq_len)):
    if rnnmodel==indrnn_onlyrecurrent:
      for para in params:
        if para.name=='hidden_to_hidden.W':
          para.set_value(np.clip(para.get_value(),-1*U_bound,U_bound)) 
    if args.use_drophiddeninput and np.random.randint(2)==1:
      temp=np.float32(np.random.randint(2,size=(sum_units,)))
      temp=temp[np.newaxis,:]
      hid_init=hid_init*temp
    perp, bpc, hid_init=train_fn(x, y,hid_init,learning_rate)

    if np.isnan(perp):
      print ('NaN detected in cost')
      assert(2==3)
    if np.isinf(perp):
      print ('INF detected in cost')