Beispiel #1
0
          #dep_loss_masked = torch.where(deps>0, dep_loss, zero)  # This zeros out all positions where deps == 0
          #dep_loss_tot = dep_loss_masked.sum() / batch_size
          dep_loss_tot = dep_loss.masked_fill_( deps==0, 0. ).sum()
          
          factor_hints="Factor hints (class_loss=%8.4f, deps_loss=%10.4f, fac=%.8f)" % ( 
                    class_loss_tot.item()/batch_size*100., 
                    dep_loss_tot.item()/batch_size*100., 
                    class_loss_tot.item()/dep_loss_tot.item(), )
                    
          #factor hints :  (231.14927673339844, 225.23297119140625, 1.0262674932124587)

          batch_loss = class_loss_tot + args.dep_fac * dep_loss_tot
          
          batch_loss.backward()
          
          model_opt.step()
          
          loss_this = batch_loss.item()
          loss_recent_tot += loss_this
          
          if idx % 10 == 0:
            print('%.1f%% of epoch %d' % (idx / float(len(train_loader)) * 100, epoch,), end='\r')  # Python 3 FTW!

          if idx % 100 == 0:
            print(epoch, idx, factor_hints)

          sentences_since_last_check = (idx-idx_loss_check)*batch_size
          #if sentences_since_last_check > 50000:  # Potentially save every  50000 sentences  (~30mins on TitanX)
          if sentences_since_last_check > 200000:  # Potentially save every 200000 sentences  (~2hrs on TitanX)
            loss_recent = loss_recent_tot / float(sentences_since_last_check)   # loss per sentence
          
Beispiel #2
0
        batch = [x_ids, x_perm_ids, s_ids, s_perm_ids, y_a, y_b]
        if args.feature_mix:
            ce_loss_x, ce_loss_s, scl_loss = model.forward_feature_mix(batch)
        else:
            ce_loss_x, ce_loss_s, scl_loss = model(batch)
        ce_loss = (ce_loss_x + ce_loss_s) / 2
        if not args.with_summary:
            ce_loss = ce_loss_x
        loss = args.lambd * ce_loss + (1 - args.lambd) * scl_loss

        # print(ce_loss_x, ce_loss_s, scl_loss)
        loss.backward()

        count += 1
        if (count % args.num_accum == 0):
            optimizer.step()
            recoder.log_train(ce_loss_x, ce_loss_s, scl_loss, loss)
            step += 1
            optimizer.zero_grad()

            if (step >= args.steps):
                break

            if (step % args.log_step == 0):
                begin_eval = True

            if (step % 10 == 0):
                bar.update(10)

        # step += 1