def greedyUntied(st):
    A = st.Psi
    M, N = A.shape
    mul = np.matmul
    ls = st.loaded_state
    We = ls['We']
    S = ls['S']
    theta = ls['theta']
    L = 5000

    while len(S) < st.T:
        Tm1 = len(S)
        T = Tm1 + 1

        x, y = bg_gen(A, L)
        B = mul(We, y)
        xhat = lista_run(y, We, S, theta, Tm1)

        S_ = np.identity(N) - mul(We, A)
        theta_ = theta[Tm1 - 1]

        stepsize = 1.0
        nmsePrev = 999
        for steps in range(10):
            rhat_ = B + mul(S_, xhat)
            xhat_ = ut.eta(rhat_, theta_)
            nmse = 20 * math.log10(
                la.norm(x - xhat_, 'fro') / la.norm(x, 'fro'))
            print ' %d nmse=%.4fdB ' % (steps, nmse)
            if nmse > nmsePrev:
                stepsize = stepsize * .5
            else:
                stepsize = stepsize * 1.1
            nmsePrev = nmse
            S_ = S_ - stepsize * mul(
                ((xhat_ - x) * abs(np.sign(xhat_))), rhat_.T) / L
            theta_ = theta_ - stepsize * np.mean(
                (xhat_ - x) * (-np.sign(xhat_)))
        xhat = ut.eta(B + mul(S_, xhat), theta_)
        S = np.concatenate((S, np.reshape(S_, (1, N, N))))
        theta = np.concatenate((theta, np.reshape(theta_, (1, ))))
        print 't=%d nmse=%.2fdB ' % (
            Tm1, 20 * math.log10(la.norm(xhat - x) / la.norm(x)))
    x, y = bg_gen(A, L)
    xhat = lista_run(y, We, S, theta, st.T)
    print 'fresh nmse=%.2fdB' % (20 *
                                 math.log10(la.norm(xhat - x) / la.norm(x)))

    return S, theta
Example #2
0
    def printTracks(self,
                    eventVars=None,
                    params=None,
                    coords=None,
                    nMax=None,
                    tracks=None,
                    color=r.kBlack):
        self.prepareText(params, coords)

        self.printText(tracks)
        headers = "  name  pdgId   pT  eta  phi"
        self.printText(headers)
        self.printText("-" * len(headers))

        nTracks = utils.size(eventVars, tracks)
        for iTrack in range(nTracks):
            if nMax <= iTrack:
                self.printText("[%d more not listed]" % (nTracks - nMax))
                break

            track = eventVars[tracks][iTrack]
            name = pdgLookup.pdgid_to_name(
                track.PID) if pdgLookupExists else ""
            self.printText("%6s %6d%5.0f %s %4.1f" % (
                name[-6:],
                track.PID,
                track.PT,
                utils.eta(track),
                track.Phi,
            ),
                           color=color)
        return
Example #3
0
    def printTracks(self, eventVars=None, params=None, coords=None,
                    nMax=None, tracks=None, color=r.kBlack):
        self.prepareText(params, coords)

        self.printText(tracks)
        headers = "  name  pdgId   pT  eta  phi"
        self.printText(headers)
        self.printText("-" * len(headers))

        nTracks = utils.size(eventVars, tracks)
        for iTrack in range(nTracks):
            if nMax <= iTrack:
                self.printText("[%d more not listed]" % (nTracks - nMax))
                break

            track = eventVars[tracks][iTrack]
            name = pdgLookup.pdgid_to_name(track.PID) if pdgLookupExists else ""
            self.printText("%6s %6d%5.0f %s %4.1f" % (name[-6:],
                                                      track.PID,
                                                      track.PT,
                                                      utils.eta(track),
                                                      track.Phi,
                                                      ),
                           color=color)
        return
Example #4
0
    def printGenParticles(self, eventVars=None, params=None, coords=None,
                          nMax=None, particles=None, color=r.kBlack):
        self.prepareText(params, coords)
        
        self.printText(particles)
        headers = "  name  pdgId   pT  eta  phi st PU"
        self.printText(headers)
        self.printText("-" * len(headers))

        nParticles = utils.size(eventVars, particles)
        for iParticle in range(nParticles):
            if nMax <= iParticle:
                self.printText("[%d more not listed]" % (nParticles - nMax))
                break
            particle = eventVars[particles].At(iParticle)
            name = pdgLookup.pdgid_to_name(particle.PID) if pdgLookupExists else ""
            self.printText("%6s %6d%5.0f %s %4.1f  %1d  %1d" % (name[-6:],
                                                                particle.PID,
                                                                particle.PT,
                                                                utils.eta(particle),
                                                                particle.Phi,
                                                                particle.Status,
                                                                particle.IsPU,
                                                                ),
                           color=color)
        return
Example #5
0
    def printLeptons(self,
                     eventVars=None,
                     params=None,
                     coords=None,
                     nMax=None,
                     leptons=None,
                     color=r.kBlack,
                     ptMin=None):
        self.prepareText(params, coords)

        self.printText(leptons)
        headers = "   pT  eta  phi  iso"
        self.printText(headers)
        self.printText("-" * len(headers))

        nLeptons = utils.size(eventVars, leptons)
        for iLepton in range(nLeptons):
            if nMax <= iLepton:
                self.printText("[%d more not listed]" % (nLeptons - nMax))
                break

            lepton = eventVars[leptons][iLepton]
            iso = "%4.1f" % lepton.IsolationVar if hasattr(
                lepton, "IsolationVar") else "    "
            self.printText("%5.0f %s %4.1f %s" % (
                lepton.PT,
                utils.eta(lepton),
                lepton.Phi,
                iso,
            ),
                           color=color)
        return
Example #6
0
    def printTowers(self,
                    eventVars=None,
                    params=None,
                    coords=None,
                    nMax=None,
                    towers=None,
                    color=r.kBlack):
        self.prepareText(params, coords)

        self.printText(towers)
        headers = "   ET  eta  phi"
        self.printText(headers)
        self.printText("-" * len(headers))

        nTowers = utils.size(eventVars, towers)
        for iTower in range(nTowers):
            if nMax <= iTower:
                self.printText("[%d more not listed]" % (nTowers - nMax))
                break

            tower = eventVars[towers][iTower]
            self.printText("%5.0f %s %4.1f" % (
                tower.ET,
                utils.eta(tower),
                tower.Phi,
            ),
                           color=color)
        return
Example #7
0
    def printGenParticles(self,
                          eventVars=None,
                          params=None,
                          coords=None,
                          nMax=None,
                          particles=None,
                          color=r.kBlack):
        self.prepareText(params, coords)

        self.printText(particles)
        headers = "  name  pdgId   pT  eta  phi st PU"
        self.printText(headers)
        self.printText("-" * len(headers))

        nParticles = utils.size(eventVars, particles)
        for iParticle in range(nParticles):
            if nMax <= iParticle:
                self.printText("[%d more not listed]" % (nParticles - nMax))
                break
            particle = eventVars[particles].At(iParticle)
            name = pdgLookup.pdgid_to_name(
                particle.PID) if pdgLookupExists else ""
            self.printText("%6s %6d%5.0f %s %4.1f  %1d  %1d" % (
                name[-6:],
                particle.PID,
                particle.PT,
                utils.eta(particle),
                particle.Phi,
                particle.Status,
                particle.IsPU,
            ),
                           color=color)
        return
def eta_opt_lambda(rhat, x, **kwargs):
    'find the MSE-optimal lambda'

    minlam = 0
    maxlam = np.abs(rhat).max()

    for k in range(5):
        lamvec = np.linspace(minlam, maxlam, 11)
        dlam = lamvec[1] - lamvec[0]
        err = [la.norm(x - ut.eta(rhat, lam)) for lam in lamvec]
        minidx = np.argmin(err)
        bestlam = lamvec[minidx]
        minlam = max(0, bestlam - dlam)
        maxlam = bestlam + dlam
    xhat = ut.eta(rhat, bestlam)
    #print 'lambda = %.3f nmse=%.3fdB' % (bestlam,20*math.log10( la.norm(err[minidx])/la.norm(x) ) )
    return (xhat, bestlam)
Example #9
0
def train(model, generated_image, initial_image):
    """ Train your model."""
    with tf.Session() as sess:
        saver = tf.train.Saver()
        #Initialize variables
        sess.run(tf.global_variables_initializer())
        sess.run(generated_image.assign(initial_image))
        ckpt = tf.train.get_checkpoint_state(
            os.path.dirname('checkpointsTransfer/checkpoint/'))
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        initial_step = model['global_step'].eval()

        start_time = time.time()

        skip_step = 1
        for index in range(initial_step, ITERS):
            if index >= 5 and index < 20:
                skip_step = 10
            elif index >= 20:
                skip_step = 20

            sess.run(model['optimizer'])
            if (index + 1) % skip_step == 0:
                #Generated image and loss
                gen_image, total_loss, summary = sess.run([
                    generated_image, model['total_loss'], model['summary_op']
                ])
                elapsed_time = time.time() - start_time
                gen_image = gen_image + MEAN_PIXELS
                print('Step {}\n   Sum: {:5.1f}'.format(
                    index + 1, np.sum(gen_image)))
                print('   Loss: {:5.1f}'.format(total_loss))
                print('   Time: {}'.format(elapsed_time))

                utils.eta(index, skip_step, elapsed_time, ITERS)

                start_time = time.time()
                filename = 'outputs/%d.png' % (index)
                utils.save_image(filename, gen_image)

                if (index + 1) % 20 == 0:
                    saver.save(sess, 'checkpointsTransfer/style_transfer/',
                               index)
def Setup(st, **kwargs):
    A = st.Psi
    M, N = A.shape
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        (y, x) = sess.run(st.generators)

    mul = np.matmul

    xhat = np.zeros_like(x)

    ls = st.loaded_state

    for key in ('We', 'S', 'theta'):
        print '%s.shape = %s' % (key, repr(ls[key].shape))

    if st.untieS:
        S, theta = greedyUntied(st)
    else:
        We = ls['We']
        S = ls['S']
        theta = ls['theta']
        Tprev = len(theta)
        theta = np.concatenate((theta, np.zeros(st.T - Tprev)))
        bt = 0
        v = 0

        B = mul(We, y)

        for t in range(st.T):
            # basic recursion:
            # xhat = eta(We*y + S*xhat;theta_t)
            rhat = B + mul(S, xhat)
            if t < Tprev:
                xhat = ut.eta(rhat, theta[t])
            else:
                (xhat, theta[t]) = eta_opt_lambda(rhat, x)
            print 't=%d lambda=%.3f nmse=%.3fdB' % (
                t, theta[t], 20 * math.log10(la.norm(xhat - x) / la.norm(x)))

    ls['theta'] = np.float32(theta)
    ls['S'] = np.float32(S)
    return theta, S
Example #11
0
    def printTowers(self, eventVars=None, params=None, coords=None,
                    nMax=None, towers=None, color=r.kBlack):
        self.prepareText(params, coords)

        self.printText(towers)
        headers = "   ET  eta  phi"
        self.printText(headers)
        self.printText("-" * len(headers))

        nTowers = utils.size(eventVars, towers)
        for iTower in range(nTowers):
            if nMax <= iTower:
                self.printText("[%d more not listed]" % (nTowers - nMax))
                break

            tower = eventVars[towers][iTower]
            self.printText("%5.0f %s %4.1f" % (tower.ET,
                                               utils.eta(tower),
                                               tower.Phi,
                                               ),
                           color=color)
        return
Example #12
0
    def printLeptons(self, eventVars=None, params=None, coords=None,
                     nMax=None, leptons=None, color=r.kBlack, ptMin=None):
        self.prepareText(params, coords)

        self.printText(leptons)
        headers = "   pT  eta  phi  iso"
        self.printText(headers)
        self.printText("-" * len(headers))

        nLeptons = utils.size(eventVars, leptons)
        for iLepton in range(nLeptons):
            if nMax <= iLepton:
                self.printText("[%d more not listed]" % (nLeptons - nMax))
                break

            lepton = eventVars[leptons][iLepton]
            iso = "%4.1f" % lepton.IsolationVar if hasattr(lepton, "IsolationVar") else "    "
            self.printText("%5.0f %s %4.1f %s" % (lepton.PT,
                                                  utils.eta(lepton),
                                                  lepton.Phi,
                                                  iso,
                                                  ),
                           color=color)
        return
Example #13
0
    def test(self, e=1):
        correct = torch.zeros((6), dtype=torch.float)
        tp = torch.zeros((6), dtype=torch.float)
        fp = torch.zeros((6), dtype=torch.float)
        fn = torch.zeros((6), dtype=torch.float)
        correct_total = 0
        step = 1
        data_len = 0
        iter_lst = [self.get_iter(self.test_features_lst, self.args)]
        num_batches = sum([len(iterator[0]) for iterator in iter_lst])
        start = time.time()
        with torch.no_grad():
            for data_loader, sampler in iter_lst:
                for i, batch in enumerate(data_loader, start=1):
                    input_ids, input_mask, seg_ids, start_positions, end_positions, labels = batch

                    # remove unnecessary pad token
                    seq_len = torch.sum(torch.sign(input_ids), 1)
                    max_len = torch.max(seq_len)

                    input_ids = input_ids[:, :max_len].clone()
                    input_mask = input_mask[:, :max_len].clone()
                    seg_ids = seg_ids[:, :max_len].clone()
                    start_positions = start_positions.clone()
                    end_positions = end_positions.clone()

                    if self.args.use_cuda:
                        input_ids = input_ids.cuda(self.args.gpu,
                                                   non_blocking=True)
                        input_mask = input_mask.cuda(self.args.gpu,
                                                     non_blocking=True)
                        seg_ids = seg_ids.cuda(self.args.gpu,
                                               non_blocking=True)
                        start_positions = start_positions.cuda(
                            self.args.gpu, non_blocking=True)
                        end_positions = end_positions.cuda(self.args.gpu,
                                                           non_blocking=True)

                    dis_loss, log_prob = self.model(input_ids,
                                                    seg_ids,
                                                    input_mask,
                                                    start_positions,
                                                    end_positions,
                                                    labels,
                                                    dtype="dis",
                                                    global_step=step)

                    #print(log_prob.shape, labels.shape)
                    data_len += labels.shape[0]
                    onehot_labels = torch.nn.functional.one_hot(
                        labels, num_classes=6).float()
                    onehot_pred = torch.nn.functional.one_hot(
                        (log_prob.argmax(dim=1).detach().cpu()),
                        num_classes=6).float()
                    correct_total += ((log_prob.argmax(
                        dim=1).detach().cpu()) == labels.detach().cpu()
                                      ).float().sum()
                    correct += (onehot_pred == onehot_labels).sum(
                        dim=0).float()
                    tp += ((onehot_pred.float() == 1) &
                           (onehot_labels.float() == 1)).sum(dim=0).float()
                    fp += ((onehot_pred.float() == 1) &
                           (onehot_labels.float() == 0)).sum(dim=0).float()
                    fn += ((onehot_pred.float() == 0) &
                           (onehot_labels.float() == 1)).sum(dim=0).float()

                    msg = "Test {}/{} {} - ETA : {}".format(
                        i, num_batches, progress_bar(i, num_batches),
                        eta(start, i, num_batches))
                    print(msg, end="\r")

        writer.add_scalar("Test/Total_accuracy", correct_total / data_len, e)
        writer.add_scalars("Test/By_class_accuracy",
                           summary_map(self.num_to_name, correct / data_len),
                           e)
        writer.add_scalars("Test/By_class_true_positives",
                           summary_map(self.num_to_name, tp / data_len), e)
        writer.add_scalars("Test/By_class_false_negatives",
                           summary_map(self.num_to_name, fn / data_len), e)
        writer.add_scalars("Test/By_class_false_positives",
                           summary_map(self.num_to_name, fp / data_len), e)

        print(
            "Test accuracy total {}, by class {}, tp {}, fp {}, fn {}".format(
                correct_total / data_len, correct / data_len, tp / data_len,
                fp / data_len, fn / data_len),
            end="\n")
Example #14
0
    def get_embeddings(self, e=1):
        step = 1
        data_len = 0
        iter_lst = [self.get_iter(self.test_features_lst, self.args)]
        num_batches = sum([len(iterator[0]) for iterator in iter_lst])
        start = time.time()
        ds = []
        cls = []
        st_emb = []
        end_emb = []
        ent_emb = []
        labels_ent = []
        with torch.no_grad():
            for data_loader, sampler in iter_lst:
                for i, batch in enumerate(data_loader, start=1):
                    input_ids, input_mask, seg_ids, start_positions, end_positions, entity_mask, labels = batch

                    # remove unnecessary pad token
                    seq_len = torch.sum(torch.sign(input_ids), 1)
                    max_len = torch.max(seq_len)

                    input_ids = input_ids[:, :max_len].clone()
                    input_mask = input_mask[:, :max_len].clone()
                    seg_ids = seg_ids[:, :max_len].clone()
                    start_positions = start_positions.clone()
                    end_positions = end_positions.clone()
                    entity_mask = entity_mask.clone()

                    if self.args.use_cuda:
                        input_ids = input_ids.cuda(self.args.gpu,
                                                   non_blocking=True)
                        input_mask = input_mask.cuda(self.args.gpu,
                                                     non_blocking=True)
                        seg_ids = seg_ids.cuda(self.args.gpu,
                                               non_blocking=True)
                        start_positions = start_positions.cuda(
                            self.args.gpu, non_blocking=True)
                        end_positions = end_positions.cuda(self.args.gpu,
                                                           non_blocking=True)
                        entity_mask = entity_mask.cuda(self.args.gpu,
                                                       non_blocking=True)

                    sequence_output, _ = self.model(input_ids,
                                                    seg_ids,
                                                    input_mask,
                                                    dtype="bert")
                    cls.append(sequence_output[:, 0].detach().cpu().numpy())
                    st_emb.append(
                        sequence_output[:, start_positions[start_positions.
                                                           nonzero()]].
                        detach().cpu().numpy().mean(1).squeeze(1))
                    end_emb.append(
                        sequence_output[:, end_positions[end_positions.nonzero(
                        )]].detach().cpu().numpy().mean(1).squeeze(1))
                    ent_emb.append(sequence_output[
                        entity_mask > 0].detach().cpu().numpy())
                    ds.append(labels.detach().cpu().numpy())
                    labels_ent.append(
                        torch.unsqueeze(labels, 1).repeat(
                            1, 384)[entity_mask > 0].detach().cpu().numpy())
                    data_len += labels.shape[0]
                    if data_len > 10000:
                        break

                    msg = "Test {}/{} {} - ETA : {}".format(
                        i, num_batches, progress_bar(i, num_batches),
                        eta(start, i, num_batches))
                    print(msg, end="\r")

        print(
            np.concatenate(ds).shape,
            np.concatenate(cls).shape,
            np.concatenate(st_emb).shape)
        np.savetxt('labels.out', np.concatenate(ds), delimiter=',')
        np.savetxt('cls.out', np.concatenate(cls), delimiter=',')
        np.savetxt('start_emb.out', np.concatenate(st_emb), delimiter=',')
        np.savetxt('end_emb.out', np.concatenate(end_emb), delimiter=',')
        np.savetxt('ent_emb.out', np.concatenate(ent_emb), delimiter=',')
        np.savetxt('labels_ent.out', np.concatenate(labels_ent), delimiter=',')
Example #15
0
    def train(self):
        step = 1
        avg_qa_loss = 0
        avg_dis_loss = 0
        iter_lst = [self.get_iter(self.train_features_lst, self.args)]
        num_batches = sum([len(iterator[0]) for iterator in iter_lst])

        correct = torch.zeros((6), dtype=torch.float)
        tp = torch.zeros((6), dtype=torch.float)
        fp = torch.zeros((6), dtype=torch.float)
        fn = torch.zeros((6), dtype=torch.float)
        correct_total = 0

        for epoch in range(self.args.start_epoch,
                           self.args.start_epoch + self.args.epochs):
            start = time.time()
            self.model.train()
            batch_step = 1
            data_len = 0
            for data_loader, sampler in iter_lst:
                if self.args.distributed:
                    sampler.set_epoch(epoch)

                for i, batch in enumerate(data_loader, start=1):
                    input_ids, input_mask, seg_ids, start_positions, end_positions, labels = batch

                    # remove unnecessary pad token
                    seq_len = torch.sum(torch.sign(input_ids), 1)
                    max_len = torch.max(seq_len)

                    input_ids = input_ids[:, :max_len].clone()
                    input_mask = input_mask[:, :max_len].clone()
                    seg_ids = seg_ids[:, :max_len].clone()
                    start_positions = start_positions.clone()
                    end_positions = end_positions.clone()

                    if self.args.use_cuda:
                        input_ids = input_ids.cuda(self.args.gpu,
                                                   non_blocking=True)
                        input_mask = input_mask.cuda(self.args.gpu,
                                                     non_blocking=True)
                        seg_ids = seg_ids.cuda(self.args.gpu,
                                               non_blocking=True)
                        start_positions = start_positions.cuda(
                            self.args.gpu, non_blocking=True)
                        end_positions = end_positions.cuda(self.args.gpu,
                                                           non_blocking=True)

                    qa_loss = self.model(input_ids,
                                         seg_ids,
                                         input_mask,
                                         start_positions,
                                         end_positions,
                                         labels,
                                         dtype="qa",
                                         global_step=step)
                    qa_loss = qa_loss.mean()
                    qa_loss.backward()

                    if self.args.train_qa:
                        # update qa model
                        avg_qa_loss = self.cal_running_avg_loss(
                            qa_loss.item(), avg_qa_loss)
                        self.qa_optimizer.step()
                        self.qa_optimizer.zero_grad()

                    # update discriminator
                    dis_loss, log_prob = self.model(input_ids,
                                                    seg_ids,
                                                    input_mask,
                                                    start_positions,
                                                    end_positions,
                                                    labels,
                                                    dtype="dis",
                                                    global_step=step)
                    dis_loss = dis_loss.mean()
                    dis_loss.backward()
                    avg_dis_loss = self.cal_running_avg_loss(
                        dis_loss.item(), avg_dis_loss)
                    self.dis_optimizer.step()
                    self.dis_optimizer.zero_grad()
                    step += 1

                    if self.do_test_every > 0 and i % self.do_test_every == 0:
                        self.test(step)

                    if i % 2000 == 0:
                        result_dict = self.evaluate_model(i)
                        for dev_file, f1 in result_dict.items():
                            print("GPU/CPU {} evaluated {}: {:.2f}".format(
                                self.args.gpu, dev_file, f1),
                                  end="\n")

                        writer.add_scalar("Train/Total_accuracy",
                                          correct_total / data_len, i)
                        writer.add_scalars(
                            "Train/By_class_accuracy",
                            summary_map(self.num_to_name, correct / data_len),
                            i)
                        writer.add_scalars(
                            "Train/By_class_true_positives",
                            summary_map(self.num_to_name, tp / data_len), i)
                        writer.add_scalars(
                            "Train/By_class_false_negatives",
                            summary_map(self.num_to_name, fn / data_len), i)
                        writer.add_scalars(
                            "Train/By_class_false_positives",
                            summary_map(self.num_to_name, fp / data_len), i)

                        correct = torch.zeros((6), dtype=torch.float)
                        tp = torch.zeros((6), dtype=torch.float)
                        fp = torch.zeros((6), dtype=torch.float)
                        fn = torch.zeros((6), dtype=torch.float)
                        correct_total = 0

                    batch_step += 1

                    msg = ""
                    if self.args.train_qa:
                        msg = "Train {}/{} {} - ETA : {} - QA loss: {:.4f}, DIS loss: {:.4f}" \
                            .format(batch_step, num_batches, progress_bar(batch_step, num_batches),
                                    eta(start, batch_step, num_batches),
                                    avg_qa_loss, avg_dis_loss)
                    else:
                        msg = "Train {}/{} {} - ETA : {} - DIS loss: {:.4f}" \
                            .format(batch_step, num_batches, progress_bar(batch_step, num_batches),
                                    eta(start, batch_step, num_batches),
                                    avg_dis_loss)

                    writer.add_scalar("Loss/QA", avg_qa_loss, i)
                    writer.add_scalar("Loss/Discriminator", avg_dis_loss, i)

                    print(msg, end="\r")

                    data_len += labels.shape[0]
                    onehot_labels = torch.nn.functional.one_hot(
                        labels, num_classes=6).float()
                    onehot_pred = torch.nn.functional.one_hot(
                        (log_prob.argmax(dim=1).detach().cpu()),
                        num_classes=6).float()
                    correct_total += ((log_prob.argmax(
                        dim=1).detach().cpu()) == labels.detach().cpu()
                                      ).float().sum()
                    correct += (onehot_pred == onehot_labels).sum(
                        dim=0).float()
                    tp += ((onehot_pred.float() == 1) &
                           (onehot_labels.float() == 1)).sum(dim=0).float()
                    fp += ((onehot_pred.float() == 1) &
                           (onehot_labels.float() == 0)).sum(dim=0).float()
                    fn += ((onehot_pred.float() == 0) &
                           (onehot_labels.float() == 1)).sum(dim=0).float()
                    if i % 1000 == 0:
                        print(
                            "Accuracy total {}, by class {}, tp {}, fp {}, fn {}"
                            .format(correct_total / data_len,
                                    correct / data_len, tp / data_len,
                                    fp / data_len, fn / data_len),
                            end="\n")

            print(
                "[GPU Num: {}, Epoch: {}, Final QA loss: {:.4f}, Final DIS loss: {:.4f}]"
                .format(self.args.gpu, epoch, avg_qa_loss, avg_dis_loss))

            print("Train accuracy total {}, by class {}, tp {}, fp {}, fn {}".
                  format(correct_total / data_len, correct / data_len,
                         tp / data_len, fp / data_len, fn / data_len),
                  end="\n")
            # save model
            if not self.args.distributed or self.args.rank == 0:
                self.save_model(epoch, avg_qa_loss)

            if self.args.do_valid:
                result_dict = self.evaluate_model(epoch)
                for dev_file, f1 in result_dict.items():
                    print("GPU/CPU {} evaluated {}: {:.2f}".format(
                        self.args.gpu, dev_file, f1),
                          end="\n")
Example #16
0
def main(args):
    save_dir = os.path.join("./save", time.strftime("%m%d%H%M%S"))
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    if args.all_data:
        data_loader = get_ext_data_loader(tokenizer,
                                          "./data/train/",
                                          shuffle=True,
                                          args=args)
    else:
        data_loader, _, _ = get_data_loader(tokenizer,
                                            "./data/train-v1.1.json",
                                            shuffle=True,
                                            args=args)
    vocab_size = len(tokenizer.vocab)
    if args.bidaf:
        print("train bidaf")
        model = BiDAF(embedding_size=args.embedding_size,
                      vocab_size=vocab_size,
                      hidden_size=args.hidden_size,
                      drop_prob=args.dropout)
    else:
        ntokens = len(tokenizer.vocab)
        model = QANet(ntokens,
                      embedding=args.embedding,
                      embedding_size=args.embedding_size,
                      hidden_size=args.hidden_size,
                      num_head=args.num_head)
    if args.load_model:
        state_dict = torch.load(args.model_path, map_location="cpu")
        model.load_state_dict(state_dict)
        print("load pre-trained model")
    device = torch.device("cuda")
    model = model.to(device)
    model.train()
    ema = EMA(model, args.decay)

    base_lr = 1
    parameters = filter(lambda param: param.requires_grad, model.parameters())
    optimizer = optim.Adam(lr=base_lr,
                           betas=(0.9, 0.999),
                           eps=1e-7,
                           weight_decay=5e-8,
                           params=parameters)
    cr = args.lr / math.log2(args.lr_warm_up_num)
    scheduler = optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda ee: cr * math.log2(ee + 1)
        if ee < args.lr_warm_up_num else args.lr)
    step = 0
    num_batches = len(data_loader)
    avg_loss = 0
    best_f1 = 0
    for epoch in range(1, args.num_epochs + 1):
        step += 1
        start = time.time()
        model.train()
        for i, batch in enumerate(data_loader, start=1):
            c_ids, q_ids, start_positions, end_positions = batch
            c_len = torch.sum(torch.sign(c_ids), 1)
            max_c_len = torch.max(c_len)
            c_ids = c_ids[:, :max_c_len].to(device)
            q_len = torch.sum(torch.sign(q_ids), 1)
            max_q_len = torch.max(q_len)
            q_ids = q_ids[:, :max_q_len].to(device)

            start_positions = start_positions.to(device)
            end_positions = end_positions.to(device)

            optimizer.zero_grad()
            loss = model(c_ids,
                         q_ids,
                         start_positions=start_positions,
                         end_positions=end_positions)
            loss.backward()
            avg_loss = cal_running_avg_loss(loss.item(), avg_loss)
            nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
            optimizer.step()
            scheduler.step(step)
            ema(model, step // args.batch_size)

            batch_size = c_ids.size(0)
            step += batch_size

            msg = "{}/{} {} - ETA : {} - qa_loss: {:.4f}" \
                .format(i, num_batches, progress_bar(i, num_batches),
                        eta(start, i, num_batches),
                        avg_loss)
            print(msg, end="\r")
        if not args.debug:
            metric_dict = eval_qa(args, model)
            f1 = metric_dict["f1"]
            em = metric_dict["exact_match"]
            print("epoch: {}, final loss: {:.4f}, F1:{:.2f}, EM:{:.2f}".format(
                epoch, avg_loss, f1, em))

            if args.bidaf:
                model_name = "bidaf"
            else:
                model_name = "qanet"
            if f1 > best_f1:
                best_f1 = f1
                state_dict = model.state_dict()
                save_file = "{}_{:.2f}_{:.2f}".format(model_name, f1, em)
                path = os.path.join(save_dir, save_file)
                torch.save(state_dict, path)
Example #17
0
    def train(self):
        step = 1
        avg_loss = 0
        global_step = 1
        iter_lst = [self.get_iter(self.features_lst, self.args)]
        num_batches = sum([len(iterator) for iterator in iter_lst])
        for epoch in range(self.args.start_epoch,
                           self.args.start_epoch + self.args.epochs):
            self.model.train()
            start = time.time()
            batch_step = 1
            for data_loader in iter_lst:

                for i, batch in enumerate(data_loader, start=1):
                    input_ids, input_mask, seg_ids, start_positions, end_positions, _ = batch

                    # remove unnecessary pad token
                    seq_len = torch.sum(torch.sign(input_ids), 1)
                    max_len = torch.max(seq_len)

                    input_ids = input_ids[:, :max_len].clone()
                    input_mask = input_mask[:, :max_len].clone()
                    seg_ids = seg_ids[:, :max_len].clone()
                    start_positions = start_positions.clone()
                    end_positions = end_positions.clone()

                    if self.args.use_cuda:
                        input_ids = input_ids.cuda(self.args.gpu,
                                                   non_blocking=True)
                        input_mask = input_mask.cuda(self.args.gpu,
                                                     non_blocking=True)
                        seg_ids = seg_ids.cuda(self.args.gpu,
                                               non_blocking=True)
                        start_positions = start_positions.cuda(
                            self.args.gpu, non_blocking=True)
                        end_positions = end_positions.cuda(self.args.gpu,
                                                           non_blocking=True)

                    loss = self.model(input_ids, seg_ids, input_mask,
                                      start_positions, end_positions)
                    loss = loss.mean()
                    loss = loss / self.args.gradient_accumulation_steps
                    loss.backward()

                    avg_loss = self.cal_running_avg_loss(
                        loss.item() * self.args.gradient_accumulation_steps,
                        avg_loss)
                    if step % self.args.gradient_accumulation_steps == 0:
                        self.optimizer.step()
                        self.optimizer.zero_grad()

                    if epoch != 0 and i % 2000 == 0:
                        result_dict = self.evaluate_model(i)
                        for dev_file, f1 in result_dict.items():
                            print("GPU/CPU {} evaluated {}: {:.2f}".format(
                                self.args.gpu, dev_file, f1),
                                  end="\n")

                    global_step += 1
                    batch_step += 1
                    msg = "{}/{} {} - ETA : {} - loss: {:.4f}" \
                        .format(batch_step, num_batches, progress_bar(batch_step, num_batches),
                                eta(start, batch_step, num_batches),
                                avg_loss)
                    print(msg, end="\r")

            print("[GPU Num: {}, epoch: {}, Final loss: {:.4f}]".format(
                self.args.gpu, epoch, avg_loss))

            # save model
            if self.args.rank == 0:
                self.save_model(epoch, avg_loss)

            if self.args.do_valid:
                result_dict = self.evaluate_model(epoch)
                for dev_file, f1 in result_dict.items():
                    print("GPU/CPU {} evaluated {}: {:.2f}".format(
                        self.args.gpu, dev_file, f1),
                          end="\n")
Example #18
0
    def train(self):

        self.model.zero_grad()

        for epoch in range(0, self.args.pretrain_epochs):

            num_batches = len(self.pretrain_loader)
            self.pretrain_sampler.set_epoch(epoch)
            start = time.time()

            # pretrain with unsupervised dataset
            for step, batch in enumerate(self.pretrain_loader, start=1):
                self.model.train()
                input_ids, input_mask, seg_ids, start_positions, end_positions = batch

                seq_len = torch.sum(torch.sign(input_ids), 1)
                max_len = torch.max(seq_len)
                input_ids = input_ids[:, :max_len].clone().cuda(
                    self.args.gpu, non_blocking=True)
                input_mask = input_mask[:, :max_len].clone().cuda(
                    self.args.gpu, non_blocking=True)
                seg_ids = seg_ids[:, :max_len].clone().cuda(self.args.gpu,
                                                            non_blocking=True)
                start_positions = start_positions.clone().cuda(
                    self.args.gpu, non_blocking=True)
                end_positions = end_positions.clone().cuda(self.args.gpu,
                                                           non_blocking=True)

                inputs = {
                    "input_ids": input_ids,
                    "attention_mask": input_mask,
                    "token_type_ids": seg_ids,
                    "start_positions": start_positions,
                    "end_positions": end_positions
                }
                loss = self.model(**inputs)[0]
                loss.backward()

                clip_grad_norm_(self.model.parameters(),
                                self.args.max_grad_norm)
                self.optimizer.step()
                self.scheduler.step()
                self.model.zero_grad()

                if self.args.rank == 0:
                    msg = "PRETRAIN {}/{} {} - ETA : {} - LOSS : {:.4f}".format(
                        step, num_batches, progress_bar(step, num_batches),
                        eta(start, step, num_batches), float(loss.item()))
                    print(msg, end="\r")

                if self.args.debug:
                    break

            # save model
            if self.args.rank == 0:
                result_dict = self.evaluate_model(msg)
                em = result_dict["exact_match"]
                f1 = result_dict["f1"]
                print(
                    "\nPRETRAIN took {} DEV - F1: {:.4f}, EM: {:.4f}\n".format(
                        user_friendly_time(time_since(start)), f1, em))

        if self.args.rank == 0:

            result_dict = self.evaluate_model("TEST", False)
            em = result_dict["exact_match"]
            f1 = result_dict["f1"]
            print("\nFINAL TEST - F1: {:.4f}, EM: {:.4f}\n".format(f1, em))
Example #19
0
    def train(self, consolidate=True, fisher_estimation_sample_size=1024):
        step = 1
        avg_loss = 0
        global_step = 1
        iter_lst = [self.get_iter(self.features_lst, self.args)]
        num_batches = sum([len(iterator[0]) for iterator in iter_lst])
        for epoch in range(self.args.start_epoch,
                           self.args.start_epoch + self.args.epochs):
            self.model.train()
            start = time.time()
            batch_step = 1
            for data_loader, sampler in iter_lst:
                if self.args.distributed:
                    sampler.set_epoch(epoch)

                for i, batch in enumerate(data_loader, start=1):
                    input_ids, input_mask, seg_ids, start_positions, end_positions, _ = batch

                    # remove unnecessary pad token
                    seq_len = torch.sum(torch.sign(input_ids), 1)
                    max_len = torch.max(seq_len)

                    input_ids = input_ids[:, :max_len].clone()
                    input_mask = input_mask[:, :max_len].clone()
                    seg_ids = seg_ids[:, :max_len].clone()
                    start_positions = start_positions.clone()
                    end_positions = end_positions.clone()

                    if self.args.use_cuda:
                        input_ids = input_ids.cuda(self.args.gpu,
                                                   non_blocking=True)
                        input_mask = input_mask.cuda(self.args.gpu,
                                                     non_blocking=True)
                        seg_ids = seg_ids.cuda(self.args.gpu,
                                               non_blocking=True)
                        start_positions = start_positions.cuda(
                            self.args.gpu, non_blocking=True)
                        end_positions = end_positions.cuda(self.args.gpu,
                                                           non_blocking=True)

                    loss = self.model(input_ids, seg_ids, input_mask,
                                      start_positions, end_positions)
                    loss = loss.mean()
                    loss = loss / self.args.gradient_accumulation_steps

                    ewc_loss = self.ewc_loss(cuda=True)
                    loss = loss + ewc_loss

                    loss.backward()

                    avg_loss = self.cal_running_avg_loss(
                        loss.item() * self.args.gradient_accumulation_steps,
                        avg_loss)
                    if step % self.args.gradient_accumulation_steps == 0:
                        self.optimizer.step()
                        self.optimizer.zero_grad()

                    if epoch != 0 and i % 2000 == 0:
                        result_dict = self.evaluate_model(i)
                        for dev_file, f1 in result_dict.items():
                            print("GPU/CPU {} evaluated {}: {:.2f}".format(
                                self.args.gpu, dev_file, f1),
                                  end="\n")

                    global_step += 1
                    batch_step += 1
                    msg = "{}/{} {} - ETA : {} - loss: {:.4f}" \
                        .format(batch_step, num_batches, progress_bar(batch_step, num_batches),
                                eta(start, batch_step, num_batches),
                                avg_loss)
                    print(msg, end="\r")

            print("[GPU Num: {}, epoch: {}, Final loss: {:.4f}]".format(
                self.args.gpu, epoch, avg_loss))

            # save model
            if self.args.rank == 0:
                self.save_model(epoch, avg_loss)

            if self.args.do_valid:
                result_dict = self.evaluate_model(epoch)
                for dev_file, f1 in result_dict.items():
                    print("GPU/CPU {} evaluated {}: {:.2f}".format(
                        self.args.gpu, dev_file, f1),
                          end="\n")

        if consolidate:
            # estimate the fisher information of the parameters and consolidate
            # them in the network.
            print(
                '=> Estimating diagonals of the fisher information matrix...',
                flush=True,
                end='',
            )
            # ATTENTION!!! the data_loader should entire training set!!!!
            self.consolidate(
                self.estimate_fisher(
                    self.get_data_loader(self.features_lst, self.args),
                    fisher_estimation_sample_size))
            print('EWC Loaded!')
Example #20
0
    def train(self):
        step = 1
        avg_qa_loss = 0
        avg_dis_loss = 0
        iter_lst = [self.get_iter(self.features_lst, self.args)]
        num_batches = sum([len(iterator[0]) for iterator in iter_lst])
        for epoch in range(self.args.start_epoch,
                           self.args.start_epoch + self.args.epochs):
            start = time.time()
            self.model.train()
            batch_step = 1
            for data_loader, sampler in iter_lst:
                if self.args.distributed:
                    sampler.set_epoch(epoch)

                for i, batch in enumerate(data_loader, start=1):
                    input_ids, input_mask, seg_ids, start_positions, end_positions, labels = batch

                    # remove unnecessary pad token
                    seq_len = torch.sum(torch.sign(input_ids), 1)
                    max_len = torch.max(seq_len)

                    input_ids = input_ids[:, :max_len].clone()
                    input_mask = input_mask[:, :max_len].clone()
                    seg_ids = seg_ids[:, :max_len].clone()
                    start_positions = start_positions.clone()
                    end_positions = end_positions.clone()

                    if self.args.use_cuda:
                        input_ids = input_ids.cuda(self.args.gpu,
                                                   non_blocking=True)
                        input_mask = input_mask.cuda(self.args.gpu,
                                                     non_blocking=True)
                        seg_ids = seg_ids.cuda(self.args.gpu,
                                               non_blocking=True)
                        start_positions = start_positions.cuda(
                            self.args.gpu, non_blocking=True)
                        end_positions = end_positions.cuda(self.args.gpu,
                                                           non_blocking=True)

                    qa_loss = self.model(input_ids,
                                         seg_ids,
                                         input_mask,
                                         start_positions,
                                         end_positions,
                                         labels,
                                         dtype="qa",
                                         global_step=step)
                    qa_loss = qa_loss.mean()
                    qa_loss.backward()

                    # update qa model
                    avg_qa_loss = self.cal_running_avg_loss(
                        qa_loss.item(), avg_qa_loss)
                    self.qa_optimizer.step()
                    self.qa_optimizer.zero_grad()

                    # update discriminator
                    dis_loss = self.model(input_ids,
                                          seg_ids,
                                          input_mask,
                                          start_positions,
                                          end_positions,
                                          labels,
                                          dtype="dis",
                                          global_step=step)
                    dis_loss = dis_loss.mean()
                    dis_loss.backward()
                    avg_dis_loss = self.cal_running_avg_loss(
                        dis_loss.item(), avg_dis_loss)
                    self.dis_optimizer.step()
                    self.dis_optimizer.zero_grad()
                    step += 1
                    if epoch != 0 and i % 2000 == 0:
                        result_dict = self.evaluate_model(i)
                        for dev_file, f1 in result_dict.items():
                            print("GPU/CPU {} evaluated {}: {:.2f}".format(
                                self.args.gpu, dev_file, f1),
                                  end="\n")

                    batch_step += 1
                    msg = "{}/{} {} - ETA : {} - QA loss: {:.4f}, DIS loss: {:.4f}" \
                        .format(batch_step, num_batches, progress_bar(batch_step, num_batches),
                                eta(start, batch_step, num_batches),
                                avg_qa_loss, avg_dis_loss)
                    print(msg, end="\r")

            print(
                "[GPU Num: {}, Epoch: {}, Final QA loss: {:.4f}, Final DIS loss: {:.4f}]"
                .format(self.args.gpu, epoch, avg_qa_loss, avg_dis_loss))

            # save model
            if not self.args.distributed or self.args.rank == 0:
                self.save_model(epoch, avg_qa_loss)

            if self.args.do_valid:
                result_dict = self.evaluate_model(epoch)
                for dev_file, f1 in result_dict.items():
                    print("GPU/CPU {} evaluated {}: {:.2f}".format(
                        self.args.gpu, dev_file, f1),
                          end="\n")
def lista_run(y, We, S, theta, T, **kwargs):
    B = np.matmul(We, y)
    xhat = ut.eta(B, theta[0])
    for t in range(1, T):
        xhat = ut.eta(B + np.matmul(S[t], xhat), theta[t])
    return xhat