Esempio n. 1
0
 def sample_data(self, data_num=100):
   z = Variable(FloatTensor(np.random.normal(0, 1, (data_num, 3))))
   labels = 1.558*np.random.random_sample(size=(data_num, 1))
   labels = Variable(FloatTensor(labels))
   gen_coords = to_cpu(self.G(z, labels)).detach().numpy()
   labels = to_cpu(labels).detach().numpy()
   np.savez("wgan_gp/results/final", labels, self.rev_standardize(gen_coords))
Esempio n. 2
0
    def predict(self, doc, batchsize=None):
        """Prediction method for use at test time. Returns a lower-triangular score matrix."""
        t_phi_a = self.factory.to_device(
            self._adjust_features(doc.anaphoricity_features.long(),
                                  self.eps_model))
        t_phi_a_offsets = self.factory.to_device(
            doc.anaphoricity_offsets.long())
        t_phi_p = self.factory.to_device(
            self._adjust_features(doc.pairwise_features.long(),
                                  self.ana_model))
        t_phi_p_offsets = self.factory.to_device(doc.pairwise_offsets.long())

        phi_a = Variable(t_phi_a, volatile=True)
        phi_a_offsets = Variable(t_phi_a_offsets, volatile=True)
        phi_p = Variable(t_phi_p, volatile=True)
        phi_p_offsets = Variable(t_phi_p_offsets, volatile=True)

        eps_scores, h_a = self.eps_model(phi_a,
                                         phi_a_offsets,
                                         batchsize=batchsize)
        ana_scores = self.ana_model(h_a,
                                    phi_p,
                                    phi_p_offsets,
                                    batchsize=batchsize)

        scores = self._create_score_matrix(eps_scores.data, ana_scores.data)

        return to_cpu(scores)
Esempio n. 3
0
    def create_successive_coords(self):
        """0.01から1.50まで151個のC_L^cと翼形状を生成"""
        cl_r = []
        cl_c = []
        gen_coords = []
        for cl in range(151):
            cl /= 100
            cl_c.append(cl)
            labels = Variable(torch.reshape(FloatTensor([cl]), (1, 1)))
            calc_num = 0
            while (True):
                calc_num += 1
                z = Variable(
                    FloatTensor(np.random.normal(0, 1, (1, self.latent_dim))))
                gen_coord = self.rev_standardize(
                    to_cpu(self.G(z, labels)).detach().numpy())
                clr = get_cl(gen_coord)
                # cl = 0.1
                if not np.isnan(clr):
                    print(cl)
                    cl_r.append(clr)
                    gen_coords.append(gen_coord)
                    break
                if calc_num == 5:
                    print('not calculated {0}'.format(cl))
                    cl_r.append(-1)
                    gen_coords.append(gen_coord)
                    break

        np.savez("normal/results/successive_label", cl_c, cl_r, gen_coords)
Esempio n. 4
0
def sample_image(epoch=None, data_num=12):
    # Sample noise
    z = Variable(
        FloatTensor(np.random.normal(0, 1, (data_num, opt.latent_dim))))
    labels = max_cl * np.random.random_sample(size=(data_num, opt.n_classes))
    labels = Variable(FloatTensor(labels))
    gen_coords = to_cpu(generator(z, labels)).detach().numpy()
    labels = to_cpu(labels).detach().numpy()
    if epoch is not None:
        save_coords(gen_coords * coord_std + coord_mean, labels,
                    "wgan_gp/coords/epoch_{0}".format(str(epoch).zfill(3)))
    else:
        np.savez("wgan_gp/results/final", labels,
                 gen_coords * coord_std + coord_mean)
        save_coords(gen_coords * coord_std + coord_mean, labels,
                    "wgan_gp/coords/final.png")
Esempio n. 5
0
 def create_coords_by_cl(self, cl_c, data_num=20):
     z = Variable(
         FloatTensor(np.random.normal(0, 1, (data_num, self.latent_dim))))
     labels = np.array([cl_c] * data_num)
     labels = Variable(torch.reshape(FloatTensor([labels]), (data_num, 1)))
     gen_coords = self.rev_standardize(
         to_cpu(self.G(z, labels)).detach().numpy())
     return gen_coords
Esempio n. 6
0
    def save_params(self, file_name=None):
        if file_name is None:
            file_name = self.__class__.__name__ + '.pkl'

        params = [p.astype(np.float16) for p in self.params]
        if GPU:
            params = [to_cpu(p) for p in params]

        with open(file_name, 'wb') as f:
            pickle.dump(params, f)
Esempio n. 7
0
    def forward(self, phi_p, solutions, sizes):
        h_p = self.hp_model(phi_p)
        ana_scores = to_cpu(self.ana_scoring_model(h_p))

        loss = Variable(torch.zeros(1))
        idx = 0
        for sol, sz in zip(solutions, sizes):
            m_scores = ana_scores[idx:(idx + sz)]
            idx = idx + sz

            best_score, best_idx = torch.max(m_scores, 0)
            if not sol[best_idx].data[0]:
                best_correct = torch.max(m_scores[sol])
                loss += 1.0 + best_correct - best_score

        return loss
Esempio n. 8
0
 def run(self, schedule, epochs):
     self.running = True
     lossMeter = LossMeter()
     for cb in schedule.callbacks: cb.on_train_begin(self)       
     for epoch in tqdm(range(epochs), desc="Epochs"):
         if not self.running: break
         for cb in schedule.callbacks: cb.on_epoch_begin(self)
         running_loss = 0
         for input, label, *_ in tqdm(schedule.data, desc="Steps", leave=False):
             if not self.running: break
             for cb in schedule.callbacks: cb.on_batch_begin(self)
             step_loss, outputs = self.step(input, label)  
             if (self.log):
                 lossMeter.update(util.to_cpu(step_loss), input.shape[0])
             for cb in schedule.callbacks: cb.on_batch_end(self, lossMeter, outputs, label)
         for cb in schedule.callbacks: cb.on_epoch_end(self, lossMeter)      
     for cb in schedule.callbacks: cb.on_train_end(self)   
Esempio n. 9
0
def train(model,
          train_config,
          training_set,
          dev_set,
          checkpoint=None,
          cuda=False):
    """Main training loop."""
    epoch_size = len(training_set)
    dot_interval = max(epoch_size // 80, 1)
    logging.info('%d documents per epoch' % epoch_size)

    if cuda:
        model = model.cuda()

    embedding_lr, deep_lr = train_config['learning_rate']
    embedding_layers = []
    deep_layers = []
    logging.info('Learning rates:')
    for name, p in model.named_parameters():
        if name.startswith('eps_model.ha_model.') or name.startswith(
                'ana_model.hp_model.'):
            logging.info('%g  %s  (embedding)' % (embedding_lr, name))
            embedding_layers.append(p)
        else:
            logging.info('%g  %s' % (deep_lr, name))
            deep_layers.append(p)

    opt_params = [{
        'params': embedding_layers,
        'lr': embedding_lr
    }, {
        'params': deep_layers,
        'lr': deep_lr
    }]

    opt = torch.optim.Adagrad(params=opt_params)

    # training_set, truncated = training_set.truncate_docs(train_config['maxsize_gpu'])
    # logging.info('Truncated %d/%d documents.' % (truncated, len(training_set)))

    model.set_error_costs(train_config['error_costs'])

    logging.info('Starting training...')
    for epoch in range(train_config['nepochs']):
        model.train()
        train_loss_reg = 0.0
        train_loss_unreg = 0.0
        for i, idx in enumerate(numpy.random.permutation(epoch_size)):
            if (i + 1) % dot_interval == 0:
                print('.', end='', flush=True)

            if training_set[idx].nmentions == 1:
                logging.info('Skipping document with only one mention.')
                continue

            opt.zero_grad()

            model_loss = model.compute_loss(
                training_set[idx], batchsize=train_config['batchsize'])

            reg_loss = to_cpu(sum(p.abs().sum() for p in model.parameters()))
            loss = model_loss + train_config['l1reg'] * reg_loss

            train_loss_unreg += model_loss.data[0] / training_set[idx].nmentions
            train_loss_reg += loss.data[0] / training_set[idx].nmentions

            loss.backward()
            opt.step()

            del loss
            del model_loss
            del reg_loss

        print(flush=True)

        if checkpoint:
            logging.info('Saving checkpoint...')
            with h5py.File('%s-%03d' % (checkpoint, epoch), 'w') as h5:
                util.save_model(h5, model)

        logging.info('Computing devset performance...')
        model.eval()
        dev_loss = 0.0
        dev_correct = 0
        dev_total = 0
        for doc in dev_set:
            loss, ncorrect = model.compute_dev_scores(
                doc, batchsize=train_config['batchsize'])

            dev_loss += loss
            dev_correct += ncorrect
            dev_total += doc.nmentions

        dev_acc = dev_correct / dev_total
        logging.info(
            'Epoch %d: train_loss_reg %g / train_loss_unreg %g / dev_loss %g / dev_acc %g'
            % (epoch, train_loss_reg, train_loss_unreg, dev_loss, dev_acc))
Esempio n. 10
0
    def compute_loss(self, doc, batchsize=None):
        """Compute the training loss.

        The loss is computed in a two-step procedure that exploits the structure of the objective function,
        whose value only ever depends on two scores per mention (the highest-scoring predicted and the
        highest-scoring correct). In the first step, we run the whole network without computing gradients
        to identify the scores contributing to the loss function. In the second step, we recompute the
        scores for those items only and do backpropagation."""
        t_phi_a = self.factory.to_device(
            self._adjust_features(doc.anaphoricity_features.long(),
                                  self.eps_model))
        t_phi_a_offsets = self.factory.to_device(
            doc.anaphoricity_offsets.long())
        t_phi_p = self.factory.to_device(
            self._adjust_features(doc.pairwise_features.long(),
                                  self.ana_model))
        t_phi_p_offsets = self.factory.to_device(doc.pairwise_offsets.long())
        solution_mask = self.factory.to_device(doc.solution_mask)

        # First do the full computation without gradients
        phi_a = Variable(t_phi_a, volatile=True)
        phi_a_offsets = Variable(t_phi_a_offsets, volatile=True)
        phi_p = Variable(t_phi_p, volatile=True)
        phi_p_offsets = Variable(t_phi_p_offsets, volatile=True)
        all_eps_scores, h_a = self.eps_model(phi_a,
                                             phi_a_offsets,
                                             batchsize=batchsize)
        all_ana_scores = self.ana_model(h_a,
                                        phi_p,
                                        phi_p_offsets,
                                        batchsize=batchsize)
        margin_info = self._find_margin(all_eps_scores.data,
                                        all_ana_scores.data, solution_mask)

        best_correct_idx = margin_info['best_correct_idx']
        loss_idx = margin_info['loss_idx']
        cost_values = margin_info['cost_values']
        loss_per_example = margin_info['loss_per_example']

        # Then turn on gradients and run on loss-contributing elements only
        loss_contributing = torch.gt(loss_per_example, 0.0).unsqueeze(1)
        if torch.sum(loss_contributing) == 0:
            return Variable(torch.zeros(1), requires_grad=False)
        loss_contributing_idx = loss_contributing.nonzero()[:, 0]
        n_loss_contributing = loss_contributing_idx.size()[0]

        # In the second run, we just compute the scores for the two elements per example
        # that contribute to the margin loss. At most one of them can be an epsilon score.
        # The scores will be put in an nmentions x 2 matrix. The following code determines
        # which of the entries in this matrix come from the eps and the ana scorer, respectively,
        # and which examples must be fed to each of the scorers.
        cand_idx = torch.stack([best_correct_idx, loss_idx], dim=1)
        example_no = self.factory.long_arange(
            0, doc.nmentions).unsqueeze(1).expand_as(cand_idx)
        is_epsilon = torch.eq(cand_idx, example_no)
        sub_is_epsilon = is_epsilon[loss_contributing_idx]
        cand_mask = (1 - is_epsilon) * loss_contributing.expand_as(is_epsilon)
        sub_cand_mask = cand_mask[loss_contributing_idx]
        cand_subset = Variable(
            example_no[:sub_cand_mask.size()[0], :].masked_select(
                sub_cand_mask))
        example_offsets = torch.cumsum(
            torch.cat([
                self.factory.long_zeros(1, 2),
                example_no[:(doc.nmentions - 1), :]
            ]), 0)
        cand_idx_in_doc = cand_idx + example_offsets
        relevant_cands = cand_idx_in_doc[cand_mask]

        # Next, we compute the required scores.
        phi_a = Variable(t_phi_a, volatile=False, requires_grad=False)
        phi_a_offsets = Variable(t_phi_a_offsets,
                                 volatile=False,
                                 requires_grad=False)
        phi_p = Variable(t_phi_p, volatile=False, requires_grad=False)
        phi_p_offsets = Variable(t_phi_p_offsets,
                                 volatile=False,
                                 requires_grad=False)

        sub_phi_a, sub_phi_a_offsets = self._select_features(
            phi_a, phi_a_offsets, loss_contributing_idx)
        sub_phi_p, sub_phi_p_offsets = self._select_features(
            phi_p, phi_p_offsets, relevant_cands)
        sub_eps_scores, sub_h_a = self.eps_model(sub_phi_a,
                                                 sub_phi_a_offsets,
                                                 batchsize=batchsize)
        sub_ana_scores = self.ana_model(sub_h_a,
                                        sub_phi_p,
                                        sub_phi_p_offsets,
                                        cand_subset=cand_subset,
                                        batchsize=batchsize)

        # Then we store them in the right components of the scores matrix.
        scores = Variable(self.factory.zeros(n_loss_contributing, 2))
        scores[sub_cand_mask] = sub_ana_scores
        needs_eps = torch.gt(torch.sum(sub_is_epsilon, dim=1), 0)
        if self.factory.get_single(torch.sum(needs_eps)) > 0:
            eps_idx = Variable(
                example_no[:sub_cand_mask.size()[0], :].masked_select(
                    1 - sub_cand_mask))
            scores[1 - sub_cand_mask] = sub_eps_scores[eps_idx]

        # The applicable rescaling weights can be taken from the first run. We now compute the scores.
        var_cost_values = Variable(cost_values, requires_grad=False)
        sub_loss_per_example = var_cost_values[loss_contributing_idx].squeeze(
        ) * (1.0 - scores[:, 0] + scores[:, 1])
        model_loss = to_cpu(torch.sum(sub_loss_per_example))

        # The loss values computed in the first and the second run should be equal, since the second
        # run only serves to obtain the gradients. In rare cases, there seems to be a discrepancy
        # between the scores. This needs more investigation.
        # The warning is silenced for nets with dropout until we've implemented consistent dropout masks
        # in the two-stage scoring process.
        score_diff = abs(
            self.factory.get_single(model_loss) -
            self.factory.get_single(margin_info['loss']))
        if score_diff > 1e-4 and self.net_config['dropout_h_comb'] is None:
            logging.warning('Unexpected score difference: %g' % score_diff)

        return model_loss
Esempio n. 11
0
    def update(self, outputs, labels):
        preds = torch.clamp(torch.round(util.to_cpu(outputs).data), 0, 1).numpy().astype(int)
        labels = util.to_cpu(labels).data.numpy().astype(int)

        self.update_from_numpy(preds, labels)       
Esempio n. 12
0
def pretrain_hp(model,
                train_config,
                training_set,
                dev_set,
                checkpoint=None,
                cuda=False):
    epoch_size = len(training_set)

    dot_interval = max(epoch_size // 80, 1)
    logging.info('%d documents per epoch' % epoch_size)

    opt = torch.optim.Adagrad(params=model.parameters(),
                              lr=train_config['learning_rate'][1])

    logging.info('Filtering corpora for pretraining...')
    train_features, train_sizes, train_solutions = filter_for_pretrain_hp(
        training_set)
    dev_features, dev_sizes, dev_solutions = filter_for_pretrain_hp(dev_set)

    logging.info('Starting training...')
    for epoch in range(train_config['nepochs']):
        train_loss_reg = 0.0
        train_loss_unreg = 0.0
        for i, idx in enumerate(numpy.random.permutation(epoch_size)):
            if (i + 1) % dot_interval == 0:
                print('.', end='', flush=True)

            if len(train_sizes[idx]) == 0:
                # no anaphoric mentions in document
                continue

            opt.zero_grad()

            if cuda:
                phi_p = Variable(
                    train_features[idx].pin_memory()).cuda(async=True)
            else:
                phi_p = Variable(train_features[idx])

            solutions = [Variable(sol) for sol in train_solutions[idx]]
            model_loss = model(phi_p, solutions, train_sizes[idx])

            reg_loss = to_cpu(sum(p.abs().sum() for p in model.parameters()))
            loss = model_loss + train_config['l1reg'] * reg_loss

            train_loss_unreg += model_loss.data[0] / len(train_sizes[idx])
            train_loss_reg += loss.data[0] / len(train_sizes[idx])

            loss.backward()
            opt.step()

            del loss
            del model_loss
            del reg_loss

        print(flush=True)

        if cuda:
            cpu_model = copy.deepcopy(model).cpu()
        else:
            cpu_model = model

        if checkpoint:
            logging.info('Saving checkpoint...')
            with open('%s-%03d' % (checkpoint, epoch), 'wb') as f:
                torch.save(cpu_model.state_dict(), f)

        logging.info('Computing devset performance...')
        dev_loss = 0.0
        for docft, docsz, docsol in zip(dev_features, dev_sizes,
                                        dev_solutions):
            if cuda:
                phi_p = Variable(docft.pin_memory(),
                                 volatile=True).cuda(async=True)
            else:
                phi_p = Variable(docft, volatile=True)

            solutions = [Variable(sol, volatile=True) for sol in docsol]
            dev_loss += model(phi_p, solutions, docsz).data[0]

        logging.info(
            'Epoch %d: train_loss_reg %g / train_loss_unreg %g / dev_loss %g' %
            (epoch, train_loss_reg, train_loss_unreg, dev_loss))
 def update(self, output, label):
     _, preds = torch.max(output, 1)
     batch_correct = util.to_cpu(torch.sum(preds == label).data)
     self.num_correct += batch_correct
     self.count += label.shape[0]
     return (batch_correct.double() / label.shape[0]).item()