Example #1
0
    def loss_consistency(self, sample):
        x_augment = sample["x_augment"]
        n_samples = len(x_augment)

        # x_augment = [(A, [A_1, A_2, ..., A_n]), (B, [B_1, B_2, ..., B_m])]
        lengths = [1 + len(augments) for sentence, augments in x_augment]

        x = list()
        for sentence, augs in x_augment:
            x.append(sentence)
            x += augs

        z = self.encoder.embed_sentences(x)
        assert len(z) == sum(lengths)

        i = 0
        original_embeddings = list()
        augmented_embeddings = list()
        for length in lengths:
            original_embeddings.append(z[i])
            augmented_embeddings.append(z[i + 1:i + length + 1])
            i += length

        augmented_embeddings = [a.mean(0) for a in augmented_embeddings]
        if self.metric == "euclidean":
            dists = euclidean_dist(original_embeddings, augmented_embeddings)
        elif self.metric == "cosine":
            dists = (
                -cosine_similarity(original_embeddings, augmented_embeddings) +
                1) * 5
        else:
            raise NotImplementedError

        log_p_y = torch_functional.log_softmax(-dists, dim=1).view(
            n_samples, n_samples, -1)
Example #2
0
    def loss(self, sample):
        """
        :param sample: {
            "xs": [
                [support_A_1, support_A_2, ...],
                [support_B_1, support_B_2, ...],
                [support_C_1, support_C_2, ...],
                ...
            ],
            "xq": [
                [query_A_1, query_A_2, ...],
                [query_B_1, query_B_2, ...],
                [query_C_1, query_C_2, ...],
                ...
            ]
        } 
        :return: 
        """
        xs = sample['xs']  # support
        xq = sample['xq']  # query

        n_class = len(xs)
        assert len(xq) == n_class
        n_support = len(xs[0])
        n_query = len(xq[0])

        target_inds = torch.arange(0, n_class).view(n_class, 1, 1).expand(
            n_class, n_query, 1).long()
        target_inds = Variable(target_inds, requires_grad=False).to(device)

        x = [item for xs_ in xs
             for item in xs_] + [item for xq_ in xq for item in xq_]
        z = self.encoder.forward(x)
        z_dim = z.size(-1)

        z_proto = z[:n_class * n_support].view(n_class, n_support,
                                               z_dim).mean(1)
        zq = z[n_class * n_support:]

        if self.metric == "euclidean":
            dists = euclidean_dist(zq, z_proto)
        elif self.metric == "cosine":
            dists = (-cosine_similarity(zq, z_proto) + 1) * 5
        else:
            raise NotImplementedError

        log_p_y = torch_functional.log_softmax(-dists, dim=1).view(
            n_class, n_query, -1)
        dists.view(n_class, n_query, -1)
        loss_val = -log_p_y.gather(2, target_inds).squeeze().view(-1).mean()
        _, y_hat = log_p_y.max(2)
        acc_val = torch.eq(y_hat, target_inds.squeeze()).float().mean()

        return loss_val, {
            'loss': loss_val.item(),
            'acc': acc_val.item(),
            'dists': dists,
            'target': target_inds
        }
Example #3
0
    def loss(self, sample, supervised_loss_share: float = 0):
        """
        :param supervised_loss_share: share of supervised loss in total loss
        :param sample: {
            "xs": [
                [support_A_1, support_A_2, ...],
                [support_B_1, support_B_2, ...],
                [support_C_1, support_C_2, ...],
                ...
            ],
            "xq": [
                [query_A_1, query_A_2, ...],
                [query_B_1, query_B_2, ...],
                [query_C_1, query_C_2, ...],
                ...
            ]
        }
        :return:
        """
        xs = sample['xs']  # support
        xq = sample['xq']  # query

        n_class = len(xs)
        assert len(xq) == n_class
        n_support = len(xs[0])
        n_query = len(xq[0])

        target_inds = torch.arange(0, n_class).view(n_class, 1, 1).expand(
            n_class, n_query, 1).long()
        target_inds = Variable(target_inds, requires_grad=False).to(device)

        has_augment = "x_augment" in sample
        if has_augment:
            augmentations = sample["x_augment"]

            n_augmentations_samples = len(sample["x_augment"])
            n_augmentations_per_sample = [
                len(item['tgt_texts']) for item in augmentations
            ]
            assert len(set(n_augmentations_per_sample)) == 1
            n_augmentations_per_sample = n_augmentations_per_sample[0]

            supports = [item["sentence"] for xs_ in xs for item in xs_]
            queries = [item["sentence"] for xq_ in xq for item in xq_]
            augmentations_supports = [[item2 for item2 in item["tgt_texts"]]
                                      for item in sample["x_augment"]]
            augmentation_queries = [
                item["src_text"] for item in sample["x_augment"]
            ]

            # Encode
            x = supports + queries + [
                item2 for item1 in augmentations_supports for item2 in item1
            ] + augmentation_queries
            z = self.encoder.embed_sentences(x)
            z_dim = z.size(-1)

            # Dispatch
            z_support = z[:len(supports)].view(n_class, n_support,
                                               z_dim).mean(dim=[1])
            z_query = z[len(supports):len(supports) + len(queries)]
            z_aug_support = (
                z[len(supports) + len(queries):len(supports) + len(queries) +
                  n_augmentations_per_sample * n_augmentations_samples].view(
                      n_augmentations_samples, n_augmentations_per_sample,
                      z_dim).mean(dim=[1]))
            z_aug_query = z[-len(augmentation_queries):]
        else:
            # When not using augmentations
            supports = [item["sentence"] for xs_ in xs for item in xs_]
            queries = [item["sentence"] for xq_ in xq for item in xq_]

            # Encode
            x = supports + queries
            z = self.encoder.embed_sentences(x)
            z_dim = z.size(-1)

            # Dispatch
            z_support = z[:len(supports)].view(n_class, n_support,
                                               z_dim).mean(dim=[1])
            z_query = z[len(supports):len(supports) + len(queries)]

        if self.metric == "euclidean":
            supervised_dists = euclidean_dist(z_query, z_support)
            if has_augment:
                unsupervised_dists = euclidean_dist(z_aug_query, z_aug_support)
        elif self.metric == "cosine":
            supervised_dists = (-cosine_similarity(z_query, z_support) + 1) * 5
            if has_augment:
                unsupervised_dists = (
                    -cosine_similarity(z_aug_query, z_aug_support) + 1) * 5
        else:
            raise NotImplementedError

        from torch.nn import CrossEntropyLoss
        supervised_loss = CrossEntropyLoss()(-supervised_dists,
                                             target_inds.reshape(-1))
        _, y_hat_supervised = (-supervised_dists).max(1)
        acc_val_supervised = torch.eq(y_hat_supervised,
                                      target_inds.reshape(-1)).float().mean()

        if has_augment:
            # Unsupervised loss
            unsupervised_target_inds = torch.range(0, n_augmentations_samples -
                                                   1).to(device).long()
            unsupervised_loss = CrossEntropyLoss()(-unsupervised_dists,
                                                   unsupervised_target_inds)
            _, y_hat_unsupervised = (-unsupervised_dists).max(1)
            acc_val_unsupervised = torch.eq(
                y_hat_unsupervised,
                unsupervised_target_inds.reshape(-1)).float().mean()

            # Final loss
            assert 0 <= supervised_loss_share <= 1
            final_loss = (supervised_loss_share) * supervised_loss + (
                1 - supervised_loss_share) * unsupervised_loss

            return final_loss, {
                "metrics": {
                    "supervised_acc": acc_val_supervised.item(),
                    "unsupervised_acc": acc_val_unsupervised.item(),
                    "supervised_loss": supervised_loss.item(),
                    "unsupervised_loss": unsupervised_loss.item(),
                    "supervised_loss_share": supervised_loss_share,
                    "final_loss": final_loss.item(),
                },
                "supervised_dists": supervised_dists,
                "unsupervised_dists": unsupervised_dists,
                "target": target_inds
            }

        return supervised_loss, {
            "metrics": {
                "acc": acc_val_supervised.item(),
                "loss": supervised_loss.item(),
            },
            "dists": supervised_dists,
            "target": target_inds
        }
Example #4
0
    def loss_softkmeans(self, sample):
        xs = sample['xs']  # support
        xq = sample['xq']  # query
        xu = sample['xu']  # unlabeled

        n_class = len(xs)
        assert len(xq) == n_class
        n_support = len(xs[0])
        n_query = len(xq[0])

        target_inds = torch.arange(0, n_class).view(n_class, 1, 1).expand(
            n_class, n_query, 1).long()
        target_inds = Variable(target_inds, requires_grad=False).to(device)

        x = [item["sentence"] for xs_ in xs for item in xs_
             ] + [item["sentence"] for xq_ in xq
                  for item in xq_] + [item["sentence"] for item in xu]
        z = self.encoder.embed_sentences(x)
        z_dim = z.size(-1)

        zs = z[:n_class * n_support]
        z_proto = z[:n_class * n_support].view(n_class, n_support,
                                               z_dim).mean(1)
        zq = z[n_class * n_support:(n_class * n_support) + (n_class * n_query)]
        zu = z[(n_class * n_support) + (n_class * n_query):]

        distances_to_proto = euclidean_dist(torch.cat((zs, zu)), z_proto)

        distances_to_proto_normed = torch.nn.Softmax(
            dim=-1)(-distances_to_proto)

        refined_protos = list()
        for class_ix in range(n_class):
            z = torch.cat(
                (zs[class_ix * n_support:(class_ix + 1) * n_support], zu))
            d = torch.cat((torch.ones(n_support).to(device),
                           distances_to_proto_normed[(n_class * n_support):,
                                                     class_ix]))
            refined_proto = ((z.t() * d).sum(1) / d.sum())
            refined_protos.append(refined_proto.view(1, -1))
        refined_protos = torch.cat(refined_protos)

        if self.metric == "euclidean":
            dists = euclidean_dist(zq, refined_protos)
        elif self.metric == "cosine":
            dists = (-cosine_similarity(zq, refined_protos) + 1) * 5
        else:
            raise NotImplementedError

        log_p_y = torch_functional.log_softmax(-dists, dim=1).view(
            n_class, n_query, -1)
        dists.view(n_class, n_query, -1)
        loss_val = -log_p_y.gather(2, target_inds).squeeze().view(-1).mean()
        _, y_hat = log_p_y.max(2)
        acc_val = torch.eq(y_hat, target_inds.squeeze()).float().mean()

        return loss_val, {
            'loss': loss_val.item(),
            "metrics": {
                "acc": acc_val.item(),
                "loss": loss_val.item(),
            },
            'dists': dists,
            'target': target_inds
        }
Example #5
0
    def train_ARSC_one_episode(
        self,
        data_path: str,
        n_iter: int = 100,
    ):
        self.train()
        episode = create_ARSC_train_episode(prefix=data_path,
                                            n_support=5,
                                            n_query=0,
                                            n_unlabeled=0)
        n_episode_classes = len(episode["xs"])
        loss_fn = nn.CrossEntropyLoss()
        episode_matrix = None
        episode_classifier = None
        if self.is_pp:
            with torch.no_grad():
                init_matrix = np.array([[
                    self.encoder.forward([sentence
                                          ]).squeeze().cpu().detach().numpy()
                    for sentence in episode["xs"][c]
                ] for c in range(n_episode_classes)]).mean(1)

            episode_matrix = torch.Tensor(init_matrix).to(device)
            episode_matrix.requires_grad = True
            optimizer = torch.optim.Adam(list(self.parameters()) +
                                         [episode_matrix],
                                         lr=2e-5)
        else:
            episode_classifier = nn.Linear(
                in_features=self.hidden_dim,
                out_features=n_episode_classes).to(device)
            optimizer = torch.optim.Adam(list(self.parameters()) +
                                         list(episode_classifier.parameters()),
                                         lr=2e-5)

        # Train on support
        iter_bar = tqdm.tqdm(range(n_iter))
        losses = list()
        accuracies = list()

        for _ in iter_bar:
            optimizer.zero_grad()

            sentences = [
                sentence for sentence_list in episode["xs"]
                for sentence in sentence_list
            ]
            labels = torch.Tensor([
                ix for ix, sl in enumerate(episode["xs"]) for _ in sl
            ]).long().to(device)
            z = self.encoder(sentences)

            # z = batch_embeddings

            if self.is_pp:
                if self.metric == "cosine":
                    z = cosine_similarity(z, episode_matrix) * 5
                elif self.metric == "euclidean":
                    z = -euclidean_dist(z, episode_matrix)
                else:
                    raise NotImplementedError
            else:
                z = self.dropout(z)
                z = episode_classifier(z)

            loss = loss_fn(input=z, target=labels)
            acc = (z.argmax(1) == labels).float().mean()
            loss.backward()
            optimizer.step()
            iter_bar.set_description(f"{loss.item():.3f} | {acc.item():.3f}")
            losses.append(loss.item())
            accuracies.append(acc.item())
        return {"loss": np.mean(losses), "acc": np.mean(accuracies)}
Example #6
0
    def test_one_episode(
        self,
        support_data_dict: Dict[str, List[str]],
        query_data_dict: Dict[str, List[str]],
        sentence_to_embedding_dict: Dict,
        batch_size: int = 4,
        n_iter: int = 1000,
        summary_writer: SummaryWriter = None,
        summary_tag_prefix: str = None,
    ):

        # Check data integrity
        assert set(support_data_dict.keys()) == set(query_data_dict.keys())

        # Freeze encoder
        self.encoder.eval()

        episode_classes = sorted(set(support_data_dict.keys()))
        n_episode_classes = len(episode_classes)
        class_to_ix = {c: ix for ix, c in enumerate(episode_classes)}
        ix_to_class = {ix: c for ix, c in enumerate(episode_classes)}
        support_data_list = [{
            "sentence": sentence,
            "label": label
        } for label, sentences in support_data_dict.items()
                             for sentence in sentences]
        support_data_list = (support_data_list * batch_size *
                             n_iter)[:(batch_size * n_iter)]

        loss_fn = nn.CrossEntropyLoss()
        episode_matrix = None
        episode_classifier = None
        if self.is_pp:
            init_matrix = np.array([[
                sentence_to_embedding_dict[sentence].ravel()
                for sentence in support_data_dict[ix_to_class[c]]
            ] for c in range(n_episode_classes)]).mean(1)

            episode_matrix = torch.Tensor(init_matrix).to(device)
            episode_matrix.requires_grad = True
            optimizer = torch.optim.Adam([episode_matrix], lr=1e-3)
        else:
            episode_classifier = nn.Linear(
                in_features=self.hidden_dim,
                out_features=n_episode_classes).to(device)
            optimizer = torch.optim.Adam(list(episode_classifier.parameters()),
                                         lr=1e-3)

        # Train on support
        iter_bar = tqdm.tqdm(range(n_iter))
        for iteration in iter_bar:
            optimizer.zero_grad()

            batch = support_data_list[iteration *
                                      batch_size:iteration * batch_size +
                                      batch_size]
            batch_sentences = [d['sentence'] for d in batch]
            batch_embeddings = torch.Tensor([
                sentence_to_embedding_dict[s] for s in batch_sentences
            ]).to(device)
            batch_labels = torch.Tensor(
                [class_to_ix[d['label']] for d in batch]).long().to(device)
            # z = self.encoder(batch_sentences)
            z = batch_embeddings

            if self.is_pp:
                if self.metric == "cosine":
                    z = cosine_similarity(z, episode_matrix) * 5
                elif self.metric == "euclidean":
                    z = -euclidean_dist(z, episode_matrix)
                else:
                    raise NotImplementedError
            else:
                z = self.dropout(z)
                z = episode_classifier(z)

            loss = loss_fn(input=z, target=batch_labels)
            acc = (z.argmax(1) == batch_labels).float().mean()
            loss.backward()
            optimizer.step()
            iter_bar.set_description(f"{loss.item():.3f} | {acc.item():.3f}")

            if summary_writer:
                summary_writer.add_scalar(tag=f'{summary_tag_prefix}_loss',
                                          global_step=iteration,
                                          scalar_value=loss.item())
                summary_writer.add_scalar(tag=f'{summary_tag_prefix}_acc',
                                          global_step=iteration,
                                          scalar_value=acc.item())

        # Predict on query
        self.eval()
        if not self.is_pp:
            episode_classifier.eval()

        query_data_list = [{
            "sentence": sentence,
            "label": label
        } for label, sentences in query_data_dict.items()
                           for sentence in sentences]
        query_labels = torch.Tensor([
            class_to_ix[d['label']] for d in query_data_list
        ]).long().to(device)
        logits = list()
        with torch.no_grad():
            for ix in range(0, len(query_data_list), 16):
                batch = query_data_list[ix:ix + 16]
                batch_sentences = [d['sentence'] for d in batch]
                batch_embeddings = torch.Tensor([
                    sentence_to_embedding_dict[s] for s in batch_sentences
                ]).to(device)
                # z = self.encoder(batch_sentences)
                z = batch_embeddings

                if self.is_pp:
                    if self.metric == "cosine":
                        z = cosine_similarity(z, episode_matrix) * 5
                    elif self.metric == "euclidean":
                        z = -euclidean_dist(z, episode_matrix)
                    else:
                        raise NotImplementedError
                else:
                    z = episode_classifier(z)

                logits.append(z)
        logits = torch.cat(logits, dim=0)
        y_hat = logits.argmax(1)

        y_pred = logits.argmax(1).cpu().detach().numpy()
        probas_pred = logits.cpu().detach().numpy()
        probas_pred = np.exp(probas_pred) / np.exp(probas_pred).sum(1)[:, None]

        y_true = query_labels.cpu().detach().numpy()
        where_ok = np.where(y_pred == y_true)[0]
        import uuid
        tag = str(uuid.uuid4())
        summary_writer.add_text(tag=tag,
                                text_string=json.dumps(ix_to_class,
                                                       ensure_ascii=False),
                                global_step=0)
        if len(where_ok):
            # Looking for OK but with less confidence (not too easy)
            ok_idx = sorted(where_ok,
                            key=lambda x: probas_pred[x][y_pred[x]])[0]
            ok_sentence = query_data_list[ok_idx]['sentence']
            ok_prediction = ix_to_class[y_pred[ok_idx]]
            ok_label = query_data_list[ok_idx]['label']
            summary_writer.add_text(tag=tag,
                                    text_string=json.dumps({
                                        "sentence":
                                        ok_sentence,
                                        "true_label":
                                        ok_label,
                                        "predicted_label":
                                        ok_prediction,
                                        "p":
                                        probas_pred[ok_idx].tolist(),
                                    }),
                                    global_step=1)

        where_ko = np.where(y_pred != y_true)[0]
        if len(where_ko):
            # Looking for KO but with most confidence
            ko_idx = sorted(where_ko,
                            key=lambda x: probas_pred[x][y_pred[x]],
                            reverse=True)[0]
            ko_sentence = query_data_list[ko_idx]['sentence']
            ko_prediction = ix_to_class[y_pred[ko_idx]]
            ko_label = query_data_list[ko_idx]['label']
            summary_writer.add_text(tag=tag,
                                    text_string=json.dumps({
                                        "sentence":
                                        ko_sentence,
                                        "true_label":
                                        ko_label,
                                        "predicted_label":
                                        ko_prediction,
                                        "p":
                                        probas_pred[ko_idx].tolist()
                                    }),
                                    global_step=2)

        loss = loss_fn(input=logits, target=query_labels)
        acc = (y_hat == query_labels).float().mean()

        return {"loss": loss.item(), "acc": acc.item()}
Example #7
0
    def train_model(self,
                    data_dict: Dict[str, List[str]],
                    summary_writer: SummaryWriter = None,
                    n_epoch: int = 400,
                    batch_size: int = 16,
                    log_every: int = 10):
        self.train()

        training_classes = sorted(set(data_dict.keys()))
        n_training_classes = len(training_classes)
        class_to_ix = {c: ix for ix, c in enumerate(training_classes)}
        training_data_list = [{
            "sentence": sentence,
            "label": label
        } for label, sentences in data_dict.items() for sentence in sentences]

        training_matrix = None
        training_classifier = None

        if self.is_pp:
            training_matrix = torch.randn(n_training_classes,
                                          self.hidden_dim,
                                          requires_grad=True,
                                          device=device)
            optimizer = torch.optim.Adam(list(self.parameters()) +
                                         [training_matrix],
                                         lr=2e-5)
        else:
            training_classifier = nn.Linear(
                in_features=self.hidden_dim,
                out_features=n_training_classes).to(device)
            optimizer = torch.optim.Adam(
                list(self.parameters()) +
                list(training_classifier.parameters()),
                lr=2e-5)

        n_samples = len(training_data_list)
        loss_fn = nn.CrossEntropyLoss()
        global_step = 0

        # Metrics
        training_losses = list()
        training_accuracies = list()

        for _ in tqdm.tqdm(range(n_epoch)):
            random.shuffle(training_data_list)
            for ix in tqdm.tqdm(range(0, n_samples, batch_size)):
                optimizer.zero_grad()
                torch.cuda.empty_cache()

                batch_items = training_data_list[ix:ix + batch_size]
                batch_sentences = [d['sentence'] for d in batch_items]
                batch_labels = torch.Tensor([
                    class_to_ix[d['label']] for d in batch_items
                ]).long().to(device)
                z = self.encoder(batch_sentences)
                if self.is_pp:
                    if self.metric == "cosine":
                        z = cosine_similarity(z, training_matrix) * 5
                    elif self.metric == "euclidean":
                        z = -euclidean_dist(z, training_matrix)
                    else:
                        raise NotImplementedError
                else:
                    z = self.dropout(z)
                    z = training_classifier(z)
                loss = loss_fn(input=z, target=batch_labels)
                acc = (z.argmax(1) == batch_labels).float().mean()
                loss.backward()
                optimizer.step()

                global_step += 1
                training_losses.append(loss.item())
                training_accuracies.append(acc.item())
                if (global_step % log_every) == 0:
                    if summary_writer:
                        summary_writer.add_scalar(
                            tag="loss",
                            global_step=global_step,
                            scalar_value=np.mean(training_losses))
                        summary_writer.add_scalar(
                            tag="acc",
                            global_step=global_step,
                            scalar_value=np.mean(training_accuracies))
                    # Empty metrics
                    training_losses = list()
                    training_accuracies = list()
Example #8
0
    def test_model_ARSC(self,
                        data_path: str,
                        n_iter: int = 1000,
                        valid_summary_writer: SummaryWriter = None,
                        test_summary_writer: SummaryWriter = None,
                        eval_every: int = 100):
        self.eval()

        tasks = get_ARSC_test_tasks(prefix=data_path)
        metrics = list()
        logger.info("Embedding sentences...")
        sentences_to_embed = [
            s for task in tasks for sentences_lists in task['xs'] +
            task['x_test'] + task['x_valid'] for s in sentences_lists
        ]

        # sentence_to_embedding_dict = {s: np.random.randn(768) for s in tqdm.tqdm(sentences_to_embed)}
        sentence_to_embedding_dict = {
            s: self.encoder.forward([s]).cpu().detach().numpy().squeeze()
            for s in tqdm.tqdm(sentences_to_embed)
        }
        for ix_task, task in enumerate(tasks):
            task_metrics = list()

            n_episode_classes = 2
            loss_fn = nn.CrossEntropyLoss()
            episode_matrix = None
            episode_classifier = None
            if self.is_pp:
                with torch.no_grad():
                    init_matrix = np.array([[
                        sentence_to_embedding_dict[sentence]
                        for sentence in task["xs"][c]
                    ] for c in range(n_episode_classes)]).mean(1)

                episode_matrix = torch.Tensor(init_matrix).to(device)
                episode_matrix.requires_grad = True
                optimizer = torch.optim.Adam([episode_matrix], lr=2e-5)
            else:
                episode_classifier = nn.Linear(
                    in_features=self.hidden_dim,
                    out_features=n_episode_classes).to(device)
                optimizer = torch.optim.Adam(list(
                    episode_classifier.parameters()),
                                             lr=2e-5)

            # Train on support
            iter_bar = tqdm.tqdm(range(n_iter))
            losses = list()
            accuracies = list()

            for iteration in iter_bar:
                optimizer.zero_grad()

                sentences = [
                    sentence for sentence_list in task["xs"]
                    for sentence in sentence_list
                ]
                labels = torch.Tensor([
                    ix for ix, sl in enumerate(task["xs"]) for _ in sl
                ]).long().to(device)
                batch_embeddings = torch.Tensor([
                    sentence_to_embedding_dict[s] for s in sentences
                ]).to(device)
                # z = self.encoder(sentences)
                z = batch_embeddings

                if self.is_pp:
                    if self.metric == "cosine":
                        z = cosine_similarity(z, episode_matrix) * 5
                    elif self.metric == "euclidean":
                        z = -euclidean_dist(z, episode_matrix)
                    else:
                        raise NotImplementedError
                else:
                    z = self.dropout(z)
                    z = episode_classifier(z)

                loss = loss_fn(input=z, target=labels)
                acc = (z.argmax(1) == labels).float().mean()
                loss.backward()
                optimizer.step()
                iter_bar.set_description(
                    f"{loss.item():.3f} | {acc.item():.3f}")
                losses.append(loss.item())
                accuracies.append(acc.item())

                if (eval_every and (iteration + 1) % eval_every
                        == 0) or (not eval_every and iteration + 1 == n_iter):
                    self.eval()
                    if not self.is_pp:
                        episode_classifier.eval()

                    # --------------
                    #   VALIDATION
                    # --------------
                    valid_query_data_list = [{
                        "sentence": sentence,
                        "label": label
                    } for label, sentences in enumerate(task["x_valid"])
                                             for sentence in sentences]

                    valid_query_labels = torch.Tensor([
                        d['label'] for d in valid_query_data_list
                    ]).long().to(device)
                    logits = list()
                    with torch.no_grad():
                        for ix in range(0, len(valid_query_data_list), 16):
                            batch = valid_query_data_list[ix:ix + 16]
                            batch_sentences = [d['sentence'] for d in batch]
                            batch_embeddings = torch.Tensor([
                                sentence_to_embedding_dict[s]
                                for s in batch_sentences
                            ]).to(device)
                            # z = self.encoder(batch_sentences)
                            z = batch_embeddings

                            if self.is_pp:
                                if self.metric == "cosine":
                                    z = cosine_similarity(z,
                                                          episode_matrix) * 5
                                elif self.metric == "euclidean":
                                    z = -euclidean_dist(z, episode_matrix)
                                else:
                                    raise NotImplementedError
                            else:
                                z = episode_classifier(z)

                            logits.append(z)
                    logits = torch.cat(logits, dim=0)
                    y_hat = logits.argmax(1)

                    valid_loss = loss_fn(input=logits,
                                         target=valid_query_labels)
                    valid_acc = (y_hat == valid_query_labels).float().mean()

                    # --------------
                    #      TEST
                    # --------------
                    test_query_data_list = [{
                        "sentence": sentence,
                        "label": label
                    } for label, sentences in enumerate(task["x_test"])
                                            for sentence in sentences]

                    test_query_labels = torch.Tensor([
                        d['label'] for d in test_query_data_list
                    ]).long().to(device)
                    logits = list()
                    with torch.no_grad():
                        for ix in range(0, len(test_query_data_list), 16):
                            batch = test_query_data_list[ix:ix + 16]
                            batch_sentences = [d['sentence'] for d in batch]
                            batch_embeddings = torch.Tensor([
                                sentence_to_embedding_dict[s]
                                for s in batch_sentences
                            ]).to(device)
                            # z = self.encoder(batch_sentences)
                            z = batch_embeddings

                            if self.is_pp:
                                if self.metric == "cosine":
                                    z = cosine_similarity(z,
                                                          episode_matrix) * 5
                                elif self.metric == "euclidean":
                                    z = -euclidean_dist(z, episode_matrix)
                                else:
                                    raise NotImplementedError
                            else:
                                z = episode_classifier(z)

                            logits.append(z)
                    logits = torch.cat(logits, dim=0)
                    y_hat = logits.argmax(1)

                    test_loss = loss_fn(input=logits, target=test_query_labels)
                    test_acc = (y_hat == test_query_labels).float().mean()

                    # --RETURN METRICS
                    task_metrics.append({
                        "test": {
                            "loss": test_loss.item(),
                            "acc": test_acc.item()
                        },
                        "valid": {
                            "loss": valid_loss.item(),
                            "acc": valid_acc.item()
                        },
                        "step": iteration + 1
                    })
                    # if valid_summary_writer:
                    #     valid_summary_writer.add_scalar(tag=f'loss', global_step=ix_task, scalar_value=valid_loss.item())
                    #     valid_summary_writer.add_scalar(tag=f'acc', global_step=ix_task, scalar_value=valid_acc.item())
                    # if test_summary_writer:
                    #     test_summary_writer.add_scalar(tag=f'loss', global_step=ix_task, scalar_value=test_loss.item())
                    #     test_summary_writer.add_scalar(tag=f'acc', global_step=ix_task, scalar_value=test_acc.item())
            metrics.append(task_metrics)
        return metrics
Example #9
0
    def loss(self, sample):
        """
        :param sample: {
            "xs": [
                [support_A_1, support_A_2, ...],
                [support_B_1, support_B_2, ...],
                [support_C_1, support_C_2, ...],
                ...
            ],
            "xq": [
                [query_A_1, query_A_2, ...],
                [query_B_1, query_B_2, ...],
                [query_C_1, query_C_2, ...],
                ...
            ]
        }
        :return:
        """
        xs = sample["xs"]  # support
        xq = sample["xq"]  # query

        n_class = len(xs)
        assert len(xq) == n_class
        n_support = len(xs[0])
        n_query = len(xq[0])

        x = [item for xs_ in xs
             for item in xs_] + [item for xq_ in xq for item in xq_]
        z = self.encoder.forward(x)
        z_support = z[:n_class * n_support]
        z_query = z[n_class * n_support:]

        if self.metric == "euclidean":
            similarities = -euclidean_dist(z_query, z_support)
        elif self.metric == "cosine":
            similarities = cosine_similarity(z_query, z_support) * 5
        else:
            raise NotImplementedError

        # Average over support samples
        distances_from_query_to_classes = torch.cat([
            similarities[:, c * n_support:(c + 1) * n_support].mean(1).view(
                1, -1) for c in range(n_class)
        ]).T
        true_labels = torch.zeros_like(distances_from_query_to_classes)

        for ix_class, class_query_sentences in enumerate(xq):
            for ix_sentence, sentence in enumerate(class_query_sentences):
                true_labels[ix_class * n_query + ix_sentence, ix_class] = 1

        loss_fn = nn.CrossEntropyLoss()
        loss_val = loss_fn(distances_from_query_to_classes,
                           true_labels.argmax(1))
        acc_val = (true_labels.argmax(1) ==
                   distances_from_query_to_classes.argmax(1)).float().mean()

        return loss_val, {
            "loss":
            loss_val.item(),
            "metrics": {
                "acc": acc_val.item(),
                "loss": loss_val.item(),
            },
            "y_hat":
            distances_from_query_to_classes.argmax(1).cpu().detach().numpy()
        }