예제 #1
0
class BiEncoderTopXRetriever:
    def __init__(self, args, vocab, biencoder_onlyfor_encodingmentions, faiss_stored_kb, reader_for_mentions,
                 duidx2encoded_emb):
        self.args = args
        self.mention_encoder = biencoder_onlyfor_encodingmentions
        self.mention_encoder.eval()
        self.faiss_searcher = faiss_stored_kb
        self.reader_for_mentions = reader_for_mentions
        self.sequence_iterator = BasicIterator(batch_size=self.args.batch_size_for_eval)
        self.sequence_iterator.index_with(vocab)
        self.cuda_device = 0
        self.duidx2encoded_emb = duidx2encoded_emb

    def biencoder_tophits_retrievaler(self, train_or_dev_or_test_flag, how_many_top_hits_preserved=500):
        ds = self.reader_for_mentions.read(train_or_dev_or_test_flag)
        generator_for_biencoder = self.sequence_iterator(ds, num_epochs=1, shuffle=False)
        generator_for_biencoder_tqdm = tqdm(generator_for_biencoder, total=self.sequence_iterator.get_num_batches(ds))

        with torch.no_grad():
            for batch in generator_for_biencoder_tqdm:
                batch = nn_util.move_to_device(batch, self.cuda_device)
                mention_uniq_ids, encoded_mentions, gold_duidxs = self._extract_mention_idx_encoded_emb_and_its_gold_cuidx(batch=batch)
                faiss_search_candidate_result_cuidxs = self.faiss_topx_retriever(encoded_mentions=encoded_mentions,
                                                                                 how_many_top_hits_preserved=how_many_top_hits_preserved)
                yield faiss_search_candidate_result_cuidxs, mention_uniq_ids, gold_duidxs

    def faiss_topx_retriever(self, encoded_mentions, how_many_top_hits_preserved):
        '''
        if cossimsearch -> re-sort with L2, we have to use self.args.cand_num_before_sort_candidates_forBLINKbiencoder
        Args:
            encoded_mentions:
            how_many_top_hits_preserved:
        Returns:
        '''

        if self.args.search_method == 'cossim':
            encoded_mentions = normalize(torch.from_numpy(encoded_mentions), dim=1).cpu().detach().numpy()
            _, faiss_search_candidate_result_cuidxs = self.faiss_searcher.search(encoded_mentions, how_many_top_hits_preserved)

        else:
            # assert self.args.search_method == 'indexflatl2'
            _, faiss_search_candidate_result_cuidxs = self.faiss_searcher.search(encoded_mentions, how_many_top_hits_preserved)

        return faiss_search_candidate_result_cuidxs

    def calc_L2distance(self, h, t):
        diff = h - t
        return torch.norm(diff, dim=2)

    def tonp(self, tsr):
        return tsr.detach().cpu().numpy()

    def _extract_mention_idx_encoded_emb_and_its_gold_cuidx(self, batch):
        out_dict = self.mention_encoder(**batch)
        return self.tonp(out_dict['mention_uniq_id']), self.tonp(out_dict['contextualized_mention']), self.tonp(out_dict['gold_duidx'])
예제 #2
0
class DataIteratorWrapper:
    def __init__(self, vocab: Vocabulary, instances, batch_size, shuffle):
        self.data_iter = BasicIterator(batch_size=batch_size, cache_instances=True)
        self.data_iter.index_with(vocab)
        self.instances = instances
        self.shuffle = shuffle

    def __len__(self):
        return self.data_iter.get_num_batches(self.instances)

    def __iter__(self):
        return self.data_iter(self.instances, shuffle=self.shuffle, num_epochs=1)
예제 #3
0
파일: predictor.py 프로젝트: neoTCR/cu-tsp
def run(args):
    print('\nArguments:')
    for k, v in vars(args).items():
        print('{}: {}'.format(k, v))
    print()

    device = args.device
    if device is None:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

    print('Loading archive ...')
    archive = load_archive(args.model_path)
    # predictor = Predictor.from_archive(archive, 'protein_predictor')
    config = archive.config.duplicate()
    dataset_reader = DatasetReader.from_params(config["dataset_reader"])
    model = archive.model.to(device).eval()

    print('Loading data ...')
    dataset_reader.lazy = False
    dataset = dataset_reader.read(args.input_path)
    iterator = BasicIterator(args.batch_size)
    iterator.index_with(model.vocab)
    num_batches = iterator.get_num_batches(dataset)
    data_generator = iterator(dataset, num_epochs=1, shuffle=False)

    print('Predicting ...')
    output_dict = {}
    with torch.no_grad():
        for batch in Tqdm.tqdm(data_generator, total=num_batches):
            batch = move_to_device(batch, model._get_prediction_device())
            outputs = model(**batch)
            predictions = outputs['predictions'].cpu().numpy()
            for pid, length, pred in zip(outputs['protein_id'], outputs['length'], predictions):
                if model.target == 'dcalpha':
                    dcalpha = pred[:length, :length]
                    dcalpha = np.triu(dcalpha, 1) + np.tril(dcalpha.transpose(), -1)
                    output_dict[pid] = {'dcalpha': dcalpha}
                elif model.target == 'angles':
                    psi, phi = pred[:length, 0], pred[:length, 1]
                    # psi[0] = 0.
                    # phi[-1] = 0.
                    output_dict[pid] = {'psi': psi, 'phi': phi}
                else:
                    coords = pred[:length]
                    output_dict[pid] = {'coords': coords}

    print('Writing to {}'.format(args.output_path))
    with open(args.output_path, 'wb') as fout:
        pickle.dump(output_dict, fout)

    print('All done.')
예제 #4
0
def latent(data_file, model_dir, epoch, device, impath):
    """
    Subcommand to analyze the latent space
    """
    # Prepare dataset
    reader = SNLIMetaReader()
    instances = reader.read(data_file)
    premises = []
    hypotheses = []
    similarities = []
    labels = []
    iterator = BasicIterator(batch_size=128)
    with torch.no_grad():
        model = prediction_utils.load_model(model_dir, epoch, device)
        model.eval()
        iterator.index_with(model.vocab)
        logger.info(f'Iterating over data: {data_file}')
        generator_tqdm = tqdm(iterator(instances, num_epochs=1, shuffle=False),
                              total=iterator.get_num_batches(instances))

        for batch in generator_tqdm:
            batch = nn_util.move_to_device(batch, device)
            _, zp, _ = model._encode(batch['premise'], model._task_encoder,
                                     0.0)
            _, zh, _ = model._encode(batch['hypothesis'], model._task_encoder,
                                     0.0)
            premises.extend([
                ' '.join(meta['premise_tokens']) for meta in batch['metadata']
            ])
            hypotheses.extend([
                ' '.join(meta['hypothesis_tokens'])
                for meta in batch['metadata']
            ])
            labels.extend([meta['label'] for meta in batch['metadata']])
            for zpe, zhe in zip(zp, zh):
                similarities.append(1 - cosine(zpe, zhe))

    df = pd.DataFrame({
        'sentence1': premises,
        'sentence2': hypotheses,
        'similarity': similarities,
        'label': labels
    })
    logger.info(df.groupby('label').mean())
예제 #5
0
class InKBAllEntitiesEncoder:
    def __init__(self, args, entity_loader_datasetreaderclass, entity_encoder_wrapping_model, vocab):
        self.args = args
        self.entity_loader_datasetreader = entity_loader_datasetreaderclass
        self.sequence_iterator_for_encoding_entities = BasicIterator(batch_size=args.batch_size_for_kb_encoder)
        self.vocab = vocab
        self.entity_encoder_wrapping_model = entity_encoder_wrapping_model
        self.entity_encoder_wrapping_model.eval()
        self.cuda_device = 0

    def encoding_all_entities(self):
        duidx2emb = {}
        ds = self.entity_loader_datasetreader.read('test')
        self.sequence_iterator_for_encoding_entities.index_with(self.vocab)
        entity_generator = self.sequence_iterator_for_encoding_entities(ds, num_epochs=1, shuffle=False)
        entity_generator_tqdm = tqdm(entity_generator, total=self.sequence_iterator_for_encoding_entities.get_num_batches(ds))
        print('======Encoding all entites from title and description=====')
        
        entities_full_path = os.path.join(self.args.entities_path, self.args.entities_filename)
        if self.args.load_entities:
            duidx2emb = pickle_load_object(entities_full_path)
        else:
            with torch.no_grad():
                for batch in entity_generator_tqdm:
                    batch = nn_util.move_to_device(batch, self.cuda_device)
                    duidxs, embs = self._extract_cuidx_and_its_encoded_emb(batch)
                    for duidx, emb in zip(duidxs, embs):
                        duidx2emb.update({int(duidx):emb})
            if self.args.save_entities:
                pickle_save_object(duidx2emb, entities_full_path)

        return duidx2emb

    def tonp(self, tsr):
        return tsr.detach().cpu().numpy()

    def _extract_cuidx_and_its_encoded_emb(self, batch) -> np.ndarray:
        out_dict = self.entity_encoder_wrapping_model(**batch)
        return self.tonp(out_dict['gold_duidx']), self.tonp(out_dict['emb_of_entities_encoded'])
예제 #6
0
def evaluate(model: Model, dataset: Dataset, iterator: BasicIterator,
             cuda_device: int, serialization_directory: str) -> Dict[str, Any]:
    model.eval()

    generator = iterator(dataset,
                         num_epochs=1,
                         cuda_device=cuda_device,
                         shuffle=False,
                         for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = tqdm.tqdm(generator,
                               total=iterator.get_num_batches(dataset))

    for batch in generator_tqdm:
        model(**batch)
        metrics = model.get_metrics()
        description = ', '.join([
            "%s: %.5f" % (name, value)
            for name, value in metrics.items() if "overall" in name
        ]) + " ||"
        generator_tqdm.set_description(description)

    metrics = model.get_metrics()
    golds = metrics["gold_spans"]
    predictions = metrics["predicted_spans"]
    assert len(dataset.instances) == len(golds) == len(predictions)

    # gold_file_path = os.path.join(serialization_directory, "gold.txt")
    prediction_file_path = os.path.join(serialization_directory,
                                        "predictions.txt")
    prediction_file = open(prediction_file_path, "w+")
    # gold_file = open(gold_file_path, "w+")
    logger.info("Writing predictions in CoNLL-like format to %s",
                prediction_file_path)

    for instance, gold, prediction in tqdm.tqdm(
            zip(dataset.instances, golds, predictions)):
        fields = instance.fields
        if "targets" in fields:
            verb_index = fields["targets"].labels.index(1)
        elif "verb_indicator" in fields:
            try:
                # Most sentences have a verbal predicate, but not all.
                verb_index = fields["verb_indicator"].labels.index(1)
            except ValueError:
                verb_index = None
        else:
            verb_index = None

        frame = None
        if "frame" in fields:
            frame = fields["frame"].tokens[0].text
        gf = None
        if "gf" in fields:
            gf = [g.text for g in fields["gf"].tokens]
        pt = None
        if "pt" in fields:
            pt = [p.text for p in fields["pt"].tokens]

        sentence = [token.text for token in fields["tokens"].tokens]

        gold_tags = convert_spans_to_seq(gold, len(sentence))
        predicted_tags = convert_spans_to_seq(prediction, len(sentence))
        assert len(sentence) == len(gold_tags) == len(predicted_tags)

        write_to_conll_eval_file(
            prediction_file,
            #  gold_file,
            verb_index,
            sentence,
            predicted_tags,
            gold_tags,
            frame,
            gf,
            pt)

    return model.get_metrics()
예제 #7
0
# In[ ]:

filtered_params = [p for name, p in model.named_parameters() if use(name)]

# In[ ]:

optimizer = torch.optim.Adam(filtered_params, lr=config.lr, weight_decay=0.)

# In[ ]:

from allennlp.training.learning_rate_schedulers import SlantedTriangular, CosineWithRestarts
# use slanted triangular lr scheduler to prevent initial spike in consistency loss
lr_sched = SlantedTriangular(
    optimizer,
    num_epochs=config.epochs,
    num_steps_per_epoch=iterator.get_num_batches(train_ds))

# In[ ]:

from allennlp.training import TrainerWithCallbacks

trainer = TrainerWithCallbacks(
    model=model,
    optimizer=optimizer,
    iterator=iterator,
    train_dataset=train_ds,
    validation_dataset=val_ds,
    callbacks=[stat_rec, wdd, monitor],
    learning_rate_scheduler=lr_sched,
    #     serialization_dir=DATA_ROOT / "debias_ckpts",
    cuda_device=0 if torch.cuda.is_available() else -1,
예제 #8
0
class EvaluatorClass:
    def __init__(self, args, model, vocab, er_vocab, all_entity_num,
                 entity_dim):
        self.args = args
        self.evaluate_on_cpu = self.args.evaluate_on_cpu
        self.is_cuda_available = torch.cuda.is_available()
        self.er_vocab = er_vocab
        self.cuda_device = int(args.cuda_device)
        self.model = model
        self.model.eval()
        self.model.evaluate_flag += 1
        self.sequence_iterator = BasicIterator(batch_size=args.batch_size)
        self.sequence_iterator.index_with(vocab)
        self.all_entity_num = all_entity_num
        self.entity_dim = entity_dim

    def evaluation(self, ds):
        pred_generator = self.sequence_iterator(ds,
                                                num_epochs=1,
                                                shuffle=False)
        self.model.eval()
        if self.evaluate_on_cpu:
            self.model.cpu()
        pred_generator_tqdm = tqdm(
            pred_generator, total=self.sequence_iterator.get_num_batches(ds))

        hits = []
        ranks = []
        for i in range(10):
            hits.append([])

        with torch.no_grad():
            for batch in pred_generator_tqdm:
                if self.evaluate_on_cpu == False:
                    batch = nn_util.move_to_device(batch, self.cuda_device)
                    preds = self.model.eval_all_entities(
                        batch['head'], batch['relation'])
                    # batch * all ent size
                    for j in range(batch['head'].size(0)):
                        filt = self.er_vocab[(
                            batch['head'][j][0].int().item(),
                            batch['relation'][j][0].int().item())]
                        target_value = preds[
                            j, batch['tail'][j][0].int().item()].item()
                        preds[j, filt] = 0.0
                        preds[j,
                              batch['tail'][j][0].int().item()] = target_value

                    sort_values, sort_idxs = torch.sort(preds,
                                                        dim=1,
                                                        descending=True)
                    sort_idxs = sort_idxs.cpu().numpy()
                    for j in range(batch['head'].size(0)):
                        rank = np.where(sort_idxs[j] == batch['tail'][j]
                                        [0].int().item())[0][0]
                        ranks.append(rank + 1)

                        for hits_level in range(10):
                            if rank <= hits_level:
                                hits[hits_level].append(1.0)
                            else:
                                hits[hits_level].append(0.0)

        print('\n ###### RESULTS ######')
        print('Hits @10: {0}'.format(np.mean(hits[9])))
        print('Hits @3: {0}'.format(np.mean(hits[2])))
        print('Hits @1: {0}'.format(np.mean(hits[0])))
        print('Mean rank: {0}'.format(np.mean(ranks)))
        print('Mean reciprocal rank: {0}'.format(np.mean(1. /
                                                         np.array(ranks))))
        print('###### ###### ######')

        print('\n### EVALUATION FINISHED ###\n')

    def add_Embclass_2_model(self, E_numpy_weight):
        # emb_d, emb_ent_size = E_numpy_weight.shape(0) , E_numpy_weight.shape(1)
        embedding_tensor = torch.Tensor(E_numpy_weight)
        embedding_tensor = embedding_tensor.float()
        if self.is_cuda_available and self.evaluate_on_cpu == False:
            embedding_tensor = embedding_tensor.cuda()
        embed_for_model = nn.Embedding.from_pretrained(embedding_tensor)
        self.model.embed_for_model = embed_for_model  # add class variable to model

    def get_E_numpy_from_alldataset(self, dslist):
        entity_symbolidx_2_KGemb_through_linear = {}

        for ds in dslist:
            pred_generator = self.sequence_iterator(ds,
                                                    num_epochs=1,
                                                    shuffle=False)
            self.model.eval()
            if self.evaluate_on_cpu:
                self.model.cpu()
            pred_generator_tqdm = tqdm(
                pred_generator,
                total=self.sequence_iterator.get_num_batches(ds))

            with torch.no_grad():
                for batch in pred_generator_tqdm:
                    if self.evaluate_on_cpu == False:
                        batch = nn_util.move_to_device(batch, self.cuda_device)
                        hidx, hvec, tidx, tvec = self._extract_head_or_tail_and_its_vectordata(
                            batch)
                        # batch, batch * dim, batch, batch * dim
                        for head_idx, head_ent_vect in zip(hidx, hvec):
                            if head_idx not in entity_symbolidx_2_KGemb_through_linear:
                                entity_symbolidx_2_KGemb_through_linear.update(
                                    {head_idx: head_ent_vect})
                        for tail_idx, tail_ent_vect in zip(tidx, tvec):
                            if tail_idx not in entity_symbolidx_2_KGemb_through_linear:
                                entity_symbolidx_2_KGemb_through_linear.update(
                                    {tail_idx: tail_ent_vect})

        E = self.entity_symbolidx_2_KGemb_through_linear__2__E(
            entity_symbolidx_2_KGemb_through_linear)
        return E

    def entity_symbolidx_2_KGemb_through_linear__2__E(
            self, entity_symbolidx_2_KGemb_through_linear):
        # pdb.set_trace()
        KBemb = np.zeros(
            (self.all_entity_num, self.entity_dim)).astype('float32')
        for ent_idx, vec in entity_symbolidx_2_KGemb_through_linear.items():
            KBemb[ent_idx] = vec
        print('converted emb', len(entity_symbolidx_2_KGemb_through_linear),
              '/', self.all_entity_num)
        # pdb.set_trace()
        return KBemb

    ### misc ###

    def idx2int_tensor(self, data):
        return data.int().cpu().detach().numpy()

    def vector2tensor(self, data):
        return data.cpu().detach().numpy()

    def get_W_numpy(self):
        return self.model.get_W()

    def get_R_numpy(self):
        return self.model.get_R()

    def tonp(self, tsr):
        return tsr.detach().cpu().numpy()

    def _extract_head_or_tail_and_its_vectordata(self, batch) -> np.ndarray:
        '''
        :param batch:
        :return: Embedding matrix of all entities.
        '''
        out_dict = self.model(**batch)
        head_idx = self.idx2int_tensor(out_dict['heads']).squeeze()
        tail_idx = self.idx2int_tensor(out_dict['tails']).squeeze()
        head_sents = self.vector2tensor(out_dict['heads_sent_encoded2KGemb'])
        tail_sents = self.vector2tensor(out_dict['tails_sent_encoded2KGemb'])

        return head_idx, head_sents, tail_idx, tail_sents
예제 #9
0
class BaseDataReader(DatasetReader):
    def __init__(self,
                 data_dir,
                 batch_size: int,
                 shuffle=False,
                 small_data=False,
                 train_name='train.json',
                 dev_name='dev.json',
                 test_name='test.json'):
        super().__init__()
        self.data_dir = data_dir
        print('loading dataset: ' + os.path.join(data_dir, train_name))
        self.train_dataset = self.read(os.path.join(data_dir, train_name))
        print('loading val dataset: ' + os.path.join(data_dir, dev_name))
        self.validation_dataset = self.read(os.path.join(data_dir, dev_name))
        self.vocab = Vocabulary.from_instances(self.train_dataset +
                                               self.validation_dataset)
        print('loading test dataset:' + os.path.join(data_dir, test_name))
        self.test_dataset = self.read(os.path.join(data_dir, test_name))
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.small_data = small_data
        self.iterator = BasicIterator(batch_size=batch_size,
                                      cache_instances=True)
        self.iterator.index_with(self.vocab)

    def get_iterator_and_num_batches(self, data_type):
        dataset_type_map = {
            'train': self.train_dataset,
            'dev': self.validation_dataset,
            'test': self.test_dataset
        }
        dataset = dataset_type_map[data_type]
        if self.small_data and data_type == 'train':
            dataset = dataset[:int(len(dataset) / 10)]
        shuffle = self.shuffle if data_type == 'train' else False
        return self.iterator(
            dataset, shuffle=shuffle,
            num_epochs=1), self.iterator.get_num_batches(dataset)

    def _read(self, file_path: str) -> Iterable[Instance]:
        """
        Reads the instances from the given file_path and returns them as an
        `Iterable` (which could be a list or could be a generator).
        You are strongly encouraged to use a generator, so that users can
        read a dataset in a lazy way, if they so choose.
        """
        raise NotImplementedError

    def text_to_instance(self, *inputs) -> Instance:
        """
        Does whatever tokenization or processing is necessary to go from textual input to an
        ``Instance``.  The primary intended use for this is with a
        :class:`~allennlp.service.predictors.predictor.Predictor`, which gets text input as a JSON
        object and needs to process it to be input to a model.

        The intent here is to share code between :func:`_read` and what happens at
        model serving time, or any other time you want to make a prediction from new data.  We need
        to process the data in the same way it was done at training time.  Allowing the
        ``DatasetReader`` to process new text lets us accomplish this, as we can just call
        ``DatasetReader.text_to_instance`` when serving predictions.

        The input type here is rather vaguely specified, unfortunately.  The ``Predictor`` will
        have to make some assumptions about the kind of ``DatasetReader`` that it's using, in order
        to pass it the right information.
        """
        raise NotImplementedError
예제 #10
0
class BasePredictionClass(object):
    """
    This (Abstract) class is devoted to extracting predictions,
    managing storage of predictions, and the optional
    visualization of predictions
    """
    def __init__(self, vocab, reader, visualize=False):
        self._vocab = vocab
        self._iterator = BasicIterator(batch_size=32)
        self._iterator.index_with(self._vocab)
        self._reader = reader
        self._indexer = self._reader.get_label_indexer()
        self._visualize = visualize

    def _get_text_from_instance(self, instance: Instance) -> List[str]:
        """Helper function to extract text from an instance
        """
        return list(map(lambda x: x.text, instance.fields['tokens'].tokens))

    def get_segmentation_from_prediction(self, *args, **kwargs) -> List[str]:
        raise NotImplementedError("The child class implements this")

    def visualize(self, *args, **kwargs):
        raise NotImplementedError("The child class implements this")

    def _get_filtered_set(self):
        """
        The set of words/symbols to be filtered out
        """
        return set()

    def get_predictions(self,
                        instances: List[Instance],
                        model: Model,
                        cuda_device: int = -1,
                        prediction_file: Optional[str] = None,
                        visualization_file: Optional[str] = None,
                        verbose: bool = False) -> List[Dict]:
        """
        We use this function to get predictions
        We use a basic itereator, since a bucket iterator shuffles
        data, even for shuffle=False

        Arguments:
            data (List[Instance]) : The list of instances for inference
            model (Model) : The model being used for predictions
            cuda_device (int) : The cuda device being used for processing
            verbose (bool) : Log accuracies and such

        Returns:
            predictions (List[Dict]) : The predictions. Each contains the
                following keys
                * text (List[str]): The tokens
                * pred (List[Tuple[str, float]]): The predicted labels and
                    probs. Can potentially have multiple labels being
                    predicted
                * gold (List[str]): The gold labels
                    can potentially have multiple gold labels
                * pred_labels (List[str]): Predicted labels for segmentation
                    Note that an this method is implemented by the base classes
                * attn (Dict[str, List[float]]) : A dictionary mapping tags to
                    attention values
                * gold_labels : The gold labels for segmentation
                    The gold labels for segmentation

        Additionally, this class stores the base_predictions, as well as the
            visualization, if visualization is set to True, and base_dir is
             provided
        """
        iterator = self._iterator(instances,
                                  num_epochs=1,
                                  shuffle=False,
                                  cuda_device=cuda_device,
                                  for_training=False)
        model.eval()
        num_batches = self._iterator.get_num_batches(instances)
        inference_generator_tqdm = Tqdm.tqdm(iterator, total=num_batches)
        predictions = []
        index = 0
        matrix = {
            self._indexer.ix2tags[ix]: {
                "tp": 0.,
                "fp": 0,
                "fn": 0.,
                "tn": 0.
            }
            for ix in range(len(self._indexer.ix2tags))
        }

        for batch in inference_generator_tqdm:
            # Currently I don't support multi-gpu data parallel
            output_dict = model.decode(model(**batch))
            for ix in range(len(output_dict["preds"])):
                text = self._get_text_from_instance(instances[index])
                pred = output_dict["preds"][ix]
                gold = [
                    self._indexer.get_tag(label)
                    for label in instances[index].fields['labels'].labels
                ]
                attn = output_dict["attentions"][ix]
                gold_labels = instances[index].fields['tags'].labels
                assert all([len(attn[x]) == len(text) for x in attn])
                gold_labels = self._indexer.extract_relevant(gold_labels)
                pred_labels = self.get_segmentation_from_prediction(
                    text=text, preds_probs=pred, attns=attn)
                assert len(pred_labels) == len(gold_labels) == len(text)
                gold_set = set(gold)
                pred_set, _ = [set(list(x)) for x in zip(*pred)]
                # import pdb; pdb.set_trace()
                for tag in matrix:
                    if tag in gold_set and tag in pred_set:
                        matrix[tag]["tp"] += 1
                    elif tag not in gold_set and tag in pred_set:
                        matrix[tag]["fp"] += 1
                    elif tag in gold_set and tag not in pred_set:
                        matrix[tag]["fn"] += 1.
                    else:
                        matrix[tag]["tn"] += 1.
                preds = [[x[0], float(x[1])] for x in pred]
                prediction = {
                    "text": text,
                    "pred": preds,
                    "gold": gold,
                    "attn": attn,
                    "pred_labels": pred_labels,
                    "gold_labels": gold_labels
                }
                predictions.append(prediction)
                index += 1
        if prediction_file is not None and prediction_file != "":
            with open(prediction_file, "w") as f:
                json.dump(predictions, f, ensure_ascii=True, indent=4)
        if visualization_file is not None and self._visualize and \
                visualization_file != "":
            self.visualize(predictions, visualization_file)
        if verbose:
            accs = []
            for tag in matrix:
                acc = (matrix[tag]["tp"] + matrix[tag]["tn"]) / \
                    sum(matrix[tag].values()) * 100.
                logger.info(f"Tag: {tag}, Acc: {acc:.2f}")
                accs.append(acc)
            avg_acc = sum(accs) / len(accs)
            logger.info(f"Average ACC: {avg_acc:.2f}")
            p, r, f = fscore_from_preds(predictions, False)
        return predictions
예제 #11
0
def evaluate(model, evaluation_dataset, batch_size, cuda):
    """
    Evaluate a model on an evaluation dataset.
    """
    # Set model to evaluation mode (turns off dropout and such)
    model.eval()
    # Create objects for calculating metrics.
    span_start_accuracy = CategoricalAccuracy()
    span_end_accuracy = CategoricalAccuracy()
    span_accuracy = BooleanAccuracy()
    squad_metrics = SquadEmAndF1()

    # Build iterater, and have it bucket batches by passage / question length.
    evaluation_iterator = BasicIterator(batch_size=batch_size)
    # Get a generator of train batches.
    num_evaluation_batches = evaluation_iterator.get_num_batches(
        evaluation_dataset)
    evaluation_generator = tqdm(evaluation_iterator(
        evaluation_dataset,
        num_epochs=1,
        shuffle=False,
        cuda_device=0 if cuda else -1,
        for_training=False),
                                total=num_evaluation_batches,
                                leave=False)
    batch_losses = 0
    for batch in evaluation_generator:
        # Extract the relevant data from the batch.
        passage = batch["passage"]["tokens"]
        question = batch["question"]["tokens"]
        span_start = batch["span_start"]
        span_end = batch["span_end"]
        metadata = batch.get("metadata", {})

        # Run data through model to get start and end logits.
        output_dict = model(passage, question)
        start_logits = output_dict["start_logits"]
        end_logits = output_dict["end_logits"]
        softmax_start_logits = output_dict["softmax_start_logits"]
        softmax_end_logits = output_dict["softmax_end_logits"]

        # Calculate loss for start and end indices.
        loss = nll_loss(softmax_start_logits, span_start.view(-1))
        loss += nll_loss(softmax_end_logits, span_end.view(-1))
        batch_losses += loss.data[0]

        # Calculate categorical span start and end accuracy.
        span_start_accuracy(start_logits, span_start.view(-1))
        span_end_accuracy(end_logits, span_end.view(-1))
        # Compute the best span, and calculate overall span accuracy.
        best_span = get_best_span(start_logits, end_logits)
        span_accuracy(best_span, torch.stack([span_start, span_end], -1))
        # Calculate EM and F1 scores
        calculate_em_f1(best_span, metadata, passage.size(0), squad_metrics)

    # Set the model back to train mode.
    model.train()

    # Extract the values from the metrics objects
    average_span_start_accuracy = span_start_accuracy.get_metric()
    average_span_end_accuracy = span_end_accuracy.get_metric()
    average_span_accuracy = span_accuracy.get_metric()
    average_em, average_f1 = squad_metrics.get_metric()
    return (batch_losses / num_evaluation_batches, average_span_start_accuracy,
            average_span_end_accuracy, average_span_accuracy, average_em,
            average_f1)
예제 #12
0
def evaluate(model, evaluation_dataset, batch_size, vocab, cuda):
    """
    Evaluate a model on an evaluation dataset.
    """
    # Set model to evaluation mode (turns off dropout and such)
    model.eval()
    # Create objects for calculating metrics.
    span_start_accuracy = CategoricalAccuracy()
    span_end_accuracy = CategoricalAccuracy()
    span_accuracy = BooleanAccuracy()
    squad_metrics = SquadEmAndF1()

    # Build iterater, and have it bucket batches by passage / question length.
    evaluation_iterator = BasicIterator(batch_size=batch_size)
    # Index the instances with the vocabulary.
    # This converts string tokens to numerical indices.
    evaluation_iterator.index_with(vocab)
    # Get a generator of train batches.
    num_evaluation_batches = evaluation_iterator.get_num_batches(
        evaluation_dataset)
    evaluation_generator = tqdm(evaluation_iterator(evaluation_dataset,
                                                    num_epochs=1,
                                                    shuffle=False),
                                total=num_evaluation_batches,
                                leave=False)
    batch_losses = 0
    for batch in evaluation_generator:
        # move the data to cuda if available
        batch = move_to_device(batch, cuda_device=0 if cuda else -1)
        # Extract the relevant data from the batch.
        passage = batch["passage"]["tokens"]
        question = batch["question"]["tokens"]
        span_start = batch["span_start"]
        span_end = batch["span_end"]
        metadata = batch.get("metadata", {})

        # Run data through model to get start and end logits.
        output_dict = model(passage, question)
        start_logits = output_dict["start_logits"]
        end_logits = output_dict["end_logits"]
        softmax_start_logits = output_dict["softmax_start_logits"]
        softmax_end_logits = output_dict["softmax_end_logits"]

        # Calculate loss for start and end indices.
        loss = nll_loss(softmax_start_logits, span_start.view(-1))
        loss += nll_loss(softmax_end_logits, span_end.view(-1))
        batch_losses += loss.item()

        # Calculate categorical span start and end accuracy.
        span_start_accuracy(start_logits, span_start.view(-1))
        span_end_accuracy(end_logits, span_end.view(-1))
        # Compute the best span, and calculate overall span accuracy.
        best_span = get_best_span(start_logits, end_logits)
        span_accuracy(best_span, torch.cat([span_start, span_end], -1))
        # Calculate EM and F1 scores
        calculate_em_f1(best_span, metadata, passage.size(0), squad_metrics)

    # Set the model back to train mode.
    model.train()

    # loss = batch_losses / num_evaluation_batches
    # em, f1 = squad_metrics.get_metric(reset=True)
    # tqdm_description = _make_tqdm_description(
    #     loss, em, f1)
    # # Log training statistics to progress bar
    # # evaluation_generator.set_description(tqdm_description)

    # Extract the values from the metrics objects
    average_span_start_accuracy = span_start_accuracy.get_metric()
    average_span_end_accuracy = span_end_accuracy.get_metric()
    average_span_accuracy = span_accuracy.get_metric()
    average_em, average_f1 = squad_metrics.get_metric()
    return (batch_losses / num_evaluation_batches, average_span_start_accuracy,
            average_span_end_accuracy, average_span_accuracy, average_em,
            average_f1)
예제 #13
0
def main():
    parser = argparse.ArgumentParser(description='Evidence sentence classifier')
    parser.add_argument('--k', type=int, default=1,
                        help='number of evidence paragraphs to pick from the classifier (default: 1)')
    parser.add_argument('--probs', type=str, default=None,
                        help='Pickled sentence probs file (default: None)')
    args = parser.parse_args()

    with torch.no_grad():
        bert_token_indexer = {'bert': PretrainedBertIndexer('scibert/vocab.txt', max_pieces=512)}

        pipeline_train = pickle.load(open('data/train_instances.p', 'rb'))
        pipeline_val = pickle.load(open('data/val_instances.p', 'rb'))
        pipeline_test = pickle.load(open('data/test_instances.p', 'rb'))

        pipeline_reader = PipelineDatasetReader(bert_token_indexer)
        p_train = pipeline_reader.read(pipeline_train)
        p_val = pipeline_reader.read(pipeline_val)
        p_test = pipeline_reader.read(pipeline_test)

        p_vocab = Vocabulary.from_instances(p_train + p_val + p_test)

        bert_token_embedding = PretrainedBertEmbedder(
            'scibert/weights.tar.gz', requires_grad=False
        )

        word_embeddings = BasicTextFieldEmbedder(
            {"bert": bert_token_embedding},
            {"bert": ['bert']},
            allow_unmatched_keys=True
        )

        ev_classifier = Classifier(word_embeddings=word_embeddings,
                                   vocab=p_vocab,
                                   loss='bce',
                                   hinge_margin=0)
        predictor = Oracle(word_embeddings=word_embeddings,
                           vocab=p_vocab)

        cuda_device = 0

        if torch.cuda.is_available():
            ev_classifier = ev_classifier.cuda()
            predictor = predictor.cuda()
        else:
            cuda_device = -1

        ev_classifier.load_state_dict(torch.load('model_checkpoints/f_evidence_sentence_classifier_para/best.th'))
        predictor.load_state_dict(torch.load('model_checkpoints/f_oracle_full/best.th'))

        logger.info('Classifier and Predictor models loaded successfully')
        ev_classifier.eval()
        predictor.eval()

        iterator = BasicIterator(batch_size=256)
        iterator.index_with(p_vocab)

        if args.probs is None:
            iterator_obj = iterator(p_test, num_epochs=1, shuffle=False)
            generator_tqdm = Tqdm.tqdm(iterator_obj, total=iterator.get_num_batches(p_test))

            output_probs = []
            for batch in generator_tqdm:
                batch = nn_util.move_to_device(batch, cuda_device)
                probs = ev_classifier.predict_evidence_probs(**batch)
                probs = probs.cpu().numpy()
                output_probs.append(probs)

            output_probs = [i for item in output_probs for i in item]
            logger.info('Obtained all sentence evidence probabilities - total {}'.format(len(output_probs)))
            pickle.dump(output_probs, open('sentence_ev_probs.p', 'wb'))

        else:
            output_probs = pickle.load(open(args.probs, 'rb'))

        top_k_sentences = []
        prob_counter = 0
        for i in range(len(pipeline_test)):
            sentences = [' '.join(pipeline_test[i]['sentence_span'][k][0] + pipeline_test[i]['sentence_span'][k + 1][0]
                                  + pipeline_test[i]['sentence_span'][k + 2][0])
                         for k in range(len(pipeline_test[i]['sentence_span']) - 2)]
            probs = list(output_probs[prob_counter: prob_counter + len(sentences)])
            prob_counter += len(sentences)
            sorted_sentences = sorted(zip(sentences, probs), key=lambda x: x[1], reverse=True)
            top_k = [s[0] for s in sorted_sentences[:args.k]]
            top_k_sentences.append({'I': pipeline_test[i]['I'],
                                    'C': pipeline_test[i]['C'],
                                    'O': pipeline_test[i]['O'],
                                    'y_label': pipeline_test[i]['y'][0][0],
                                    'evidence': ' '.join(top_k)})

        logger.info('Obtained the top sentences from the evidence classifier')

        predictor_reader = EIDatasetReader(bert_token_indexer)
        predictor_test = predictor_reader.read(top_k_sentences)

        test_metrics = evaluate(predictor, predictor_test, iterator,
                                cuda_device=cuda_device,
                                batch_weight_key="")

        print('Test Data statistics:')
        for key, value in test_metrics.items():
            print(str(key) + ': ' + str(value))