"""Spacy Operation.""" from __future__ import annotations from typing import List import torch from meerkat import SpacyCell from meerkat.tools.lazy_loader import LazyLoader from robustnessgym.core.operation import Operation from robustnessgym.core.slice import SliceDataPanel as DataPanel spacy = LazyLoader("spacy", warning="Please `pip install spacy`.") spacy_tokens = LazyLoader("spacy.tokens") nc = LazyLoader( "neuralcoref", error="Can't import neuralcoref. Please install neuralcoref using:\n" "git clone https://github.com/huggingface/neuralcoref.git\n" "cd neuralcoref\n" "pip install -r requirements.txt\n" "pip install -e .", ) class SpacyOp(Operation): """Operation that runs the Spacy pipeline.""" def __init__( self, lang: str = "en_core_web_sm", nlp: spacy.language.Language = None, neuralcoref: bool = False,
from typing import List import cytoolz as tz import torch from meerkat.tools.lazy_loader import LazyLoader from robustnessgym.core.identifier import Identifier from robustnessgym.slicebuilders.transformation import SingleColumnTransformation fastBPE = LazyLoader("fastBPE", error="Install fastBPE with `pip install fastBPE`.") class FairseqBacktranslation(SingleColumnTransformation): """Backtranslation using torchhub fairseq models.""" def __init__( self, n_src2tgt: int = 1, n_tgt2src: int = 1, langs: str = "en2de", torchhub_dir: str = None, device: str = "cuda", src2tgt_topk: int = 1000, src2tgt_temp: float = 1.0, tgt2src_topk: int = 1000, tgt2src_temp: float = 1.0, ): super(FairseqBacktranslation, self).__init__(identifiers=Identifier.range( n=n_src2tgt * n_tgt2src,
import cytoolz as tz import torch from meerkat.tools.lazy_loader import LazyLoader from transformers import ( AutoModel, AutoModelForQuestionAnswering, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, AutoTokenizer, ) from robustnessgym.core.metrics import compute_metric from robustnessgym.core.slice import SliceDataPanel as DataPanel from robustnessgym.tasks.task import Task ludwig_api = LazyLoader("ludwig.api") nltk = LazyLoader("nltk") class Model: def __init__( self, identifier: str, task: Task, model=None, evaluation_fn=None, device: str = None, is_classifier: bool = None, ): # TODO(karan): improve this wrapper around models
from functools import partial from types import SimpleNamespace import numpy as np from meerkat.tools.lazy_loader import LazyLoader scipy_optimize = LazyLoader("scipy.optimize") scipy_special = LazyLoader("scipy.special") skmetrics = LazyLoader("sklearn.metrics.pairwise") def Phi(D, edge_list: list = None): """Given an n x d matrix of (example, slices), calculate the potential matrix. Includes correlations modeled by the edges in the `edge_list`. """ if edge_list is not None: pairwise_terms = ( D[np.arange(len(D)), edge_list[:, 0][:, np.newaxis]].T * D[np.arange(len(D)), edge_list[:, 1][:, np.newaxis]].T) return np.concatenate([D, pairwise_terms], axis=1) else: return D def log_partition_ratio(x, Phi_D_src, n_src): """Calculate the log-partition ratio in the KLIEP problem.""" return np.log(n_src) - scipy_special.logsumexp(Phi_D_src.dot(x))
import re import statistics from typing import Callable, Sequence, Union import numpy as np import torch from meerkat.tools.lazy_loader import LazyLoader from sklearn.metrics import accuracy_score, f1_score nltk = LazyLoader("nltk") rouge_score = LazyLoader("rouge_score") def get_metric(name: str) -> Callable: """Get metrics from string names.""" if name == "accuracy": return accuracy elif name == "f1": return f1 elif name == "f1_micro": return f1_micro elif name == "f1_macro": return f1_macro else: raise NotImplementedError(f"Metric name {name} not recognized.") def accuracy( predictions: Union[list, np.array, torch.Tensor], labels: Union[list, np.array, torch.Tensor], ):
from typing import List import fuzzywuzzy.fuzz as fuzz import numpy as np from meerkat.tools.lazy_loader import LazyLoader from robustnessgym.core.decorators import prerequisites from robustnessgym.core.identifier import Identifier from robustnessgym.core.operation import lookup from robustnessgym.core.slice import SliceDataPanel as DataPanel from robustnessgym.ops.allen import AllenConstituencyParsingOp from robustnessgym.slicebuilders.subpopulations.score import ScoreSubpopulation nltk = LazyLoader("nltk") @prerequisites(AllenConstituencyParsingOp) class ConstituencyOverlapSubpopulation(ScoreSubpopulation): def score( self, batch: DataPanel, columns: List[str], *args, **kwargs, ) -> np.ndarray: # Require that the number of keys is exactly 2 assert len(columns) == 2, "Must specify exactly 2 keys." # Retrieve the trees trees = { col: lookup(batch, AllenConstituencyParsingOp, [col]) for col in columns
"""Easy data augmentation techniques for text classification. Jason Wei and Kai Zou. Taken from https://github.com/jasonwei20/eda_nlp """ import random import re from random import shuffle from meerkat.tools.lazy_loader import LazyLoader nltk_corpus = LazyLoader("nltk.corpus") random.seed(1) # stop words list stop_words = [ "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves",
from collections import OrderedDict from typing import List, Tuple import cytoolz as tz import numpy as np from meerkat.tools.lazy_loader import LazyLoader from robustnessgym.core.identifier import Identifier from robustnessgym.core.model import Model from robustnessgym.core.slice import SliceDataPanel as DataPanel from robustnessgym.slicebuilders.attack import Attack attack_recipes = LazyLoader( "textattack.attack_recipes", error="Install TextAttack with `pip install textattack`.", ) wrappers = LazyLoader("textattack.models.wrappers") class TextAttack(Attack): """Class for TextAttack.""" def __init__( self, attack: "attack_recipes.AttackRecipe", ): super(TextAttack, self).__init__( identifiers=[Identifier( self.__class__.__name__, attack=attack, )], )
from typing import Dict, List, Tuple import numpy as np from meerkat.tools.lazy_loader import LazyLoader from robustnessgym.core.identifier import Identifier from robustnessgym.core.tools import transpose_batch from robustnessgym.slicebuilders.attack import Attack morpheus = LazyLoader("morpheus") class Morpheus(Attack): def __init__( self, dataset: str, model: str, constrain_pos: bool = True, **kwargs, ): super().__init__(identifiers=[ Identifier(self.__class__.__name__, dataset=dataset, model=model) ], ) self.constrain_pos = constrain_pos self.dataset = dataset.lower() if self.dataset == "mnli": self.attack = morpheus.MorpheusHuggingfaceNLI(model) elif "squad" in self.dataset: is_squad2 = "2" in self.dataset
from __future__ import annotations from typing import Dict, List from meerkat import AbstractCell from meerkat.tools.lazy_loader import LazyLoader from robustnessgym.core.operation import Operation from robustnessgym.core.slice import SliceDataPanel as DataPanel stanza = LazyLoader("stanza", error="Please `pip install stanza`.") class StanzaCell(AbstractCell): """Cell that stores a Stanza Document.""" def __init__(self, doc: stanza.Document, *args, **kwargs): super(StanzaCell, self).__init__(*args, **kwargs) self.doc = doc def get(self, *args, **kwargs): return self.doc @property def data(self) -> stanza.Document: return self.doc @classmethod def from_text(cls, text: str, pipeline: stanza.Pipeline) -> StanzaCell: return cls(pipeline(text)) def get_state(self) -> Dict:
"""Transformations using nlpaug.""" from typing import List from meerkat.tools.lazy_loader import LazyLoader from robustnessgym.core.identifier import Identifier from robustnessgym.slicebuilders.transformation import SingleColumnTransformation nlpaug_flow = LazyLoader("nlpaug.flow", error="Please `pip install nlpaug`.") class NlpAugTransformation(SingleColumnTransformation): """Class for building transformations using nlpaug.""" def __init__(self, pipeline: "nlpaug_flow.Pipeline", num_transformed: int = 1, identifiers: List[Identifier] = None, *args, **kwargs): assert isinstance(pipeline, nlpaug_flow.Pipeline), ( "`pipeline` must be an nlpaug Pipeline object. Please use \n" "from nlpaug.flow import Sequential\n" "rg.NlpAugTransformation(pipeline=Sequential(flow=[...])).") super(NlpAugTransformation, self).__init__(num_transformed=num_transformed, identifiers=Identifier.range( n=num_transformed, _name=self.__class__.__name__, pipeline=[ Identifier(
from typing import List from meerkat.tools.lazy_loader import LazyLoader from robustnessgym.core.operation import Operation from robustnessgym.core.slice import SliceDataPanel as DataPanel from robustnessgym.mixins.device import DeviceMixin predictors = LazyLoader("allennlp.predictors") class AllenPredictionOp(DeviceMixin, Operation): def __init__( self, path: str, device: str, ): super(AllenPredictionOp, self).__init__(device=device) self._predictor = predictors.Predictor.from_path( archive_path=path, cuda_device=self.cuda_device) @property def predictor(self): return self._predictor def process_batch( self, dp: DataPanel, columns: List[str], **kwargs,
import hashlib import inspect import json from collections import defaultdict from functools import partial from typing import Callable, Dict, List, Mapping, Optional, Sequence import cytoolz as tz import numpy as np import progressbar import torch import yaml from meerkat import ImagePath from meerkat.tools.lazy_loader import LazyLoader PIL = LazyLoader("PIL") def save_image(image, filename): """Save 'image' to file 'filename' and return an RGImage object.""" if isinstance(image, torch.Tensor): image = image.numpy() image = PIL.Image.fromarray(image.astype(np.uint8)) if image.mode != "RGB": image = image.convert("RGB") image.save(filename) return ImagePath(filename)