Esempio n. 1
0
    def __init__(self,
                 dataset: WMTDataset,
                 source_lang: Language,
                 target_lang: Language,
                 local_root: str = '.',
                 source_dataset_filename: str = None,
                 target_dataset_filename: str = None,
                 model_name: str = None,
                 paper_arxiv_id: str = None,
                 paper_pwc_id: str = None,
                 paper_results: dict = None,
                 model_description: str = None,
                 tokenization: Callable[[str], str] = None):
        super().__init__(model_name, paper_arxiv_id, paper_pwc_id,
                         paper_results, model_description)
        self.root = change_root_if_server(root=local_root,
                                          server_root=".data/nlp/wmt")
        self.dataset = dataset
        self.source_lang = source_lang
        self.target_lang = target_lang

        default_src_fn, default_dst_fn = self._get_source_dataset_filename()
        if source_dataset_filename is None or is_server():
            source_dataset_filename = default_src_fn

        if target_dataset_filename is None or is_server():
            target_dataset_filename = default_dst_fn

        self.source_dataset_path = Path(self.root) / source_dataset_filename
        self.target_dataset_path = Path(self.root) / target_dataset_filename

        self.metrics = TranslationMetrics(self.source_dataset_path,
                                          self.target_dataset_path,
                                          tokenization)
Esempio n. 2
0
def make_data(batch_size):
    print('Preparing data...', flush=True)

    if is_server():
        datadir = './.data/vision/imagenet'
    else:  # local settings
        datadir = '/fastwork/data/ilsvrc2012'

    # Setup the input pipeline
    _, crop = bit_hyperrule.get_resolution_from_dataset('imagenet2012')
    input_tx = tv.transforms.Compose([
        tv.transforms.Resize((crop, crop)),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    # valid_set = tv.datasets.ImageFolder(os.path.join(datadir, 'val'), input_tx)
    valid_set = tv.datasets.ImageNet(datadir, split='val', transform=input_tx)

    valid_loader = torch.utils.data.DataLoader(valid_set,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               num_workers=8,
                                               pin_memory=True,
                                               drop_last=False)
    return valid_set, valid_loader
Esempio n. 3
0
    def cache_exists(self):
        """
        Checks whether the cache exists in the sotabench.com database - if so
        then sets self.results to cached results and returns True.

        You can use this property for control flow to break a for loop over a dataset
        after the first iteration. This prevents re-running the same calculation for the
        same model twice.

        Q: Why should the user use this?
        A: If you want fast "continuous evaluation" and don't want to avoid rerunning the same model over and over
            each time you commit something new to your repository.

        Examples:
            Breaking a for loop if the model is the same as last time we ran

            .. code-block:: python

                ...

                with torch.no_grad():
                    for i, (input, target) in enumerate(iterator):
                        ...
                        output = model(input)
                        # optional formatting of output here to be a list of detection dicts
                        evaluator.add(output)

                        if evaluator.cache_exists:
                            break

                evaluator.save()

        This logic is for the server; it will not break the loop if you evaluate locally.

        :return: bool or None (if not on server)
        """

        if not is_server():  # we only check the cache on the server
            return None

        if not self.first_batch_processed:
            return False

        if self._cache_exists is not None:
            return self._cache_exists

        client = Client.public()
        cached_res = client.get_results_by_run_hash(self.batch_hash)
        if cached_res:
            self.results = cached_res
            self.cached_results = True
            print("No model change detected (using the first batch run "
                  f"hash {self.batch_hash}). Will use cached results.")

            self._cache_exists = True
        else:
            self._cache_exists = False
        return self._cache_exists
Esempio n. 4
0
    def cache_exists(self):
        """
        Checks whether the cache exists in the sotabench.com database - if so
        then sets self.results to cached results and returns True.

        You can use this property for control flow to break a for loop over a dataset
        after the first iteration. This prevents re-running the same calculation for the
        same model twice.

        Q: Why should the user use this?
        A: If you want fast "continuous evaluation" and don't want to avoid rerunning the same model over and over
            each time you commit something new to your repository.

        Examples:
            Breaking a for loop for a PyTorch evaluation

            .. code-block:: python

                ...

                with torch.no_grad():
                    for i, (input, target) in enumerate(test_loader):
                        input = input.to(device=device, non_blocking=True)
                        target = target.to(device=device, non_blocking=True)
                        output = model(input)

                        image_ids = [img[0].split('/')[-1].replace('.JPEG', '') for img in test_loader.dataset.imgs[i*test_loader.batch_size:(i+1)*test_loader.batch_size]]

                        evaluator.add(dict(zip(image_ids, list(output.cpu().numpy()))))

                        if evaluator.cache_exists:
                            break

                evaluator.save()  # uses the cached results

        This logic is for the server; it will not break the loop if you evaluate locally.

        :return: bool or None (if not in check mode)
        """
        if not self.first_batch_processed:
            raise ValueError(
                'No batches of data have been processed so no batch_hash exists'
            )

        if not is_server():  # we only check the cache on the server
            return None

        client = Client.public()
        cached_res = client.get_results_by_run_hash(self.batch_hash)
        if cached_res:
            self.results = cached_res
            self.cached_results = True

            print("No model change detected (using the first batch run "
                  "hash). Will use cached results.")
            return True

        return False
Esempio n. 5
0
    def cache_exists(self):
        """
        Checks whether the cache exists in the sotabench.com database - if so
        then sets self.results to cached results and returns True.

        You can use this property for control flow to break a for loop over a dataset
        after the first iteration. This prevents rerunning the same calculation for the
        same model twice.

        Examples:
            Breaking a for loop

            .. code-block:: python

                ...

                with torch.no_grad():
                    for i, (input, target) in enumerate(iterator):
                        ...
                        output = model(input)
                        # optional formatting of output here to be a list of detection dicts
                        evaluator.add(output)

                        if evaluator.cache_exists:
                            break

                evaluator.save()

        :return: bool or None (if not in check mode)
        """

        if not is_server():  # we only check the cache on the server
            return None

        if not self.first_batch_processed:
            return False

        if self._cache_exists is not None:
            return self._cache_exists

        client = Client.public()
        cached_res = client.get_results_by_run_hash(self.batch_hash)
        if cached_res:
            self.results = cached_res
            self.cached_results = True
            print("No model change detected (using the first batch run "
                  "hash). Will use cached results.")
            self._cache_exists = True
        else:
            self._cache_exists = False
        return self._cache_exists
Esempio n. 6
0
    def __init__(self,
                 local_root: str = '.',
                 dataset_filename: str = None,
                 model_name: str = None,
                 paper_arxiv_id: str = None,
                 paper_pwc_id: str = None,
                 paper_results: dict = None,
                 model_description=None,
                 version: SQuADVersion = SQuADVersion.V20):
        """
        Creates an evaluator for SQuAD v1.1 or v2.0 Question Answering benchmarks.

        :param local_root: Path to the directory where the dataset files are located locally.
            Ignored when run on sotabench server.
        :param dataset_filename: Local filename of the JSON file with the SQuAD dataset.
            If None, the standard filename is used, based on :param:`version`.
            Ignored when run on sotabench server.
        :param model_name: The name of the model from the
            paper - if you want to link your build to a model from a
            machine learning paper. See the SQuAD benchmarks pages for model names,
            (f.e., https://sotabench.com/benchmarks/question-answering-on-squad11-dev)
            on the paper leaderboard or models yet to try tabs.
        :param paper_arxiv_id: Optional linking to arXiv if you
            want to link to papers on the leaderboard; put in the
            corresponding paper's arXiv ID, e.g. '1907.10529'.
        :param paper_pwc_id: Optional linking to Papers With Code;
            put in the corresponding papers with code URL slug, e.g.
            'spanbert-improving-pre-training-by'
        :param paper_results: If the paper model you are reproducing
            does not have model results on sotabench.com, you can specify
            the paper results yourself through this argument, where keys
            are metric names, values are metric values. e.g:

                    {'EM': 0.858, 'F1': 0.873}.

            Ensure that the metric names match those on the sotabench
            leaderboard - for SQuAD benchmarks it should be `EM` for exact match
            and `F1` for F1 score. Make sure to use results of evaluation on a development set.
        :param model_description: Optional model description.
        :param version: Which dataset to evaluate on, either `SQuADVersion.V11` or `SQuADVersion.V20`.
        """
        super().__init__(model_name, paper_arxiv_id, paper_pwc_id,
                         paper_results, model_description)
        self.root = change_root_if_server(root=local_root,
                                          server_root=".data/nlp/squad")
        self.version = version
        if dataset_filename is None or is_server():
            dataset_filename = "dev-{}.json".format(version.value)
        self.dataset_path = Path(self.root) / dataset_filename

        self.metrics = SQuADMetrics(self.dataset_path, version)
Esempio n. 7
0
def run_benchmark(model_url: str, model_name: str, version: SQuADVersion):
    evaluator = SQuADEvaluator(local_root="data/nlp/squad",
                               model_name=model_name,
                               paper_arxiv_id="1907.10529",
                               version=version)

    model = run_squad.BertForQuestionAnswering.from_pretrained(model_url)
    settings = get_default_settings(evaluator.version)
    tokenizer = run_squad.BertTokenizer.from_pretrained("spanbert-large-cased",
                                                        do_lower_case=False)

    device = torch.device("cuda")
    model.to(device)

    eval_examples = run_squad.read_squad_examples(
        input_file=evaluator.dataset_path,
        is_training=False,
        version_2_with_negative=settings.version_2_with_negative)

    # when on sotabench server, run the pipeline on a small dataset first and
    # compare the results with cache to avoid recomputing on whole dataset
    cache_exists = False
    if is_server():
        small_examples = eval_examples[::100]
        answers = evaluate(model, tokenizer, device, small_examples, settings)
        evaluator.add(answers)
        if evaluator.cache_exists:
            cache_exists = True
        else:
            evaluator.reset()

    evaluator.reset_time()
    if not cache_exists or not is_server():
        answers = evaluate(model, tokenizer, device, eval_examples, settings)
        evaluator.add(answers)

    evaluator.save()
    print(evaluator.results)
Esempio n. 8
0
    def cache_exists(self):
        """
        Checks whether the cache exists in the sotabench.com database - if so
        then sets self.results to cached results and returns True.

        You can use this property for control flow to break a for loop over a dataset
        after the first iteration. This prevents rerunning the same calculation for the
        same model twice.

        Examples:
            Breaking a for loop

            .. code-block:: python

                ...

                with torch.no_grad():
                    for i, (input, target) in enumerate(iterator):
                        ...
                        output = model(input)
                        # output and target should then be flattened into 1D np.ndarrays and passed in below
                        evaluator.update(output=output, target=target)

                        if evaluator.cache_exists:
                            break

                evaluator.save()

        :return: bool or None (if not in check mode)
        """

        if not self.first_batch_processed:
            raise ValueError(
                'No batches of data have been processed so no batch_hash exists'
            )

        if not is_server():
            return None

        client = Client.public()
        cached_res = client.get_results_by_run_hash(self.batch_hash)
        if cached_res:
            self.results = cached_res
            self.cached_results = True
            print("No model change detected (using the first batch run "
                  "hash). Will use cached results.")
            return True

        return False
Esempio n. 9
0
    def __init__(self,
                 local_root: str = '.',
                 dataset_filename: str = None,
                 model_name: str = None,
                 paper_arxiv_id: str = None,
                 paper_pwc_id: str = None,
                 paper_results: dict = None,
                 model_description=None,
                 version: SQuADVersion = SQuADVersion.V20):
        super().__init__(model_name, paper_arxiv_id, paper_pwc_id, paper_results, model_description)
        self.root = change_root_if_server(root=local_root,
                                          server_root=".data/nlp/squad")
        self.version = version
        if dataset_filename is None or is_server():
            dataset_filename = "dev-{}.json".format(version.value)
        self.dataset_path = Path(self.root) / dataset_filename

        self.metrics = SQuADMetrics(self.dataset_path, version)
Esempio n. 10
0
def get_datasets(versions):
    squad_links = {
        SQuADVersion.V11:
        "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json",
        SQuADVersion.V20:
        "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json"
    }
    filenames = {
        SQuADVersion.V11: "dev-v1.1.json",
        SQuADVersion.V20: "dev-v2.0.json"
    }
    data_dir = Path(".data") if is_server() else Path("data")
    datasets_path = data_dir / "nlp" / "squad"
    datasets_path.mkdir(parents=True, exist_ok=True)
    for version in versions:
        filename = datasets_path / filenames[version]
        if not filename.exists():
            download_url_to_file(squad_links[version], filename)
Esempio n. 11
0
def evaluate(pretrained_name):
    model = ULMFiT().from_pretrained_(pretrained_name)
    if is_server():
        wikitext_folder = WikiText103Evaluator.dataset.get_path(
            local_root="unused")
    else:
        wikitext_folder = untar_data(URLs.WIKITEXT)
    ds = model.arch.dataset(wikitext_folder,
                            tokenizer=model.pretrain_lm.tokenizer)

    test_df = ds.read_data(ds.tst_path)
    data_lm = ds.databunch_from_df(TextLMDataBunch,
                                   test_df,
                                   test_df,
                                   bs=20,
                                   bptt=70)
    learn = model.finetune_lm.get_learner(data_lm)

    full_data = np.concatenate(data_lm.valid_ds.items)

    evaluator = WikiText103Evaluator(model_name="Multifit (slim)",
                                     model_description=pretrained_name,
                                     paper_arxiv_id="1909.04761",
                                     local_root=str(wikitext_folder))

    learn.loss_func = None

    dev = torch.device("cuda")
    evaluator.reset()
    batches = iterate_over_batches(torch.tensor(full_data), bs=200, bptt=70)
    for x, y in progress_bar(batches, total=len(full_data) // 200 // 70):
        logits = learn.pred_batch(batch=[x.to(dev), y.to(dev)])
        log_probs = torch.log_softmax(logits, -1)
        evaluator.add(log_probs, y)
        if evaluator.cache_exists:
            break
    evaluator.save()
    print(pretrained_name)
    evaluator.print_results()
    return evaluator.results
Esempio n. 12
0
    _entry('regnety_064', 'RegNetY-6.4GF', '2003.13678'),
    _entry('regnety_080', 'RegNetY-8.0GF', '2003.13678'),
    _entry('regnety_120', 'RegNetY-12GF', '2003.13678'),
    _entry('regnety_160', 'RegNetY-16GF', '2003.13678'),
    _entry('regnety_320', 'RegNetY-32GF', '2003.13678', batch_size=BATCH_SIZE // 2),

    _entry('rexnet_100', 'ReXNet-1.0x', '2007.00992'),
    _entry('rexnet_130', 'ReXNet-1.3x', '2007.00992'),
    _entry('rexnet_150', 'ReXNet-1.5x', '2007.00992'),
    _entry('rexnet_200', 'ReXNet-2.0x', '2007.00992'),

    _entry('vit_small_patch16_224', 'ViT-S/16', None),
    _entry('vit_base_patch16_224', 'ViT-B/16', None),
]

if is_server():
    DATA_ROOT = './.data/vision/imagenet'
else:
    # local settings
    DATA_ROOT = './'
DATA_FILENAME = 'ILSVRC2012_img_val.tar'
TAR_PATH = os.path.join(DATA_ROOT, DATA_FILENAME)

for m in model_list:
    model_name = m['model']
    # create model from name
    model = create_model(model_name, pretrained=True)
    param_count = sum([m.numel() for m in model.parameters()])
    print('Model %s, %s created. Param count: %d' % (model_name, m['paper_model_name'], param_count))

    dataset = DatasetTar(TAR_PATH)
Esempio n. 13
0
    def __init__(self,
                 dataset: WMTDataset,
                 source_lang: Language,
                 target_lang: Language,
                 local_root: str = '.',
                 source_dataset_filename: str = None,
                 target_dataset_filename: str = None,
                 model_name: str = None,
                 paper_arxiv_id: str = None,
                 paper_pwc_id: str = None,
                 paper_results: dict = None,
                 model_description: str = None,
                 tokenization: Callable[[str], str] = None):
        """
        Creates an evaluator for one of the WMT benchmarks.

        :param dataset: Which dataset to evaluate on, f.e., WMTDataset.News2014.
        :param source_lang: Source language of the documents to translate.
        :param target_lang: Target language into which the documents are translated.
        :param local_root: Path to the directory where the dataset files are located locally.
            Ignored when run on sotabench server.
        :param source_dataset_filename: Local filename of the SGML file with the source documents.
            If None, the standard WMT filename is used, based on :param:`dataset`,
            :param:`source_lang` and :param:`target_lang`.
            Ignored when run on sotabench server.
        :param target_dataset_filename: Local filename of the SGML file with the reference documents.
            If None, the standard WMT filename is used, based on :param:`dataset`,
            :param:`source_lang` and :param:`target_lang`.
            Ignored when run on sotabench server.
        :param model_name: The name of the model from the
            paper - if you want to link your build to a model from a
            machine learning paper. See the WMT benchmarks pages for model names,
            (f.e., https://sotabench.com/benchmarks/machine-translation-on-wmt2014-english-german)
            on the paper leaderboard or models yet to try tabs.
        :param paper_arxiv_id: Optional linking to arXiv if you
            want to link to papers on the leaderboard; put in the
            corresponding paper's arXiv ID, e.g. '1907.06616'.
        :param paper_pwc_id: Optional linking to Papers With Code;
            put in the corresponding papers with code URL slug, e.g.
            'facebook-fairs-wmt19-news-translation-task'
        :param paper_results: If the paper model you are reproducing
            does not have model results on sotabench.com, you can specify
            the paper results yourself through this argument, where keys
            are metric names, values are metric values. e.g:

                    {'SacreBLEU': 42.7, 'BLEU score': 43.1}.

            Ensure that the metric names match those on the sotabench
            leaderboard - for WMT benchmarks it should be `SacreBLEU` for de-tokenized
            case sensitive BLEU score and `BLEU score` for tokenized BLEU.
        :param model_description: Optional model description.
        :param tokenization: An optional tokenization function to compute tokenized BLEU score.
            It takes a single string - a segment to tokenize, and returns a string with tokens
            separated by space, f.e.:

                    tokenization = lambda seg: seg.replace("'s", " 's").replace("-", " - ")

            If None, only de-tokenized SacreBLEU score is reported.
        """

        super().__init__(model_name, paper_arxiv_id, paper_pwc_id,
                         paper_results, model_description)
        self.root = change_root_if_server(root=local_root,
                                          server_root=".data/nlp/wmt")
        self.dataset = dataset
        self.source_lang = source_lang
        self.target_lang = target_lang

        default_src_fn, default_dst_fn = self._get_source_dataset_filename()
        if source_dataset_filename is None or is_server():
            source_dataset_filename = default_src_fn

        if target_dataset_filename is None or is_server():
            target_dataset_filename = default_dst_fn

        self.source_dataset_path = Path(self.root) / source_dataset_filename
        self.target_dataset_path = Path(self.root) / target_dataset_filename

        self.metrics = TranslationMetrics(self.source_dataset_path,
                                          self.target_dataset_path,
                                          tokenization)
Esempio n. 14
0
import numpy as np
import torch
import yaml
from sotabencheval.object_detection import COCOEvaluator
from sotabencheval.utils import is_server
from tqdm import tqdm

from models.experimental import attempt_load
from utils.datasets import create_dataloader
from utils.general import (coco80_to_coco91_class, check_dataset, check_file,
                           check_img_size, compute_loss, non_max_suppression,
                           scale_coords, xyxy2xywh, clip_coords, set_logging)
from utils.torch_utils import select_device, time_synchronized

DATA_ROOT = './.data/vision/coco' if is_server(
) else '../coco'  # sotabench data dir


def test(
        data,
        weights=None,
        batch_size=16,
        imgsz=640,
        conf_thres=0.001,
        iou_thres=0.6,  # for NMS
        save_json=False,
        single_cls=False,
        augment=False,
        verbose=False,
        model=None,
        dataloader=None,
Esempio n. 15
0
import os
import numpy as np
import PIL
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import ImageNet

from efficientnet_pytorch import EfficientNet

from sotabencheval.image_classification import ImageNetEvaluator
from sotabencheval.utils import is_server

if is_server():
    DATA_ROOT = './.data/vision/imagenet'
else:  # local settings
    DATA_ROOT = os.environ['IMAGENET_DIR']
    assert bool(DATA_ROOT), 'please set IMAGENET_DIR environment variable'
    print('Local data root: ', DATA_ROOT)

model_name = 'EfficientNet-B5'
model = EfficientNet.from_pretrained(model_name.lower())
image_size = EfficientNet.get_image_size(model_name.lower())

input_transform = transforms.Compose([
    transforms.Resize(image_size, PIL.Image.BICUBIC),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,
                                                          0.225]),
])
Esempio n. 16
0
    scale_coords,
    xyxy2xywh,
    clip_coords,
    plot_images,
    xywh2xyxy,
    box_iou,
    output_to_target,
    ap_per_class,
    set_logging,
)
from utils.torch_utils import select_device, time_synchronized

from sotabencheval.object_detection import COCOEvaluator
from sotabencheval.utils import is_server

DATA_ROOT = "./.data/vision/coco" if is_server(
) else "../coco"  # sotabench data dir


def test(
    data,
    weights=None,
    batch_size=16,
    imgsz=640,
    conf_thres=0.001,
    iou_thres=0.6,  # for NMS
    save_json=False,
    single_cls=False,
    augment=False,
    verbose=False,
    model=None,
    dataloader=None,