Exemple #1
0
    def __init__(self,
                 dataset: WMTDataset,
                 source_lang: Language,
                 target_lang: Language,
                 local_root: str = '.',
                 source_dataset_filename: str = None,
                 target_dataset_filename: str = None,
                 model_name: str = None,
                 paper_arxiv_id: str = None,
                 paper_pwc_id: str = None,
                 paper_results: dict = None,
                 model_description: str = None,
                 tokenization: Callable[[str], str] = None):
        super().__init__(model_name, paper_arxiv_id, paper_pwc_id,
                         paper_results, model_description)
        self.root = change_root_if_server(root=local_root,
                                          server_root=".data/nlp/wmt")
        self.dataset = dataset
        self.source_lang = source_lang
        self.target_lang = target_lang

        default_src_fn, default_dst_fn = self._get_source_dataset_filename()
        if source_dataset_filename is None or is_server():
            source_dataset_filename = default_src_fn

        if target_dataset_filename is None or is_server():
            target_dataset_filename = default_dst_fn

        self.source_dataset_path = Path(self.root) / source_dataset_filename
        self.target_dataset_path = Path(self.root) / target_dataset_filename

        self.metrics = TranslationMetrics(self.source_dataset_path,
                                          self.target_dataset_path,
                                          tokenization)
Exemple #2
0
    def __init__(self,
                 root: str = '.',
                 model_name: str = None,
                 paper_arxiv_id: str = None,
                 paper_pwc_id: str = None,
                 paper_results: dict = None,
                 model_description=None,):
        """Benchmarking function.

        Args:
            root (string): Root directory of the ImageNet Dataset - where the
            label data is located (or will be downloaded to).
            model_name (str, optional): The name of the model from the
                paper - if you want to link your build to a model from a
                machine learning paper. See the ImageNet benchmark page for model names,
                https://sotabench.com/benchmarks/image-classification-on-imagenet, e.g.
                on the paper leaderboard tab.
            paper_arxiv_id (str, optional): Optional linking to arXiv if you
                want to link to papers on the leaderboard; put in the
                corresponding paper's arXiv ID, e.g. '1611.05431'.
            paper_pwc_id (str, optional): Optional linking to Papers With Code;
                put in the corresponding papers with code URL slug, e.g.
                'u-gat-it-unsupervised-generative-attentional'
            paper_results (dict, optional) : If the paper model you are reproducing
                does not have model results on sotabench.com, you can specify
                the paper results yourself through this argument, where keys
                are metric names, values are metric values. e.g::

                    {'Top 1 Accuracy': 0.543, 'Top 5 Accuracy': 0.654}.

                Ensure that the metric names match those on the sotabench
                leaderboard - for ImageNet it should be 'Top 1 Accuracy' and
                'Top 5 Accuracy'.
            model_description (str, optional): Optional model description.
        """

        root = self.root = os.path.expanduser(change_root_if_server(
            root=root,
            server_root="./.data/vision/imagenet"))

        self.model_name = model_name
        self.paper_arxiv_id = paper_arxiv_id
        self.paper_pwc_id = paper_pwc_id
        self.paper_results = paper_results
        self.model_description = model_description

        self.top1 = AverageMeter()
        self.top5 = AverageMeter()

        self.load_targets()

        self.outputs = {}
        self.results = None
        self.first_batch_processed = False
        self.batch_hash = None
        self.cached_results = False

        self.speed_mem_metrics = {}
        self.init_time = time.time()
 def _get_path(self, local_root, local_unzip=False):
     root = Path(change_root_if_server(root=local_root,
                                       server_root=".data/nlp/" + self.pwc_name.lower()))
     zip_name = self.pwc_name.lower() + "-v1.zip"
     dataset_path = root / "wiki.test.tokens"
     if not dataset_path.exists(): # unzip
         extract_archive(str(root / zip_name), to_path=root.parent)
     return dataset_path
Exemple #4
0
def get_path(local_root, local_unzip=False):
    root = Path(
        change_root_if_server(root=local_root,
                              server_root=".data/nlp/multinli"))
    zip_name = "MNLI.zip"
    dataset_path = root / "MNLI" / "dev_matched.tsv"
    if not dataset_path.exists():  # unzip
        extract_archive(str(root / zip_name), to_path=root)
    return (dataset_path, dataset_path.parent / "dev_mismatched.tsv")
Exemple #5
0
    def __init__(self,
                 local_root: str = '.',
                 dataset_filename: str = None,
                 model_name: str = None,
                 paper_arxiv_id: str = None,
                 paper_pwc_id: str = None,
                 paper_results: dict = None,
                 model_description=None,
                 version: SQuADVersion = SQuADVersion.V20):
        """
        Creates an evaluator for SQuAD v1.1 or v2.0 Question Answering benchmarks.

        :param local_root: Path to the directory where the dataset files are located locally.
            Ignored when run on sotabench server.
        :param dataset_filename: Local filename of the JSON file with the SQuAD dataset.
            If None, the standard filename is used, based on :param:`version`.
            Ignored when run on sotabench server.
        :param model_name: The name of the model from the
            paper - if you want to link your build to a model from a
            machine learning paper. See the SQuAD benchmarks pages for model names,
            (f.e., https://sotabench.com/benchmarks/question-answering-on-squad11-dev)
            on the paper leaderboard or models yet to try tabs.
        :param paper_arxiv_id: Optional linking to arXiv if you
            want to link to papers on the leaderboard; put in the
            corresponding paper's arXiv ID, e.g. '1907.10529'.
        :param paper_pwc_id: Optional linking to Papers With Code;
            put in the corresponding papers with code URL slug, e.g.
            'spanbert-improving-pre-training-by'
        :param paper_results: If the paper model you are reproducing
            does not have model results on sotabench.com, you can specify
            the paper results yourself through this argument, where keys
            are metric names, values are metric values. e.g:

                    {'EM': 0.858, 'F1': 0.873}.

            Ensure that the metric names match those on the sotabench
            leaderboard - for SQuAD benchmarks it should be `EM` for exact match
            and `F1` for F1 score. Make sure to use results of evaluation on a development set.
        :param model_description: Optional model description.
        :param version: Which dataset to evaluate on, either `SQuADVersion.V11` or `SQuADVersion.V20`.
        """
        super().__init__(model_name, paper_arxiv_id, paper_pwc_id,
                         paper_results, model_description)
        self.root = change_root_if_server(root=local_root,
                                          server_root=".data/nlp/squad")
        self.version = version
        if dataset_filename is None or is_server():
            dataset_filename = "dev-{}.json".format(version.value)
        self.dataset_path = Path(self.root) / dataset_filename

        self.metrics = SQuADMetrics(self.dataset_path, version)
Exemple #6
0
    def __init__(self,
                 local_root: str = '.',
                 dataset_filename: str = None,
                 model_name: str = None,
                 paper_arxiv_id: str = None,
                 paper_pwc_id: str = None,
                 paper_results: dict = None,
                 model_description=None,
                 version: SQuADVersion = SQuADVersion.V20):
        super().__init__(model_name, paper_arxiv_id, paper_pwc_id, paper_results, model_description)
        self.root = change_root_if_server(root=local_root,
                                          server_root=".data/nlp/squad")
        self.version = version
        if dataset_filename is None or is_server():
            dataset_filename = "dev-{}.json".format(version.value)
        self.dataset_path = Path(self.root) / dataset_filename

        self.metrics = SQuADMetrics(self.dataset_path, version)
Exemple #7
0
    def __init__(self,
                 dataset: WMTDataset,
                 source_lang: Language,
                 target_lang: Language,
                 local_root: str = '.',
                 source_dataset_filename: str = None,
                 target_dataset_filename: str = None,
                 model_name: str = None,
                 paper_arxiv_id: str = None,
                 paper_pwc_id: str = None,
                 paper_results: dict = None,
                 model_description: str = None,
                 tokenization: Callable[[str], str] = None):
        """
        Creates an evaluator for one of the WMT benchmarks.

        :param dataset: Which dataset to evaluate on, f.e., WMTDataset.News2014.
        :param source_lang: Source language of the documents to translate.
        :param target_lang: Target language into which the documents are translated.
        :param local_root: Path to the directory where the dataset files are located locally.
            Ignored when run on sotabench server.
        :param source_dataset_filename: Local filename of the SGML file with the source documents.
            If None, the standard WMT filename is used, based on :param:`dataset`,
            :param:`source_lang` and :param:`target_lang`.
            Ignored when run on sotabench server.
        :param target_dataset_filename: Local filename of the SGML file with the reference documents.
            If None, the standard WMT filename is used, based on :param:`dataset`,
            :param:`source_lang` and :param:`target_lang`.
            Ignored when run on sotabench server.
        :param model_name: The name of the model from the
            paper - if you want to link your build to a model from a
            machine learning paper. See the WMT benchmarks pages for model names,
            (f.e., https://sotabench.com/benchmarks/machine-translation-on-wmt2014-english-german)
            on the paper leaderboard or models yet to try tabs.
        :param paper_arxiv_id: Optional linking to arXiv if you
            want to link to papers on the leaderboard; put in the
            corresponding paper's arXiv ID, e.g. '1907.06616'.
        :param paper_pwc_id: Optional linking to Papers With Code;
            put in the corresponding papers with code URL slug, e.g.
            'facebook-fairs-wmt19-news-translation-task'
        :param paper_results: If the paper model you are reproducing
            does not have model results on sotabench.com, you can specify
            the paper results yourself through this argument, where keys
            are metric names, values are metric values. e.g:

                    {'SacreBLEU': 42.7, 'BLEU score': 43.1}.

            Ensure that the metric names match those on the sotabench
            leaderboard - for WMT benchmarks it should be `SacreBLEU` for de-tokenized
            case sensitive BLEU score and `BLEU score` for tokenized BLEU.
        :param model_description: Optional model description.
        :param tokenization: An optional tokenization function to compute tokenized BLEU score.
            It takes a single string - a segment to tokenize, and returns a string with tokens
            separated by space, f.e.:

                    tokenization = lambda seg: seg.replace("'s", " 's").replace("-", " - ")

            If None, only de-tokenized SacreBLEU score is reported.
        """

        super().__init__(model_name, paper_arxiv_id, paper_pwc_id,
                         paper_results, model_description)
        self.root = change_root_if_server(root=local_root,
                                          server_root=".data/nlp/wmt")
        self.dataset = dataset
        self.source_lang = source_lang
        self.target_lang = target_lang

        default_src_fn, default_dst_fn = self._get_source_dataset_filename()
        if source_dataset_filename is None or is_server():
            source_dataset_filename = default_src_fn

        if target_dataset_filename is None or is_server():
            target_dataset_filename = default_dst_fn

        self.source_dataset_path = Path(self.root) / source_dataset_filename
        self.target_dataset_path = Path(self.root) / target_dataset_filename

        self.metrics = TranslationMetrics(self.source_dataset_path,
                                          self.target_dataset_path,
                                          tokenization)
Exemple #8
0
    def __init__(
        self,
        root: str = '.',
        split: str = "val",
        dataset_year: str = "2017",
        model_name: str = None,
        paper_arxiv_id: str = None,
        paper_pwc_id: str = None,
        paper_results: dict = None,
        model_description=None,
    ):
        """Benchmarking function.

        Args:
            root (string): Root directory of the COCO Dataset - where the
            label data is located (or will be downloaded to).
            split (str) : the split for COCO to use, e.g. 'val'
            dataset_year (str): the dataset year for COCO to use
            model_name (str, optional): The name of the model from the
                paper - if you want to link your build to a machine learning
                paper. See the COCO benchmark page for model names,
                https://sotabench.com/benchmarks/object-detection-on-coco-minival,
                e.g. on the paper leaderboard tab.
            paper_arxiv_id (str, optional): Optional linking to arXiv if you
                want to link to papers on the leaderboard; put in the
                corresponding paper's arXiv ID, e.g. '1611.05431'.
            paper_pwc_id (str, optional): Optional linking to Papers With Code;
                put in the corresponding papers with code URL slug, e.g.
                'u-gat-it-unsupervised-generative-attentional'
            paper_results (dict, optional) : If the paper you are reproducing
                does not have model results on sotabench.com, you can specify
                the paper results yourself through this argument, where keys
                are metric names, values are metric values. e.g::

                    {'box AP': 0.349, 'AP50': 0.592, ...}.

                Ensure that the metric names match those on the sotabench
                leaderboard - for COCO it should be 'box AP', 'AP50',
                'AP75', 'APS', 'APM', 'APL'
            model_description (str, optional): Optional model description.
        """

        root = self.root = change_root_if_server(
            root=root, server_root="./.data/vision/coco")

        self.model_name = model_name
        self.paper_arxiv_id = paper_arxiv_id
        self.paper_pwc_id = paper_pwc_id
        self.paper_results = paper_results
        self.model_description = model_description
        self.split = split

        annFile = os.path.join(
            root,
            "annotations/instances_%s%s.json" % (self.split, dataset_year))

        self._download(annFile)

        self.coco = COCO(annFile)
        self.iou_types = ['bbox']
        self.coco_evaluator = CocoEvaluator(self.coco, self.iou_types)

        self.detections = []
        self.results = None
        self.first_batch_processed = False
        self.batch_hash = None
        self.cached_results = False

        self.speed_mem_metrics = {}

        self.init_time = time.time()