Example #1
0
def run_on_all_states(f, index_slice=None):
    if index_slice is not None:
        states = list(us.STATES)[index_slice]
    else:
        states = list(us.STATES)
    run_task = catch_errors(f)
    results = [run_task(state) for state in states]

    successes = sum(result is Result.Success for result in results)
    errors = sum(result is Result.Error for result in results)
    printer = Printer()
    printer.info("Final result:")
    printer.info(f"{successes} were created successfully. {errors} errored.")
    printer.table(
        list(
            zip(
                [name for name in states],
                [
                    str(result) if result is not None else "Error"
                    for result in results
                ],
            )),
        header=("State", "Created"),
        divider=True,
    )
Example #2
0
def print_summary(nlp, pretty=True, no_print=False):
    """Print a formatted summary for the current nlp object's pipeline. Shows
    a table with the pipeline components and why they assign and require, as
    well as any problems if available.
    nlp (Language): The nlp object.
    pretty (bool): Pretty-print the results (color etc).
    no_print (bool): Don't print anything, just return the data.
    RETURNS (dict): A dict with "overview" and "problems".
    """
    msg = Printer(pretty=pretty, no_print=no_print)
    overview = []
    problems = {}
    for i, (name, pipe) in enumerate(nlp.pipeline):
        requires = getattr(pipe, "requires", [])
        assigns = getattr(pipe, "assigns", [])
        retok = getattr(pipe, "retokenizes", False)
        overview.append((i, name, requires, assigns, retok))
        problems[name] = analyze_pipes(nlp.pipeline, name, pipe, i, warn=False)
    msg.divider("Pipeline Overview")
    header = ("#", "Component", "Requires", "Assigns", "Retokenizes")
    msg.table(overview, header=header, divider=True, multiline=True)
    n_problems = sum(len(p) for p in problems.values())
    if any(p for p in problems.values()):
        msg.divider("Problems ({})".format(n_problems))
        for name, problem in problems.items():
            if problem:
                problem = ", ".join(problem)
                msg.warn("'{}' requirements not met: {}".format(name, problem))
    else:
        msg.good("No problems found.")
    if no_print:
        return {"overview": overview, "problems": problems}
Example #3
0
def evaluate(
    model,
    data_path,
    gpu_id=-1,
    gold_preproc=False,
    displacy_path=None,
    displacy_limit=25,
    return_scores=False,
):
    """
    Evaluate a model. To render a sample of parses in a HTML file, set an
    output directory as the displacy_path argument.
    """
    msg = Printer()
    util.fix_random_seed()
    if gpu_id >= 0:
        util.use_gpu(gpu_id)
    util.set_env_log(False)
    data_path = util.ensure_path(data_path)
    displacy_path = util.ensure_path(displacy_path)
    if not data_path.exists():
        msg.fail("Evaluation data not found", data_path, exits=1)
    if displacy_path and not displacy_path.exists():
        msg.fail("Visualization output directory not found", displacy_path, exits=1)
    corpus = GoldCorpus(data_path, data_path)
    nlp = util.load_model(model)
    dev_docs = list(corpus.dev_docs(nlp, gold_preproc=gold_preproc))
    begin = timer()
    scorer = nlp.evaluate(dev_docs, verbose=False)
    end = timer()
    nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
    results = {
        "Time": "%.2f s" % (end - begin),
        "Words": nwords,
        "Words/s": "%.0f" % (nwords / (end - begin)),
        "TOK": "%.2f" % scorer.token_acc,
        "POS": "%.2f" % scorer.tags_acc,
        "UAS": "%.2f" % scorer.uas,
        "LAS": "%.2f" % scorer.las,
        "NER P": "%.2f" % scorer.ents_p,
        "NER R": "%.2f" % scorer.ents_r,
        "NER F": "%.2f" % scorer.ents_f,
    }
    msg.table(results, title="Results")

    if displacy_path:
        docs, golds = zip(*dev_docs)
        render_deps = "parser" in nlp.meta.get("pipeline", [])
        render_ents = "ner" in nlp.meta.get("pipeline", [])
        render_parses(
            docs,
            displacy_path,
            model_name=model,
            limit=displacy_limit,
            deps=render_deps,
            ents=render_ents,
        )
        msg.good("Generated {} parses as HTML".format(displacy_limit), displacy_path)
    if return_scores:
        return scorer.scores
Example #4
0
def info(
    model: Optional[str] = None,
    *,
    markdown: bool = False,
    silent: bool = True,
    exclude: Optional[List[str]] = None,
) -> Union[str, dict]:
    msg = Printer(no_print=silent, pretty=not silent)
    if not exclude:
        exclude = []
    if model:
        title = f"Info about pipeline '{model}'"
        data = info_model(model, silent=silent)
    else:
        title = "Info about spaCy"
        data = info_spacy()
    raw_data = {k.lower().replace(" ", "_"): v for k, v in data.items()}
    if "Pipelines" in data and isinstance(data["Pipelines"], dict):
        data["Pipelines"] = ", ".join(f"{n} ({v})"
                                      for n, v in data["Pipelines"].items())
    markdown_data = get_markdown(data, title=title, exclude=exclude)
    if markdown:
        if not silent:
            print(markdown_data)
        return markdown_data
    if not silent:
        table_data = {k: v for k, v in data.items() if k not in exclude}
        msg.table(table_data, title=title)
    return raw_data
def print_textcats_auc_per_cat(msg: Printer,
                               scores: Dict[str, Dict[str, float]]) -> None:
    msg.table(
        [(k, f"{v:.2f}") for k, v in scores.items()],
        header=("", "ROC AUC"),
        aligns=("l", "r"),
        title="Textcat ROC AUC (per label)",
    )
def print_prf_per_type(msg: Printer, scores: Dict[str, Dict[str, float]],
                       name: str, type: str) -> None:
    data = [(k, f"{v['p']*100:.2f}", f"{v['r']*100:.2f}", f"{v['f']*100:.2f}")
            for k, v in scores.items()]
    msg.table(
        data,
        header=("", "P", "R", "F"),
        aligns=("l", "r", "r", "r"),
        title=f"{name} (per {type})",
    )
Example #7
0
def print_prf_per_type(msg: Printer, scores: Dict[str, Dict[str, float]],
                       name: str, type: str) -> None:
    data = []
    for key, value in scores.items():
        row = [key]
        for k in ("p", "r", "f"):
            v = value[k]
            row.append(f"{v * 100:.2f}" if isinstance(v, (int, float)) else v)
        data.append(row)
    msg.table(
        data,
        header=("", "P", "R", "F"),
        aligns=("l", "r", "r", "r"),
        title=f"{name} (per {type})",
    )
Example #8
0
def info(model=None, markdown=False, silent=False):
    """
    Print info about spaCy installation. If a model shortcut link is
    speficied as an argument, print model information. Flag --markdown
    prints details in Markdown for easy copy-pasting to GitHub issues.
    """
    msg = Printer()
    if model:
        if util.is_package(model):
            model_path = util.get_package_path(model)
        else:
            model_path = util.get_data_path() / model
        meta_path = model_path / "meta.json"
        if not meta_path.is_file():
            msg.fail("Can't find model meta.json", meta_path, exits=1)
        meta = srsly.read_json(meta_path)
        if model_path.resolve() != model_path:
            meta["link"] = path2str(model_path)
            meta["source"] = path2str(model_path.resolve())
        else:
            meta["source"] = path2str(model_path)
        if not silent:
            title = "Info about model '{}'".format(model)
            model_meta = {
                k: v
                for k, v in meta.items() if k not in ("accuracy", "speed")
            }
            if markdown:
                print_markdown(model_meta, title=title)
            else:
                msg.table(model_meta, title=title)
        return meta
    data = {
        "spaCy version": about.__version__,
        "Location": path2str(Path(__file__).parent.parent),
        "Platform": platform.platform(),
        "Python version": platform.python_version(),
        "Models": list_models(),
    }
    if not silent:
        title = "Info about spaCy"
        if markdown:
            print_markdown(data, title=title)
        else:
            msg.table(data, title=title)
    return data
Example #9
0
def info(model=None, markdown=False, silent=False):
    """
    Print info about spaCy installation. If a model shortcut link is
    speficied as an argument, print model information. Flag --markdown
    prints details in Markdown for easy copy-pasting to GitHub issues.
    """
    msg = Printer()
    if model:
        if util.is_package(model):
            model_path = util.get_package_path(model)
        else:
            model_path = util.get_data_path() / model
        meta_path = model_path / "meta.json"
        if not meta_path.is_file():
            msg.fail("Can't find model meta.json", meta_path, exits=1)
        meta = srsly.read_json(meta_path)
        if model_path.resolve() != model_path:
            meta["link"] = path2str(model_path)
            meta["source"] = path2str(model_path.resolve())
        else:
            meta["source"] = path2str(model_path)
        if not silent:
            title = "Info about model '{}'".format(model)
            model_meta = {
                k: v for k, v in meta.items() if k not in ("accuracy", "speed")
            }
            if markdown:
                print_markdown(model_meta, title=title)
            else:
                msg.table(model_meta, title=title)
        return meta
    data = {
        "spaCy version": about.__version__,
        "Location": path2str(Path(__file__).parent.parent),
        "Platform": platform.platform(),
        "Python version": platform.python_version(),
        "Models": list_models(),
    }
    if not silent:
        title = "Info about spaCy"
        if markdown:
            print_markdown(data, title=title)
        else:
            msg.table(data, title=title)
    return data
Example #10
0
    def evaluate(self, data: List[Example]) -> None:
        msg = Printer()
        formatted_data, _ = self._format_data(data)
        sc = self.nlp.evaluate(formatted_data, batch_size=64)
        msg.divider("Recognizer Results")
        result = [
            ("Precision", f"{sc.ents_p:.3f}"),
            ("Recall", f"{sc.ents_r:.3f}"),
            ("F-Score", f"{sc.ents_f:.3f}"),
        ]
        msg.table(result)

        table_data = []
        for label, scores in sorted(sc.ents_per_type.items(),
                                    key=lambda tup: tup[0]):
            table_data.append((label, f"{scores['p']:.3f}",
                               f"{scores['r']:.3f}", f"{scores['f']:.3f}"))
        header = ("Label", "Precision", "Recall", "F-Score")
        formatted = msg.table(table_data, header=header, divider=True)
        return sc
Example #11
0
def evaluate(
    model: str,
    data_path: Path,
    output: Optional[Path] = None,
    use_gpu: int = -1,
    gold_preproc: bool = False,
    displacy_path: Optional[Path] = None,
    displacy_limit: int = 25,
    silent: bool = True,
    spans_key: str = "sc",
) -> Dict[str, Any]:
    msg = Printer(no_print=silent, pretty=not silent)
    fix_random_seed()
    setup_gpu(use_gpu, silent=silent)
    data_path = util.ensure_path(data_path)
    output_path = util.ensure_path(output)
    displacy_path = util.ensure_path(displacy_path)
    if not data_path.exists():
        msg.fail("Evaluation data not found", data_path, exits=1)
    if displacy_path and not displacy_path.exists():
        msg.fail("Visualization output directory not found", displacy_path, exits=1)
    corpus = Corpus(data_path, gold_preproc=gold_preproc)
    nlp = util.load_model(model)
    dev_dataset = list(corpus(nlp))
    scores = nlp.evaluate(dev_dataset)
    metrics = {
        "TOK": "token_acc",
        "TAG": "tag_acc",
        "POS": "pos_acc",
        "MORPH": "morph_acc",
        "LEMMA": "lemma_acc",
        "UAS": "dep_uas",
        "LAS": "dep_las",
        "NER P": "ents_p",
        "NER R": "ents_r",
        "NER F": "ents_f",
        "TEXTCAT": "cats_score",
        "SENT P": "sents_p",
        "SENT R": "sents_r",
        "SENT F": "sents_f",
        "SPAN P": f"spans_{spans_key}_p",
        "SPAN R": f"spans_{spans_key}_r",
        "SPAN F": f"spans_{spans_key}_f",
        "SPEED": "speed",
    }
    results = {}
    data = {}
    for metric, key in metrics.items():
        if key in scores:
            if key == "cats_score":
                metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")"
            if isinstance(scores[key], (int, float)):
                if key == "speed":
                    results[metric] = f"{scores[key]:.0f}"
                else:
                    results[metric] = f"{scores[key]*100:.2f}"
            else:
                results[metric] = "-"
            data[re.sub(r"[\s/]", "_", key.lower())] = scores[key]

    msg.table(results, title="Results")
    data = handle_scores_per_type(scores, data, spans_key=spans_key, silent=silent)

    if displacy_path:
        factory_names = [nlp.get_pipe_meta(pipe).factory for pipe in nlp.pipe_names]
        docs = list(nlp.pipe(ex.reference.text for ex in dev_dataset[:displacy_limit]))
        render_deps = "parser" in factory_names
        render_ents = "ner" in factory_names
        render_parses(
            docs,
            displacy_path,
            model_name=model,
            limit=displacy_limit,
            deps=render_deps,
            ents=render_ents,
        )
        msg.good(f"Generated {displacy_limit} parses as HTML", displacy_path)

    if output_path is not None:
        srsly.write_json(output_path, data)
        msg.good(f"Saved results to {output_path}")
    return data
Example #12
0
def validate():
    """
    Validate that the currently installed version of spaCy is compatible
    with the installed models. Should be run after `pip install -U spacy`.
    """
    msg = Printer()
    with msg.loading("Loading compatibility table..."):
        r = requests.get(about.__compatibility__)
        if r.status_code != 200:
            msg.fail(
                "Server error ({})".format(r.status_code),
                "Couldn't fetch compatibility table.",
                exits=1,
            )
    msg.good("Loaded compatibility table")
    compat = r.json()["spacy"]
    version = about.__version__
    version = version.rsplit(".dev", 1)[0]
    current_compat = compat.get(version)
    if not current_compat:
        msg.fail(
            "Can't find spaCy v{} in compatibility table".format(version),
            about.__compatibility__,
            exits=1,
        )
    all_models = set()
    for spacy_v, models in dict(compat).items():
        all_models.update(models.keys())
        for model, model_vs in models.items():
            compat[spacy_v][model] = [reformat_version(v) for v in model_vs]
    model_links = get_model_links(current_compat)
    model_pkgs = get_model_pkgs(current_compat, all_models)
    incompat_links = {l for l, d in model_links.items() if not d["compat"]}
    incompat_models = {
        d["name"]
        for _, d in model_pkgs.items() if not d["compat"]
    }
    incompat_models.update(
        [d["name"] for _, d in model_links.items() if not d["compat"]])
    na_models = [m for m in incompat_models if m not in current_compat]
    update_models = [m for m in incompat_models if m in current_compat]
    spacy_dir = Path(__file__).parent.parent

    msg.divider("Installed models (spaCy v{})".format(about.__version__))
    msg.info("spaCy installation: {}".format(path2str(spacy_dir)))

    if model_links or model_pkgs:
        header = ("TYPE", "NAME", "MODEL", "VERSION", "")
        rows = []
        for name, data in model_pkgs.items():
            rows.append(get_model_row(current_compat, name, data, msg))
        for name, data in model_links.items():
            rows.append(get_model_row(current_compat, name, data, msg, "link"))
        msg.table(rows, header=header)
    else:
        msg.text("No models found in your current environment.", exits=0)
    if update_models:
        msg.divider("Install updates")
        msg.text("Use the following commands to update the model packages:")
        cmd = "python -m spacy download {}"
        print("\n".join([cmd.format(pkg) for pkg in update_models]) + "\n")
    if na_models:
        msg.text("The following models are not available for spaCy "
                 "v{}: {}".format(about.__version__, ", ".join(na_models)))
    if incompat_links:
        msg.text(
            "You may also want to overwrite the incompatible links using the "
            "`python -m spacy link` command with `--force`, or remove them "
            "from the data directory. "
            "Data path: {path}".format(path=path2str(get_data_path())))
    if incompat_models or incompat_links:
        sys.exit(1)
Example #13
0
class PrecisionRecallFMeasure(BaseMetric, ClassNursery):
    def __init__(self, idx2labelname_mapping: Optional[Dict[int, str]] = None):
        """

        Parameters
        ----------
        idx2labelname_mapping : Dict[int, str]
            Mapping from index to label. If this is not provided
            then we are going to use the class indices in all the reports
        """
        super(PrecisionRecallFMeasure, self).__init__()
        self.idx2labelname_mapping = idx2labelname_mapping
        self.msg_printer = Printer()
        self.classification_metrics_utils = ClassificationMetricsUtils(
            idx2labelname_mapping=idx2labelname_mapping
        )

        # setup counters to calculate true positives, false positives,
        # false negatives and true negatives
        # The keys are the different class indices in the dataset and the
        # values are the number of true positives, false positives, false negative
        # true negatvies for the dataset

        self.tp_counter = {}
        self.fp_counter = {}
        self.fn_counter = {}
        self.tn_counter = {}

    def print_confusion_metrics(
        self,
        predicted_probs: torch.FloatTensor,
        labels: torch.LongTensor,
        labels_mask: Optional[torch.ByteTensor] = None,
    ) -> None:
        """ Prints confusion matrix

        Parameters
        ----------
        predicted_probs : torch.FloatTensor
            Predicted Probabilities ``[batch_size, num_classes]``
        labels : torch.FloatTensor
            True labels of the size ``[batch_size, 1]``
        labels_mask : Optional[torch.ByteTensor]
            Labels mask indicating 1 in thos places where the true label is ignored
            Otherwise 0. It should be of same size as labels

        """
        assert predicted_probs.ndimension() == 2, self.msg_printer.fail(
            "The predicted probs should "
            "have 2 dimensions. The probs "
            "that you passed have shape "
            "{0}".format(predicted_probs.size())
        )

        assert labels.ndimension() == 2, self.msg_printer.fail(
            "The labels should have 2 dimension."
            "The labels that you passed have shape "
            "{0}".format(labels.size())
        )

        if labels_mask is None:
            labels_mask = torch.zeros_like(labels).type(torch.ByteTensor)

        # TODO: for now k=1, change it to different number of ks
        top_probs, top_indices = predicted_probs.topk(k=1, dim=1)

        # convert to 1d numpy
        top_indices_numpy = top_indices.cpu().numpy().tolist()

        # convert labels to 1 dimension
        true_labels_numpy = labels.cpu().numpy().tolist()

        confusion_mtrx, classes = self.classification_metrics_utils.get_confusion_matrix_and_labels(
            predicted_tag_indices=top_indices_numpy,
            true_tag_indices=true_labels_numpy,
            masked_label_indices=labels_mask,
        )

        if self.idx2labelname_mapping is not None:
            classes_with_names = [
                f"cls_{class_}({self.idx2labelname_mapping[class_]})"
                for class_ in classes
            ]
        else:
            classes_with_names = classes

        assert (
            len(classes) == confusion_mtrx.shape[1]
        ), f"len(classes) = {len(classes)} confusion matrix shape {confusion_mtrx.shape}"

        header = [f"{class_}" for class_ in classes]
        header.insert(0, "pred(cols)/true(rows)")

        confusion_mtrx = pd.DataFrame(confusion_mtrx)
        confusion_mtrx.insert(0, "class_name", classes_with_names)

        self.msg_printer.table(
            data=confusion_mtrx.values.tolist(), header=header, divider=True
        )

    def calc_metric(
        self, iter_dict: Dict[str, Any], model_forward_dict: Dict[str, Any]
    ) -> None:
        """ Updates the values being tracked for calculating the metric

        For Precision Recall FMeasure we update the true positive,
        false positive and false negative of the different classes
        being tracked

        Parameters
        ----------
        iter_dict : Dict[str, Any]
            The ``iter_dict`` from the dataset is expected to have
            ``label`` which are labels for instances. They are usually
            of the size ``[batch_size]``
            Optionally there can be a ``label_mask`` of the size ``[batch_size]``
            The ``label_mask`` is 1 where the label should be masked otherwise
            if the label is not masked then it is 0

        model_forward_dict : Dict[str, Any]
            The dictionary obtained after a forward pass
            The model_forward_pass is expected to have ``normalized_probs``
            that usually is of the size ``[batch_size, num_classes]``
        """

        normalized_probs = model_forward_dict["normalized_probs"]
        labels = iter_dict["label"]
        labels_mask = iter_dict.get("label_mask")
        if labels_mask is None:
            labels_mask = torch.zeros_like(labels).type(torch.ByteTensor)

        normalized_probs = normalized_probs.cpu()
        labels = labels.cpu()

        assert normalized_probs.ndimension() == 2, self.msg_printer.fail(
            "The predicted probs should "
            "have 2 dimensions. The probs "
            "that you passed have shape "
            "{0}".format(normalized_probs.size())
        )

        assert labels.ndimension() == 2, self.msg_printer.fail(
            "The labels should have 2 dimension."
            "The labels that you passed have shape "
            "{0}".format(labels.size())
        )

        # TODO: for now k=1, change it to different number of ks
        top_probs, top_indices = normalized_probs.topk(k=1, dim=1)

        # convert to 1d numpy
        top_indices_numpy = top_indices.cpu().numpy().tolist()

        # convert labels to 1 dimension
        true_labels_numpy = labels.cpu().numpy().tolist()

        labels_mask = labels_mask.tolist()

        confusion_mtrx, classes = self.classification_metrics_utils.get_confusion_matrix_and_labels(
            true_tag_indices=true_labels_numpy,
            predicted_tag_indices=top_indices_numpy,
            masked_label_indices=labels_mask,
        )

        # For further confirmation on how I calculated this I searched for stackoverflow on
        # 18th of July 2019. This seems to be the correct way to calculate tps, fps, fns
        # You can refer to https://stackoverflow.com/a/43331484/2704763

        # calculate tps
        tps = np.around(np.diag(confusion_mtrx), decimals=4)

        # calculate fps
        fps = np.around(np.sum(confusion_mtrx, axis=0) - tps, decimals=4)

        # calculate fns
        fns = np.around(np.sum(confusion_mtrx, axis=1) - tps, decimals=4)

        tps = tps.tolist()
        fps = fps.tolist()
        fns = fns.tolist()

        class_tps_mapping = dict(zip(classes, tps))
        class_fps_mapping = dict(zip(classes, fps))
        class_fns_mapping = dict(zip(classes, fns))

        self.tp_counter = merge_dictionaries_with_sum(
            self.tp_counter, class_tps_mapping
        )
        self.fp_counter = merge_dictionaries_with_sum(
            self.fp_counter, class_fps_mapping
        )
        self.fn_counter = merge_dictionaries_with_sum(
            self.fn_counter, class_fns_mapping
        )

    def get_metric(self) -> Dict[str, Any]:
        """ Returns different values being tracked to calculate Precision Recall FMeasure

        Returns
        -------
        Dict[str, Any]
            Returns a dictionary with the following key value pairs

            precision: Dict[str, float]
                The precision for different classes
            recall: Dict[str, float]
                The recall values for different classes
            fscore: Dict[str, float]
                The fscore values for different classes,
            num_tp: Dict[str, int]
                The number of true positives for different classes,
            num_fp: Dict[str, int]
                The number of false positives for different classes,
            num_fn: Dict[str, int]
                The number of false negatives for different classes
            "macro_precision": float
                The macro precision value considering all different classes,
            macro_recall: float
                The macro recall value considering all different classes
            macro_fscore: float
                The macro fscore value considering all different classes
            micro_precision: float
                The micro precision value considering all different classes,
            micro_recall: float
                The micro recall value considering all different classes.
            micro_fscore: float
                The micro fscore value considering all different classes

        """
        precision_dict, recall_dict, fscore_dict = self.classification_metrics_utils.get_prf_from_counters(
            tp_counter=self.tp_counter,
            fp_counter=self.fp_counter,
            fn_counter=self.fn_counter,
        )

        # macro scores
        # for a detailed discussion on micro and macro scores please follow the discussion @
        # https://datascience.stackexchange.com/questions/15989/micro-average-vs-macro-average-performance-in-a-multiclass-classification-settin

        # micro scores
        micro_precision, micro_recall, micro_fscore = self.classification_metrics_utils.get_micro_prf_from_counters(
            tp_counter=self.tp_counter,
            fp_counter=self.fp_counter,
            fn_counter=self.fn_counter,
        )

        # macro scores
        macro_precision, macro_recall, macro_fscore = self.classification_metrics_utils.get_macro_prf_from_prf_dicts(
            precision_dict=precision_dict,
            recall_dict=recall_dict,
            fscore_dict=fscore_dict,
        )

        return {
            "precision": precision_dict,
            "recall": recall_dict,
            "fscore": fscore_dict,
            "num_tp": self.tp_counter,
            "num_fp": self.fp_counter,
            "num_fn": self.fn_counter,
            "macro_precision": macro_precision,
            "macro_recall": macro_recall,
            "macro_fscore": macro_fscore,
            "micro_precision": micro_precision,
            "micro_recall": micro_recall,
            "micro_fscore": micro_fscore,
        }

    def reset(self) -> None:
        """ Resets all the counters

        Resets the ``tp_counter`` which is the true positive counter
        Resets the ``fp_counter`` which is the false positive counter
        Resets the ``fn_counter`` - which is the false negative counter
        Resets the ``tn_counter`` - which is the true nagative counter

        """
        self.tp_counter = {}
        self.fp_counter = {}
        self.fn_counter = {}
        self.tn_counter = {}

    def report_metrics(self, report_type="wasabi"):
        """ Reports metrics in a printable format

        Parameters
        ----------
        report_type : type
            Select one of ``[wasabi, paper]``
            If wasabi, then we return a printable table that represents the
            precision recall and fmeasures for different classes

        """

        accuracy_metrics = self.get_metric()
        precision = accuracy_metrics["precision"]
        recall = accuracy_metrics["recall"]
        fscore = accuracy_metrics["fscore"]
        macro_precision = accuracy_metrics["macro_precision"]
        macro_recall = accuracy_metrics["macro_recall"]
        macro_fscore = accuracy_metrics["macro_fscore"]
        micro_precision = accuracy_metrics["micro_precision"]
        micro_recall = accuracy_metrics["micro_recall"]
        micro_fscore = accuracy_metrics["micro_fscore"]

        if report_type == "wasabi":
            table = self.classification_metrics_utils.generate_table_report_from_counters(
                tp_counter=self.tp_counter,
                fp_counter=self.fp_counter,
                fn_counter=self.fn_counter,
            )
            return table

        elif report_type == "paper":
            "Refer to the paper Logical Structure Recovery in Scholarly Articles with " "Rich Document Features Table 2. It generates just fscores and returns"
            class_nums = fscore.keys()
            class_nums = sorted(class_nums, reverse=False)
            fscores = [fscore[class_num] for class_num in class_nums]
            fscores.extend([micro_fscore, macro_fscore])
            return fscores
Example #14
0
def validate():
    """
    Validate that the currently installed version of spaCy is compatible
    with the installed models. Should be run after `pip install -U spacy`.
    """
    msg = Printer()
    with msg.loading("Loading compatibility table..."):
        r = requests.get(about.__compatibility__)
        if r.status_code != 200:
            msg.fail(
                "Server error ({})".format(r.status_code),
                "Couldn't fetch compatibility table.",
                exits=1,
            )
    msg.good("Loaded compatibility table")
    compat = r.json()["spacy"]
    version = about.__version__
    version = version.rsplit(".dev", 1)[0]
    current_compat = compat.get(version)
    if not current_compat:
        msg.fail(
            "Can't find spaCy v{} in compatibility table".format(version),
            about.__compatibility__,
            exits=1,
        )
    all_models = set()
    for spacy_v, models in dict(compat).items():
        all_models.update(models.keys())
        for model, model_vs in models.items():
            compat[spacy_v][model] = [reformat_version(v) for v in model_vs]
    model_links = get_model_links(current_compat)
    model_pkgs = get_model_pkgs(current_compat, all_models)
    incompat_links = {l for l, d in model_links.items() if not d["compat"]}
    incompat_models = {d["name"] for _, d in model_pkgs.items() if not d["compat"]}
    incompat_models.update(
        [d["name"] for _, d in model_links.items() if not d["compat"]]
    )
    na_models = [m for m in incompat_models if m not in current_compat]
    update_models = [m for m in incompat_models if m in current_compat]
    spacy_dir = Path(__file__).parent.parent

    msg.divider("Installed models (spaCy v{})".format(about.__version__))
    msg.info("spaCy installation: {}".format(path2str(spacy_dir)))

    if model_links or model_pkgs:
        header = ("TYPE", "NAME", "MODEL", "VERSION", "")
        rows = []
        for name, data in model_pkgs.items():
            rows.append(get_model_row(current_compat, name, data, msg))
        for name, data in model_links.items():
            rows.append(get_model_row(current_compat, name, data, msg, "link"))
        msg.table(rows, header=header)
    else:
        msg.text("No models found in your current environment.", exits=0)
    if update_models:
        msg.divider("Install updates")
        msg.text("Use the following commands to update the model packages:")
        cmd = "python -m spacy download {}"
        print("\n".join([cmd.format(pkg) for pkg in update_models]) + "\n")
    if na_models:
        msg.text(
            "The following models are not available for spaCy "
            "v{}: {}".format(about.__version__, ", ".join(na_models))
        )
    if incompat_links:
        msg.text(
            "You may also want to overwrite the incompatible links using the "
            "`python -m spacy link` command with `--force`, or remove them "
            "from the data directory. "
            "Data path: {path}".format(path=path2str(get_data_path()))
        )
    if incompat_models or incompat_links:
        sys.exit(1)
Example #15
0
class PrecisionRecallFMeasure(BaseMetric, ClassNursery):
    def __init__(self, datasets_manager: DatasetsManager):
        """

        Parameters
        ----------
        datasets_manager : DatasetsManager
            The dataset manager managing the labels and other information
        """
        super(PrecisionRecallFMeasure,
              self).__init__(datasets_manager=datasets_manager)
        self.datasets_manager = datasets_manager
        self.idx2labelname_mapping = None
        self.msg_printer = Printer()
        self.classification_metrics_utils = ClassificationMetricsUtils()
        self.label_namespace = self.datasets_manager.label_namespaces[0]
        self.normalized_probs_namespace = "normalized_probs"
        self.label_numericalizer = self.datasets_manager.namespace_to_numericalizer[
            self.label_namespace]

        # setup counters to calculate true positives, false positives,
        # false negatives and true negatives
        # The keys are the different class indices in the dataset and the
        # values are the number of true positives, false positives, false negative
        # true negatvies for the dataset

        self.tp_counter = {}
        self.fp_counter = {}
        self.fn_counter = {}
        self.tn_counter = {}

    def print_confusion_metrics(
        self,
        predicted_probs: torch.FloatTensor,
        labels: torch.LongTensor,
        labels_mask: Optional[torch.ByteTensor] = None,
    ) -> None:
        """ Prints confusion matrix

        Parameters
        ----------
        predicted_probs : torch.FloatTensor
            Predicted Probabilities ``[batch_size, num_classes]``
        labels : torch.FloatTensor
            True labels of the size ``[batch_size, 1]``
        labels_mask : Optional[torch.ByteTensor]
            Labels mask indicating 1 in thos places where the true label is ignored
            Otherwise 0. It should be of same size as labels

        """
        assert predicted_probs.ndimension() == 2, self.msg_printer.fail(
            "The predicted probs should "
            "have 2 dimensions. The probs "
            "that you passed have shape "
            "{0}".format(predicted_probs.size()))

        assert labels.ndimension() == 2, self.msg_printer.fail(
            "The labels should have 2 dimension."
            "The labels that you passed have shape "
            "{0}".format(labels.size()))

        if labels_mask is None:
            labels_mask = torch.zeros_like(labels, dtype=torch.bool)

        # TODO: for now k=1, change it to different number of ks
        top_probs, top_indices = predicted_probs.topk(k=1, dim=1)

        # convert to 1d numpy
        top_indices_numpy = top_indices.cpu().numpy().tolist()

        # convert labels to 1 dimension
        true_labels_numpy = labels.cpu().numpy().tolist()

        (
            confusion_mtrx,
            classes,
        ) = self.classification_metrics_utils.get_confusion_matrix_and_labels(
            predicted_tag_indices=top_indices_numpy,
            true_tag_indices=true_labels_numpy,
            true_masked_label_indices=labels_mask,
        )

        if self.idx2labelname_mapping is not None:
            classes_with_names = [
                f"cls_{class_}({self.idx2labelname_mapping[class_]})"
                for class_ in classes
            ]
        else:
            classes_with_names = classes

        assert (
            len(classes) == confusion_mtrx.shape[1]
        ), f"len(classes) = {len(classes)} confusion matrix shape {confusion_mtrx.shape}"

        header = [f"{class_}" for class_ in classes]
        header.insert(0, "pred(cols)/true(rows)")

        confusion_mtrx = pd.DataFrame(confusion_mtrx)
        confusion_mtrx.insert(0, "class_name", classes_with_names)

        self.msg_printer.table(data=confusion_mtrx.values.tolist(),
                               header=header,
                               divider=True)

    def calc_metric(self, lines: List[Line], labels: List[Label],
                    model_forward_dict: Dict[str, Any]) -> None:
        """ Updates the values being tracked for calculating the metric

        For Precision Recall FMeasure we update the true positive,
        false positive and false negative of the different classes
        being tracked

        Parameters
        ----------
        lines : List[Line]
           A list of lines
        labels: List[Label]
            A list of labels. This has to be the label used for classification
            Refer to the documentation of Label for more information

        model_forward_dict : Dict[str, Any]
            The dictionary obtained after a forward pass
            The model_forward_pass is expected to have ``normalized_probs``
            that usually is of the size ``[batch_size, num_classes]``
        """

        normalized_probs = model_forward_dict[self.normalized_probs_namespace]

        labels_tensor = []
        for label in labels:
            tokens = label.tokens[self.label_namespace]
            tokens = [tok.text for tok in tokens]
            numericalized_instance = self.label_numericalizer.numericalize_instance(
                instance=tokens)

            labels_tensor.extend(numericalized_instance)

        labels_tensor = torch.LongTensor(labels_tensor)
        labels_tensor = labels_tensor.view(-1, 1)
        labels_mask = torch.zeros_like(labels_tensor).type(torch.ByteTensor)

        normalized_probs = normalized_probs.cpu()

        assert normalized_probs.ndimension() == 2, self.msg_printer.fail(
            "The predicted probs should "
            "have 2 dimensions. The probs "
            "that you passed have shape "
            "{0}".format(normalized_probs.size()))

        assert labels_tensor.ndimension() == 2, self.msg_printer.fail(
            "The labels should have 2 dimension."
            "The labels that you passed have shape "
            "{0}".format(labels_tensor.size()))

        # TODO: for now k=1, change it to different number of ks
        top_probs, top_indices = normalized_probs.topk(k=1, dim=1)

        # convert to 1d numpy
        top_indices_numpy = top_indices.cpu().numpy().tolist()

        # convert labels to 1 dimension
        true_labels_numpy = labels_tensor.cpu().numpy().tolist()

        labels_mask = labels_mask.tolist()

        (
            confusion_mtrx,
            classes,
        ) = self.classification_metrics_utils.get_confusion_matrix_and_labels(
            true_tag_indices=true_labels_numpy,
            predicted_tag_indices=top_indices_numpy,
            true_masked_label_indices=labels_mask,
        )

        # For further confirmation on how I calculated this I searched for stackoverflow on
        # 18th of July 2019. This seems to be the correct way to calculate tps, fps, fns
        # You can refer to https://stackoverflow.com/a/43331484/2704763

        # calculate tps
        tps = np.around(np.diag(confusion_mtrx), decimals=4)

        # calculate fps
        fps = np.around(np.sum(confusion_mtrx, axis=0) - tps, decimals=4)

        # calculate fns
        fns = np.around(np.sum(confusion_mtrx, axis=1) - tps, decimals=4)

        tps = tps.tolist()
        fps = fps.tolist()
        fns = fns.tolist()

        class_tps_mapping = dict(zip(classes, tps))
        class_fps_mapping = dict(zip(classes, fps))
        class_fns_mapping = dict(zip(classes, fns))

        self.tp_counter = merge_dictionaries_with_sum(self.tp_counter,
                                                      class_tps_mapping)
        self.fp_counter = merge_dictionaries_with_sum(self.fp_counter,
                                                      class_fps_mapping)
        self.fn_counter = merge_dictionaries_with_sum(self.fn_counter,
                                                      class_fns_mapping)

    def get_metric(self) -> Dict[str, Any]:
        """ Returns different values being tracked to calculate Precision Recall FMeasure

        Returns
        -------
        Dict[str, Any]
            Returns a dictionary with the following key value pairs for every namespace

            precision: Dict[str, float]
                The precision for different classes
            recall: Dict[str, float]
                The recall values for different classes
            fscore: Dict[str, float]
                The fscore values for different classes,
            num_tp: Dict[str, int]
                The number of true positives for different classes,
            num_fp: Dict[str, int]
                The number of false positives for different classes,
            num_fn: Dict[str, int]
                The number of false negatives for different classes
            "macro_precision": float
                The macro precision value considering all different classes,
            macro_recall: float
                The macro recall value considering all different classes
            macro_fscore: float
                The macro fscore value considering all different classes
            micro_precision: float
                The micro precision value considering all different classes,
            micro_recall: float
                The micro recall value considering all different classes.
            micro_fscore: float
                The micro fscore value considering all different classes

        """
        (
            precision_dict,
            recall_dict,
            fscore_dict,
        ) = self.classification_metrics_utils.get_prf_from_counters(
            tp_counter=self.tp_counter,
            fp_counter=self.fp_counter,
            fn_counter=self.fn_counter,
        )

        # macro scores
        # for a detailed discussion on micro and macro scores please follow the discussion @
        # https://datascience.stackexchange.com/questions/15989/micro-average-vs-macro-average-performance-in-a-multiclass-classification-settin

        # micro scores
        (
            micro_precision,
            micro_recall,
            micro_fscore,
        ) = self.classification_metrics_utils.get_micro_prf_from_counters(
            tp_counter=self.tp_counter,
            fp_counter=self.fp_counter,
            fn_counter=self.fn_counter,
        )

        # macro scores
        (
            macro_precision,
            macro_recall,
            macro_fscore,
        ) = self.classification_metrics_utils.get_macro_prf_from_prf_dicts(
            precision_dict=precision_dict,
            recall_dict=recall_dict,
            fscore_dict=fscore_dict,
        )

        metric = {
            self.label_namespace: {
                "precision": precision_dict,
                "recall": recall_dict,
                "fscore": fscore_dict,
                "num_tp": self.tp_counter,
                "num_fp": self.fp_counter,
                "num_fn": self.fn_counter,
                "macro_precision": macro_precision,
                "macro_recall": macro_recall,
                "macro_fscore": macro_fscore,
                "micro_precision": micro_precision,
                "micro_recall": micro_recall,
                "micro_fscore": micro_fscore,
            }
        }

        return metric

    def reset(self) -> None:
        """ Resets all the counters

        Resets the ``tp_counter`` which is the true positive counter
        Resets the ``fp_counter`` which is the false positive counter
        Resets the ``fn_counter`` - which is the false negative counter
        Resets the ``tn_counter`` - which is the true nagative counter

        """
        self.tp_counter = {}
        self.fp_counter = {}
        self.fn_counter = {}
        self.tn_counter = {}

    def report_metrics(self, report_type="wasabi"):
        """ Reports metrics in a printable format

        Parameters
        ----------
        report_type : type
            Select one of ``[wasabi, paper]``
            If wasabi, then we return a printable table that represents the
            precision recall and fmeasures for different classes

        """
        if report_type == "wasabi":
            table = self.classification_metrics_utils.generate_table_report_from_counters(
                tp_counter=self.tp_counter,
                fp_counter=self.fp_counter,
                fn_counter=self.fn_counter,
            )
            return {self.label_namespace: table}
Example #16
0
def top_prediction_errors(
    recognizer: EntityRecognizer,
    data: List[Example],
    labels: List[str] = None,
    n: int = None,
    k: int = None,
    exclude_fp: bool = False,
    exclude_fn: bool = False,
    verbose: bool = False,
) -> List[PredictionError]:
    """Get a sorted list of examples your model is worst at predicting.

    Args:
        recognizer (EntityRecognizer): An instance of EntityRecognizer
        data (List[Example]): List of annotated Examples
        labels (List[str], optional): List of labels to get errors for.
            Defaults to the labels property of `recognizer`.
        n (int, optional): If set, only use the top n examples from data.
        k (int, optional): If set, return the top k prediction errors, otherwise the whole list.
        exclude_fp (bool, optional): Flag to exclude False Positive errors.
        exclude_fn (bool, optional): Flag to exclude False Negative errors.
        verbose (bool, optional): Show verbose output.

    Returns:
        List[PredictionError]: List of Prediction Errors your model is making, sorted by the
            spans your model has the most trouble with.
    """
    labels_ = labels or recognizer.labels
    if n is not None:
        data = data[:n]

    n_examples = len(data)
    texts = (e.text for e in data)
    anns = (e.spans for e in data)

    errors = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))  # type: ignore
    error_examples: DefaultDict[str, List[PredictionErrorExamplePair]] = defaultdict(list)
    n_errors = 0

    for orig_example, pred_example, ann in zip(data, recognizer.predict(texts), anns):
        if k is not None and n_errors > k:
            break

        pred_error_example_pair = PredictionErrorExamplePair(
            original=orig_example, predicted=pred_example
        )

        cand = set([(s.start, s.end, s.label) for s in pred_example.spans])
        gold = set([(s.start, s.end, s.label) for s in ann])

        fp_diff = cand - gold
        fn_diff = gold - cand

        seen = set()

        if fp_diff and not exclude_fp:
            for fp in fp_diff:
                gold_ent = None
                for ge in gold:
                    if fp[0] == ge[0] and fp[1] == ge[1]:
                        gold_ent = ge
                        break
                if gold_ent:
                    start, end, label = gold_ent
                    text = pred_example.text[start:end]
                    false_label = fp[2]
                    errors[label][text][false_label] += 1
                    error_examples[f"{text}||{label}||{false_label}"].append(
                        pred_error_example_pair
                    )
                else:
                    start, end, false_label = fp
                    text = pred_example.text[start:end]
                    errors[NONE][text][false_label] += 1
                    error_examples[f"{text}||{NONE}||{false_label}"].append(pred_error_example_pair)
                n_errors += 1
                seen.add((start, end))

        if fn_diff and not exclude_fn:
            for fn in fn_diff:
                start, end, label = fn
                if (start, end) not in seen:
                    text = pred_example.text[start:end]
                    errors[label][text][NONE] += 1
                    error_examples[f"{text}||{label}||{NONE}"].append(pred_error_example_pair)
                    n_errors += 1

    ranked_errors_map: Dict[str, PredictionError] = {}

    for label, errors_per_label in errors.items():
        for error_text, error_labels in errors_per_label.items():
            for error_label, count in error_labels.items():
                pe_hash = f"{error_text}||{label}||{error_label}"
                ranked_errors_map[pe_hash] = PredictionError(
                    text=error_text,
                    true_label=label,
                    pred_label=error_label,
                    count=count,
                    examples=error_examples[f"{error_text}||{label}||{error_label}"],
                )

    ranked_errors: List[PredictionError] = sorted(
        list(ranked_errors_map.values()), key=lambda error: error.count, reverse=True  # type: ignore
    )
    error_texts = set()
    for re in ranked_errors:
        if re.examples:
            for e in re.examples:
                error_texts.add(e.original.text)

    error_rate = round(len(error_texts) / len(data), 2)
    if verbose:
        error_summary = {
            "N Examples": len(data),
            "N Errors": len(ranked_errors),
            "N Error Examples": len(error_texts),
            "Error Rate": error_rate,
        }
        msg = Printer()
        msg.divider("Error Analysis")
        msg.table(error_summary)

    return ranked_errors