Beispiel #1
0
 def test_preview(self):
     """Generate a preview that is smaller than/equal to fragment size."""
     preview = highlighting.preview(self.value,
                                    fragment_size=350,
                                    start_tag=self.start_tag,
                                    end_tag=self.end_tag)
     self.assertEqual(len(preview), 338)
Beispiel #2
0
def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document:
    """Transform an ES search result back into a :class:`.Document`."""
    # typing: ignore
    result: Document = {}

    result["match"] = {}  # Hit on field, but no highlighting.
    result["truncated"] = {}  # Preview is truncated.

    result.update(raw.to_dict())  # type: ignore

    _add_announced_date_first(result, raw)

    _add_date(result, raw, "submitted_date")
    _add_date(result, raw, "submitted_date_first")
    _add_date(result, raw, "submitted_date_latest")

    _add_amc_msc(result)

    try:
        result["score"] = raw.meta.score  # type: ignore
    except AttributeError:
        pass

    if "preview" not in result:
        result["preview"] = {}

    if "abstract" in result:
        result["preview"]["abstract"], result["truncated"]["abstract"] \
            = preview(result["abstract"])

    if highlight:
        result["highlight"] = {}
        result = add_highlighting(result, raw)

    return result
Beispiel #3
0
 def test_preview(self):
     """Generate a preview that is smaller than/equal to fragment size."""
     preview, trunc = highlighting.preview(
         self.value,
         fragment_size=350,
         start_tag=self.start_tag,
         end_tag=self.end_tag,
     )
     self.assertGreaterEqual(338, len(preview))
     self.assertTrue(trunc)
Beispiel #4
0
def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document:
    """Transform an ES search result back into a :class:`.Document`."""
    # typing: ignore
    result: Document = {}

    result["match"] = {}  # Hit on field, but no highlighting.
    result["truncated"] = {}  # Preview is truncated.

    result.update(raw.__dict__["_d_"])

    # Parse dates to date/datetime objects.
    if "announced_date_first" in result:
        result["announced_date_first"] = datetime.strptime(
            raw["announced_date_first"], "%Y-%m").date()
    for key in ["", "_first", "_latest"]:
        key = f"submitted_date{key}"
        if key not in result:
            continue
        try:
            result[key] = datetime.strptime(raw[key], "%Y-%m-%dT%H:%M:%S%z")
        except (ValueError, TypeError):
            logger.warning(f"Could not parse {key} as datetime")
            pass

    for key in ["acm_class", "msc_class"]:
        if key in result and result[key]:
            result[key] = "; ".join(result[key])

    try:
        result["score"] = raw.meta.score  # type: ignore
    except AttributeError:
        pass

    if highlight:  # type(result.get('abstract')) is str and
        result["highlight"] = {}
        logger.debug("%s: add highlighting to result", result["paper_id"])

        if "preview" not in result:
            result["preview"] = {}

        if "abstract" in result:
            result["preview"]["abstract"] = preview(result["abstract"])
            if result["preview"]["abstract"].endswith("…"):
                result["truncated"]["abstract"] = True

        result = add_highlighting(result, raw)

    return result
Beispiel #5
0
 def test_preview_with_close_highlights(self):
     """Two highlights in the abstract are close together."""
     value = (
         "We investigate self-averaging properties in the transport of"
         " particles through <span class=\"has-text-success"
         " has-text-weight-bold mathjax\">random</span> media. We show"
         " rigorously that in the subdiffusive anomalous regime transport"
         " coefficients are not self--averaging quantities. These"
         " quantities are exactly calculated in the case of directed"
         " <span class=\"has-text-success has-text-weight-bold mathjax\">"
         "random</span> walks. In the case of general symmetric <span"
         " class=\"has-text-success has-text-weight-bold mathjax\">random"
         "</span> walks a perturbative analysis around the Effective Medium"
         " Approximation (EMA) is performed.")
     start_tag = "<span class=\"has-text-success has-text-weight-bold mathjax\">"
     end_tag = "</span>"
     preview = highlighting.preview(value,
                                    start_tag=start_tag,
                                    end_tag=end_tag)