def test_preview(self): """Generate a preview that is smaller than/equal to fragment size.""" preview = highlighting.preview(self.value, fragment_size=350, start_tag=self.start_tag, end_tag=self.end_tag) self.assertEqual(len(preview), 338)
def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document: """Transform an ES search result back into a :class:`.Document`.""" # typing: ignore result: Document = {} result["match"] = {} # Hit on field, but no highlighting. result["truncated"] = {} # Preview is truncated. result.update(raw.to_dict()) # type: ignore _add_announced_date_first(result, raw) _add_date(result, raw, "submitted_date") _add_date(result, raw, "submitted_date_first") _add_date(result, raw, "submitted_date_latest") _add_amc_msc(result) try: result["score"] = raw.meta.score # type: ignore except AttributeError: pass if "preview" not in result: result["preview"] = {} if "abstract" in result: result["preview"]["abstract"], result["truncated"]["abstract"] \ = preview(result["abstract"]) if highlight: result["highlight"] = {} result = add_highlighting(result, raw) return result
def test_preview(self): """Generate a preview that is smaller than/equal to fragment size.""" preview, trunc = highlighting.preview( self.value, fragment_size=350, start_tag=self.start_tag, end_tag=self.end_tag, ) self.assertGreaterEqual(338, len(preview)) self.assertTrue(trunc)
def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document: """Transform an ES search result back into a :class:`.Document`.""" # typing: ignore result: Document = {} result["match"] = {} # Hit on field, but no highlighting. result["truncated"] = {} # Preview is truncated. result.update(raw.__dict__["_d_"]) # Parse dates to date/datetime objects. if "announced_date_first" in result: result["announced_date_first"] = datetime.strptime( raw["announced_date_first"], "%Y-%m").date() for key in ["", "_first", "_latest"]: key = f"submitted_date{key}" if key not in result: continue try: result[key] = datetime.strptime(raw[key], "%Y-%m-%dT%H:%M:%S%z") except (ValueError, TypeError): logger.warning(f"Could not parse {key} as datetime") pass for key in ["acm_class", "msc_class"]: if key in result and result[key]: result[key] = "; ".join(result[key]) try: result["score"] = raw.meta.score # type: ignore except AttributeError: pass if highlight: # type(result.get('abstract')) is str and result["highlight"] = {} logger.debug("%s: add highlighting to result", result["paper_id"]) if "preview" not in result: result["preview"] = {} if "abstract" in result: result["preview"]["abstract"] = preview(result["abstract"]) if result["preview"]["abstract"].endswith("…"): result["truncated"]["abstract"] = True result = add_highlighting(result, raw) return result
def test_preview_with_close_highlights(self): """Two highlights in the abstract are close together.""" value = ( "We investigate self-averaging properties in the transport of" " particles through <span class=\"has-text-success" " has-text-weight-bold mathjax\">random</span> media. We show" " rigorously that in the subdiffusive anomalous regime transport" " coefficients are not self--averaging quantities. These" " quantities are exactly calculated in the case of directed" " <span class=\"has-text-success has-text-weight-bold mathjax\">" "random</span> walks. In the case of general symmetric <span" " class=\"has-text-success has-text-weight-bold mathjax\">random" "</span> walks a perturbative analysis around the Effective Medium" " Approximation (EMA) is performed.") start_tag = "<span class=\"has-text-success has-text-weight-bold mathjax\">" end_tag = "</span>" preview = highlighting.preview(value, start_tag=start_tag, end_tag=end_tag)