コード例 #1
0
def test_checked_cast() -> None:
    x: object = 5
    y: int = checked_cast(int, x)
    assert x == y

    with raises(AssertionError):
        checked_cast(int, 3.5)
コード例 #2
0
ファイル: _cli.py プロジェクト: ekojsalim/nasty
 def _batch_submit(self, batch: Batch, request: Request) -> None:
     request = checked_cast(Search, request)
     if self.daily:
         for daily_request in request.to_daily_requests():
             super()._batch_submit(batch, daily_request)
     else:
         super()._batch_submit(batch, request)
コード例 #3
0
    def prepare_doc_dict(cls, doc_dict: MutableMapping[str, object]) -> None:
        super().prepare_doc_dict(doc_dict)
        doc_dict["_id"] = doc_dict.pop("index")

        url = urlparse(checked_cast(str, doc_dict["url"]))
        netloc = url.netloc
        if netloc.startswith("www."):
            netloc = netloc[len("www.") :]
        doc_dict["url_netloc"] = netloc
        doc_dict["url_path"] = url.path.strip("/").split("/")

        doc_dict["time"] = dateparser.parse(
            checked_cast(str, doc_dict["time"]), languages=[str(doc_dict["lang"]), "en"]
        )

        doc_dict.pop("kw")
コード例 #4
0
 def _tokenize_doc_dict(
     cls,
     doc_dict: MutableMapping[str, object],
     text_field_map: Mapping[str, object],
     lang: str,
 ) -> None:
     for field_name, text_field_or_childs in text_field_map.items():
         # text_field_or_childs is either True or a mapping
         value = doc_dict.get(field_name)
         if not value:
             continue
         elif text_field_or_childs is True:
             (
                 doc_dict[field_name],
                 doc_dict[field_name + "_orig"],
                 doc_dict[field_name + "_tokens"],
             ) = cls._tokenize(checked_cast(str, value), lang)
         elif isinstance(value, MutableMapping):
             cls._tokenize_doc_dict(
                 value, cast(Mapping[str, object], text_field_or_childs), lang
             )
         elif isinstance(value, Sequence):
             for v in value:
                 cls._tokenize_doc_dict(
                     v, cast(Mapping[str, object], text_field_or_childs), lang
                 )
         else:
             raise ValueError(
                 f"Value for Object-field needs to be either a Mapping or a "
                 f"Sequence. The value was: {value}"
             )
コード例 #5
0
ファイル: reddit.py プロジェクト: lschmelzeisen/nasty-data
    def prepare_doc_dict(cls, doc_dict: MutableMapping[str, object]) -> None:
        super().prepare_doc_dict(doc_dict)
        doc_dict["_id"] = "t1_" + checked_cast(str, doc_dict["id"])

        # "crosspost_parent_list" contains the whole JSON dict of the post this post
        # is cross-posting somewhere. For simplicity of the data model we discard this
        # here, at the cost of a single ID-lookup to the index should it be needed
        # later.
        doc_dict.pop("crosspost_parent_list", None)
コード例 #6
0
ファイル: reddit.py プロジェクト: lschmelzeisen/nasty-data
 def prepare_doc_dict(cls, doc_dict: MutableMapping[str, object]) -> None:
     super().prepare_doc_dict(doc_dict)
     doc_dict["_id"] = "t3_" + checked_cast(str, doc_dict["id"])
コード例 #7
0
def _lang_from_field(doc_dict: Mapping[str, object]) -> str:
    lang = checked_cast(str, doc_dict["lang"])
    if lang not in TokenizedBaseDocument._tokenizers.keys():
        return "en"
    return lang