コード例 #1
0
    def _end_event(event_type: Type[ScyllaDatabaseContinuousEvent],
                   match: Match):
        shard = int(match.groupdict()
                    ["shard"]) if "shard" in match.groupdict().keys() else None
        event_filter = event_registry.get_registry_filter()
        event_filter \
            .filter_by_node(node=node) \
            .filter_by_type(event_type=event_type) \
            .filter_by_period(period_type=EventPeriod.BEGIN.value)

        if shard is not None:
            event_filter.filter_by_shard(shard)

        begun_events = event_filter.get_filtered()

        if not begun_events:
            raise ContinuousEventRegistryException(
                "Did not find any events of type {event_type}"
                "with period type {period_type}.".format(
                    event_type=event_type,
                    period_type=EventPeriod.BEGIN.value))
        if len(begun_events) > 1:
            LOGGER.warning(
                "Found {event_count} events of type {event_type} with period {event_period}. "
                "Will apply the function to most recent event by default.".
                format(event_count=len(begun_events),
                       event_type=event_type,
                       event_period=EventPeriod.BEGIN.value))
        event = begun_events[-1]
        event.end_event()
コード例 #2
0
ファイル: html.py プロジェクト: rrosajp/w3lib
    def convert_entity(m: Match) -> str:
        groups = m.groupdict()
        number = None
        if groups.get("dec"):
            number = int(groups["dec"], 10)
        elif groups.get("hex"):
            number = int(groups["hex"], 16)
        elif groups.get("named"):
            entity_name = groups["named"]
            if entity_name.lower() in keep:
                return m.group(0)
            else:
                number = name2codepoint.get(entity_name) or name2codepoint.get(
                    entity_name.lower())
        if number is not None:
            # Numeric character references in the 80-9F range are typically
            # interpreted by browsers as representing the characters mapped
            # to bytes 80-9F in the Windows-1252 encoding. For more info
            # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
            try:
                if 0x80 <= number <= 0x9F:
                    return bytes((number, )).decode("cp1252")
                else:
                    return chr(number)
            except ValueError:
                pass

        return "" if remove_illegal and groups.get("semicolon") else m.group(0)
コード例 #3
0
    def handle_basic(self, source: str,
                     match: Match) -> Generator[List[Content], None, None]:
        """Handles ``basic`` links to imgur media.

        Args:
            source (str): The source url
            match (Match): The source match regex

        Yields:
            list[Content]: A list of various levels of quality content for \
                the same source url
        """

        data = self._get_data(match.groupdict()["id"])
        content_list = []
        for url_type in self._content_urls:
            if url_type in data:
                content_list.append(
                    Content(
                        uid=f'{self.name}-{data["id"]}-{url_type}',
                        source=source,
                        fragments=[data[url_type]],
                        extractor=self,
                        extension=data[url_type].split(".")[-1],
                        title=data.get("title"),
                        description=data.get("description"),
                        quality=self._quality_map.get(url_type, 0.0),
                        uploaded_by=data.get("account_id"),
                        uploaded_date=datetime.datetime.fromtimestamp(
                            int(data.get("datetime"))),
                        metadata=data,
                    ))
        yield content_list
コード例 #4
0
    def inner(match: Match):
        groups = match.groupdict()
        identifier = groups["identifier"]
        title = groups["title"]

        if title and not identifier:
            identifier, title = title, identifier

        try:
            url = relative_url(from_url, url_map[identifier])
        except KeyError:
            if " " in identifier or "/" in identifier:
                # invalid identifier, must not be a intended reference
                unintended.append(identifier)
            else:
                unmapped.append(identifier)

            if not title:
                return f"[{identifier}][]"
            return f"[{title}][{identifier}]"

        # TODO: we could also use a config option to ignore some identifiers
        # and to map others to URLs, something like:
        # references:
        #   ignore:
        #     - "USERNAME:PASSWORD@"
        #   map:
        #     some-id: https://example.com

        return f'<a href="{url}">{title or identifier}</a>'
コード例 #5
0
ファイル: fourchan.py プロジェクト: stephen-bunn/qetch
    def handle_raw(self, source: str,
                   match: Match) -> Generator[List[Content], None, None]:
        """Handles ``raw`` links to 4chan media.

        Args:
            source (str): The source url
            match (Match): The source match regex

        Yields:
            list[Content]: A list of various levels of quality content for \
                the same source url
        """

        matchdict = match.groupdict()
        content_list = []
        for (post_type, url_path, quality) in self._content_configs:
            content_list.append(
                Content(
                    uid=
                    (f'{self.name}-{matchdict["board"]}-raw-{matchdict["id"]}'
                     f"{post_type}"),
                    source=source,
                    fragments=[source],
                    extractor=self,
                    title=None,
                    description=None,
                    quality=quality,
                    uploaded_by=None,
                    uploaded_date=None,
                    metadata=None,
                ))
        yield content_list
コード例 #6
0
    def _add_event(event_type: Type[ScyllaDatabaseContinuousEvent], match: Match):
        kwargs = match.groupdict()
        if "shard" in kwargs:
            kwargs["shard"] = int(kwargs["shard"])

        new_event = event_type(node=node, **kwargs)
        new_event.begin_event()
コード例 #7
0
    def _extract_entity_attributes(self, match: Match) -> EntityAttributes:
        """Extract the entity attributes, i.e. type, value, etc., from the
        regex match."""
        entity_text = match.groupdict()[GROUP_ENTITY_TEXT]

        if match.groupdict()[GROUP_ENTITY_DICT]:
            return self._extract_entity_attributes_from_dict(entity_text, match)

        entity_type = match.groupdict()[GROUP_ENTITY_TYPE]

        if match.groupdict()[GROUP_ENTITY_VALUE]:
            entity_value = match.groupdict()[GROUP_ENTITY_VALUE]
            self._deprecated_synonym_format_was_used = True
        else:
            entity_value = entity_text

        return EntityAttributes(entity_type, entity_value, entity_text, None, None)
コード例 #8
0
 def extract(self, match: Match) -> Link:
     groups = match.groupdict()
     return Link(
         image=groups.get('md_is_image') or groups.get('md_alt_is_image') or '',
         text=groups.get('md_text') or '',
         target=groups.get('md_filename') or '',
         title=groups.get('md_title') or '',
         anchor=groups.get('md_anchor') or ''
     )
コード例 #9
0
 def _end_event(event_type: Type[ScyllaDatabaseContinuousEvent],
                match: Match):
     kwargs = match.groupdict()
     continuous_hash = event_type.get_continuous_hash_from_dict({
         'node': node,
         **kwargs
     })
     if begin_event := event_registry.find_continuous_events_by_hash(
             continuous_hash):
         begin_event[-1].end_event()
         return
コード例 #10
0
 def _getMatchingFirstHists(self, match: Match) -> List[Tuple[str]]:
     firstmatches = []
     for tup in self.hits[0]:
         # does the tuple match in all spots where the pattern name matches, and
         # where the variable is significant (we would output a different plot)?
         # make sure we handle the 1-indexing of regex matches
         if all(tup[self.regexgroups[0][_[0]] - 1] == _[1]
                for _ in match.groupdict().items()
                if _[0] in self.outputnames):
             firstmatches.append(tup)
     return firstmatches
コード例 #11
0
 def transformation(match: Match, s=spec.decode(self.codec)):
     symb: dict = match.groupdict()
     args: list = [match.group(0), *match.groups()]
     used = set()
     item = meta.format(s, self.codec, args, symb, True, True, used)
     for variable in used:
         symb.pop(variable, None)
     symb.update(offset=match.start())
     for name, value in meta.items():
         symb.setdefault(name, value)
     return self.labelled(item, **symb)
コード例 #12
0
ファイル: core.py プロジェクト: marcoceppi/aioslack
 def callback(match: Match) -> str:
     m = match.groupdict()
     if m["userid"]:
         user = self.users.get(m["userid"], None)
         if user is None:
             username = m["userid"]
         else:
             username = user.name
     elif m["alias"]:
         username = m["alias"]
     return f"{prefix}{username}"
コード例 #13
0
ファイル: rule.py プロジェクト: subiol/reactbot
 async def execute(self, evt: MessageEvent, match: Match) -> None:
     extra_vars = {
         **{str(i): val
            for i, val in enumerate(match.groups())},
         **match.groupdict(),
     }
     content = self.template.execute(evt=evt,
                                     rule_vars=self.variables,
                                     extra_vars=extra_vars)
     await evt.client.send_message_event(evt.room_id, self.type
                                         or self.template.type, content)
コード例 #14
0
 def _convert_match(self, match: Match) -> 'RelativeDeltaParams':
     groups = match.groupdict()
     try:
         meridiem = groups.pop("meridiem").lower()
     except (KeyError, AttributeError):
         meridiem = None
     params = self._convert_groups(groups)
     if meridiem == "pm":
         params["hour"] += 12
     elif meridiem == "am" and params["hour"] == 12:
         params["hour"] = 0
     return params
コード例 #15
0
 def _extract_entity_attributes_from_dict(self, entity_text: Text,
                                          match: Match) -> EntityAttributes:
     """Extract the entity attributes from the dict format."""
     entity_dict_str = match.groupdict()[GROUP_ENTITY_DICT]
     entity_dict = self._get_validated_dict(entity_dict_str)
     return EntityAttributes(
         entity_dict.get(ENTITY_ATTRIBUTE_TYPE),
         entity_dict.get(ENTITY_ATTRIBUTE_VALUE, entity_text),
         entity_text,
         entity_dict.get(ENTITY_ATTRIBUTE_GROUP),
         entity_dict.get(ENTITY_ATTRIBUTE_ROLE),
     )
コード例 #16
0
ファイル: fourchan.py プロジェクト: stephen-bunn/qetch
    def handle_thread(self, source: str,
                      match: Match) -> Generator[List[Content], None, None]:
        """Handles ``thread`` links to 4chan media.

        Args:
            source (str): The source url
            match (Match): The source match regex

        Yields:
            list[Content]: A list of various levels of quality content for \
                the same source url
        """

        matchdict = match.groupdict()
        data = self._get_data(matchdict["board"], matchdict["id"])
        for post in data.get("posts", []):
            if "md5" in post:
                content_list = []
                for (post_type, url_path, quality,
                     extension_type) in self._content_configs:
                    # build post_type depending on existing post_type
                    post_type = (f"-{post_type}" if post_type else "")
                    content_uid = (
                        f'{self.name}-{matchdict["board"]}-'
                        f'{matchdict["id"]}-{post["tim"]}{post_type}')
                    content_fragments = [
                        furl(self._img_base).add(path=url_path.format(
                            board=matchdict["board"], post=post)).url
                    ]
                    content_extension = (extension_type if extension_type else
                                         post["ext"].split(".")[-1])
                    content_description = None
                    if "com" in post and len(post["com"]) > 0:
                        content_description = HTML(html=post.get("com")).text

                    content_list.append(
                        Content(
                            uid=content_uid,
                            source=source,
                            fragments=content_fragments,
                            extractor=self,
                            extension=content_extension,
                            title=post.get("filename"),
                            description=content_description,
                            quality=quality,
                            uploaded_by=post.get("name"),
                            uploaded_date=datetime.datetime.fromtimestamp(
                                int(post.get("time"))),
                            metadata=post,
                        ))
                yield content_list
コード例 #17
0
ファイル: entities_parser.py プロジェクト: ravishankr/rasa
def extract_entity_attributes(match: Match) -> EntityAttributes:
    """Extract the entity attributes, i.e. type, value, etc., from the
    regex match.

    Args:
        match: Regex match to extract the entity attributes from.

    Returns:
        EntityAttributes object.
    """
    entity_text = match.groupdict()[GROUP_ENTITY_TEXT]

    if match.groupdict()[GROUP_ENTITY_DICT]:
        return extract_entity_attributes_from_dict(entity_text, match)

    entity_type = match.groupdict()[GROUP_ENTITY_TYPE]

    if match.groupdict()[GROUP_ENTITY_VALUE]:
        entity_value = match.groupdict()[GROUP_ENTITY_VALUE]
    else:
        entity_value = entity_text

    return EntityAttributes(entity_type, entity_value, entity_text, None, None)
コード例 #18
0
def _get_numbered_group(match: typing.Match,
                        name: str,
                        start: int = 0) -> typing.Optional[str]:
    groups = match.groupdict()
    idx = start

    while True:
        key = f'{name}{idx}'
        if key not in groups:
            return None
        if groups[key] is not None:
            return groups[key]
        idx += 1
    return None
コード例 #19
0
 def from_match(cls, match: Match, mail_dtime: datetime, ttype: TransType) -> 'Transaction':
     groups_dict = match.groupdict()
     transaction = cls.__new__(cls)
     kwargs = {
         'ttype': ttype,
         'mail_dtime': mail_dtime,
     }
     if 'nat' in groups_dict:
         kwargs['amount'] = float(groups_dict['nat'].replace('.', '').replace(',', '.'))
     if 'int' in groups_dict:
         kwargs['int_amount'] = float(groups_dict['int'].replace('.', '').replace(',', '.'))
     if 'rate' in groups_dict:
         kwargs['rate'] = float(groups_dict['rate'].replace('.', '').replace(',', '.'))
     transaction.__init__(**kwargs)
     return transaction
コード例 #20
0
    def format_struct(match: Match) -> str:
        db_name = match.group('db_name')
        try:
            if DATA_TYPES[db_name] is datetime and db_name != 'date':
                db_name = f"SUBSTRING(c.{db_name}, 0, 10)"
            else:
                db_name = f"c.{db_name}"

            params = {**match.groupdict(), 'db_name': f"({db_name} ?? null)"}

            return template.substitute(**params)

        except KeyError:
            raise InvalidStructureParameter(name=match.group('db_name'),
                                            structure_format=template_format)
コード例 #21
0
def get_named_group_index(match: typing.Match, name: str) -> typing.Optional[int]:
    """Get the index of the named group

    Args:
        match (Match): The regex match
        name (str): The group name

    Returns:
        int: The index of the group
    """
    if name in match.groupdict():
        span = match.span(name)
        for i in range(1, len(match.groups()) + 1):
            if match.span(i) == span:
                return i
    return None
コード例 #22
0
def get_named_group_at_index(match: typing.Match, idx: int) -> typing.Optional[str]:
    """Get the name of the group

    Args:
        match (Match): The regex match
        idx (int): The group index

    Returns:
        str: The group name
    """
    if len(match.groups()) >= idx:
        span = match.span(idx)
        for group in match.groupdict():
            if match.span(group) == span:
                return group
    return None
コード例 #23
0
 def _content(self, content: Content,
              match: Match) -> Tuple[Union[str, None], str, Any]:
     filename = None
     line_number = "0"
     obj = None
     self._logger.debug("content is showing")
     something = match.groupdict()["something"]
     if something:
         self._logger.debug("asked to open something")
         if something.startswith("{{"):
             self._logger.debug("something appears to be a template: %s",
                                something)
             templated = templar(something, content.showing)
             if isinstance(templated, str):
                 parts = templated.rsplit(":", 1)
                 if os.path.isfile(parts[0]):
                     filename = parts[0]
                     line_number = parts[1:][0] if parts[1:] else line_number
                     self._logger.debug(
                         "template interaction in valid filename %s:%s",
                         filename, line_number)
                 else:
                     self._logger.debug(
                         "template not a valid filename, open showing")
                     obj = templated
             else:
                 self._logger.debug("template not a string, open showing")
                 obj = templated
         else:
             parts = something.rsplit(":", 1)
             if os.path.isfile(parts[0]):
                 filename = parts[0]
                 line_number = parts[1:][0] if parts[1:] else line_number
                 self._logger.debug(
                     "something not a template, but is a valid filename %s:%s",
                     filename,
                     line_number,
                 )
             else:
                 self._logger.debug("something just a plain string")
                 obj = something
     else:
         self._logger.debug("something not provided")
         obj = content.showing
     return filename, line_number, obj
コード例 #24
0
def get_named_group_index_dict(match: typing.Match) -> typing.Dict[int, str]:
    """Get the name/index map of the groups

    Args:
        match (Match): The regex match

    Returns:
        dict: A mapping of indices to names
    """
    group_idx_to_name = {}
    for group in match.groupdict():
        span = match.span(group)
        for i in range(1, len(match.groups()) + 1):
            if match.span(i) == span:
                group_idx_to_name[i] = group
                break

    return group_idx_to_name
コード例 #25
0
    def _end_event(event_type: Type[ScyllaDatabaseContinuousEvent],
                   match: Match):
        kwargs = match.groupdict()

        event_filter = event_registry.get_registry_filter()
        event_filter \
            .filter_by_node(node=node) \
            .filter_by_type(event_type=event_type) \
            .filter_by_period(period_type=EventPeriod.BEGIN.value)

        if kwargs.get("shard"):
            event_filter.filter_by_shard(int(kwargs["shard"]))

        if kwargs.get("table"):
            event_filter.filter_by_attr(base="CompactionEvent",
                                        table=kwargs["table"])

        if kwargs.get("compaction_process_id"):
            event_filter.filter_by_attr(
                base="CompactionEvent",
                compaction_process_id=kwargs["compaction_process_id"])

        begun_events = event_filter.get_filtered()

        if not begun_events:
            TestFrameworkEvent(
                source=event_type.__name__,
                message="Did not find any events of type {event_type}"
                " with period type {period_type}, event data: {event_data}".
                format(
                    event_type=event_type,
                    period_type=EventPeriod.BEGIN.value,
                    event_data=kwargs,
                ),
                severity=Severity.ERROR).publish_or_dump()
            return

        if len(begun_events) > 1:
            LOGGER.debug(
                "Found %s events of type %s with period %s. "
                "Will apply the function to most recent event by default.",
                len(begun_events), event_type, EventPeriod.BEGIN.value)
        event = begun_events[-1]
        event.end_event()
コード例 #26
0
    def inner(match: Match):  # noqa: WPS430 (nested function, no other way than side-effecting the warnings)
        groups = match.groupdict()
        identifier = groups["identifier"]
        title = groups["title"]

        if title and not identifier:
            identifier, title = title, identifier

        try:
            url = relative_url(from_url, url_map[identifier])
        except KeyError:
            if " " not in identifier and "/" not in identifier:
                unmapped.append(identifier)

            if not title:
                return f"[{identifier}][]"
            return f"[{title}][{identifier}]"

        return f'<a href="{url}">{title or identifier}</a>'
コード例 #27
0
def get_named_group_index_list(match: typing.Match) -> typing.List[typing.Optional[str]]:
    """Get the names of the groups

    Args:
        match (Match): The regex match

    Returns:
        list: The names of the groups by index
    """
    group_names: typing.List[typing.Optional[str]] = [None] * (len(match.groups()) + 1)

    for i in range(1, len(match.groups()) + 1):
        span = match.span(i)
        for group in match.groupdict():
            if match.span(group) == span:
                group_names[i] = group
                break

    return group_names
コード例 #28
0
def match_to_datetime(match: Match) -> datetime:
    D = match.groupdict()
    dt_kwargs = {
        grp: D.get(grp)
        for grp in DATETIME_MATCH_GROUPS if D.get(grp)
    }
    if "year" in dt_kwargs:
        if dt_kwargs["year"] == 2:
            dt_kwargs["year"] = datetime.strptime(dt_kwargs["year"], "%y").year
        else:
            dt_kwargs["year"] = int(dt_kwargs["year"], base=10)
    for k, v in dt_kwargs.items():
        if isinstance(v, str):
            dt_kwargs[k] = int(v, base=10)
    dt = datetime(**dt_kwargs, tzinfo=UTC)

    ofs = D.get("offset_hours")
    if ofs:
        dt -= timedelta(hours=ofs)
    return dt
コード例 #29
0
ファイル: entities_parser.py プロジェクト: ravishankr/rasa
def extract_entity_attributes_from_dict(entity_text: Text,
                                        match: Match) -> EntityAttributes:
    """Extract entity attributes from dict format.

    Args:
        entity_text: Original entity text.
        match: Regex match.

    Returns:
        Extracted entity attributes.
    """
    entity_dict_str = match.groupdict()[GROUP_ENTITY_DICT]
    entity_dict = get_validated_dict(entity_dict_str)
    return EntityAttributes(
        entity_dict.get(ENTITY_ATTRIBUTE_TYPE),
        entity_dict.get(ENTITY_ATTRIBUTE_VALUE, entity_text),
        entity_text,
        entity_dict.get(ENTITY_ATTRIBUTE_GROUP),
        entity_dict.get(ENTITY_ATTRIBUTE_ROLE),
    )
コード例 #30
0
    def extract(self, match: Match) -> Link:
        groups = match.groupdict()

        image = groups.get('wiki_is_image') or ''
        link = groups.get('wiki_link') or ''
        anchor = groups.get('wiki_anchor') or ''
        text = groups.get('wiki_text') or link or anchor

        if not (link or text or anchor):
            raise BrokenLink(
                f"Could not extract required field `wiki_link` from {match.group(0)}"
            )

        link = self._slugify(link)
        if anchor:
            anchor = self._slugify(anchor)

        return Link(image=image,
                    text=text,
                    target=link,
                    title=text,
                    anchor=anchor)