Beispiel #1
0
 def test_is_one_of_false_hierarchy(self):
     t_liquid = RxType("liquid")
     t_water = RxType("water", t_liquid)
     t_lake = RxType("lake", t_water)
     type_set = RxTypeSet()
     type_set._types = {"liquid": t_liquid}
     self.assertFalse(type_set.is_one_of("liquid", ["water", "lake"]))
Beispiel #2
0
 def __init__(self,
              printable_subset: Optional[Iterable[str]] = None) -> None:
     self.omit_types: Set[str] = set()
     self.omit_wrappers: Set[str] = set()
     self._rxtypes = RxTypeSet()
     self._char_sets = CharSets(self._rxtypes)
     self._rxwrappers = RxWrapperSet(self._char_sets, printable_subset)
Beispiel #3
0
    def test_add(self):

        rxtype = RxType("liquid")
        type_set = RxTypeSet(init_types=False)
        type_set.add(rxtype)
        self.assertEqual(len(type_set._types), 1)
        self.assertEqual(type_set._types["liquid"], rxtype)
Beispiel #4
0
 def test_is_of_false_unrelated(self):
     t_liquid = RxType("liquid")
     t_solid = RxType("solid")
     type_set = RxTypeSet()
     type_set._types = {"liquid": t_liquid, "solid": t_solid}
     self.assertFalse(type_set.is_of("solid", "liquid"))
     self.assertFalse(type_set.is_of("liquid", "solid"))
Beispiel #5
0
    def test_is_of_excluded(self):
        t_liquid = RxType("liquid")
        t_water = RxType("water", t_liquid)
        type_set = RxTypeSet()
        type_set._types = {"liquid": t_liquid}

        with self.assertRaises(KeyError):
            type_set.is_of("water", "liquid")
Beispiel #6
0
 def test_is_one_of_false_unrelated(self):
     t_liquid = RxType("liquid")
     t_solid = RxType("solid")
     t_gas = RxType("gas")
     t_plasma = RxType("plasma")
     type_set = RxTypeSet()
     type_set._types = {"plasma": t_plasma}
     self.assertFalse(
         type_set.is_one_of("plasma", ["liquid", "solid", "gas"]))
Beispiel #7
0
 def test_is_one_of_true(self):
     t_liquid = RxType("liquid")
     t_solid = RxType("solid")
     t_gas = RxType("gas")
     t_water = RxType("water", t_liquid)
     type_set = RxTypeSet()
     type_set._types = {"water": t_water}
     self.assertTrue(type_set.is_one_of("water",
                                        ["liquid", "solid", "gas"]))
Beispiel #8
0
    def test_is_of_true(self):

        t_liquid = RxType("liquid")
        t_water = RxType("water", t_liquid)
        t_lake = RxType("lake", t_water)
        type_set = RxTypeSet()
        type_set._types = {"water": t_water, "lake": t_lake}
        self.assertTrue(type_set.is_of("water", "liquid"))
        self.assertTrue(type_set.is_of("lake", "water"))
Beispiel #9
0
    def test_init_types(self):

        type_set = RxTypeSet(init_types=False)
        type_set.init_types()

        for settings in RXTYPE_SETTINGS:
            self.assertIn(settings["name"], type_set._types)
            rxtype = type_set._types[settings["name"]]
            self.assertIsInstance(rxtype, RxType)
            self.assertEqual(rxtype.name, settings["name"])
Beispiel #10
0
    def test_init_char_set_empty_subset(self):
        type_set = RxTypeSet()
        type_set.init_types()
        char_sets = CharSets(type_set)

        type_name = "alphanum"
        full_type_names = set(type_set.get_full_type_names(type_name))
        printable_subset = set()
        char_set = CHAR_SETS[type_name]

        char_sets.init_char_set(type_name, printable_subset, char_set)

        # no characters should be activated from type name character set
        for char in char_set:
            for name in full_type_names:
                if name in char_sets._char_sets:
                    self.assertNotIn(char, char_sets._char_sets[name])
Beispiel #11
0
    def test_init_char_set(self):
        type_set = RxTypeSet()
        type_set.init_types()
        char_sets = CharSets(type_set)
        printable_subset = set(CHAR_SETS.keys())

        for settings in RXWRAPPER_SETTINGS:
            if settings.get("is_char_set"):
                type_name = settings.get("rxtype_name", settings["name"])
                char_set = settings.get("char_set") or CHAR_SETS[type_name]
                char_sets.init_char_set(type_name, printable_subset, char_set)

        for type_name in CHAR_SETS.keys():
            self.assertIn(type_name, char_sets._char_sets)

            for char in CHAR_SETS[type_name]:
                self.assertIn(char, char_sets._char_sets[type_name])
Beispiel #12
0
 def setUp(self):
     self.water_type = RxType("water")
     self.fire_type = RxType("fire")
     self.rxtypes = RxTypeSet()
     self.char_sets = CharSets(self.rxtypes)
     self.river_wrapper = RxWrapper("river", lambda node: "river",
                                    self.water_type)
     self.candle_wrapper = RxWrapper("candle", lambda node: "candle",
                                     self.fire_type)
Beispiel #13
0
    def test_init_char_set_full_subset(self):
        type_set = RxTypeSet()
        type_set.init_types()
        char_sets = CharSets(type_set)

        type_name = "alphanum"
        full_type_names = set(type_set.get_full_type_names(type_name))
        printable_subset = set(CHAR_SETS.keys())
        char_set = CHAR_SETS[type_name]

        char_sets.init_char_set(type_name, printable_subset, char_set)

        # all related type names should contain all characters in alphanum char set
        for char in char_set:
            for name in full_type_names:

                # only evaluate those type names that represent actual printable character sets
                if name in char_sets._char_sets:
                    self.assertIn(char, char_sets._char_sets[name])
Beispiel #14
0
    def test_init(self):
        type_set = RxTypeSet()
        char_sets = CharSets(type_set)

        self.assertEqual(len(char_sets._char_sets), len(CHAR_SETS.keys()))

        for key in CHAR_SETS.keys():
            self.assertIn(key, char_sets._char_sets)
            self.assertIsInstance(char_sets._char_sets[key], set)
            self.assertEqual(len(char_sets._char_sets[key]), 0)

        self.assertEqual(type_set, char_sets._rxtypes)
Beispiel #15
0
    def test_init_char_set_partial_subset(self):
        type_set = RxTypeSet()
        type_set.init_types()
        char_sets = CharSets(type_set)
        type_names = ["digit", "alpha_lower", "alpha_upper"]
        printable_subset = set(["digit", "alpha_upper"])

        # Initialise character sets
        for type_name in type_names:
            char_set = CHAR_SETS[type_name]
            char_sets.init_char_set(type_name, printable_subset, char_set)

        # Search through all specified type names
        for type_name in type_names:

            # Confirm that type name is present in charsets
            self.assertIn(type_name, char_sets._char_sets)

            # Get the full list of character set type names that the current type name inherits from (inluding itself)
            full_type_names = list(
                filter(
                    lambda name: name in char_sets._char_sets,
                    type_set.get_full_type_names(type_name),
                ))

            # For all characters in the character set corresponding to the current type name
            for char in CHAR_SETS[type_name]:

                # For all the character set types it inherits from (including itself)
                for name in full_type_names:

                    # If the current type name is in the printable subset, it should be activated within the charset
                    if type_name in printable_subset:
                        self.assertIn(char, char_sets._char_sets[name])

                    # If the current type name is not in the printable subset, it should be excluded from the charset
                    else:
                        self.assertNotIn(char, char_sets._char_sets[name])
Beispiel #16
0
    def test_empty_sets(self):
        type_set = RxTypeSet()
        char_sets = CharSets(type_set)

        for key in char_sets._char_sets.keys():
            char_sets._char_sets[key].add("a")
            char_sets._char_sets[key].add("b")
            char_sets._char_sets[key].add("c")
            self.assertEqual(len(char_sets._char_sets[key]), 3)

        char_sets.empty_sets()

        self.assertEqual(len(char_sets._char_sets), len(CHAR_SETS.keys()))
        for key in char_sets._char_sets.keys():
            self.assertEqual(len(char_sets._char_sets[key]), 0)
Beispiel #17
0
    def test_get_full_type_names(self):
        t_liquid = RxType("liquid")
        t_water = RxType("water", t_liquid)
        t_lake = RxType("lake", t_water)
        type_set = RxTypeSet()
        type_set._types = {
            "liquid": t_liquid,
            "water": t_water,
            "lake": t_lake
        }

        self.assertEqual(type_set.get_full_type_names("liquid"), ["liquid"])
        self.assertEqual(type_set.get_full_type_names("water"),
                         ["water", "liquid"])
        self.assertEqual(type_set.get_full_type_names("lake"),
                         ["lake", "water", "liquid"])
Beispiel #18
0
class RxNodeFactory:
    def __init__(self,
                 printable_subset: Optional[Iterable[str]] = None) -> None:
        self.omit_types: Set[str] = set()
        self.omit_wrappers: Set[str] = set()
        self._rxtypes = RxTypeSet()
        self._char_sets = CharSets(self._rxtypes)
        self._rxwrappers = RxWrapperSet(self._char_sets, printable_subset)

    def set_omit(
        self,
        types: Optional[Union[str, Iterable[str]]] = None,
        wrappers: Optional[Union[str, Iterable[str]]] = None,
    ) -> None:

        if types and not isinstance(types, set):
            types = set(types)
        self.omit_types = types or set()

        if wrappers and not isinstance(wrappers, set):
            wrappers = set(wrappers)
        self.omit_wrappers = wrappers or set()

    def clear_omit(self, types: bool = False, wrappers: bool = False) -> None:
        if not (types and wrappers):
            self.clear_omit(types=True, wrappers=True)

        if types:
            self.omit_types.clear()
        if wrappers:
            self.omit_wrappers.clear()

    def parse_rxspec(self, rxspec: RxSpec) -> NodeSpec:
        if not isinstance(rxspec, list):
            rxspec = [rxspec]

        node_spec = {"rw_name": rxspec[0]}
        for spec in rxspec[1:]:
            if self._rxwrappers.wrapper_is_type(first_nested(spec), "mod"):
                node_spec["modifier"] = self.parse_rxspec(spec)

            else:
                node_spec["children"] = [
                    self.parse_rxspec(child) for child in spec
                ]

        return node_spec

    def make_node(
        self,
        rw_name: Optional[str] = None,
        children: Union[List[NodeSpec], int] = RAND,
        modifier: Optional[Union[NodeSpec, int]] = None,
        rxwrapper: Optional[RxWrapper] = None,
        is_child: bool = False,
        strict_type_match: bool = False,
    ) -> RxNode:
        """
        children format: [{'rw_name': regex_wrapper_name, 'children': [<children>]})]
        modifier format: {'rw_name': <modifier_name>, 'children': <children>, 'modifier': <modifier>}
        """

        if not rxwrapper:
            if not rw_name:
                raise ValueError("must provide regex wrapper object or name")

            rxwrapper = self._rxwrappers[rw_name]

        child_nodes: List[RxNode] = []
        if rxwrapper.child_count != 0:
            if children == RAND:
                child_types: List[str] = list(
                    filter(
                        lambda type_name: not self._rxtypes.is_one_of(
                            type_name, self.omit_types),
                        rxwrapper.child_types,
                    ))

                if rxwrapper.uniform_child_types:
                    child_types = sample(rxwrapper.child_types, 1)
                child_nodes = [
                    self.make_random_node(choice(child_types), is_child=True)
                    for i in range(rxwrapper.get_child_count())
                ]
            else:
                for child in children:
                    child_nodes.append(self.make_node(**child, is_child=True))

        node: RxNode = RxNode(self._char_sets, rxwrapper, child_nodes,
                              is_child)
        if rxwrapper.is_modifiable:
            if modifier == RAND:
                # print("- ", node.name)
                # print("- ", rxwrapper.name)
                # print("- ", rxwrapper.rxtype)
                # print("- ", rxwrapper.rxtype.is_type_name("mod"))

                # if wrapper is not a modifier, build a modifier. Otherwise, build mod-modifier.
                mod_type: str = ("mmod" if rxwrapper.rxtype.is_type_name("mod")
                                 else "mod")
                # print("- ", mod_type)
                # if mod_type (to make) is mod, then don't build an mmod
                # omit_types += ["mmod"] if mod_type == "mod" else []
                # print(">> ", omit_types)
                if mod_type not in self.omit_types:
                    modifier_node: RxNode = self.make_random_node(
                        mod_type, strict_typing=True)
                    node.set_modifier(modifier_node)
                # print("-- ", modifier)
            elif modifier:
                modifier_node = self.make_node(**modifier)
                node.set_modifier(modifier_node)

        return node

    def make_random_node(
        self,
        type_name: str = "re",
        is_child: bool = False,
        prob_modifier: float = P_MODIFIER,
        strict_typing: bool = False,
    ) -> RxNode:
        rxtype: RxType = self._rxtypes[type_name]

        # filter RxWrapper.wrappers with items that match rxtype
        filtered_wrappers: List[RxWrapper] = list(
            filter(
                lambda rxwrapper: rxwrapper.rxtype.is_type(
                    rxtype, strict=strict_typing),
                self._rxwrappers.all(),
            ))

        # filter out types specified for omission in node generation
        for omit in self.omit_types:
            omit_type: RxType = self._rxtypes[omit]
            filtered_wrappers = list(
                filter(
                    lambda rxwrapper: not rxwrapper.rxtype.is_type(omit_type),
                    filtered_wrappers,
                ))

        # filter out characters if is root node and suppression parameter specified
        if not is_child and SUPPRESS_ROOT_CHARS:
            filtered_wrappers = list(
                filter(
                    lambda rxwrapper: not rxwrapper.rxtype.is_type(
                        self._rxtypes["printable"]),
                    filtered_wrappers,
                ))

        # filter out wrappers specified for omission in node generation
        for omit in self.omit_wrappers:
            filtered_wrappers = list(
                filter(lambda rxwrapper: rxwrapper.name != omit,
                       filtered_wrappers))

        rxwrapper: RxWrapper = choice(filtered_wrappers)
        modifier: Optional[int] = None
        if rxwrapper.is_modifiable and random() < prob_modifier:
            modifier = RAND

        return self.make_node(
            rxwrapper=rxwrapper,
            modifier=modifier,
            is_child=is_child,
        )
Beispiel #19
0
 def setUp(self):
     self.rxtypes = RxTypeSet()
     self.char_sets = CharSets(self.rxtypes)
     self.rxwrappers = RxWrapperSet(self.char_sets)
Beispiel #20
0
    def test_getitem(self):

        rxtype = RxType("liquid")
        type_set = RxTypeSet()
        type_set._types = {"liquid": rxtype}
        self.assertEqual(type_set["liquid"], rxtype)
Beispiel #21
0
 def test_getitem(self):
     type_set = RxTypeSet()
     char_sets = CharSets(type_set)
     for key in CHAR_SETS.keys():
         self.assertIsInstance(char_sets[key], set)
Beispiel #22
0
 def test_rxtypes(self):
     type_set = RxTypeSet()
     char_sets = CharSets(type_set)
     result = char_sets.rxtypes()
     self.assertEqual(result, type_set)
Beispiel #23
0
 def test_contains_false(self):
     type_set = RxTypeSet()
     char_sets = CharSets(type_set)
     self.assertFalse("INVALID_KEY" in char_sets)
     self.assertNotIn("INVALID_KEY", char_sets)
Beispiel #24
0
 def test_contains_true(self):
     type_set = RxTypeSet()
     char_sets = CharSets(type_set)
     for key in CHAR_SETS.keys():
         self.assertTrue(key in char_sets)
         self.assertIn(key, char_sets)