Esempio n. 1
0
    def test_sensitivity_matches(self):
        rule = AndRule(
            RegexRule("bad thing"),
            OrRule(
                RegexRule("very bad", sensitivity=Sensitivity.CRITICAL),
                RegexRule("moderately bad", sensitivity=Sensitivity.PROBLEM),
                RegexRule("slightly bad", sensitivity=Sensitivity.WARNING),
                FallbackRule(sensitivity=Sensitivity.INFORMATION)))

        expected = [("very bad thing", Sensitivity.CRITICAL),
                    ("moderately bad thing", Sensitivity.PROBLEM),
                    ("moderately bad very bad thing", Sensitivity.CRITICAL),
                    ("slightly moderately bad thing", Sensitivity.PROBLEM),
                    ("moderately slightly bad thing", Sensitivity.WARNING),
                    ("bad thing", Sensitivity.INFORMATION),
                    ("moderately quite bad thing", Sensitivity.INFORMATION)]

        for in_v, sensitivity in expected:
            matched, results = run_rule(rule, in_v)
            self.assertEqual(matched, True)
            self.assertEqual(
                sensitivity,
                max([
                    rule.sensitivity for rule, matches in results.items()
                    if rule.sensitivity is not None and matches
                ],
                    key=lambda sensitivity: sensitivity.value))
 def test_rule_names(self):
     A = RegexRule("A", name="Fragment A")
     B = RegexRule("B", name="Fragment B")
     C1 = RegexRule("C1", name="Fragment C1")
     C2 = RegexRule("C2", name="Fragment C2")
     C = OrRule(C1, C2, name="Fragment C")
     self.assertEqual(
         AndRule(A, B).presentation, "(Fragment A and Fragment B)")
     self.assertEqual(
         OrRule(A, B, C).presentation,
         "(Fragment A, Fragment B, or Fragment C)")
Esempio n. 3
0
from os2datascanner.engine2.pipeline import (explorer, processor, matcher,
                                             tagger, exporter)

here_path = os.path.dirname(__file__)
test_data_path = os.path.join(here_path, "..", "data")

data = """Hwæt! wē Gār-Dena in gēar-dagum
þēod-cyninga þrym gefrūnon,
hū ðā æþeling as ell en fremedon.
Oft Scyld Scēfing sceaþena þrēatum,
monegum mǣgþum meodo-setla oftēah."""
data_url = "data:text/plain;base64,{0}".format(
    base64.encodebytes(data.encode("utf-8")).decode("ascii"))

rule = OrRule(
    RegexRule("Æthelred the Unready", name="Check for ill-advised kings"),
    RegexRule("Scyld S(.*)g", sensitivity=Sensitivity.CRITICAL),
    RegexRule("Professor James Moriarty"))

expected_matches = [{
    "rule": {
        "type": "regex",
        "name": "Check for ill-advised kings",
        "sensitivity": None,
        "expression": "Æthelred the Unready"
    },
    "matches": None
}, {
    "rule": {
        "type": "regex",
        "name": None,
Esempio n. 4
0
        try:
            return html.parse(fp).xpath("//body")[0].text_content()
        except AssertionError:
            return None


curdir = os.path.dirname(__file__)
benchpath = os.path.join(curdir, "..", "..", "data", "html_benchmark", "data")
benchpath = os.path.abspath(benchpath)

# BS4: create json to inject into the conversion queue
bs4_obj = messages.ConversionMessage(
    scan_spec=messages.ScanSpecMessage(
        scan_tag="dummy",
        source=bs4FilesystemSource(benchpath),
        rule=RegexRule("[Aa]rachnid"),
        configuration={},
        progress=None,
    ),
    handle=bs4FilesystemHandle(bs4FilesystemSource(benchpath), "html.html"),
    progress=messages.ProgressFragment(rule=RegexRule("[Aa]rachnid"), matches=[]),
).to_json_object()


lxml_obj = deepcopy(bs4_obj)
lxml_obj["scan_spec"]["source"]["type"] = "lxml"
lxml_obj["handle"]["type"] = "lxml"
lxml_obj["handle"]["source"]["type"] = "lxml"


def test_conversion_html(obj):
def get_regex_rule(regex, sensitivity):
    return RegexRule(regex, sensitivity=sensitivity)
Esempio n. 6
0
scan_tag0 = {
    "time": time0,
    "scanner": {
        "pk": 14,
        "name": "Dummy test scanner"
    },
}
scan_tag1 = {
    "scanner": {
        "pk": 11,
        "name": "Dummy test scanner2"
    },
    "time": time1
}

common_rule = RegexRule(expression="Vores hemmelige adgangskode er",
                        sensitivity=Sensitivity.PROBLEM)

common_rule_2 = RegexRule(expression="Vores hemmelige adgangskode er",
                          sensitivity=Sensitivity.CRITICAL)
"""EGON DATA"""
egon_email_handle = EWSMailHandle(source=EWSAccountSource(domain='@olsen.com',
                                                          server=None,
                                                          admin_user=None,
                                                          admin_password=None,
                                                          user='******'),
                                  path='TDJHGFIHDIJHSKJGHKFUGIUHIUEHIIHE',
                                  mail_subject='Jeg har en plan',
                                  folder_name='Hundehoveder',
                                  entry_id=None)

egon_email_handle_1 = EWSMailHandle(
    def test_simplerule_matches(self):
        candidates = [
            (CPRRule(modulus_11=False, ignore_irrelevant=False), """
2205995008: forbryder,
230500 0003: forbryder,
240501-0006: forbryder,
250501-1987: forbryder""",
             ["2205XXXXXX", "2305XXXXXX", "2405XXXXXX", "2505XXXXXX"]),
            (CPRRule(modulus_11=True, ignore_irrelevant=True), """
2205995008: forbryder,
230500 0003: forbryder,
240501-0006: forbryder,
250501-1987: forbryder""", ["2205XXXXXX", "2305XXXXXX", "2405XXXXXX"]),
            (CPRRule(modulus_11=True,
                     ignore_irrelevant=True,
                     examine_context=False), """
Vejstrand Kommune, Børn- og Ungeforvaltningen. P-nr. 2205995008
Vejstrand Kommune, Børn- og Ungeforvaltningen. P-nummer: 2305000003
240501-0006""", ["2205XXXXXX", "2305XXXXXX", "2405XXXXXX"]),
            (CPRRule(modulus_11=True,
                     ignore_irrelevant=True,
                     examine_context=True), """
Vejstrand Kommune, Børn- og Ungeforvaltningen. P-nr. 2205995008
Vejstrand Kommune, Børn- og Ungeforvaltningen. P-nummer: 2305000003
240501-0006""", ["2405XXXXXX"]),
            (RegexRule("((four|six)( [aopt]+)?|(one|seven) [aopt]+)"), """
one
one potato
two potato
three potato
four
five potato
six potato
seven potato
more!""", ["one potato", "four", "six potato", "seven potato"]),
            (LastModifiedRule(
                datetime(2019, 12, 24, 23, 59, 59, tzinfo=timezone.utc)),
             datetime(2019, 12, 31, 23, 59, 59,
                      tzinfo=timezone.utc), ["2019-12-31T23:59:59+0000"]),
            (LastModifiedRule(
                datetime(2019, 12, 24, 23, 59, 59, tzinfo=timezone.utc)),
             datetime(2019, 5, 22, 0, 0, 1, tzinfo=timezone.utc), None),
            (DimensionsRule(width_range=range(0, 16385),
                            height_range=range(0, 16385),
                            min_dim=256), (128, 256), [[128, 256]]),
            (DimensionsRule(width_range=range(0, 16385),
                            height_range=range(0, 16385),
                            min_dim=256), (128, 255), []),
            (DimensionsRule(width_range=range(256, 1024),
                            height_range=range(256, 1024),
                            min_dim=0), (256, 256), [[256, 256]]),
            (DimensionsRule(width_range=range(256, 1024),
                            height_range=range(256, 1024),
                            min_dim=0), (32, 32), []),
        ]

        for rule, in_value, expected in candidates:
            with self.subTest(rule):
                json = rule.to_json_object()
                back_again = rule.from_json_object(json)
                self.assertEqual(rule, back_again)

            with self.subTest(rule):
                matches = rule.match(in_value)
                if expected:
                    self.assertEqual([match["match"] for match in matches],
                                     expected)
                else:
                    self.assertFalse(list(matches))
class RuleTests(unittest.TestCase):
    def test_simplerule_matches(self):
        candidates = [
            (CPRRule(modulus_11=False, ignore_irrelevant=False), """
2205995008: forbryder,
230500 0003: forbryder,
240501-0006: forbryder,
250501-1987: forbryder""",
             ["2205XXXXXX", "2305XXXXXX", "2405XXXXXX", "2505XXXXXX"]),
            (CPRRule(modulus_11=True, ignore_irrelevant=True), """
2205995008: forbryder,
230500 0003: forbryder,
240501-0006: forbryder,
250501-1987: forbryder""", ["2205XXXXXX", "2305XXXXXX", "2405XXXXXX"]),
            (CPRRule(modulus_11=True,
                     ignore_irrelevant=True,
                     examine_context=False), """
Vejstrand Kommune, Børn- og Ungeforvaltningen. P-nr. 2205995008
Vejstrand Kommune, Børn- og Ungeforvaltningen. P-nummer: 2305000003
240501-0006""", ["2205XXXXXX", "2305XXXXXX", "2405XXXXXX"]),
            (CPRRule(modulus_11=True,
                     ignore_irrelevant=True,
                     examine_context=True), """
Vejstrand Kommune, Børn- og Ungeforvaltningen. P-nr. 2205995008
Vejstrand Kommune, Børn- og Ungeforvaltningen. P-nummer: 2305000003
240501-0006""", ["2405XXXXXX"]),
            (RegexRule("((four|six)( [aopt]+)?|(one|seven) [aopt]+)"), """
one
one potato
two potato
three potato
four
five potato
six potato
seven potato
more!""", ["one potato", "four", "six potato", "seven potato"]),
            (LastModifiedRule(
                datetime(2019, 12, 24, 23, 59, 59, tzinfo=timezone.utc)),
             datetime(2019, 12, 31, 23, 59, 59,
                      tzinfo=timezone.utc), ["2019-12-31T23:59:59+0000"]),
            (LastModifiedRule(
                datetime(2019, 12, 24, 23, 59, 59, tzinfo=timezone.utc)),
             datetime(2019, 5, 22, 0, 0, 1, tzinfo=timezone.utc), None),
            (DimensionsRule(width_range=range(0, 16385),
                            height_range=range(0, 16385),
                            min_dim=256), (128, 256), [[128, 256]]),
            (DimensionsRule(width_range=range(0, 16385),
                            height_range=range(0, 16385),
                            min_dim=256), (128, 255), []),
            (DimensionsRule(width_range=range(256, 1024),
                            height_range=range(256, 1024),
                            min_dim=0), (256, 256), [[256, 256]]),
            (DimensionsRule(width_range=range(256, 1024),
                            height_range=range(256, 1024),
                            min_dim=0), (32, 32), []),
        ]

        for rule, in_value, expected in candidates:
            with self.subTest(rule):
                json = rule.to_json_object()
                back_again = rule.from_json_object(json)
                self.assertEqual(rule, back_again)

            with self.subTest(rule):
                matches = rule.match(in_value)
                if expected:
                    self.assertEqual([match["match"] for match in matches],
                                     expected)
                else:
                    self.assertFalse(list(matches))

    compound_candidates = [
        (AndRule(RegexRule("A"), OrRule(RegexRule("B"),
                                        RegexRule("C"))), [("A", False, 3),
                                                           ("AB", True, 2),
                                                           ("ABC", True, 2),
                                                           ("BC", False, 1),
                                                           ("AC", True, 3)]),
        (NotRule(
            AndRule(RegexRule("A"), OrRule(RegexRule("B"), RegexRule("C")))),
         [("A", True, 3), ("AB", False, 2), ("ABC", False, 2), ("BC", True, 1),
          ("AC", False, 3)]),
        (AndRule(NotRule(OrRule(RegexRule("B"), RegexRule("C"))),
                 RegexRule("A")), [("A", True, 3), ("AB", False, 1),
                                   ("ABC", False, 1), ("BC", False, 1),
                                   ("AC", False, 2)])
    ]

    def test_compound_rule_matches(self):
        for rule, tests in RuleTests.compound_candidates:
            for input_string, outcome, evaluation_count in tests:
                now = rule
                evaluations = 0

                while True:
                    print(now)
                    head, pve, nve = now.split()
                    evaluations += 1
                    print(head)
                    match = list(head.match(input_string))
                    print(match)
                    if match:
                        now = pve
                    else:
                        now = nve
                    if isinstance(now, bool):
                        break
                print(input_string, now, outcome)
                self.assertEqual(outcome, now,
                                 "{0}: wrong result".format(input_string))
                self.assertEqual(
                    evaluation_count, evaluations,
                    "{0}: wrong evaluation count".format(input_string))

    def test_json_round_trip(self):
        for rule, _ in RuleTests.compound_candidates:
            with self.subTest(rule):
                json = rule.to_json_object()
                back_again = rule.from_json_object(json)
                self.assertEqual(rule, back_again)

    def test_oxford_comma(self):
        self.assertEqual(oxford_comma(["Monday"], "and"), "Monday")
        self.assertEqual(oxford_comma(["Monday", "Tuesday"], "and"),
                         "Monday and Tuesday")
        self.assertEqual(
            oxford_comma(["Monday", "Tuesday", "Wednesday"], "and"),
            "Monday, Tuesday, and Wednesday")

    def test_rule_names(self):
        A = RegexRule("A", name="Fragment A")
        B = RegexRule("B", name="Fragment B")
        C1 = RegexRule("C1", name="Fragment C1")
        C2 = RegexRule("C2", name="Fragment C2")
        C = OrRule(C1, C2, name="Fragment C")
        self.assertEqual(
            AndRule(A, B).presentation, "(Fragment A and Fragment B)")
        self.assertEqual(
            OrRule(A, B, C).presentation,
            "(Fragment A, Fragment B, or Fragment C)")
    "scanner": "Dummy test scanner",
    "time": time0
}
scan_tag1 = {
    "scanner": "Dummy test scanner",
    "time": time1
}
scan_tag2 = {
    "scanner": "Dummy test scanner",
    "time": time2
}

common_handle = FilesystemHandle(
        FilesystemSource("/mnt/fs01.magenta.dk/brugere/af"),
        "OS2datascanner/Dokumenter/Verdensherredømme - plan.txt")
common_rule = RegexRule("Vores hemmelige adgangskode er",
                        sensitivity=Sensitivity.WARNING)
dimension_rule = DimensionsRule()


common_scan_spec = messages.ScanSpecMessage(
        scan_tag=None, # placeholder
        source=common_handle.source,
        rule=common_rule,
        configuration={},
        progress=None)

positive_match = messages.MatchesMessage(
        scan_spec=common_scan_spec._replace(scan_tag=scan_tag0),
        handle=common_handle,
        matched=True,
        matches=[