Exemple #1
0
def test_validator_invalid_number():

    TEST_DATA = {"number_field": "one hundred"}
    TEST_SCHEMA = {"fields": [{"name": "number_field", "type": "numeric"}]}

    test = Schema(TEST_SCHEMA)
    assert (not test.validate(TEST_DATA))

    TEST_DATA = {"number_field": print}
    TEST_SCHEMA = {"fields": [{"name": "number_field", "type": "numeric"}]}

    test = Schema(TEST_SCHEMA)
    assert (not test.validate(TEST_DATA))
Exemple #2
0
def main(context):

    # create the run context from the config and context passed to main
    # this would allow dates etc to be passed from something external
    context = build_context(**context)

    extract_tweet = ExtractTweetDetailsOperator()
    extract_cves = ExtractCvesFromTweetOperator()
    save = SaveToBucketOperator(
        project=context['config'].get('target_project'),
        to_path=context['config'].get('target_path'),
        schema=Schema(context),
        compress=context['config'].get('compress'))
    end = EndOperator()

    flow = extract_tweet > extract_cves > save > end

    while True:
        try:
            listener = TwitterListener(api, flow)
            stream = tweepy.Stream(api.auth, listener, tweet_mode="extended")
            stream.filter(track=["CVE"], languages=["en"])
        except KeyboardInterrupt:
            print('Keyboard Interrupt')
            quit()
        except Exception as err:
            print(
                F"Error {type(err).__name__} {err} - restarting in 5 seconds")
            print(gva.errors.RenderErrorStack())

        time.sleep(5)
Exemple #3
0
def test_validator_nonnative_types():

    TEST_DATA = {
        "integer_field": "100",
        "boolean_field": "True",
        "date_field": "2000-01-01T00:00:00.000",
        "nullable_field": ""
    }
    TEST_SCHEMA = {
        "fields": [{
            "name": "integer_field",
            "type": "numeric"
        }, {
            "name": "boolean_field",
            "type": "boolean"
        }, {
            "name": "date_field",
            "type": "date"
        }, {
            "name": "nullable_field",
            "type": "nullable"
        }]
    }

    test = Schema(TEST_SCHEMA)
    assert (test.validate(TEST_DATA))
Exemple #4
0
def test_validator_invalid_boolean():

    TEST_DATA = {"boolean_field": "not true"}
    TEST_SCHEMA = {"fields": [{"name": "boolean_field", "type": "boolean"}]}

    test = Schema(TEST_SCHEMA)
    assert (not test.validate(TEST_DATA))
Exemple #5
0
def test_call_alias():

    TEST_DATA = {"number_field": 100}
    TEST_SCHEMA = {"fields": [{"name": "number_field", "type": "numeric"}]}

    test = Schema(TEST_SCHEMA)
    assert test(TEST_DATA)
Exemple #6
0
def test_validator_invalid_string():

    TEST_DATA = {"string_field": 100}
    TEST_SCHEMA = {"fields": [{"name": "string_field", "type": "string"}]}

    test = Schema(TEST_SCHEMA)
    assert (not test.validate(TEST_DATA))
Exemple #7
0
def test_validator_invalid_schema():

    result = True
    try:
        Schema({"name": "string"})
    except:
        result = False
    assert (not result)
Exemple #8
0
def test_validator_date():

    INVALID_TEST_DATA = {"key": "tomorrow"}
    VALID_TEST_DATA = {"key": "2020-01-01"}
    TEST_SCHEMA = {"fields": [{"name": "key", "type": "date"}]}

    test = Schema(TEST_SCHEMA)
    assert (not test.validate(INVALID_TEST_DATA))
    assert (test.validate(VALID_TEST_DATA))
Exemple #9
0
def test_validator_list():

    INVALID_TEST_DATA = {"key": "not a list"}
    VALID_TEST_DATA = {"key": ["is", "a", "list"]}
    TEST_SCHEMA = {"fields": [{"name": "key", "type": "list"}]}

    test = Schema(TEST_SCHEMA)
    assert (not test.validate(INVALID_TEST_DATA))
    assert (test.validate(VALID_TEST_DATA))
Exemple #10
0
def test_unknown_type():

    TEST_SCHEMA = {"fields": [{"name": "key", "type": "not_a_known_type"}]}

    failed = False
    try:
        test = Schema(TEST_SCHEMA)
    except ValueError:
        failed = True

    assert failed
def test_extract_qid_summary_operator_validates():
    """ Test the output from the operator complies to the schema """

    valid_xml = xmltodict.parse(VALID_XML)['VULN']
    extract_qid_summary = ExtractQidSummaryOperator()
    data, context = extract_qid_summary(data=valid_xml, context={})

    schema_file = find_file('QID_SUMMARY.metadata')

    validator = Schema(schema_file)
    assert validator.validate(data), validator.last_error
Exemple #12
0
def test_validator_number_ranges():

    OVER_TEST_DATA = {"number": 1000}
    UNDER_TEST_DATA = {"number": 100}
    IN_TEST_DATA = {"number": 500}
    TEST_SCHEMA = {
        "fields": [{
            "name": "number",
            "type": "numeric",
            "min": 250,
            "max": 750
        }]
    }

    test = Schema(TEST_SCHEMA)
    assert (not test.validate(OVER_TEST_DATA))
    assert (not test.validate(UNDER_TEST_DATA))
    assert (test.validate(IN_TEST_DATA))

    TEST_SCHEMA_MIN = {
        "fields": [{
            "name": "number",
            "type": "numeric",
            "min": 250
        }]
    }
    test = Schema(TEST_SCHEMA_MIN)
    assert (test.validate(OVER_TEST_DATA)), test.last_error
    assert not (test.validate(UNDER_TEST_DATA)), test.last_error

    TEST_SCHEMA_MAX = {
        "fields": [{
            "name": "number",
            "type": "numeric",
            "max": 750
        }]
    }
    test = Schema(TEST_SCHEMA_MAX)
    assert (test.validate(UNDER_TEST_DATA)), test.last_error
    assert not (test.validate(OVER_TEST_DATA)), test.last_error
Exemple #13
0
def test_raise_exception():

    TEST_DATA = {"number_field": "one hundred"}
    TEST_SCHEMA = {"fields": [{"name": "number_field", "type": "numeric"}]}

    test = Schema(TEST_SCHEMA)
    failed = False
    try:
        test.validate(TEST_DATA, raise_exception=True)
    except ValueError:
        failed = True

    assert failed
Exemple #14
0
def test_validator_string_format():

    INVALID_TEST_DATA = {"cve": "eternalblue"}
    VALID_TEST_DATA = {"cve": "CVE-2017-0144"}
    TEST_SCHEMA = {
        "fields": [{
            "name": "cve",
            "type": "string",
            "format": r"(?i)CVE-\d{4}-\d{4,7}"
        }]
    }

    test = Schema(TEST_SCHEMA)
    assert (not test.validate(INVALID_TEST_DATA))
    assert (test.validate(VALID_TEST_DATA))
Exemple #15
0
def test_validator_enum():

    INVALID_TEST_DATA = {"key": "left"}
    VALID_TEST_DATA = {"key": "north"}
    TEST_SCHEMA = {
        "fields": [{
            "name": "key",
            "type": "enum",
            "symbols": ['north', 'south']
        }]
    }

    test = Schema(TEST_SCHEMA)
    assert (not test.validate(INVALID_TEST_DATA))
    assert (test.validate(VALID_TEST_DATA))
Exemple #16
0
def test_validator_loaders():
    """
    Ensure dictionary, json and json files load
    """
    import json

    TEST_SCHEMA_DICT = {"fields": [{"name": "string_field", "type": "string"}]}
    TEST_SCHEMA_STRING = json.dumps(TEST_SCHEMA_DICT)
    TEST_SCHEMA_FILE = 'temp'

    with open(TEST_SCHEMA_FILE, 'w') as file:
        file.write(TEST_SCHEMA_STRING)

    failed = False
    try:
        test = Schema(TEST_SCHEMA_DICT)
        test.validate({"string_field": "pass"})
    except Exception:
        failed = True
    assert not failed, "load schema from dictionary"

    failed = False
    try:
        test = Schema(TEST_SCHEMA_STRING)
        test.validate({"string_field": "pass"})
    except Exception:
        failed = True
    assert not failed, "load schema from string"

    failed = False
    try:
        test = Schema(TEST_SCHEMA_FILE)
        test.validate({"string_field": "pass"})
    except Exception:
        failed = True
    assert not failed, "load schema from file"
Exemple #17
0
def test_validator_multiple_types():

    TEST_DATA_1 = {"multi": "True"}
    TEST_DATA_2 = {"multi": True}
    TEST_DATA_3 = {"multi": None}
    TEST_SCHEMA = {
        "fields": [{
            "name": "multi",
            "type": ["string", "boolean", "nullable"]
        }]
    }

    test = Schema(TEST_SCHEMA)
    assert (test.validate(TEST_DATA_1))
    assert (test.validate(TEST_DATA_2))
    assert (test.validate(TEST_DATA_3))
Exemple #18
0
def test_validator_extended_schema():
    """
    Ensure the validator will ignore additional fields in the schema
    """
    TEST_DATA = {"string_field": "the"}
    TEST_SCHEMA = {
        "table":
        "this is a test schema",
        "fields": [{
            "name": "string_field",
            "type": "string",
            "description": "character array",
            "last_updated": datetime.datetime.today()
        }]
    }

    test = Schema(TEST_SCHEMA)
    assert (test.validate(TEST_DATA))
Exemple #19
0
def build_flow(context: dict):

    # define the operations in the flow
    save_to_bucket = SaveToBucketOperator(
            project=context['config'].get('target_project'),
            to_path=context['config'].get('target_path'),
            schema=Schema(context),
            date=context.get('date'),
            compress=context['config'].get('compress'))
    end = EndOperator()

    # chain the operations to create the flow
    flow = save_to_bucket > end

    # attach the writers
    flow.attach_writers(context['config'].get('writers', []))

    return flow
Exemple #20
0
def test_validator_all_valid_values():

    TEST_DATA = {
        "string_field": "string",
        "integer_field": 100,
        "boolean_field": True,
        "date_field": datetime.datetime.today(),
        "other_field": ["abc"],
        "nullable_field": None,
        "list_field": ['a', 'b', 'c'],
        "enum_field": "RED"
    }
    TEST_SCHEMA = {
        "fields": [{
            "name": "string_field",
            "type": "string"
        }, {
            "name": "integer_field",
            "type": "numeric"
        }, {
            "name": "boolean_field",
            "type": "boolean"
        }, {
            "name": "date_field",
            "type": "date"
        }, {
            "name": "other_field",
            "type": "other"
        }, {
            "name": "nullable_field",
            "type": "nullable"
        }, {
            "name": "list_field",
            "type": "list"
        }, {
            "name": "enum_field",
            "type": "enum",
            "symbols": ['RED', 'GREEN', 'BLUE']
        }]
    }

    test = Schema(TEST_SCHEMA)
    assert (test.validate(TEST_DATA))
def execute_test(compress, schema, reader):
    writer = Writer(
            writer=file_writer,
            to_path='%datefolders',
            compress=compress,
            schema=schema
    )

    #reader = read_jsonl('tweets.jsonl')
    start = time.perf_counter_ns()
    for record in reader:
        writer.append(record)
    writer.finalize()
    return (time.perf_counter_ns() - start) / 1e9

schema = Schema(schema_definition)
lines = list(read_jsonl('tweets.jsonl'))

print(len(lines))
print(lines[1])

results = []
result = {
    'compression': False,
    'validation': False,
    'time': execute_test(False, None, lines)
}
results.append(result)
shutil.rmtree("year_2021")

result = {
Exemple #22
0
def validate():
    for i in range(1000000):
        s = Schema(schema_definition)
        s.validate(data)