def test_register_schema_file(): """ Register schema from a file """ schema = { '$id': 'test/test', 'version': 1, 'properties': { 'something': { 'type': 'string' }, }, } el = EventLog() yaml = YAML(typ='safe') with tempfile.NamedTemporaryFile(mode='w') as f: yaml.dump(schema, f) f.flush() f.seek(0) el.register_schema_file(f.name) assert schema in el.schemas.values()
def test_raised_exception_for_nonlist_categories(): # Bad schema in yaml form. yaml_schema = _("""\ $id: test.schema title: Test Event version: 1 type: object properties: test_property: description: testing a property categories: user-identifier type: string """) yaml = YAML(typ='safe') schema = yaml.load(yaml_schema) # Register schema with an EventLog e = EventLog( allowed_schemas={ SCHEMA_ID: { "allowed_categories": ["user-identifier"] } }, ) # This schema does not have categories as a list. with pytest.raises(ValueError) as err: e.register_schema(schema) # Verify that the error message is the expected error message. assert 'must be a list.' in str(err.value)
def test_register_schema_file_object(tmp_path): """ Register schema from a file """ schema = { '$id': 'test/test', 'version': 1, 'properties': { 'something': { 'type': 'string', 'categories': ['unrestricted'] }, }, } el = EventLog() yaml = YAML(typ='safe') schema_file = tmp_path.joinpath("schema.yml") yaml.dump(schema, schema_file) with open(str(schema_file), 'r') as f: el.register_schema_file(f) assert schema in el.schemas.values()
def test_missing_categories_label(): # Bad schema in yaml form. yaml_schema = _("""\ $id: test.schema title: Test Event version: 1 type: object properties: test_property: description: testing a property type: string """) yaml = YAML(typ='safe') schema = yaml.load(yaml_schema) # Register schema with an EventLog e = EventLog( allowed_schemas={ SCHEMA_ID: { "allowed_categories": ["random-category"] } } ) # This schema does not have categories as a list. with pytest.raises(KeyError) as err: e.register_schema(schema) # Verify that the error message is the expected error message. assert 'All properties must have a "categories"' in str(err.value)
def test_register_invalid_schema(): """ Invalid JSON Schemas should fail registration """ el = EventLog() with pytest.raises(jsonschema.SchemaError): el.register_schema({ # Totally invalid 'properties': True })
def test_reserved_properties(): """ User schemas can't have properties starting with __ These are reserved """ el = EventLog() with pytest.raises(ValueError): el.register_schema({ '$id': 'test/test', 'version': 1, 'properties': { '__fail__': { 'type': 'string' }, }, })
def test_good_config_file(tmp_path): cfg = get_config_from_file(tmp_path, GOOD_CONFIG) # Pass config to EventLog e = EventLog(config=cfg) assert len(e.handlers) > 0 assert isinstance(e.handlers[0], logging.Handler)
def test_record_event_badschema(): """ Fail fast when an event doesn't conform to its schema """ schema = { '$id': 'test/test', 'version': 1, 'properties': { 'something': { 'type': 'string', 'categories': ['unrestricted'] }, 'status': { 'enum': ['success', 'failure'], 'categories': ['unrestricted'] } } } el = EventLog(handlers=[logging.NullHandler()]) el.register_schema(schema) el.allowed_schemas = ['test/test'] with pytest.raises(jsonschema.ValidationError): el.record_event( 'test/test', 1, { 'something': 'blah', 'status': 'hi' #'not-in-enum' })
def test_record_event(): """ Simple test for emitting valid events """ schema = { '$id': 'test/test', 'version': 1, 'properties': { 'something': { 'type': 'string' }, }, } output = io.StringIO() handler = logging.StreamHandler(output) el = EventLog(handlers=[handler]) el.register_schema(schema) el.allowed_schemas = ['test/test'] el.record_event('test/test', 1, { 'something': 'blah', }) handler.flush() event_capsule = json.loads(output.getvalue()) assert '__timestamp__' in event_capsule # Remove timestamp from capsule when checking equality, since it is gonna vary del event_capsule['__timestamp__'] assert event_capsule == { '__schema__': 'test/test', '__version__': 1, 'something': 'blah' }
def test_timestamp_override(): """ Simple test for overriding timestamp """ schema = { '$id': 'test/test', 'version': 1, 'properties': { 'something': { 'type': 'string', 'categories': ['unrestricted'] }, }, } output = io.StringIO() handler = logging.StreamHandler(output) el = EventLog(handlers=[handler]) el.register_schema(schema) el.allowed_schemas = ['test/test'] timestamp_override = datetime.utcnow() - timedelta(days=1) el.record_event('test/test', 1, { 'something': 'blah', }, timestamp_override=timestamp_override) handler.flush() event_capsule = json.loads(output.getvalue()) assert event_capsule['__timestamp__'] == timestamp_override.isoformat( ) + 'Z'
def test_allowed_schemas(): """ Events should be emitted only if their schemas are allowed """ schema = { '$id': 'test/test', 'version': 1, 'properties': { 'something': { 'type': 'string', 'categories': ['unrestricted'] }, }, } output = io.StringIO() handler = logging.StreamHandler(output) el = EventLog(handlers=[handler]) # Just register schema, but do not mark it as allowed el.register_schema(schema) el.record_event('test/test', 1, { 'something': 'blah', }) handler.flush() assert output.getvalue() == ''
def main(): eventlog = EventLog( allowed_schemas=[ "hub.jupyter.org/server-action" ], handlers=[ logging.StreamHandler() ] ) for dirname, _, files in os.walk(pathlib.Path(__file__).parent / "event-schemas"): for file in files: if not file.endswith('.yaml'): continue eventlog.register_schema_file(os.path.join(dirname, file)) for l in sys.stdin: if 'seconds to' not in l: continue timestamp, user, action = parse_activity_line(l) eventlog.record_event( "hub.jupyter.org/server-action", 1, { "action": action, "username": user, "servername": "" }, timestamp_override=timestamp )
def test_register_duplicate_schemas(): schema0 = { '$id': 'test/test', 'version': 1, 'properties': { 'something': { 'type': 'string', 'categories': ['unrestricted'] }, }, } schema1 = { '$id': 'test/test', 'version': 1, 'properties': { 'somethingelse': { 'type': 'string', 'categories': ['unrestricted'] }, }, } el = EventLog() el.register_schema(schema0) with pytest.raises(ValueError): el.register_schema(schema1)
def test_allowed_schemas(schema, allowed_schemas, expected_output): sink = io.StringIO() # Create a handler that captures+records events with allowed tags. handler = logging.StreamHandler(sink) e = EventLog(handlers=[handler], allowed_schemas=allowed_schemas) e.register_schema(schema) event = { 'nothing-exciting': 'hello, world', 'id': 'test id', 'email': '*****@*****.**', } # Record event and read output e.record_event(SCHEMA_ID, VERSION, EVENT_DATA) recorded_event = json.loads(sink.getvalue()) event_data = { key: value for key, value in recorded_event.items() if not key.startswith('__') } # Verify that *exactly* the right properties are recorded. assert expected_output == event_data
def main(): eventlog = EventLog(allowed_schemas=["hub.jupyter.org/server-action"], handlers=[logging.StreamHandler(sys.stdout)]) hmac_key = secrets.token_bytes(32) for dirname, _, files in os.walk( pathlib.Path(__file__).parent / "event-schemas"): for file in files: if not file.endswith('.yaml'): continue eventlog.register_schema_file(os.path.join(dirname, file)) for l in sys.stdin: if 'seconds to' not in l: continue timestamp, user, action = parse_activity_line(l) eventlog.record_event( "hub.jupyter.org/server-action", 1, { "action": action, "username": hmac.new(key=hmac_key, msg=user.encode(), digestmod='sha256').hexdigest(), "servername": "" }, timestamp_override=timestamp)
def test_missing_required_properties(): """ id and $version are required properties in our schemas. They aren't required by JSON Schema itself """ el = EventLog() with pytest.raises(ValueError): el.register_schema({'properties': {}}) with pytest.raises(ValueError): el.register_schema({ '$id': 'something', '$version': 1, # This should been 'version' })
def get_event_data(event, schema, schema_id, version, allowed_schemas): sink = io.StringIO() # Create a handler that captures+records events with allowed tags. handler = logging.StreamHandler(sink) e = EventLog(handlers=[handler], allowed_schemas=allowed_schemas) e.register_schema(schema) # Record event and read output e.record_event(schema_id, version, deepcopy(event)) recorded_event = json.loads(sink.getvalue()) return { key: value for key, value in recorded_event.items() if not key.startswith('__') }
def test_bad_config_file(tmp_path): cfg = get_config_from_file(tmp_path, BAD_CONFIG) with pytest.raises(TraitError): e = EventLog(config=cfg)
def test_unique_logger_instances(): schema0 = { '$id': 'test/test0', 'version': 1, 'properties': { 'something': { 'type': 'string', 'categories': ['unrestricted'] }, }, } schema1 = { '$id': 'test/test1', 'version': 1, 'properties': { 'something': { 'type': 'string', 'categories': ['unrestricted'] }, }, } output0 = io.StringIO() output1 = io.StringIO() handler0 = logging.StreamHandler(output0) handler1 = logging.StreamHandler(output1) el0 = EventLog(handlers=[handler0]) el0.register_schema(schema0) el0.allowed_schemas = ['test/test0'] el1 = EventLog(handlers=[handler1]) el1.register_schema(schema1) el1.allowed_schemas = ['test/test1'] el0.record_event('test/test0', 1, { 'something': 'blah', }) el1.record_event('test/test1', 1, { 'something': 'blah', }) handler0.flush() handler1.flush() event_capsule0 = json.loads(output0.getvalue()) assert '__timestamp__' in event_capsule0 # Remove timestamp from capsule when checking equality, since it is gonna vary del event_capsule0['__timestamp__'] assert event_capsule0 == { '__schema__': 'test/test0', '__schema_version__': 1, '__metadata_version__': 1, 'something': 'blah' } event_capsule1 = json.loads(output1.getvalue()) assert '__timestamp__' in event_capsule1 # Remove timestamp from capsule when checking equality, since it is gonna vary del event_capsule1['__timestamp__'] assert event_capsule1 == { '__schema__': 'test/test1', '__schema_version__': 1, '__metadata_version__': 1, 'something': 'blah' }