def test_flood_negative_gap_differing_data(): events = [ Event(timestamp=now, duration=5, data={"a": 0}), Event(timestamp=now, duration=100, data={"b": 1}), ] flooded = flood(events) assert flooded == events
def test_sort_by_duration(): now = datetime.now(timezone.utc) events = [] events.append(Event(timestamp=now + timedelta(seconds=2), duration=timedelta(seconds=1))) events.append(Event(timestamp=now + timedelta(seconds=1), duration=timedelta(seconds=2))) events_sorted = sort_by_duration(events) assert events_sorted == events[::-1]
def test_json_serialization(self): e = Event(timestamp=datetime.now(timezone.utc), duration=timedelta(hours=13, minutes=37), data={"key": "val"}) json_str = e.to_json_str() logging.error(json_str) assert e == Event(**json.loads(json_str))
def test_url_parse_event(): now = datetime.now(timezone.utc) e = Event(data={"url": "http://asd.com/test/?a=1"}, timestamp=now, duration=timedelta(seconds=1)) result = split_url_events([e]) print(result) assert result[0].data["protocol"] == "http" assert result[0].data["domain"] == "asd.com" assert result[0].data["path"] == "/test/" assert result[0].data["params"] == "" assert result[0].data["options"] == "a=1" assert result[0].data["identifier"] == "" e2 = Event(data={"url": "https://www.asd.asd.com/test/test2/meh;meh2?asd=2&asdf=3#id"}, timestamp=now, duration=timedelta(seconds=1)) result = split_url_events([e2]) print(result) assert result[0].data["protocol"] == "https" assert result[0].data["domain"] == "asd.asd.com" assert result[0].data["path"] == "/test/test2/meh" assert result[0].data["params"] == "meh2" assert result[0].data["options"] == "asd=2&asdf=3" assert result[0].data["identifier"] == "id" e3 = Event(data={"url": "file:///home/johan/myfile.txt"}, timestamp=now, duration=timedelta(seconds=1)) result = split_url_events([e3]) print(result) assert result[0].data["protocol"] == "file" assert result[0].data["domain"] == "" assert result[0].data["path"] == "/home/johan/myfile.txt" assert result[0].data["params"] == "" assert result[0].data["options"] == "" assert result[0].data["identifier"] == ""
def test_chunk_events_by_key(): now = datetime.now(timezone.utc) events = [] e1_data = {"label1": "1a", "label2": "2a"} e2_data = {"label1": "1a", "label2": "2b"} e3_data = {"label1": "1b", "label2": "2b"} e1 = Event(data=e1_data, timestamp=now, duration=timedelta(seconds=1)) e2 = Event(data=e2_data, timestamp=now, duration=timedelta(seconds=1)) e3 = Event(data=e3_data, timestamp=now, duration=timedelta(seconds=1)) events = [e1, e2, e3] result = chunk_events_by_key(events, "label1") print(len(result)) pprint(result) assert len(result) == 2 # Check root label assert result[0].data["label1"] == "1a" assert result[1].data["label1"] == "1b" # Check timestamp assert result[0].timestamp == e1.timestamp assert result[1].timestamp == e3.timestamp # Check duration assert result[0].duration == e1.duration + e2.duration assert result[1].duration == e3.duration # Check subevents assert result[0].data["subevents"][0] == e1 assert result[0].data["subevents"][1] == e2 assert result[1].data["subevents"][0] == e3
def merge_events_by_keys(events, keys) -> List[Event]: # The result will be a list of events without timestamp since they are merged # Call recursively until all keys are consumed if len(keys) < 1: return events merged_events: Dict[Tuple, Event] = {} for event in events: composite_key: Tuple = () for key in keys: if key in event.data: val = event["data"][key] # Needed for when the value is a list, such as for categories if isinstance(val, list): val = tuple(val) composite_key = composite_key + (val,) if composite_key not in merged_events: merged_events[composite_key] = Event( timestamp=event.timestamp, duration=event.duration, data={} ) for key in keys: if key in event.data: merged_events[composite_key].data[key] = event.data[key] else: merged_events[composite_key].duration += event.duration result = [] for key in merged_events: result.append(Event(**merged_events[key])) return result
def test_categorize(): now = datetime.now(timezone.utc) classes = [ (["Test"], Rule({"regex": "^just"})), (["Test", "Subtest"], Rule({"regex": "subtest$"})), (["Test", "Ignorecase"], Rule({ "regex": "ignorecase", "ignore_case": True })), ] events = [ Event(timestamp=now, duration=0, data={"key": "just a test"}), Event(timestamp=now, duration=0, data={"key": "just a subtest"}), Event(timestamp=now, duration=0, data={"key": "just a IGNORECASE test"}), Event(timestamp=now, duration=0, data={}), ] events = categorize(events, classes) assert events[0].data["$category"] == ["Test"] assert events[1].data["$category"] == ["Test", "Subtest"] assert events[2].data["$category"] == ["Test", "Ignorecase"] assert events[3].data["$category"] == ["Uncategorized"]
def test_simplify_string(): events = [ Event(data={"label": "(99) Facebook"}), Event(data={"label": "(14) YouTube"}), ] assert simplify_string(events, "label")[0].data["label"] == "Facebook" assert simplify_string(events, "label")[1].data["label"] == "YouTube" events = [ Event(data={ "app": "Cemu.exe", "title": "Cemu - FPS: 133.7 - BotW" }) ] assert simplify_string( events, "title")[0].data["title"] == "Cemu - FPS: ... - BotW" events = [ Event(data={ "app": "VSCode.exe", "title": "● report.md - Visual Studio Code" }) ] assert simplify_string( events, "title")[0].data["title"] == "report.md - Visual Studio Code" events = [Event(data={"app": "Gedit", "title": "*test.md - gedit"})] assert simplify_string(events, "title")[0].data["title"] == "test.md - gedit"
def test_query2_function_in_function(datastore): qname = "asd" bid = "test_bucket" starttime = iso8601.parse_date("1970-01-01") endtime = iso8601.parse_date("1970-01-02") example_query = """ RETURN=limit_events(query_bucket("{bid}"), 1); """.format(bid=bid) try: # Setup buckets bucket1 = datastore.create_bucket(bucket_id=bid, type="test", client="test", hostname="test", name="test") # Prepare buckets e1 = Event(data={}, timestamp=starttime, duration=timedelta(seconds=1)) e2 = Event(data={}, timestamp=starttime + timedelta(seconds=1), duration=timedelta(seconds=1)) bucket1.insert(e1) result = query(qname, example_query, starttime, endtime, datastore) assert 1 == len(result) finally: datastore.delete_bucket(bid)
def test_create() -> None: Event(timestamp=now, duration=timedelta(hours=13, minutes=37), data={"key": "val"}) Event(timestamp=valid_timestamp, duration=timedelta(hours=13, minutes=37), data={"key": "val"})
def test_replace(bucket_cm): """ Tests the replace event event in bucket functionality """ with bucket_cm as bucket: # Create two events e1 = bucket.insert(Event(data={"label": "test1"}, timestamp=now)) assert e1 assert e1.id is not None e2 = bucket.insert( Event(data={"label": "test2"}, timestamp=now + timedelta(seconds=1))) assert e2 assert e2.id is not None e1.data["label"] = "test1-replaced" bucket.replace(e1.id, e1) bucket.insert( Event(data={"label": "test3"}, timestamp=now + timedelta(seconds=2))) e2.data["label"] = "test2-replaced" bucket.replace(e2.id, e2) # Assert length assert 3 == len(bucket.get(-1)) assert bucket.get(-1)[0]["data"]["label"] == "test3" assert bucket.get(-1)[1]["data"]["label"] == "test2-replaced" assert bucket.get(-1)[2]["data"]["label"] == "test1-replaced"
def test_flood_backward_merge(): events = [ Event(timestamp=now, duration=5), Event(timestamp=now + 10 * td1s, duration=10), ] flooded = flood(events) assert len(flooded) == 1 assert flooded[0].duration == timedelta(seconds=20)
def test_heartbeat_reduce_fail(): """Events should not reduce""" now = datetime.now() td_1s = timedelta(seconds=1) events = [Event(timestamp=now, data={"label": "test"}), Event(timestamp=now + 3*td_1s, data={"label": "test"})] reduced_events = heartbeat_reduce(events, pulsetime=2) assert len(reduced_events) == 2
def test_flood_negative_small_gap_differing_data(): events = [ Event(timestamp=now, duration=100, data={"b": 1}), Event(timestamp=now + 99.99 * td1s, duration=100, data={"a": 0}), ] flooded = flood(events) duration = sum((e.duration for e in flooded), timedelta(0)) assert duration == timedelta(seconds=100 + 99.99)
def test_flood_forward(): events = [ Event(timestamp=now, duration=10, data={"a": 0}), Event(timestamp=now + 15 * td1s, duration=5, data={"b": 1}), ] flooded = flood(events) assert (flooded[0].timestamp + flooded[0].duration) - flooded[1].timestamp == timedelta(0)
def test_filter_keyval_regex(): events = [ Event(data={"label": "aa"}), Event(data={"label": "bb"}), Event(data={"label": "cc"}), ] events_re = filter_keyvals_regex(events, "label", "aa|cc") assert len(events_re) == 2
def test_heartbeat_merge(): """Events should merge""" now = datetime.now() td_1s = timedelta(seconds=1) last_event, heartbeat = Event(timestamp=now), Event(timestamp=now + td_1s) merged = heartbeat_merge(last_event, heartbeat, pulsetime=2) assert merged is not None
def test_query2_test_merged_keys(datastore): name = "A label/name for a test bucket" bid = "bucket1" qname = "test_query_merged_keys" starttime = iso8601.parse_date("2080") endtime = starttime + timedelta(hours=1) example_query = """ bid1 = "{bid}"; events = query_bucket(bid1); events = merge_events_by_keys(events, ["label1", "label2"]); events = sort_by_duration(events); eventcount = query_bucket_eventcount(bid1); RETURN = {{"events": events, "eventcount": eventcount}}; """.format(bid=bid) try: # Setup buckets bucket1 = datastore.create_bucket(bucket_id=bid, type="test", client="test", hostname="test", name=name) # Prepare buckets e1 = Event(data={ "label1": "test1", "label2": "test1" }, timestamp=starttime, duration=timedelta(seconds=1)) e2 = Event(data={ "label1": "test1", "label2": "test1" }, timestamp=starttime + timedelta(seconds=1), duration=timedelta(seconds=1)) e3 = Event(data={ "label1": "test1", "label2": "test2" }, timestamp=starttime + timedelta(seconds=2), duration=timedelta(seconds=1)) bucket1.insert(e3) bucket1.insert(e1) bucket1.insert(e2) # Query result = query(qname, example_query, starttime, endtime, datastore) # Assert print(result) assert (len(result["events"]) == 2) assert (result["eventcount"] == 3) assert (result["events"][0]["data"]["label1"] == "test1") assert (result["events"][0]["data"]["label2"] == "test1") assert (result["events"][0]["duration"] == timedelta(seconds=2)) assert (result["events"][1]["data"]["label1"] == "test1") assert (result["events"][1]["data"]["label2"] == "test2") assert (result["events"][1]["duration"] == timedelta(seconds=1)) finally: datastore.delete_bucket(bid)
def test_flood_negative_gap_same_data(): events = [ Event(timestamp=now, duration=100, data={"a": 0}), Event(timestamp=now, duration=5, data={"a": 0}), ] flooded = flood(events) total_duration = sum((e.duration for e in flooded), timedelta(0)) assert len(flooded) == 1 assert total_duration == timedelta(seconds=100)
def test_merge_events_by_keys(self): now = datetime.now(timezone.utc) events = [] e1 = Event(data={"label": "a"}, timestamp=now, duration=timedelta(seconds=1)) e2 = Event(data={"label": "b"}, timestamp=now, duration=timedelta(seconds=1)) events = events + [e1]*10 events = events + [e2]*10 result = merge_events_by_keys(events, ["label"]) assert len(result) == 2 assert result[0].duration == timedelta(seconds=10)
def test_filter_keyval(): labels = ["aa", "cc"] events = [ Event(data={"label": "aa"}), Event(data={"label": "bb"}), Event(data={"label": "cc"}), ] included_events = filter_keyvals(events, "label", labels) excluded_events = filter_keyvals(events, "label", labels, exclude=True) assert len(included_events) == 2 assert len(excluded_events) == 1
def test_heartbeat_reduce(): """Events should reduce""" now = datetime.now() td_1s = timedelta(seconds=1) # Check that empty list works assert not heartbeat_reduce([], pulsetime=1) events = [Event(timestamp=now, data={"label": "test"}), Event(timestamp=now + td_1s, data={"label": "test"})] reduced_events = heartbeat_reduce(events, pulsetime=2) assert len(reduced_events) == 1
def test_query2_basic_query(datastore): name = "A label/name for a test bucket" bid1 = "bucket1" bid2 = "bucket2" qname = "test_query_basic" starttime = iso8601.parse_date("1970") endtime = starttime + timedelta(hours=1) example_query = """ bid1 = "{bid1}"; bid2 = "{bid2}"; events = query_bucket(bid1); intersect_events = query_bucket(bid2); RETURN = filter_period_intersect(events, intersect_events); """.format(bid1=bid1, bid2=bid2) try: # Setup buckets bucket1 = datastore.create_bucket(bucket_id=bid1, type="test", client="test", hostname="test", name=name) bucket2 = datastore.create_bucket(bucket_id=bid2, type="test", client="test", hostname="test", name=name) # Prepare buckets e1 = Event(data={"label": "test1"}, timestamp=starttime, duration=timedelta(seconds=1)) e2 = Event( data={"label": "test2"}, timestamp=starttime + timedelta(seconds=2), duration=timedelta(seconds=1), ) et = Event( data={"label": "intersect-label"}, timestamp=starttime, duration=timedelta(seconds=1), ) bucket1.insert(e1) bucket1.insert(e2) bucket2.insert(et) # Query result = query(qname, example_query, starttime, endtime, datastore) # Assert assert len(result) == 1 assert result[0]["data"]["label"] == "test1" finally: datastore.delete_bucket(bid1) datastore.delete_bucket(bid2)
def test_query2_query_categorize(datastore): bid = "test_bucket" qname = "test" starttime = iso8601.parse_date("1970") endtime = starttime + timedelta(hours=1) example_query = r""" events = query_bucket("{bid}"); events = sort_by_timestamp(events); events = categorize(events, [[["test"], {{"regex": "test"}}], [["test", "subtest"], {{"regex": "test\w"}}]]); events_by_cat = merge_events_by_keys(events, ["$category"]); RETURN = {{"events": events, "events_by_cat": events_by_cat}}; """.format(bid=bid) try: bucket = datastore.create_bucket(bucket_id=bid, type="test", client="test", hostname="test", name="asd") events = [ Event( data={"label": "test"}, timestamp=starttime, duration=timedelta(seconds=1), ), Event( data={"label": "testwithmoredetail"}, timestamp=starttime + timedelta(seconds=1), duration=timedelta(seconds=1), ), Event( data={"label": "testwithmoredetail"}, timestamp=starttime + timedelta(seconds=2), duration=timedelta(seconds=1), ), ] bucket.insert(events) result = query(qname, example_query, starttime, endtime, datastore) print(result) assert len(result["events"]) == 3 assert result["events"][0].data["label"] == "test" assert result["events"][0].data["$category"] == ["test"] assert result["events"][1].data["$category"] == ["test", "subtest"] assert len(result["events_by_cat"]) == 2 assert result["events_by_cat"][0].data["$category"] == ["test"] assert result["events_by_cat"][1].data["$category"] == [ "test", "subtest" ] assert result["events_by_cat"][1].duration == timedelta(seconds=2) finally: datastore.delete_bucket(bid)
def post(self, bucket_id): data = request.get_json() logger.debug("Received post request for event in bucket '{}' and data: {}".format(bucket_id, data)) if isinstance(data, dict): events = [Event(**data)] elif isinstance(data, list): events = [Event(**e) for e in data] else: raise BadRequest("Invalid POST data", "") event = app.api.create_events(bucket_id, events) return event.to_json_dict() if event else None, 200
def get_all_events( self, offset: int, limit: int, starttime: Optional[datetime] = None, endtime: Optional[datetime] = None, synced: Optional[bool] = None, ): if limit == 0: return [] afk = ( EventModel.select().order_by(EventModel.timestamp.desc()).group_by( fn.strftime('%Y-%m-%d %H:%M:%S', EventModel.timestamp)) # .group_by(datetime.strptime(EventModel.timestamp, '%Y/%m/%d %H:%M:%S')) .offset(offset).limit(limit)) if starttime: # Important to normalize datetimes to UTC, otherwise any UTC offset will be ignored starttime = starttime.astimezone(timezone.utc) afk = afk.where(starttime <= EventModel.timestamp) if endtime: endtime = endtime.astimezone(timezone.utc) afk = afk.where(EventModel.timestamp <= endtime) afk = afk.where((EventModel.datastr.contains('"status": "afk"'))) activity = (EventModel.select().order_by( EventModel.timestamp.desc()).offset(offset).limit(limit)) if starttime: # Important to normalize datetimes to UTC, otherwise any UTC offset will be ignored starttime = starttime.astimezone(timezone.utc) activity = activity.where(starttime <= EventModel.timestamp) if endtime: endtime = endtime.astimezone(timezone.utc) activity = activity.where(EventModel.timestamp <= endtime) activity = activity.where((EventModel.datastr.contains('reddit')) | (EventModel.datastr.contains('Facebook')) | (EventModel.datastr.contains('Instagram')) | (EventModel.datastr.contains('devRant')) | (EventModel.datastr.contains('Messenger')) | (EventModel.datastr.contains('Twitter'))) if synced is not None: afk = afk.where(EventModel.is_synced == synced) activity = activity.where(EventModel.is_synced == synced) return [ Event(**e1) for e1 in list(map(EventModel.json, afk.execute())) ] + [ Event(**e2) for e2 in list(map(EventModel.json, activity.execute())) ]
def test_heartbeat_merge_fail(): """Merge should not happen""" now = datetime.now() td_1s = timedelta(seconds=1) # timestamp of heartbeat more than pulsetime away last_event, heartbeat = Event(timestamp=now, data={"label": "test"}), Event(timestamp=now + 3*td_1s, data={"label": "test"}) merged = heartbeat_merge(last_event, heartbeat, pulsetime=2) assert merged is None # labels not identical last_event, heartbeat = Event(timestamp=now, data={"label": "test"}), Event(timestamp=now + td_1s, data={"label": "test2"}) merged = heartbeat_merge(last_event, heartbeat, pulsetime=2) assert merged is None
def test_tags(): now = datetime.now(timezone.utc) classes = [ ("Test", Rule({"regex": "value$"})), ("Test", Rule({"regex": "^just"})), ] events = [ Event(timestamp=now, duration=0, data={"key": "just a test value"}), Event(timestamp=now, duration=0, data={}), ] events = tag(events, classes) assert len(events[0].data["$tags"]) == 2 assert len(events[1].data["$tags"]) == 0
def test_replace_last(bucket_cm): """ Tests the replace last event in bucket functionality (simple) """ with bucket_cm as bucket: # Create two events bucket.insert(Event(data={"label": "test1"}, timestamp=now)) bucket.insert(Event(data={"label": "test2"}, timestamp=now + timedelta(seconds=1))) # Create second event to replace with the second one bucket.replace_last(Event(data={"label": "test2-replaced"}, timestamp=now + timedelta(seconds=1))) bucket.insert(Event(data={"label": "test3"}, timestamp=now + timedelta(seconds=2))) # Assert length assert 3 == len(bucket.get(-1)) assert bucket.get(-1)[1]["data"]["label"] == "test2-replaced"
def get_events(self, bucket_id: str, limit: int, starttime: Optional[datetime] = None, endtime: Optional[datetime] = None): query_filter = {} # type: Dict[str, dict] if starttime or endtime: query_filter["timestamp"] = {} if starttime: query_filter["timestamp"]["$gte"] = starttime if endtime: query_filter["timestamp"]["$lte"] = endtime if limit == 0: return [] elif limit < 0: limit = 10**9 ds_events = list(self.db[bucket_id]["events"].find(query_filter).sort([ ("timestamp", -1) ]).limit(limit)) events = [] for event in ds_events: event["id"] = str(event.pop('_id')) # Required since MongoDB doesn't handle timezones event["timestamp"] = event["timestamp"].replace( tzinfo=timezone.utc) event = Event(**event) events.append(event) return events