class RiverML: # fraud detection model model = compose.Pipeline(preprocessing.StandardScaler(), linear_model.LogisticRegression()) # ROCAUC metric to score the model as it trains metric = metrics.ROCAUC()
def __init__(self, my_id=1, bootstrap_servers='', list_of_partitions=[], request_topic='', inference_topic='', group_id='my_grp'): """ Constructor :type interval: int :param interval: Check interval, in seconds """ self.model = compose.Pipeline( preprocessing.MinMaxScaler(), anomaly.HalfSpaceTrees( seed=42)) # tree.HoeffdingTreeClassifier(max_depth=10) self.metric = metrics.ROCAUC() # metrics.Accuracy() # self.my_id = my_id self.t = request_topic self.result_t = inference_topic self.my_grp_id = group_id self.result_t_p = 8 self.bootstrap_servers = bootstrap_servers # self.list_of_partitions = list_of_partitions self.tls = [] x = 0 for i in list_of_partitions: self.tls.insert(x, TopicPartition(self.t, i)) x = x + 1 #self.tls=list_of_partitions print(self.tls) conf = { 'bootstrap.servers': bootstrap_servers, 'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'ssl.ca.location': '/tmp/cacert.pem', 'sasl.username': '******', 'sasl.password': '******', # 'sasl.username': '******', # 'sasl.password': '******', # 'key.serializer': StringSerializer('utf_8'), # 'value.serializer': StringSerializer('utf_8'), 'client.id': 'test-sw-1' } self.producer = Producer(conf) conf = { 'bootstrap.servers': bootstrap_servers, 'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': '******', 'sasl.password': '******', 'ssl.ca.location': '/tmp/cacert.pem', 'group.id': group_id, 'auto.offset.reset': 'latest' } self.consumer = consumer = Consumer(conf) self.consumer.assign(self.tls)
from river import compose from river import preprocessing from river import linear_model from river import metrics from river import datasets from river import optim optimizer = optim.SGD(0.1) model = compose.Pipeline(preprocessing.StandardScaler(), linear_model.LogisticRegression(optimizer)) metric = metrics.ROCAUC() precision = metrics.Precision() for x, y in datasets.Phishing(): y_pred = model.predict_proba_one(x) model.learn_one(x, y) metric.update(y, y_pred) precision.update(y, y_pred) print(metric) print(precision)