Exemple #1
0
class ModelInference(object):
    def __init__(self,
                 my_id=1,
                 bootstrap_servers='',
                 list_of_partitions=[],
                 request_topic='',
                 inference_topic='',
                 group_id='my_grp'):
        """ Constructor
        :type interval: int
        :param interval: Check interval, in seconds
        """
        self.model = None
        #Create the model instance here
        self.my_id = my_id
        self.t = request_topic
        self.result_t = inference_topic
        self.my_grp_id = group_id
        self.result_t_p = 8
        self.bootstrap_servers = bootstrap_servers

        self.tls = []
        x = 0
        for i in list_of_partitions:
            self.tls.insert(x, TopicPartition(self.t, i))
            x = x + 1
        #self.tls=list_of_partitions
        print(self.tls)
        conf = {
            'bootstrap.servers': bootstrap_servers,
            'sasl.mechanism': 'PLAIN',
            'security.protocol': 'SASL_SSL',
            'ssl.ca.location': '/tmp/cacert.pem',
            'sasl.username': '******',
            'sasl.password': '******',
            # 'key.serializer': StringSerializer('utf_8'),
            # 'value.serializer': StringSerializer('utf_8'),
            'client.id': 'test-sw-1'
        }

        self.producer = Producer(conf)
        conf = {
            'bootstrap.servers': bootstrap_servers,
            #'sasl.mechanism': 'PLAIN',
            'sasl.username': '******',
            'sasl.password': '******',
            'ssl.ca.location': '/tmp/cacert.pem',
            'group.id': group_id,
            'auto.offset.reset': 'smallest'
        }
        self.consumer = consumer = Consumer(conf)
        self.consumer.assign(self.tls)

    def __del__(self):
        print('closing')

        self.producer.flush()
        self.consumer.close()

    def run(self):
        try:
            self.consumer.assign(self.tls)
            i = 0
            while (True):
                msg = self.consumer.poll(timeout=1.0)
                if msg is None: continue

                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        sys.stderr.write(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        raise KafkaException(msg.error())
                else:

                    message = loads(msg.value().decode("utf-8"))
                    ingest_ts = message['ingestTs']
                    message_id = message['message_id']
                    truth = message['Class']
                    y_hat = truth  ## Replace with model.predict & model.learn_one(
                    inference_ts = time.time()
                    out = {}
                    out['ingest_ts'] = ingest_ts
                    out['message_id'] = message_id
                    out['truth'] = truth  ## model.learn_one(Y,Y_HAT)
                    out['y_hat'] = y_hat
                    out['inference_ts'] = inference_ts
                    out['dur_evt_inf'] = inference_ts - ingest_ts
                    i = i + 1
                    # sprint(self.result_t)
                    k = str(i)
                    v = json.dumps(out).encode('utf-8')
                    #self.producer.produce(self.result_t, value=v, key=k)
                    # self.producer.flush()

                    if (i % 2 == 0):
                        print('sending to ' + self.result_t + ' ' +
                              str(self.result_t_p) + ' ' + str(out))
                        self.producer.flush()
                #self.producer.close()

        finally:
            # Close down consumer to commit final offsets.
            self.consumer.close()

    def runOld(self):
        print(bootstrap_servers)
        conf = {
            'bootstrap.servers': bootstrap_servers,
            'sasl.mechanism': 'PLAIN',
            'security.protocol': 'SASL_SSL',
            'ssl.ca.location': '/tmp/cacert.pem',
            'sasl.username': '******',
            'sasl.password':
            '******',
            # 'key.serializer': StringSerializer('utf_8'),
            # 'value.serializer': StringSerializer('utf_8'),
            'client.id': 'test-sw-1'
        }

        self.producer = Producer(conf)
        conf = {
            'bootstrap.servers': bootstrap_servers,
            'sasl.mechanism': 'PLAIN',
            'security.protocol': 'SASL_SSL',
            'sasl.username': '******',
            'sasl.password':
            '******',
            'ssl.ca.location': '/tmp/cacert.pem',
            'group.id': 'g-1',
            'auto.offset.reset': 'smallest'
        }
        self.consumer = consumer = Consumer(conf)
        self.consumer.assign(self.tls)
        print('starting ' + str(self.my_id))
        now = datetime.now()
        i = 0
        while True:

            for message in self.consumer:

                message = loads(msg.value().decode("utf-8"))

                ingest_ts = message['ingestTs']
                message_id = message['message_id']
                truth = message['Class']
                y_hat = truth  ## Replace with model.predict & model.learn_one(
                inference_ts = time.time()
                out = {}
                out['ingest_ts'] = ingest_ts
                out['message_id'] = message_id
                out['truth'] = truth  ## model.learn_one(Y,Y_HAT)
                out['y_hat'] = y_hat
                out['inference_ts'] = inference_ts
                out['dur_evt_inf'] = inference_ts - ingest_ts
                i = i + 1
                #sprint(self.result_t)
                self.producer.send(self.result_t,
                                   value=out,
                                   key=str(message_id))
                #self.producer.flush()

                if (i % 1000 == 0):
                    print('sending to ' + self.result_t + ' ' +
                          str(self.result_t_p) + ' ' + str(out))
        self.producer.flush()
        self.producer.close()
class ModelInference(object):
    def __init__(self,
                 my_id=1,
                 bootstrap_servers='',
                 list_of_partitions=[],
                 request_topic='',
                 inference_topic='',
                 group_id='my_grp'):
        """ Constructor
        :type interval: int
        :param interval: Check interval, in seconds
        """
        self.model = compose.Pipeline(
            preprocessing.MinMaxScaler(), anomaly.HalfSpaceTrees(
                seed=42))  # tree.HoeffdingTreeClassifier(max_depth=10)
        self.metric = metrics.ROCAUC()  # metrics.Accuracy() #
        self.my_id = my_id
        self.t = request_topic
        self.result_t = inference_topic
        self.my_grp_id = group_id
        self.result_t_p = 8
        self.bootstrap_servers = bootstrap_servers
        #         self.list_of_partitions = list_of_partitions

        self.tls = []
        x = 0
        for i in list_of_partitions:
            self.tls.insert(x, TopicPartition(self.t, i))
            x = x + 1
        #self.tls=list_of_partitions
        print(self.tls)

        conf = {
            'bootstrap.servers': bootstrap_servers,
            'sasl.mechanism': 'PLAIN',
            'security.protocol': 'SASL_SSL',
            'ssl.ca.location': '/tmp/cacert.pem',
            'sasl.username': '******',
            'sasl.password':
            '******',
            #                 'sasl.username': '******',
            #                 'sasl.password': '******',
            # 'key.serializer': StringSerializer('utf_8'),
            # 'value.serializer': StringSerializer('utf_8'),
            'client.id': 'test-sw-1'
        }

        self.producer = Producer(conf)
        conf = {
            'bootstrap.servers': bootstrap_servers,
            'sasl.mechanism': 'PLAIN',
            'security.protocol': 'SASL_SSL',
            'sasl.username': '******',
            'sasl.password':
            '******',
            'ssl.ca.location': '/tmp/cacert.pem',
            'group.id': group_id,
            'auto.offset.reset': 'latest'
        }
        self.consumer = consumer = Consumer(conf)
        self.consumer.assign(self.tls)

    def __del__(self):
        print('closing')
        #self.consumer.close()
        self.producer.flush()
        self.producer.close()

    def run(self):
        ### Set up model, metric, and starting timestamp for this model instance ###
        model = self.model
        metric = self.metric
        print('MODEL and METRIC before any messages consumed:', model, metric)
        start_consume_ts = time.time()
        print('Start Consume Time ' + str(start_consume_ts))
        ######
        try:
            self.consumer.assign(self.tls)
            i = 0
            while (True):
                msg = self.consumer.poll(timeout=1.0)
                if msg is None: continue

                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        sys.stderr.write(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        raise KafkaException(msg.error())
                else:
                    message = loads(msg.value().decode("utf-8"))
                    ingest_ts = message[
                        'ingestTs']  # Ingestion time from Kafka
                    message.pop('ingestTs')
                    message_id = message['message_id']  # Message ID from Kafka
                    message.pop('message_id')
                    truth = float(
                        message['Class'])  # True label supplied by Kafka
                    message.pop(
                        'Class'
                    )  # Remove label remove passing the data for prediction (could combine with above)
                    message.pop('ignore')
                    x = message
                    for k, v in x.items():
                        x[k] = float(v)
                    y_hat = model.score_one(
                        x)  #model.predict_one(x) # make a prediction
                    if (i % 1000 == 0):
                        print('x: ', x)
                        for k, v in x.items():
                            print(type(x[k]))
                        print('model: ', model)
                        print('y_hat: ', y_hat)
                    inference_ts = time.time()
                    metric = metric.update(truth, y_hat)  # update the metric
                    model = model.learn_one(
                        x)  # model.learn_one(x,y) # make the model learn
                    learn_ts = time.time()
                    out = {}
                    out['ingest_ts'] = ingest_ts
                    out['learn_ts'] = learn_ts
                    out['message_id'] = message_id
                    out['truth'] = truth
                    out['y_hat'] = y_hat
                    out['inference_ts'] = inference_ts
                    out['dur_evt_inf'] = inference_ts - ingest_ts
                    out['dur_start_inf'] = inference_ts - start_consume_ts
                    out['dur_inf_learn'] = learn_ts - inference_ts
                    out['model_metric'] = metric.get()
                    out['ignore'] = False
                    i = i + 1
                    # sprint(self.result_t)
                    k = str(i)
                    v = json.dumps(out).encode('utf-8')
                    self.producer.produce(self.result_t, value=v, key=k)
                    # self.producer.flush()

                    if (i % 1000 == 0):
                        print('sending to ' + self.result_t + ' ' +
                              str(self.result_t_p) + ' ' + str(out))
                        self.producer.flush()
                    if (i % 100000 == 0):
                        try:
                            pickle.dump(model, open("model.pkl", "wb"))
                        except:
                            os.remove("model.pkl")
                            pickle.dump(model, open("model.pkl", "wb"))
                #self.producer.close()

        finally:
            # Close down consumer to commit final offsets.
            self.consumer.close()