def __init__(self, config: Configuration, selection): super().__init__() self.examples_to_classify = multiprocessing.Manager().Queue(10) self.config = config self.stop = False self.nbr_classified = 0 self.total_time_classification = 0 self.total_time_all = 0 self.total_diff = pd.Timedelta(0) if config.export_results_to_kafka: self.result_producer = KafkaProducer( bootstrap_servers=config.get_connection(), value_serializer=lambda m: json.dumps(m).encode('utf-8')) self.architecture = None self.init_architecture(selection)
def main(): config = Configuration() # suppress debugging messages of tensorflow # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # load the scalers of the training data for the normalisation scalers = load_scalers(config) consumers = [] limiting_consumer = None selection = '' while selection not in ['cbs', 'snn']: print( 'Please select architecture that should be used. Type "snn" or "cbs"' ) selection = input() print() print('Creating consumers ...\n') # if using the fabric simulation start at the start of the topics # for live classification start at newest messages possible offset = 'earliest' if config.testing_using_fabric_sim else 'latest' try: # create consumers for all topics for topic in config.topic_list: c = KafkaConsumer( topic, bootstrap_servers=config.get_connection(), value_deserializer=lambda m: json.loads(m.decode('utf-8')), auto_offset_reset=offset) # based on the topic select one of the consumers for time interval determination if topic == config.limiting_topic: limiting_consumer = c consumers.append(c) except errors.NoBrokersAvailable: print( 'Configured kafka server is not available. Please check the connection or change the configuration.' ) sys.exit(0) # create and start a classifier thread that handles the classification of processed examples print('\nCreating classifier ...') print('\nUsed model file:') print(config.directory_model_to_use, '\n') print('The classifier will use k=' + str(config.k_of_knn) + ' for the k-NN algorithm') print( 'The mean similarity output is calculated on the basis of the k most similar cases' ) print('The time span is the time between the end timestamp of the') print('interval and the current time right before the output.') print( 'The total time is the time needed for the completely processing the example,' ) print('including the time in the queue.\n') classifier = Classifier(config, selection) classifier.start() print('Waiting for data to classify ...\n') try: # classify as until interrupted while 1: start_time = time.perf_counter() # read data for a single example from kafka, results contains lists of single messages results = read_single_example(consumers, limiting_consumer, config) # combine into a single dataframe df = list_to_dataframe(results, config) # transform dataframe into a array that can be used as neural network input example = df.to_numpy() # normalize the data of the example example = normalise_dataframe(example, scalers) # create a queue element containing element = (example, df.index[0], df.index[-1], start_time) # add element to the queue of examples to classify classifier.examples_to_classify.put(element) # reset all consumer offsets by two messages to reduce the time intervals that are left out for i in range(len(consumers)): partition = TopicPartition(config.topic_list[i], 0) last_offset = consumers[i].position(partition) new_offset = last_offset - 2 if last_offset - 2 >= 0 else 0 consumers[i].seek(partition, new_offset) except KeyboardInterrupt: # interrupt the classifier thread print('Exiting ...\n') classifier.stop = True