def test_dataacquisition_kafka(self): adoc_dataset_location = ADOC_DATASET_LOCATION video_files = os.listdir(adoc_dataset_location) train_video_files = [x for x in video_files if x[0:5] == 'train'] train_video_files.sort() train_video_files = train_video_files[1:2] #not all videos for test for video in train_video_files: video_producer = VideoProducer(self.broker, "training", os.path.join( adoc_dataset_location, video), debug=True, resize_to_dimension=(256, 256)) video_producer.send_video(extra_fields={"sequence_name": video}) time.sleep(5) data_acquirer = TrainingDataAcquisition(topic='training') data_acquirer.load() self.assertEqual(len(data_acquirer.data), 1) self.assertEqual(len(data_acquirer.data['train_2.avi']), 200) print(data_acquirer.train_name)
def setUpClass(self): self.is_initial_training_from_topic = True self.initial_training_data = None self.inference_data_topic = 'inference' self.inference_data_acquisition = InferenceDataAcquisition( topic=self.inference_data_topic) # # Send training data self.training_data_topic = 'training' adoc_dataset_location = ADOC_DATASET_LOCATION video_files = os.listdir(adoc_dataset_location) train_video_files = [x for x in video_files if x[0:5] == 'train'] train_video_files.sort() train_video_files = train_video_files[1:2] # not all videos for test for video in train_video_files: video_producer = VideoProducer("localhost:29092", self.training_data_topic, os.path.join( adoc_dataset_location, video), debug=True, resize_to_dimension=(256, 256)) video_producer.send_video(extra_fields={"sequence_name": video}) self.training_data_acquirer = TrainingDataAcquisition( topic=self.training_data_topic, group_id_suffix="training") self.training_data_acquirer.load() self.training_data_acquirer.data self.training_data_acquirer.train_name
def send_training_data(self): adoc_dataset_location = ADOC_DATASET_LOCATION video_files = os.listdir(adoc_dataset_location) train_video_files = [x for x in video_files if x[0:5] == 'train'] train_video_files.sort() train_video_files = train_video_files[1:2] # not all videos for test for video in train_video_files: video_producer = VideoProducer(KAFKA_BROKER_LIST, self.training_data_topic, os.path.join(adoc_dataset_location, video), debug=True, resize_to_dimension=(256,256)) video_producer.send_video(extra_fields={"sequence_name": video})
def send_training_data(self): home_dir = '/home/viniciusgoncalves' dataset_location = os.path.join(home_dir,'toy_dataset/adoc/') video_files = os.listdir(dataset_location) train_video_files = [x for x in video_files if x[0:5] == 'train'] train_video_files.sort() train_video_files = train_video_files[1:2] # not all videos for test for video in train_video_files: video_producer = VideoProducer("localhost:29092", self.training_data_topic, os.path.join(dataset_location, video), debug=True, resize_to_dimension=(256,256)) video_producer.send_video(extra_fields={"sequence_name": video})
def setUpClass(self): self.is_initial_training_from_topic = False self.initially_load_models = True self.initial_training_data = None self.inference_data_topic = 'inference' self.prediction_result_topic = 'prediction' self.training_data_topic = 'training' # # Send training data if not self.initially_load_models: adoc_dataset_location = ADOC_DATASET_LOCATION video_files = os.listdir(adoc_dataset_location) train_video_files = [x for x in video_files if x[0:5] == 'train'] train_video_files.sort() train_video_files = train_video_files[1: 2] # not all videos for test for video in train_video_files: video_producer = VideoProducer("localhost:29092", self.training_data_topic, os.path.join( adoc_dataset_location, video), debug=True, resize_to_dimension=(256, 256)) video_producer.send_video( extra_fields={"sequence_name": video}) self.user_constraints = { "is_real_time": False, "minimum_efectiveness": None } self.models = [{ "name": "gaussian_1", "training_rate": 200, "efectiveness": 30, "inference_rate": 10, "model": Gaussian(model_name='gaussian_1', pca=True, pca_n_components=.95) }, { "name": "gaussian_2", "training_rate": 250, "efectiveness": 25, "inference_rate": 10, "model": Gaussian(model_name='gaussian_2', pca=True, pca_n_components=.90) }] self.drift_algorithm = PageHinkley(min_instances=10, delta=0.005, threshold=10, alpha=1 - 0.01) self.dimensionality_reduction = PCA() self.number_training_frames_after_drift = 10 self.handler = MainHandler( models=self.models, user_constraints=self.user_constraints, number_training_frames_after_drift=self. number_training_frames_after_drift, drift_algorithm=self.drift_algorithm, dimensionality_reduction=self.dimensionality_reduction, training_data_topic=self.training_data_topic, is_initial_training_from_topic=self.is_initial_training_from_topic, initial_training_data=self.initial_training_data, prediction_result_topic=self.prediction_result_topic, inference_data_topic=self.inference_data_topic, initially_load_models=self.initially_load_models)
def setUpClass(self): self.inference_data_topic = 'inference' self.prediction_result_topic = 'prediction' # # Send training data self.training_data_topic = 'training' adoc_dataset_location = ADOC_DATASET_LOCATION video_files = os.listdir(adoc_dataset_location) train_video_files = [x for x in video_files if x[0:5] == 'train'] train_video_files.sort() train_video_files = train_video_files[1:2] # not all videos for test for video in train_video_files: video_producer = VideoProducer(KAFKA_BROKER_LIST, self.training_data_topic, os.path.join(adoc_dataset_location, video), debug=True, resize_to_dimension=(256,256)) video_producer.send_video(extra_fields={"sequence_name": video}) self.user_constraints = { "is_real_time": False, "minimum_efectiveness": None } self.models = [ { "name": "model_1", "training_rate": 200, "efectiveness": 30, "inference_rate": 10, "model": MockModel(40, model_name= "model_1") }, { "name": "model_2", "training_rate": 300, "efectiveness": 20, "inference_rate": 20, "model": MockModel(30, model_name= "model_2") }, { "name": "model_3", "training_rate": 400, "efectiveness": 20, "inference_rate": 20, "model": MockModel(10, model_name= "model_3") } ] self.drift_algorithm = PageHinkley(min_instances=20, delta=0.005, threshold=10, alpha=1 - 0.01) self.dimensionality_reduction = PCA() self.number_training_frames_after_drift = 10 self.handler = MainHandler( models=self.models, user_constraints=self.user_constraints, number_training_frames_after_drift=self.number_training_frames_after_drift, drift_algorithm=self.drift_algorithm, dimensionality_reduction=self.dimensionality_reduction, training_data_topic=self.training_data_topic, prediction_result_topic=self.prediction_result_topic, inference_data_topic=self.inference_data_topic )
temp_dir = os.path.join(home_dir, 'temp/') dataset_location = os.path.join(home_dir, 'toy_dataset/adoc/') video_files = os.listdir(dataset_location) train_video_files = [x for x in video_files if x[0:5] == 'train'] train_video_files.sort() #train_video_files = [train_video_files[1]] # Only one video train_video_files = train_video_files[1:] # More than one video for video in train_video_files: print(f"Publishing video {video}") video_producer = VideoProducer("localhost:29092", "training", os.path.join(dataset_location, video), debug=True, resize_to_dimension=(256, 256)) video_producer.send_video(extra_fields={"sequence_name": video}) # consumer = ImageFiniteConsumer(topic="inference_5", bootstrap_servers="localhost:29092") # videos = {} # for msg in consumer.consumer: # val = msg.value['data'] # sequence_name = msg.value['sequence_name'] # if videos.get(sequence_name) is None: # videos[sequence_name] = [] # videos[sequence_name].append(frame_from_bytes_str(msg.value['data']))