def test_dataacquisition_kafka(self):

        adoc_dataset_location = ADOC_DATASET_LOCATION

        video_files = os.listdir(adoc_dataset_location)

        train_video_files = [x for x in video_files if x[0:5] == 'train']
        train_video_files.sort()
        train_video_files = train_video_files[1:2]  #not all videos for test

        for video in train_video_files:
            video_producer = VideoProducer(self.broker,
                                           "training",
                                           os.path.join(
                                               adoc_dataset_location, video),
                                           debug=True,
                                           resize_to_dimension=(256, 256))
            video_producer.send_video(extra_fields={"sequence_name": video})

        time.sleep(5)

        data_acquirer = TrainingDataAcquisition(topic='training')
        data_acquirer.load()

        self.assertEqual(len(data_acquirer.data), 1)

        self.assertEqual(len(data_acquirer.data['train_2.avi']), 200)

        print(data_acquirer.train_name)
    def setUpClass(self):

        self.is_initial_training_from_topic = True
        self.initial_training_data = None

        self.inference_data_topic = 'inference'
        self.inference_data_acquisition = InferenceDataAcquisition(
            topic=self.inference_data_topic)

        # # Send training data
        self.training_data_topic = 'training'

        adoc_dataset_location = ADOC_DATASET_LOCATION
        video_files = os.listdir(adoc_dataset_location)
        train_video_files = [x for x in video_files if x[0:5] == 'train']
        train_video_files.sort()
        train_video_files = train_video_files[1:2]  # not all videos for test
        for video in train_video_files:
            video_producer = VideoProducer("localhost:29092",
                                           self.training_data_topic,
                                           os.path.join(
                                               adoc_dataset_location, video),
                                           debug=True,
                                           resize_to_dimension=(256, 256))
            video_producer.send_video(extra_fields={"sequence_name": video})

        self.training_data_acquirer = TrainingDataAcquisition(
            topic=self.training_data_topic, group_id_suffix="training")
        self.training_data_acquirer.load()

        self.training_data_acquirer.data
        self.training_data_acquirer.train_name
Beispiel #3
0
    def send_training_data(self):

        
        adoc_dataset_location = ADOC_DATASET_LOCATION
        video_files = os.listdir(adoc_dataset_location)

        train_video_files = [x for x in video_files if x[0:5] == 'train']
        train_video_files.sort()
        train_video_files = train_video_files[1:2] # not all videos for test

        for video in train_video_files:
            video_producer = VideoProducer(KAFKA_BROKER_LIST, self.training_data_topic, os.path.join(adoc_dataset_location, video), debug=True, resize_to_dimension=(256,256))
            video_producer.send_video(extra_fields={"sequence_name": video})
Beispiel #4
0
    def send_training_data(self):

        home_dir = '/home/viniciusgoncalves'
        dataset_location = os.path.join(home_dir,'toy_dataset/adoc/')
        video_files = os.listdir(dataset_location)

        train_video_files = [x for x in video_files if x[0:5] == 'train']
        train_video_files.sort()
        train_video_files = train_video_files[1:2] # not all videos for test

        for video in train_video_files:
            video_producer = VideoProducer("localhost:29092", self.training_data_topic, os.path.join(dataset_location, video), debug=True, resize_to_dimension=(256,256))
            video_producer.send_video(extra_fields={"sequence_name": video})
    def setUpClass(self):

        self.is_initial_training_from_topic = False
        self.initially_load_models = True
        self.initial_training_data = None

        self.inference_data_topic = 'inference'
        self.prediction_result_topic = 'prediction'
        self.training_data_topic = 'training'

        # # Send training data
        if not self.initially_load_models:

            adoc_dataset_location = ADOC_DATASET_LOCATION
            video_files = os.listdir(adoc_dataset_location)
            train_video_files = [x for x in video_files if x[0:5] == 'train']
            train_video_files.sort()
            train_video_files = train_video_files[1:
                                                  2]  # not all videos for test
            for video in train_video_files:
                video_producer = VideoProducer("localhost:29092",
                                               self.training_data_topic,
                                               os.path.join(
                                                   adoc_dataset_location,
                                                   video),
                                               debug=True,
                                               resize_to_dimension=(256, 256))
                video_producer.send_video(
                    extra_fields={"sequence_name": video})

        self.user_constraints = {
            "is_real_time": False,
            "minimum_efectiveness": None
        }

        self.models = [{
            "name":
            "gaussian_1",
            "training_rate":
            200,
            "efectiveness":
            30,
            "inference_rate":
            10,
            "model":
            Gaussian(model_name='gaussian_1', pca=True, pca_n_components=.95)
        }, {
            "name":
            "gaussian_2",
            "training_rate":
            250,
            "efectiveness":
            25,
            "inference_rate":
            10,
            "model":
            Gaussian(model_name='gaussian_2', pca=True, pca_n_components=.90)
        }]
        self.drift_algorithm = PageHinkley(min_instances=10,
                                           delta=0.005,
                                           threshold=10,
                                           alpha=1 - 0.01)
        self.dimensionality_reduction = PCA()
        self.number_training_frames_after_drift = 10

        self.handler = MainHandler(
            models=self.models,
            user_constraints=self.user_constraints,
            number_training_frames_after_drift=self.
            number_training_frames_after_drift,
            drift_algorithm=self.drift_algorithm,
            dimensionality_reduction=self.dimensionality_reduction,
            training_data_topic=self.training_data_topic,
            is_initial_training_from_topic=self.is_initial_training_from_topic,
            initial_training_data=self.initial_training_data,
            prediction_result_topic=self.prediction_result_topic,
            inference_data_topic=self.inference_data_topic,
            initially_load_models=self.initially_load_models)
Beispiel #6
0
    def setUpClass(self):


        self.inference_data_topic = 'inference'
        self.prediction_result_topic = 'prediction'


       
        
        # # Send training data
        self.training_data_topic = 'training'

        adoc_dataset_location = ADOC_DATASET_LOCATION
        video_files = os.listdir(adoc_dataset_location)
        train_video_files = [x for x in video_files if x[0:5] == 'train']
        train_video_files.sort()
        train_video_files = train_video_files[1:2] # not all videos for test
        for video in train_video_files:
            video_producer = VideoProducer(KAFKA_BROKER_LIST, self.training_data_topic, os.path.join(adoc_dataset_location, video), debug=True, resize_to_dimension=(256,256))
            video_producer.send_video(extra_fields={"sequence_name": video})




        self.user_constraints = {
            "is_real_time": False,
            "minimum_efectiveness": None
        }
        
        self.models = [
            {
                "name": "model_1",
                "training_rate": 200,
                "efectiveness": 30,
                "inference_rate": 10,
                "model":  MockModel(40, model_name= "model_1")
            },
            {
                "name": "model_2",
                "training_rate": 300,
                "efectiveness": 20,
                "inference_rate": 20,
                "model":  MockModel(30, model_name= "model_2")
            },
            {
                "name": "model_3",
                "training_rate": 400,
                "efectiveness": 20,
                "inference_rate": 20,
                "model":  MockModel(10, model_name= "model_3")
            }
        ]
        self.drift_algorithm = PageHinkley(min_instances=20, delta=0.005, threshold=10, alpha=1 - 0.01)
        self.dimensionality_reduction = PCA()
        self.number_training_frames_after_drift = 10
        


        self.handler = MainHandler(
            models=self.models,
            user_constraints=self.user_constraints,
            number_training_frames_after_drift=self.number_training_frames_after_drift,
            drift_algorithm=self.drift_algorithm,
            dimensionality_reduction=self.dimensionality_reduction,
            training_data_topic=self.training_data_topic,
            prediction_result_topic=self.prediction_result_topic,
            inference_data_topic=self.inference_data_topic
            )
Beispiel #7
0
temp_dir = os.path.join(home_dir, 'temp/')
dataset_location = os.path.join(home_dir, 'toy_dataset/adoc/')

video_files = os.listdir(dataset_location)

train_video_files = [x for x in video_files if x[0:5] == 'train']
train_video_files.sort()

#train_video_files = [train_video_files[1]] # Only one video
train_video_files = train_video_files[1:]  # More than one video

for video in train_video_files:
    print(f"Publishing video {video}")
    video_producer = VideoProducer("localhost:29092",
                                   "training",
                                   os.path.join(dataset_location, video),
                                   debug=True,
                                   resize_to_dimension=(256, 256))
    video_producer.send_video(extra_fields={"sequence_name": video})

# consumer = ImageFiniteConsumer(topic="inference_5", bootstrap_servers="localhost:29092")

# videos = {}
# for msg in consumer.consumer:
#     val = msg.value['data']
#     sequence_name = msg.value['sequence_name']
#     if videos.get(sequence_name) is None:
#         videos[sequence_name] = []

#     videos[sequence_name].append(frame_from_bytes_str(msg.value['data']))