Exemple #1
0
    def creating_cardinalities(self, min_cardinality=0, max_cardinality=float("+inf")):
        """Create the cardinality object for the learning step.

        Args:
            min_cardinality (int, optional): minimum value of cardinality to be selected. Defaults to 0.
            max_cardinality (float, optional): maximum value of cardinality to be selected. Defaults to float("+inf").
        """
        if self.one_model:
            list_cardinalities_available = []
            for event in self.counter:
                cardinality = len(str(self.counter[event]))
                min_cardinality = 3
                max_cardinality = 8
                if cardinality > min_cardinality and cardinality < max_cardinality:
                    list_cardinalities_available.append(cardinality)
            self.set_cardinalities_available = set(list_cardinalities_available)
            logger.info(str(len(self.set_cardinalities_available)) + " cardinalities available in this dataset")
            self.list_cardinalities.append(Cardinality(cardinality=0, path_w2v=self.path_model_w2v, path_list_classes=self.path_list_classes, size=self.size, one_model=self.one_model, set_cardinalities=self.set_cardinalities_available))
        else:
            list_cardinalities_available = []
            for event in self.counter:
                list_cardinalities_available.append(len(str(self.counter[event])))
            self.set_cardinalities_available = set(list_cardinalities_available)
            logger.info(str(len(self.set_cardinalities_available)) + " cardinalities available in this dataset")
            for cardinality in self.set_cardinalities_available:
                if cardinality > min_cardinality and cardinality < max_cardinality:
                    self.list_cardinalities.append(Cardinality(cardinality=cardinality, path_w2v=self.path_model_w2v, path_list_classes=self.path_list_classes, size=self.size))
Exemple #2
0
 def setUp(self):
     self.list_cardinalities = [
         Cardinality(3, "", ""),
         Cardinality(4, "", ""),
         Cardinality(5, "", "")
     ]
     self.lock = torch.multiprocessing.get_context('spawn').Lock()
     self.model = LSTMLayer(num_classes=5)
Exemple #3
0
 def setUp(self):
     self.cardinality = Cardinality(3, "", "")
     self.cardinality.list_classes = [0, 0, 0, 1, 1, 2]
     self.cardinality.counter = {
         0: 100,
         1: 100,
         2: 100,
         4: 100,
         6: 1000,
         5: 1000
     }
     self.cardinality.compute_position()
Exemple #4
0
    def test_loadfiles(self, mock_isfile):
        mock_isfile.return_value = True
        # without size
        cardinality = Cardinality(3, "", "")
        read_data = pickle.dumps({
            'word2vec': -1,
            'counter_patterns': {
                "1": 1,
                "10": 10,
                "100": 100,
                "1000": 1000,
                "10000": 10000,
                "100000": 100000
            }
        })
        mockOpen = mock_open(read_data=read_data)
        tf = tempfile.NamedTemporaryFile()
        f = h5py.File(tf, 'w')
        f.create_dataset("list_classes", data=[1, 2, 3, 4, 5])
        f.close()
        cardinality.path_list_classes = tf.name
        with patch('builtins.open', mockOpen):
            cardinality.load_files()
        tf.close()
        self.assertEqual(len(cardinality.counter), 6)
        self.assertEqual(len(cardinality.list_classes), 5)

        # with size
        cardinality = Cardinality(3, "", "", size=2)
        read_data = pickle.dumps({
            'word2vec': -1,
            'counter_patterns': {
                "1": 1,
                "10": 10,
                "100": 100,
                "1000": 1000,
                "10000": 10000,
                "100000": 100000
            }
        })
        mockOpen = mock_open(read_data=read_data)
        tf = tempfile.NamedTemporaryFile()
        f = h5py.File(tf, 'w')
        f.create_dataset("list_classes", data=[1, 2, 3, 4, 5])
        f.close()
        cardinality.path_list_classes = tf.name
        with patch('builtins.open', mockOpen):
            cardinality.load_files()
        tf.close()
        self.assertEqual(len(cardinality.counter), 6)
        self.assertEqual(len(cardinality.list_classes), 2)
 def test_train_model(self, mock_isfile):
     mock_isfile.return_value = True
     cardinality = Cardinality(3, "", "")
     read_data = pickle.dumps({
         'word2vec': {
             "1": [1] * 20,
             "2": [2] * 20,
             "3": [3] * 20,
             "4": [4] * 20,
             "5": [5] * 20,
             "6": [6] * 20,
             "7": [7] * 20
         },
         'counter_patterns': {
             1: 10,
             2: 100,
             3: 100,
             4: 100,
             6: 1000,
             5: 1000
         },
         "LSTM": {
             3: self.model.state_dict()
         }
     })
     mockOpen = mock_open(read_data=read_data)
     tf = tempfile.NamedTemporaryFile()
     f = h5py.File(tf, 'w')
     f.create_dataset("list_classes",
                      data=[
                          1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
                          1, 2, 1, 2, 3, 4, 5, 6
                      ])
     f.close()
     cardinality.path_list_classes = tf.name
     cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}
     with patch('builtins.open', mockOpen):
         worker_single = Worker_single(batch_result=1,
                                       cardinality=cardinality,
                                       lock=self.lock,
                                       batch_size=1)
         dataloader = worker_single.create_dataloader()
         worker_single.train(resuming=False)
         worker_single.train(resuming=True)
 def test_without_file(self, mock_isfile):
     mock_isfile.return_value = True
     cardinality = Cardinality(3, "", "")
     read_data = pickle.dumps({
         'word2vec': {
             "1": [1] * 20,
             "2": [2] * 20,
             "3": [3] * 20,
             "4": [4] * 20,
             "5": [5] * 20,
             "6": [6] * 20,
             "7": [7] * 20
         },
         'counter_patterns': {
             1: 10,
             2: 100,
             3: 100,
             4: 100,
             6: 1000,
             5: 1000
         },
         "LSTM": {
             3: self.model.state_dict()
         }
     })
     mockOpen = mock_open(read_data=read_data)
     tf = tempfile.NamedTemporaryFile()
     f = h5py.File(tf, 'w')
     f.create_dataset("list_classes",
                      data=[
                          1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
                          1, 2, 1, 2, 3, 4, 5, 6
                      ])
     f.close()
     cardinality.path_list_classes = tf.name
     cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}
     # No data
     with self.assertRaises(Exception):
         worker_single = Worker_single(cardinality=cardinality,
                                       lock=self.lock,
                                       batch_size=1)
         worker_single.load_model()
 def setUp(self):
     self.lock = torch.multiprocessing.get_context('spawn').Lock()
     self.model = LSTMLayer(num_classes=5)
     cardinality = Cardinality(3, "", "")
     cardinality.list_classes = [1,1,1,2,2,3,4,5,6]
     cardinality.counter= {1:10, 2:100, 3:100, 4:100, 6:1000, 5:1000}
     cardinality.compute_position()
     self.result = Result(cardinality)
 def test_creation(self):
     cardinality = Cardinality(3, "", "", size=2)
     read_data = pickle.dumps({
         'word2vec': -1,
         'counter_patterns': {
             "1": 1,
             "10": 10,
             "100": 100,
             "1000": 1000,
             "10000": 10000,
             "100000": 100000
         }
     })
     mockOpen = mock_open(read_data=read_data)
     tf = tempfile.NamedTemporaryFile()
     f = h5py.File(tf, 'w')
     f.create_dataset("list_classes", data=[1, 2, 3, 4, 5])
     f.close()
     cardinality.path_list_classes = tf.name
     with patch('builtins.open', mockOpen):
         cardinality.load_files()
     self.cardinality = cardinality
     worker_single = Worker_single(cardinality=self.cardinality,
                                   lock=self.lock)
     worker_single = Worker_single(cardinality=self.cardinality,
                                   lock=self.lock,
                                   stoppingcondition="timer")
     worker_single = Worker_single(cardinality=self.cardinality,
                                   lock=self.lock,
                                   stoppingcondition="earlystopping")
     worker_single = Worker_single(cardinality=self.cardinality,
                                   lock=self.lock,
                                   stoppingcondition="epoch")
     with self.assertRaises(Exception):
         worker_single = Worker_single(cardinality=self.cardinality,
                                       lock=self.lock,
                                       stoppingcondition="unimplemented")
     tf.close()
Exemple #9
0
    def test_compute_position(self):
        cardinality = Cardinality(3, "", "")
        cardinality.list_classes = [1, 1, 1, 2, 2, 3, 4, 5, 6]
        cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}
        cardinality.compute_position()
        self.assertEqual(len(cardinality.set_classes_kept), 3)
        self.assertEqual(list(cardinality.list_position), [3, 4, 5, 6])
        self.assertEqual(cardinality.number_of_classes, 5)

        self.assertEqual(len(cardinality), 4)
Exemple #10
0
    def test_compute_position_one_model(self):
        cardinality = Cardinality(3,
                                  "",
                                  "",
                                  one_model=True,
                                  set_cardinalities=[1, 2, 3, 4, 5])
        cardinality.list_classes = [1, 1, 1, 2, 2, 3, 4, 5, 6]
        cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}
        cardinality.compute_position()
        self.assertEqual(len(cardinality.set_classes_kept), 6)
        self.assertEqual(list(cardinality.list_position),
                         [0, 1, 2, 3, 4, 5, 6, 7, 8])
        self.assertEqual(cardinality.number_of_classes, 7)

        self.assertEqual(len(cardinality), 9)
Exemple #11
0
class UtilTest(unittest.TestCase):
    def setUp(self):
        self.cardinality = Cardinality(3, "", "")
        self.cardinality.list_classes = [0, 0, 0, 1, 1, 2]
        self.cardinality.counter = {
            0: 100,
            1: 100,
            2: 100,
            4: 100,
            6: 1000,
            5: 1000
        }
        self.cardinality.compute_position()

    def test_creating(self):
        result = Result(self.cardinality)
        self.assertEqual(result.number_of_classes, 3)

    def test_update(self):
        result = Result(self.cardinality)
        result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3],
                                    [0.3, 0.4]]),
                      labels=[0, 1, 1, 0])

        self.assertEqual(result.conf_matrix[0, 1], 1)
        self.assertEqual(result.conf_matrix[1, 0], 1)
        self.assertEqual(result.conf_matrix[1, 1], 1)
        self.assertEqual(result.conf_matrix[0, 0], 1)

    def test_compute(self):
        result = Result(self.cardinality)
        result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3],
                                    [0.3, 0.4]]),
                      labels=[0, 1, 1, 0])

        result.computing_result()
        self.assertEqual(result.global_TP, 2)
        self.assertEqual(result.global_FP, 2)
        self.assertEqual(result.global_FN, 2)

        self.assertEqual(result.macro_precision, 0.50)
        self.assertEqual(result.macro_recall, 0.50)
        self.assertTrue(math.isclose(result.micro_recall, 1 / 3))
        self.assertTrue(math.isclose(result.micro_precision, 1 / 3))

    def test_no_class(self):
        result = Result(self.cardinality)
        result.number_of_classes = 0
        result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3],
                                    [0.3, 0.4]]),
                      labels=[0, 1, 1, 0])
        result.computing_result()

        self.assertEqual(result.micro_precision, 0)
        self.assertEqual(result.micro_recall, 0)

    def test_print(self):
        result = Result(self.cardinality, "Test")
        result.number_of_classes = 0
        result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3],
                                    [0.3, 0.4]]),
                      labels=[0, 1, 1, 0])
        result.computing_result()

        result = Result(self.cardinality)
        result.number_of_classes = 0
        result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3],
                                    [0.3, 0.4]]),
                      labels=[0, 1, 1, 0])
        result.computing_result()

    def test_results_normal(self):
        result = Result(self.cardinality)
        result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3],
                                    [0.3, 0.4]]),
                      labels=[0, 1, 1, 0])

        result_dump = pickle.dumps({
            "Result": {
                "1": {
                    "Test": result,
                    "Train": result
                },
                "2": {
                    "Test": result,
                    "Train": result
                }
            }
        })
        mockOpen = mock_open(read_data=result_dump)
        with patch('builtins.open', mockOpen):
            results = Results(path_model="model/", name_model="Test")
            results.load_files()
            results.compute_results(condition="Test")

            self.assertEqual(results.global_TP, 4)
            self.assertEqual(results.global_FP, 4)
            self.assertEqual(results.global_FN, 4)

            self.assertEqual(results.macro_precision, 0.50)
            self.assertEqual(results.macro_recall, 0.50)
            self.assertTrue(math.isclose(results.micro_recall, 1 / 3))
            self.assertTrue(math.isclose(results.micro_precision, 1 / 3))
        results.print_results()

    def test_results_no_class(self):
        result = Result(self.cardinality)
        result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3],
                                    [0.3, 0.4]]),
                      labels=[0, 1, 1, 0])
        result.number_of_classes = 0
        result_dump = pickle.dumps({
            "Result": {
                "1": {
                    "Test": result,
                    "Train": result
                },
                "2": {
                    "Test": result,
                    "Train": result
                }
            }
        })
        mockOpen = mock_open(read_data=result_dump)
        with patch('builtins.open', mockOpen):
            results = Results(path_model="model/", name_model="Test")
            results.load_files()
            results.compute_results(condition="Test")

            self.assertEqual(results.global_TP, 0)
            self.assertEqual(results.global_FP, 0)
            self.assertEqual(results.global_FN, 0)

            self.assertEqual(results.macro_precision, 0)
            self.assertEqual(results.macro_recall, 0)
            self.assertTrue(math.isclose(results.micro_recall, 0))
            self.assertTrue(math.isclose(results.micro_precision, 0))
    def test_create(self, mock_isfile):
        mock_isfile.return_value = True
        cardinality = Cardinality(3, "", "")
        read_data = pickle.dumps({
            'word2vec': {
                "1": [1] * 20,
                "2": [2] * 20,
                "3": [3] * 20,
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 10,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            }
        })
        mockOpen = mock_open(read_data=read_data)
        tf = tempfile.NamedTemporaryFile()
        f = h5py.File(tf, 'w')
        f.create_dataset("list_classes",
                         data=[
                             1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
                             1, 2, 1, 2, 3, 4, 5, 6
                         ])
        f.close()
        cardinality.path_list_classes = tf.name
        cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}

        # Test
        with patch('builtins.open', mockOpen):
            #cardinality.load_files()
            worker_single = Worker_single(cardinality=cardinality,
                                          lock=self.lock,
                                          batch_size=1)
            dataloader = worker_single.create_dataloader()
            self.assertEqual(len(dataloader), 7)  # 60% of 12
            self.assertTrue(worker_single.dataset.loaded)

        # Train
        # Avoid to create a new object
        worker_single.dataset.loaded = False
        with patch('builtins.open', mockOpen):
            #cardinality.load_files()
            worker_single = Worker_single(cardinality=cardinality,
                                          lock=self.lock,
                                          batch_size=1)
            dataloader = worker_single.create_dataloader(condition="Train")
            self.assertEqual(len(dataloader), 5)  # 40% of 12

        worker_single.dataset.loaded = False
        with patch('builtins.open', mockOpen):
            #cardinality.load_files()
            worker_single = Worker_single(cardinality=cardinality,
                                          lock=self.lock,
                                          batch_size=1)
            dataloader = worker_single.create_dataloader(condition="Train",
                                                         subsample=True,
                                                         subsample_split=0.25)
            self.assertEqual(len(dataloader), 1)  # 25% of 40% of 12

        tf.close()
Exemple #13
0
    def test_getitem(self):
        cardinality = Cardinality(3, "", "")
        cardinality.size_windows = 4
        cardinality.list_classes = [
            1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3,
            4, 5, 6
        ]
        cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}
        cardinality.w2v = {
            "1": [1] * 20,
            "2": [2] * 20,
            "3": [3] * 20,
            "4": [4] * 20,
            "5": [5] * 20,
            "6": [6] * 20,
            "7": [7] * 20
        }
        cardinality.list_position = range(len(cardinality.list_classes))

        # Normal
        output_getitem = cardinality.__getitem__(
            len(cardinality.list_classes) - 1)
        self.assertEqual(output_getitem['output'], 6)
        self.assertEqual(len(list(output_getitem['input'][0])), 20)
        self.assertEqual(len(list(output_getitem['input'])), 4)

        # Invalid
        output_getitem = cardinality.__getitem__(0)
        self.assertListEqual(list(output_getitem['input']), [-1])
        self.assertEqual(output_getitem['output'], -1)

        # Invalid due to the same pattern
        output_getitem = cardinality.__getitem__(1)
        self.assertEqual(output_getitem['output'], -1)
        self.assertListEqual(list(output_getitem['input']), [-1])
        self.assertEqual(output_getitem['output'], -1)

        # Padding
        output_getitem = cardinality.__getitem__(5)
        self.assertEqual(output_getitem['output'], 1)
        self.assertEqual(len(list(output_getitem['input'][0])), 20)
        self.assertEqual(len(list(output_getitem['input'])), 4)
Exemple #14
0
 def test_compute_position_void(self):
     cardinality = Cardinality(3, "", "")
     cardinality.list_classes = []
     cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}
     cardinality.compute_position()
     self.assertEqual(cardinality.number_of_classes, 0)