def creating_cardinalities(self, min_cardinality=0, max_cardinality=float("+inf")): """Create the cardinality object for the learning step. Args: min_cardinality (int, optional): minimum value of cardinality to be selected. Defaults to 0. max_cardinality (float, optional): maximum value of cardinality to be selected. Defaults to float("+inf"). """ if self.one_model: list_cardinalities_available = [] for event in self.counter: cardinality = len(str(self.counter[event])) min_cardinality = 3 max_cardinality = 8 if cardinality > min_cardinality and cardinality < max_cardinality: list_cardinalities_available.append(cardinality) self.set_cardinalities_available = set(list_cardinalities_available) logger.info(str(len(self.set_cardinalities_available)) + " cardinalities available in this dataset") self.list_cardinalities.append(Cardinality(cardinality=0, path_w2v=self.path_model_w2v, path_list_classes=self.path_list_classes, size=self.size, one_model=self.one_model, set_cardinalities=self.set_cardinalities_available)) else: list_cardinalities_available = [] for event in self.counter: list_cardinalities_available.append(len(str(self.counter[event]))) self.set_cardinalities_available = set(list_cardinalities_available) logger.info(str(len(self.set_cardinalities_available)) + " cardinalities available in this dataset") for cardinality in self.set_cardinalities_available: if cardinality > min_cardinality and cardinality < max_cardinality: self.list_cardinalities.append(Cardinality(cardinality=cardinality, path_w2v=self.path_model_w2v, path_list_classes=self.path_list_classes, size=self.size))
def setUp(self): self.list_cardinalities = [ Cardinality(3, "", ""), Cardinality(4, "", ""), Cardinality(5, "", "") ] self.lock = torch.multiprocessing.get_context('spawn').Lock() self.model = LSTMLayer(num_classes=5)
def setUp(self): self.cardinality = Cardinality(3, "", "") self.cardinality.list_classes = [0, 0, 0, 1, 1, 2] self.cardinality.counter = { 0: 100, 1: 100, 2: 100, 4: 100, 6: 1000, 5: 1000 } self.cardinality.compute_position()
def test_loadfiles(self, mock_isfile): mock_isfile.return_value = True # without size cardinality = Cardinality(3, "", "") read_data = pickle.dumps({ 'word2vec': -1, 'counter_patterns': { "1": 1, "10": 10, "100": 100, "1000": 1000, "10000": 10000, "100000": 100000 } }) mockOpen = mock_open(read_data=read_data) tf = tempfile.NamedTemporaryFile() f = h5py.File(tf, 'w') f.create_dataset("list_classes", data=[1, 2, 3, 4, 5]) f.close() cardinality.path_list_classes = tf.name with patch('builtins.open', mockOpen): cardinality.load_files() tf.close() self.assertEqual(len(cardinality.counter), 6) self.assertEqual(len(cardinality.list_classes), 5) # with size cardinality = Cardinality(3, "", "", size=2) read_data = pickle.dumps({ 'word2vec': -1, 'counter_patterns': { "1": 1, "10": 10, "100": 100, "1000": 1000, "10000": 10000, "100000": 100000 } }) mockOpen = mock_open(read_data=read_data) tf = tempfile.NamedTemporaryFile() f = h5py.File(tf, 'w') f.create_dataset("list_classes", data=[1, 2, 3, 4, 5]) f.close() cardinality.path_list_classes = tf.name with patch('builtins.open', mockOpen): cardinality.load_files() tf.close() self.assertEqual(len(cardinality.counter), 6) self.assertEqual(len(cardinality.list_classes), 2)
def test_train_model(self, mock_isfile): mock_isfile.return_value = True cardinality = Cardinality(3, "", "") read_data = pickle.dumps({ 'word2vec': { "1": [1] * 20, "2": [2] * 20, "3": [3] * 20, "4": [4] * 20, "5": [5] * 20, "6": [6] * 20, "7": [7] * 20 }, 'counter_patterns': { 1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000 }, "LSTM": { 3: self.model.state_dict() } }) mockOpen = mock_open(read_data=read_data) tf = tempfile.NamedTemporaryFile() f = h5py.File(tf, 'w') f.create_dataset("list_classes", data=[ 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, 5, 6 ]) f.close() cardinality.path_list_classes = tf.name cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000} with patch('builtins.open', mockOpen): worker_single = Worker_single(batch_result=1, cardinality=cardinality, lock=self.lock, batch_size=1) dataloader = worker_single.create_dataloader() worker_single.train(resuming=False) worker_single.train(resuming=True)
def test_without_file(self, mock_isfile): mock_isfile.return_value = True cardinality = Cardinality(3, "", "") read_data = pickle.dumps({ 'word2vec': { "1": [1] * 20, "2": [2] * 20, "3": [3] * 20, "4": [4] * 20, "5": [5] * 20, "6": [6] * 20, "7": [7] * 20 }, 'counter_patterns': { 1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000 }, "LSTM": { 3: self.model.state_dict() } }) mockOpen = mock_open(read_data=read_data) tf = tempfile.NamedTemporaryFile() f = h5py.File(tf, 'w') f.create_dataset("list_classes", data=[ 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, 5, 6 ]) f.close() cardinality.path_list_classes = tf.name cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000} # No data with self.assertRaises(Exception): worker_single = Worker_single(cardinality=cardinality, lock=self.lock, batch_size=1) worker_single.load_model()
def setUp(self): self.lock = torch.multiprocessing.get_context('spawn').Lock() self.model = LSTMLayer(num_classes=5) cardinality = Cardinality(3, "", "") cardinality.list_classes = [1,1,1,2,2,3,4,5,6] cardinality.counter= {1:10, 2:100, 3:100, 4:100, 6:1000, 5:1000} cardinality.compute_position() self.result = Result(cardinality)
def test_creation(self): cardinality = Cardinality(3, "", "", size=2) read_data = pickle.dumps({ 'word2vec': -1, 'counter_patterns': { "1": 1, "10": 10, "100": 100, "1000": 1000, "10000": 10000, "100000": 100000 } }) mockOpen = mock_open(read_data=read_data) tf = tempfile.NamedTemporaryFile() f = h5py.File(tf, 'w') f.create_dataset("list_classes", data=[1, 2, 3, 4, 5]) f.close() cardinality.path_list_classes = tf.name with patch('builtins.open', mockOpen): cardinality.load_files() self.cardinality = cardinality worker_single = Worker_single(cardinality=self.cardinality, lock=self.lock) worker_single = Worker_single(cardinality=self.cardinality, lock=self.lock, stoppingcondition="timer") worker_single = Worker_single(cardinality=self.cardinality, lock=self.lock, stoppingcondition="earlystopping") worker_single = Worker_single(cardinality=self.cardinality, lock=self.lock, stoppingcondition="epoch") with self.assertRaises(Exception): worker_single = Worker_single(cardinality=self.cardinality, lock=self.lock, stoppingcondition="unimplemented") tf.close()
def test_compute_position(self): cardinality = Cardinality(3, "", "") cardinality.list_classes = [1, 1, 1, 2, 2, 3, 4, 5, 6] cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000} cardinality.compute_position() self.assertEqual(len(cardinality.set_classes_kept), 3) self.assertEqual(list(cardinality.list_position), [3, 4, 5, 6]) self.assertEqual(cardinality.number_of_classes, 5) self.assertEqual(len(cardinality), 4)
def test_compute_position_one_model(self): cardinality = Cardinality(3, "", "", one_model=True, set_cardinalities=[1, 2, 3, 4, 5]) cardinality.list_classes = [1, 1, 1, 2, 2, 3, 4, 5, 6] cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000} cardinality.compute_position() self.assertEqual(len(cardinality.set_classes_kept), 6) self.assertEqual(list(cardinality.list_position), [0, 1, 2, 3, 4, 5, 6, 7, 8]) self.assertEqual(cardinality.number_of_classes, 7) self.assertEqual(len(cardinality), 9)
class UtilTest(unittest.TestCase): def setUp(self): self.cardinality = Cardinality(3, "", "") self.cardinality.list_classes = [0, 0, 0, 1, 1, 2] self.cardinality.counter = { 0: 100, 1: 100, 2: 100, 4: 100, 6: 1000, 5: 1000 } self.cardinality.compute_position() def test_creating(self): result = Result(self.cardinality) self.assertEqual(result.number_of_classes, 3) def test_update(self): result = Result(self.cardinality) result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3], [0.3, 0.4]]), labels=[0, 1, 1, 0]) self.assertEqual(result.conf_matrix[0, 1], 1) self.assertEqual(result.conf_matrix[1, 0], 1) self.assertEqual(result.conf_matrix[1, 1], 1) self.assertEqual(result.conf_matrix[0, 0], 1) def test_compute(self): result = Result(self.cardinality) result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3], [0.3, 0.4]]), labels=[0, 1, 1, 0]) result.computing_result() self.assertEqual(result.global_TP, 2) self.assertEqual(result.global_FP, 2) self.assertEqual(result.global_FN, 2) self.assertEqual(result.macro_precision, 0.50) self.assertEqual(result.macro_recall, 0.50) self.assertTrue(math.isclose(result.micro_recall, 1 / 3)) self.assertTrue(math.isclose(result.micro_precision, 1 / 3)) def test_no_class(self): result = Result(self.cardinality) result.number_of_classes = 0 result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3], [0.3, 0.4]]), labels=[0, 1, 1, 0]) result.computing_result() self.assertEqual(result.micro_precision, 0) self.assertEqual(result.micro_recall, 0) def test_print(self): result = Result(self.cardinality, "Test") result.number_of_classes = 0 result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3], [0.3, 0.4]]), labels=[0, 1, 1, 0]) result.computing_result() result = Result(self.cardinality) result.number_of_classes = 0 result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3], [0.3, 0.4]]), labels=[0, 1, 1, 0]) result.computing_result() def test_results_normal(self): result = Result(self.cardinality) result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3], [0.3, 0.4]]), labels=[0, 1, 1, 0]) result_dump = pickle.dumps({ "Result": { "1": { "Test": result, "Train": result }, "2": { "Test": result, "Train": result } } }) mockOpen = mock_open(read_data=result_dump) with patch('builtins.open', mockOpen): results = Results(path_model="model/", name_model="Test") results.load_files() results.compute_results(condition="Test") self.assertEqual(results.global_TP, 4) self.assertEqual(results.global_FP, 4) self.assertEqual(results.global_FN, 4) self.assertEqual(results.macro_precision, 0.50) self.assertEqual(results.macro_recall, 0.50) self.assertTrue(math.isclose(results.micro_recall, 1 / 3)) self.assertTrue(math.isclose(results.micro_precision, 1 / 3)) results.print_results() def test_results_no_class(self): result = Result(self.cardinality) result.update(torch.Tensor([[0.4, 0.3], [0.3, 0.4], [0.4, 0.3], [0.3, 0.4]]), labels=[0, 1, 1, 0]) result.number_of_classes = 0 result_dump = pickle.dumps({ "Result": { "1": { "Test": result, "Train": result }, "2": { "Test": result, "Train": result } } }) mockOpen = mock_open(read_data=result_dump) with patch('builtins.open', mockOpen): results = Results(path_model="model/", name_model="Test") results.load_files() results.compute_results(condition="Test") self.assertEqual(results.global_TP, 0) self.assertEqual(results.global_FP, 0) self.assertEqual(results.global_FN, 0) self.assertEqual(results.macro_precision, 0) self.assertEqual(results.macro_recall, 0) self.assertTrue(math.isclose(results.micro_recall, 0)) self.assertTrue(math.isclose(results.micro_precision, 0))
def test_create(self, mock_isfile): mock_isfile.return_value = True cardinality = Cardinality(3, "", "") read_data = pickle.dumps({ 'word2vec': { "1": [1] * 20, "2": [2] * 20, "3": [3] * 20, "4": [4] * 20, "5": [5] * 20, "6": [6] * 20, "7": [7] * 20 }, 'counter_patterns': { 1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000 } }) mockOpen = mock_open(read_data=read_data) tf = tempfile.NamedTemporaryFile() f = h5py.File(tf, 'w') f.create_dataset("list_classes", data=[ 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, 5, 6 ]) f.close() cardinality.path_list_classes = tf.name cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000} # Test with patch('builtins.open', mockOpen): #cardinality.load_files() worker_single = Worker_single(cardinality=cardinality, lock=self.lock, batch_size=1) dataloader = worker_single.create_dataloader() self.assertEqual(len(dataloader), 7) # 60% of 12 self.assertTrue(worker_single.dataset.loaded) # Train # Avoid to create a new object worker_single.dataset.loaded = False with patch('builtins.open', mockOpen): #cardinality.load_files() worker_single = Worker_single(cardinality=cardinality, lock=self.lock, batch_size=1) dataloader = worker_single.create_dataloader(condition="Train") self.assertEqual(len(dataloader), 5) # 40% of 12 worker_single.dataset.loaded = False with patch('builtins.open', mockOpen): #cardinality.load_files() worker_single = Worker_single(cardinality=cardinality, lock=self.lock, batch_size=1) dataloader = worker_single.create_dataloader(condition="Train", subsample=True, subsample_split=0.25) self.assertEqual(len(dataloader), 1) # 25% of 40% of 12 tf.close()
def test_getitem(self): cardinality = Cardinality(3, "", "") cardinality.size_windows = 4 cardinality.list_classes = [ 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, 5, 6 ] cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000} cardinality.w2v = { "1": [1] * 20, "2": [2] * 20, "3": [3] * 20, "4": [4] * 20, "5": [5] * 20, "6": [6] * 20, "7": [7] * 20 } cardinality.list_position = range(len(cardinality.list_classes)) # Normal output_getitem = cardinality.__getitem__( len(cardinality.list_classes) - 1) self.assertEqual(output_getitem['output'], 6) self.assertEqual(len(list(output_getitem['input'][0])), 20) self.assertEqual(len(list(output_getitem['input'])), 4) # Invalid output_getitem = cardinality.__getitem__(0) self.assertListEqual(list(output_getitem['input']), [-1]) self.assertEqual(output_getitem['output'], -1) # Invalid due to the same pattern output_getitem = cardinality.__getitem__(1) self.assertEqual(output_getitem['output'], -1) self.assertListEqual(list(output_getitem['input']), [-1]) self.assertEqual(output_getitem['output'], -1) # Padding output_getitem = cardinality.__getitem__(5) self.assertEqual(output_getitem['output'], 1) self.assertEqual(len(list(output_getitem['input'][0])), 20) self.assertEqual(len(list(output_getitem['input'])), 4)
def test_compute_position_void(self): cardinality = Cardinality(3, "", "") cardinality.list_classes = [] cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000} cardinality.compute_position() self.assertEqual(cardinality.number_of_classes, 0)