def datagen_overfitting(self): train_data_generator = self.get_dataset(self.train_subjets) self.data_train = train_data_generator.get_tiny_custom_channel_dataset_test(self.data["datagen_config"]["train_dataset_len"]) self.data_test = train_data_generator.get_tiny_custom_channel_dataset_test(self.data["datagen_config"]["train_dataset_len"]) self.training_set = NetworkDataSet(self.data_train, self.tokenizer) self.validation_set = NetworkDataSet(self.data_train, self.tokenizer) self.testing_set = NetworkDataSet(self.data_test, self.tokenizer) self.print_dataset()
def set_test_datagen(self): self.test_data_generator = self.get_dataset_generator( self.test_subjets, dataset_max_len=self.dataset_test_len) self.data_test = self.test_data_generator.get_dataset() print("Test") print(self.test_data_generator.dataset_metadata) self.testing_set = NetworkDataSet(self.data_test, self.tokenizer)
def set_train_datagen(self): self.train_data_generator = self.get_dataset_generator( self.train_subjets, dataset_max_len=self.dataset_train_len) self.data_train = self.train_data_generator.get_dataset() print("Entrenamiento") print(self.train_data_generator.dataset_metadata) self.training_set = NetworkDataSet(self.data_train, self.tokenizer)
def set_validation_datagen(self): self.validation_data_generator = self.get_dataset_generator( self.validation_subjets, dataset_max_len=self.dataset_validation_len) self.data_validation = self.validation_data_generator.get_dataset() print("Validacion") print(self.validation_data_generator.dataset_metadata) self.validation_set = NetworkDataSet(self.data_validation, self.tokenizer)
pdb.set_trace() #print(data_train) """" # Validation Dataset validation_data_generator = DataGen(validation_subjets, tokenizer, combinate_subjects=True, channels_iter=channel_iters, targets_cod=target_cod) data_validation = validation_data_generator.get_same_channel_dataset() print("Validacion") print(validation_data_generator.dataset_metadata) # Test Dataset test_data_generator = DataGen(test_subjets, tokenizer, combinate_subjects=True, channels_iter=channel_iters, targets_cod=target_cod) data_test = test_data_generator.get_same_channel_dataset() print("Test") print(test_data_generator.dataset_metadata) """ # Create the datasets training_set = NetworkDataSet(data_train, tokenizer) #validation_set = NetworkDataSet(data_validation, tokenizer) #testing_set = NetworkDataSet(data_test, tokenizer) TRAIN_BATCH_SIZE = 2 #VALID_BATCH_SIZE = 2 TEST_BATCH_SIZE = 1 LEARNING_RATE = 1e-04 train_params = { 'batch_size': TRAIN_BATCH_SIZE, 'shuffle': True, 'num_workers': 0 } test_params = {
# Validation Dataset #validation_data_generator = DataGen(validation_subjets, tokenizer) #data_validation = validation_data_generator.get_same_channel_dataset() # Test Dataset targets_cod = {"positive": 0, "negative": 1} test_data_generator = DataGen(test_subjets, tokenizer, combinate_subjects=True, channels_iter=100, targets_cod=targets_cod) data_test = test_data_generator.get_same_channel_dataset() print(test_data_generator.dataset_metadata) # Create the datasets #training_set = NetworkDataSet(data_train, tokenizer) #validation_set = NetworkDataSet(data_validation, tokenizer) testing_set = NetworkDataSet(data_test, tokenizer) TRAIN_BATCH_SIZE = 4 VALID_BATCH_SIZE = 1 TEST_BATCH_SIZE = 1 LEARNING_RATE = 1e-04 train_params = { 'batch_size': TRAIN_BATCH_SIZE, 'shuffle': True, 'num_workers': 0 } validation_params = { 'batch_size': VALID_BATCH_SIZE, 'shuffle': True,
train_data_generator = DataGen(train_subjets, tokenizer, combinate_subjects=True, channels_iter=channel_iters, targets_cod=target_cod) data_train = train_data_generator.get_same_channel_dataset() print("Entrenamiento") print(train_data_generator.dataset_metadata) # Validation Dataset validation_data_generator = DataGen(validation_subjets, tokenizer, combinate_subjects=True, channels_iter=channel_iters, targets_cod=target_cod) data_validation = validation_data_generator.get_same_channel_dataset() print("Validacion") print(validation_data_generator.dataset_metadata) # Test Dataset test_data_generator = DataGen(test_subjets, tokenizer, combinate_subjects=True, channels_iter=channel_iters, targets_cod=target_cod) data_test = test_data_generator.get_same_channel_dataset() print("Test") print(test_data_generator.dataset_metadata) # Create the datasets training_set = NetworkDataSet(data_train, tokenizer) validation_set = NetworkDataSet(data_validation, tokenizer) testing_set = NetworkDataSet(data_test, tokenizer) TRAIN_BATCH_SIZE = 2 VALID_BATCH_SIZE = 2 TEST_BATCH_SIZE = 2 LEARNING_RATE = 1e-04 train_params = {'batch_size': TRAIN_BATCH_SIZE, 'shuffle': True, 'num_workers': 0 } validation_params = {'batch_size': VALID_BATCH_SIZE, 'shuffle': True,