class WG_IFN_Dataset(data.Dataset): def __init__(self, cf, train=True, transform=None, data_idx_WG = np.arange(1), data_idx_IFN = np.arange(1), complement_idx=False): self.train = train # training set or test set if len(data_idx_WG)==1: self.datasetWG = WashingtonDataset(cf, train=self.train, transform = transform) else: self.datasetWG = WashingtonDataset(cf, train=self.train, transform = transform, data_idx = data_idx_WG, complement_idx = True) if len(data_idx_IFN)==1: self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform) else: self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform, data_idx = data_idx_IFN, complement_idx = True) self.data_idx_WG = self.datasetWG.data_idx # this is needed, to be passed from one set to another self.data_idx_IFN = self.datasetIFN.data_idx # this is needed, to be passed from one set to another def add_weights_of_words(self): # weights to balance the loss, if the data is unbalanced self.datasetWG.add_weights_of_words() self.datasetIFN.add_weights_of_words() def num_classes(self): return self.datasetIFN.num_classes() #IFN and WG have the same phoc size def __getitem__(self, index): if index < len(self.datasetWG): return self.datasetWG[index] else: return self.datasetIFN[index - len(self.datasetWG)] # check: are we skipping a sample here? def __len__(self): return len(self.datasetWG) + len(self.datasetIFN)
def __init__(self, cf, train=True, transform=None, data_idx_WG = np.arange(1), data_idx_IFN = np.arange(1), complement_idx=False): self.train = train # training set or test set if len(data_idx_WG)==1: self.datasetWG = WashingtonDataset(cf, train=self.train, transform = transform) else: self.datasetWG = WashingtonDataset(cf, train=self.train, transform = transform, data_idx = data_idx_WG, complement_idx = True) if len(data_idx_IFN)==1: self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform) else: self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform, data_idx = data_idx_IFN, complement_idx = True) self.data_idx_WG = self.datasetWG.data_idx # this is needed, to be passed from one set to another self.data_idx_IFN = self.datasetIFN.data_idx # this is needed, to be passed from one set to another
def __init__(self, cf, train=True, transform=None): # cf.train_split = False # this should always be false, as we are keeping one folder for testing self.train = train # training set or test set trn_folder = cf.folders_to_use.replace( cf.IFN_test[-1], '') # removing the test set from train folders # backing up the original paths dataset_path = cf.dataset_path_IFN gt_path = cf.gt_path_IFN cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test, 'set_' + trn_folder[0]) cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[0]) self.datasetIFN_1 = IfnEnitDataset(cf, train=self.train, transform=transform) cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test, 'set_' + trn_folder[1]) cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[1]) self.datasetIFN_2 = IfnEnitDataset(cf, train=self.train, transform=transform) cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test, 'set_' + trn_folder[2]) cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[2]) self.datasetIFN_3 = IfnEnitDataset(cf, train=self.train, transform=transform) cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test, 'set_' + trn_folder[3]) cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[3]) self.datasetIFN_4 = IfnEnitDataset(cf, train=self.train, transform=transform) self.IFN_1_len = len(self.datasetIFN_1) self.IFN_2_len = len(self.datasetIFN_2) self.IFN_3_len = len(self.datasetIFN_3) self.IFN_4_len = len(self.datasetIFN_4) cf.dataset_path_IFN = dataset_path cf.gt_path_IFN = gt_path # this needs to be used in loading the test set
class IAM_IFN_Dataset(data.Dataset): def __init__(self, cf, train=True, mode = 'train', transform=None, data_idx_IAM = np.arange(1), data_idx_IFN = np.arange(1), complement_idx=False): self.train = train # training set or test set self.mode = mode if len(data_idx_IFN)==1: self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform) else: self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform, data_idx = data_idx_IFN, complement_idx = True) if len(data_idx_IAM)==1: if mode == 'train': self.datasetIAM = iam_train_valid_combined_dataset(cf, train=True, transform = transform) # mode is one of train, test, or validate else: assert(mode == 'test') self.datasetIAM = IAM_words(cf, mode = self.mode, transform = transform) else: # this is deprecated for IAM dataset as we are splitting based on train, validate, and test folders # self.datasetIAM = IAM_words(cf, mode = self.mode, transform = transform) print('Deprecated by Rawi, as the split is based on train, validate and test') self.data_idx_IFN = self.datasetIFN.data_idx # this is needed, to be passed from one set to another # self.data_idx_IAM = self.datasetIAM.data_idx # this is needed, to be passed from one set to another def add_weights_of_words(self): # weights to balance the loss, if the data is unbalanced self.datasetIFN.add_weights_of_words() self.datasetIAM.add_weights_of_words() def num_classes(self): return self.datasetIAM.num_classes() #IFN and WG have the same phoc size def __getitem__(self, index): if index < len(self.datasetIFN): return self.datasetIFN[index] else: return self.datasetIAM[index - len(self.datasetIFN)] # check: are we skipping a sample here? def __len__(self): return len(self.datasetIFN) + len(self.datasetIAM)
def get_ifn(cf, image_transform): print('...................Loading IFN dataset...................') if not (cf.IFN_based_on_folds_experiment): ''' randomly split training and testing according to split percentage the folder left for tesing is the cf.IFN_test ''' train_set = IfnEnitDataset( cf, train=True, transform=image_transform['image_transform_hdr']) test_set = IfnEnitDataset( cf, train=False, transform=image_transform['image_transform_hdr'], data_idx=train_set.data_idx, complement_idx=True) else: ''' leave one folder out of 'abcde' folders ''' train_set = IFN_XVAL_Dataset( cf, train=True, transform=image_transform['image_transform_hdr']) test_set = IfnEnitDataset( cf, train=False, transform=image_transform['image_transform_hdr']) return train_set, test_set
def test_dataload(cf): logger = logging.getLogger('test_dataloader_wg') # Image transformations if cf.pad_images: pad_image = PadImage( (globals.MAX_IMAGE_WIDTH, globals.MAX_IMAGE_HEIGHT)) if cf.resize_images: if cf.pad_images: image_transfrom = transforms.Compose([ pad_image, transforms.ToPILImage(), transforms.Scale((cf.input_size[0], cf.input_size[1])), transforms.ToTensor() ]) else: image_transfrom = transforms.Compose([ transforms.ToPILImage(), transforms.Scale((cf.input_size[0], cf.input_size[1])), transforms.ToTensor() ]) else: if cf.pad_images: image_transfrom = transforms.Compose( [pad_image, transforms.ToTensor()]) else: image_transfrom = transforms.ToTensor() if cf.dataset_name == 'WG': input_dataset = WashingtonDataset(cf, transform=image_transfrom) elif cf.dataset_name == 'IFN': input_dataset = IfnEnitDataset(cf, transform=image_transfrom) else: logger.fatal('The dataset \'%s\' is unknown. Use: [WG, IFN]', cf.dataset_name) sys.exit(0) # dataloader = DataLoader(input_dataset, batch_size=cf.batch_size, # shuffle=cf.shuffle, num_workers=cf.num_workers) for i in range(len(input_dataset)): plt.figure(i) plt.xticks([]) plt.yticks([]) data, target = input_dataset[i] plt.imshow(data.numpy()[0, :, :], 'gray') plt.show() if i == 102: break
def __init__(self, cf, train=True, mode = 'train', transform=None, data_idx_IAM = np.arange(1), data_idx_IFN = np.arange(1), complement_idx=False): self.train = train # training set or test set self.mode = mode if len(data_idx_IFN)==1: self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform) else: self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform, data_idx = data_idx_IFN, complement_idx = True) if len(data_idx_IAM)==1: if mode == 'train': self.datasetIAM = iam_train_valid_combined_dataset(cf, train=True, transform = transform) # mode is one of train, test, or validate else: assert(mode == 'test') self.datasetIAM = IAM_words(cf, mode = self.mode, transform = transform) else: # this is deprecated for IAM dataset as we are splitting based on train, validate, and test folders # self.datasetIAM = IAM_words(cf, mode = self.mode, transform = transform) print('Deprecated by Rawi, as the split is based on train, validate and test') self.data_idx_IFN = self.datasetIFN.data_idx # this is needed, to be passed from one set to another
class IFN_XVAL_Dataset(data.Dataset): def __init__(self, cf, train=True, transform=None): # cf.train_split = False # this should always be false, as we are keeping one folder for testing self.train = train # training set or test set trn_folder = cf.folders_to_use.replace( cf.IFN_test[-1], '') # removing the test set from train folders # backing up the original paths dataset_path = cf.dataset_path_IFN gt_path = cf.gt_path_IFN cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test, 'set_' + trn_folder[0]) cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[0]) self.datasetIFN_1 = IfnEnitDataset(cf, train=self.train, transform=transform) cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test, 'set_' + trn_folder[1]) cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[1]) self.datasetIFN_2 = IfnEnitDataset(cf, train=self.train, transform=transform) cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test, 'set_' + trn_folder[2]) cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[2]) self.datasetIFN_3 = IfnEnitDataset(cf, train=self.train, transform=transform) cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test, 'set_' + trn_folder[3]) cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[3]) self.datasetIFN_4 = IfnEnitDataset(cf, train=self.train, transform=transform) self.IFN_1_len = len(self.datasetIFN_1) self.IFN_2_len = len(self.datasetIFN_2) self.IFN_3_len = len(self.datasetIFN_3) self.IFN_4_len = len(self.datasetIFN_4) cf.dataset_path_IFN = dataset_path cf.gt_path_IFN = gt_path # this needs to be used in loading the test set def __getitem__(self, index): if index < self.IFN_1_len: return self.datasetIFN_1[index] elif index < (self.IFN_1_len + self.IFN_2_len): index = index - (self.IFN_1_len) return self.datasetIFN_2[ index] # check: are we skipping a sample here? elif index < (self.IFN_1_len + self.IFN_2_len + self.IFN_3_len): index = index - (self.IFN_1_len + self.IFN_2_len) return self.datasetIFN_3[index] else: # This is IFN_4 index = index - (self.IFN_1_len + self.IFN_2_len + self.IFN_3_len) return self.datasetIFN_4[index] def __len__(self): return self.IFN_1_len + self.IFN_2_len + self.IFN_3_len + self.IFN_4_len def add_weights_of_words( self): # weights to balance the loss, if the data is unbalanced self.datasetIFN_1.add_weights_of_words() def num_classes(self): return self.datasetIFN_1.num_classes( ) # Does not matter which one as they all have the same phoc length