Exemple #1
0
class WG_IFN_Dataset(data.Dataset):
  def __init__(self, cf, train=True, transform=None, data_idx_WG = np.arange(1), 
               data_idx_IFN = np.arange(1), complement_idx=False):
    self.train = train  # training set or test set  
    if len(data_idx_WG)==1:
        self.datasetWG = WashingtonDataset(cf, train=self.train, transform = transform)        
    else: 
        self.datasetWG = WashingtonDataset(cf, train=self.train, transform = transform,
                                           data_idx = data_idx_WG, complement_idx = True)
    if len(data_idx_IFN)==1:
        self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform)
    else:
        self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform,
                                         data_idx = data_idx_IFN, complement_idx = True)

    self.data_idx_WG = self.datasetWG.data_idx # this is needed, to be passed from one set to another
    self.data_idx_IFN = self.datasetIFN.data_idx # this is needed, to be passed from one set to another
          
  def add_weights_of_words(self): # weights to balance the loss, if the data is unbalanced   
      self.datasetWG.add_weights_of_words()
      self.datasetIFN.add_weights_of_words()

  def num_classes(self):
    return self.datasetIFN.num_classes() #IFN and WG have the same phoc size

  def __getitem__(self, index):
    if index < len(self.datasetWG):
        return self.datasetWG[index]
    else:
        return self.datasetIFN[index - len(self.datasetWG)] # check: are we skipping a sample here?

  def __len__(self):
    return len(self.datasetWG) + len(self.datasetIFN)
Exemple #2
0
  def __init__(self, cf, train=True, transform=None, data_idx_WG = np.arange(1), 
               data_idx_IFN = np.arange(1), complement_idx=False):
    self.train = train  # training set or test set  
    if len(data_idx_WG)==1:
        self.datasetWG = WashingtonDataset(cf, train=self.train, transform = transform)        
    else: 
        self.datasetWG = WashingtonDataset(cf, train=self.train, transform = transform,
                                           data_idx = data_idx_WG, complement_idx = True)
    if len(data_idx_IFN)==1:
        self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform)
    else:
        self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform,
                                         data_idx = data_idx_IFN, complement_idx = True)

    self.data_idx_WG = self.datasetWG.data_idx # this is needed, to be passed from one set to another
    self.data_idx_IFN = self.datasetIFN.data_idx # this is needed, to be passed from one set to another
Exemple #3
0
    def __init__(self, cf, train=True, transform=None):

        # cf.train_split = False # this should always be false, as we are keeping one folder for testing
        self.train = train  # training set or test set

        trn_folder = cf.folders_to_use.replace(
            cf.IFN_test[-1], '')  # removing the test set from train folders

        # backing up the original paths
        dataset_path = cf.dataset_path_IFN
        gt_path = cf.gt_path_IFN

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[0])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[0])
        self.datasetIFN_1 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[1])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[1])
        self.datasetIFN_2 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[2])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[2])
        self.datasetIFN_3 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[3])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[3])
        self.datasetIFN_4 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        self.IFN_1_len = len(self.datasetIFN_1)
        self.IFN_2_len = len(self.datasetIFN_2)
        self.IFN_3_len = len(self.datasetIFN_3)
        self.IFN_4_len = len(self.datasetIFN_4)

        cf.dataset_path_IFN = dataset_path
        cf.gt_path_IFN = gt_path  # this needs to be used in loading the test set
class IAM_IFN_Dataset(data.Dataset):
  def __init__(self, cf, train=True, mode = 'train', transform=None, 
               data_idx_IAM = np.arange(1), 
               data_idx_IFN = np.arange(1), 
               complement_idx=False):
    self.train = train  # training set or test set 
    self.mode = mode
    if len(data_idx_IFN)==1:
        self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform)
    else:
        self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform,
                                         data_idx = data_idx_IFN, complement_idx = True)
    if len(data_idx_IAM)==1: 
        if mode == 'train':
            self.datasetIAM = iam_train_valid_combined_dataset(cf, train=True, transform = transform) # mode is one of train, test, or validate            
        else:            
            assert(mode == 'test')
            self.datasetIAM = IAM_words(cf, mode = self.mode, transform = transform) 
            
    else:
         # this is deprecated for IAM dataset as we are splitting based on train, validate, and test folders
        # self.datasetIAM = IAM_words(cf, mode = self.mode, transform = transform)
        print('Deprecated by Rawi, as the split is based on train, validate and test')

    self.data_idx_IFN = self.datasetIFN.data_idx # this is needed, to be passed from one set to another
   #  self.data_idx_IAM = self.datasetIAM.data_idx # this is needed, to be passed from one set to another
          
  def add_weights_of_words(self): # weights to balance the loss, if the data is unbalanced   
      self.datasetIFN.add_weights_of_words()
      self.datasetIAM.add_weights_of_words()

  def num_classes(self):
    return self.datasetIAM.num_classes() #IFN and WG have the same phoc size

  def __getitem__(self, index):
      
    if index < len(self.datasetIFN):
        return self.datasetIFN[index]
    else:
        return self.datasetIAM[index - len(self.datasetIFN)] # check: are we skipping a sample here?

  def __len__(self):
    return len(self.datasetIFN) + len(self.datasetIAM)
Exemple #5
0
def get_ifn(cf, image_transform):
    print('...................Loading IFN dataset...................')
    if not (cf.IFN_based_on_folds_experiment):
        ''' randomly split training and testing according to split percentage 
        the folder left for tesing is the cf.IFN_test '''
        train_set = IfnEnitDataset(
            cf, train=True, transform=image_transform['image_transform_hdr'])
        test_set = IfnEnitDataset(
            cf,
            train=False,
            transform=image_transform['image_transform_hdr'],
            data_idx=train_set.data_idx,
            complement_idx=True)
    else:
        ''' leave one folder out of 'abcde' folders '''
        train_set = IFN_XVAL_Dataset(
            cf, train=True, transform=image_transform['image_transform_hdr'])
        test_set = IfnEnitDataset(
            cf, train=False, transform=image_transform['image_transform_hdr'])
    return train_set, test_set
Exemple #6
0
def test_dataload(cf):
    logger = logging.getLogger('test_dataloader_wg')

    # Image transformations
    if cf.pad_images:
        pad_image = PadImage(
            (globals.MAX_IMAGE_WIDTH, globals.MAX_IMAGE_HEIGHT))

    if cf.resize_images:
        if cf.pad_images:
            image_transfrom = transforms.Compose([
                pad_image,
                transforms.ToPILImage(),
                transforms.Scale((cf.input_size[0], cf.input_size[1])),
                transforms.ToTensor()
            ])
        else:
            image_transfrom = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Scale((cf.input_size[0], cf.input_size[1])),
                transforms.ToTensor()
            ])
    else:
        if cf.pad_images:
            image_transfrom = transforms.Compose(
                [pad_image, transforms.ToTensor()])
        else:
            image_transfrom = transforms.ToTensor()

    if cf.dataset_name == 'WG':
        input_dataset = WashingtonDataset(cf, transform=image_transfrom)

    elif cf.dataset_name == 'IFN':
        input_dataset = IfnEnitDataset(cf, transform=image_transfrom)
    else:
        logger.fatal('The dataset \'%s\' is unknown. Use: [WG, IFN]',
                     cf.dataset_name)
        sys.exit(0)

#    dataloader = DataLoader(input_dataset, batch_size=cf.batch_size,
#                            shuffle=cf.shuffle, num_workers=cf.num_workers)

    for i in range(len(input_dataset)):
        plt.figure(i)
        plt.xticks([])
        plt.yticks([])
        data, target = input_dataset[i]
        plt.imshow(data.numpy()[0, :, :], 'gray')
        plt.show()

        if i == 102: break
  def __init__(self, cf, train=True, mode = 'train', transform=None, 
               data_idx_IAM = np.arange(1), 
               data_idx_IFN = np.arange(1), 
               complement_idx=False):
    self.train = train  # training set or test set 
    self.mode = mode
    if len(data_idx_IFN)==1:
        self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform)
    else:
        self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform,
                                         data_idx = data_idx_IFN, complement_idx = True)
    if len(data_idx_IAM)==1: 
        if mode == 'train':
            self.datasetIAM = iam_train_valid_combined_dataset(cf, train=True, transform = transform) # mode is one of train, test, or validate            
        else:            
            assert(mode == 'test')
            self.datasetIAM = IAM_words(cf, mode = self.mode, transform = transform) 
            
    else:
         # this is deprecated for IAM dataset as we are splitting based on train, validate, and test folders
        # self.datasetIAM = IAM_words(cf, mode = self.mode, transform = transform)
        print('Deprecated by Rawi, as the split is based on train, validate and test')

    self.data_idx_IFN = self.datasetIFN.data_idx # this is needed, to be passed from one set to another
Exemple #8
0
class IFN_XVAL_Dataset(data.Dataset):
    def __init__(self, cf, train=True, transform=None):

        # cf.train_split = False # this should always be false, as we are keeping one folder for testing
        self.train = train  # training set or test set

        trn_folder = cf.folders_to_use.replace(
            cf.IFN_test[-1], '')  # removing the test set from train folders

        # backing up the original paths
        dataset_path = cf.dataset_path_IFN
        gt_path = cf.gt_path_IFN

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[0])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[0])
        self.datasetIFN_1 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[1])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[1])
        self.datasetIFN_2 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[2])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[2])
        self.datasetIFN_3 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[3])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[3])
        self.datasetIFN_4 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        self.IFN_1_len = len(self.datasetIFN_1)
        self.IFN_2_len = len(self.datasetIFN_2)
        self.IFN_3_len = len(self.datasetIFN_3)
        self.IFN_4_len = len(self.datasetIFN_4)

        cf.dataset_path_IFN = dataset_path
        cf.gt_path_IFN = gt_path  # this needs to be used in loading the test set

    def __getitem__(self, index):
        if index < self.IFN_1_len:
            return self.datasetIFN_1[index]

        elif index < (self.IFN_1_len + self.IFN_2_len):
            index = index - (self.IFN_1_len)
            return self.datasetIFN_2[
                index]  # check: are we skipping a sample here?

        elif index < (self.IFN_1_len + self.IFN_2_len + self.IFN_3_len):
            index = index - (self.IFN_1_len + self.IFN_2_len)
            return self.datasetIFN_3[index]

        else:  # This is IFN_4
            index = index - (self.IFN_1_len + self.IFN_2_len + self.IFN_3_len)

            return self.datasetIFN_4[index]

    def __len__(self):
        return self.IFN_1_len + self.IFN_2_len + self.IFN_3_len + self.IFN_4_len

    def add_weights_of_words(
            self):  # weights to balance the loss, if the data is unbalanced
        self.datasetIFN_1.add_weights_of_words()

    def num_classes(self):
        return self.datasetIFN_1.num_classes(
        )  # Does not matter which one as they all have the same phoc length