Esempio n. 1
0
class WG_IFN_Dataset(data.Dataset):
  def __init__(self, cf, train=True, transform=None, data_idx_WG = np.arange(1), 
               data_idx_IFN = np.arange(1), complement_idx=False):
    self.train = train  # training set or test set  
    if len(data_idx_WG)==1:
        self.datasetWG = WashingtonDataset(cf, train=self.train, transform = transform)        
    else: 
        self.datasetWG = WashingtonDataset(cf, train=self.train, transform = transform,
                                           data_idx = data_idx_WG, complement_idx = True)
    if len(data_idx_IFN)==1:
        self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform)
    else:
        self.datasetIFN = IfnEnitDataset(cf, train=self.train, transform = transform,
                                         data_idx = data_idx_IFN, complement_idx = True)

    self.data_idx_WG = self.datasetWG.data_idx # this is needed, to be passed from one set to another
    self.data_idx_IFN = self.datasetIFN.data_idx # this is needed, to be passed from one set to another
          
  def add_weights_of_words(self): # weights to balance the loss, if the data is unbalanced   
      self.datasetWG.add_weights_of_words()
      self.datasetIFN.add_weights_of_words()

  def num_classes(self):
    return self.datasetIFN.num_classes() #IFN and WG have the same phoc size

  def __getitem__(self, index):
    if index < len(self.datasetWG):
        return self.datasetWG[index]
    else:
        return self.datasetIFN[index - len(self.datasetWG)] # check: are we skipping a sample here?

  def __len__(self):
    return len(self.datasetWG) + len(self.datasetIFN)
Esempio n. 2
0
class IFN_XVAL_Dataset(data.Dataset):
    def __init__(self, cf, train=True, transform=None):

        # cf.train_split = False # this should always be false, as we are keeping one folder for testing
        self.train = train  # training set or test set

        trn_folder = cf.folders_to_use.replace(
            cf.IFN_test[-1], '')  # removing the test set from train folders

        # backing up the original paths
        dataset_path = cf.dataset_path_IFN
        gt_path = cf.gt_path_IFN

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[0])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[0])
        self.datasetIFN_1 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[1])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[1])
        self.datasetIFN_2 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[2])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[2])
        self.datasetIFN_3 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        cf.dataset_path_IFN = dataset_path.replace(cf.IFN_test,
                                                   'set_' + trn_folder[3])
        cf.gt_path_IFN = gt_path.replace(cf.IFN_test, 'set_' + trn_folder[3])
        self.datasetIFN_4 = IfnEnitDataset(cf,
                                           train=self.train,
                                           transform=transform)

        self.IFN_1_len = len(self.datasetIFN_1)
        self.IFN_2_len = len(self.datasetIFN_2)
        self.IFN_3_len = len(self.datasetIFN_3)
        self.IFN_4_len = len(self.datasetIFN_4)

        cf.dataset_path_IFN = dataset_path
        cf.gt_path_IFN = gt_path  # this needs to be used in loading the test set

    def __getitem__(self, index):
        if index < self.IFN_1_len:
            return self.datasetIFN_1[index]

        elif index < (self.IFN_1_len + self.IFN_2_len):
            index = index - (self.IFN_1_len)
            return self.datasetIFN_2[
                index]  # check: are we skipping a sample here?

        elif index < (self.IFN_1_len + self.IFN_2_len + self.IFN_3_len):
            index = index - (self.IFN_1_len + self.IFN_2_len)
            return self.datasetIFN_3[index]

        else:  # This is IFN_4
            index = index - (self.IFN_1_len + self.IFN_2_len + self.IFN_3_len)

            return self.datasetIFN_4[index]

    def __len__(self):
        return self.IFN_1_len + self.IFN_2_len + self.IFN_3_len + self.IFN_4_len

    def add_weights_of_words(
            self):  # weights to balance the loss, if the data is unbalanced
        self.datasetIFN_1.add_weights_of_words()

    def num_classes(self):
        return self.datasetIFN_1.num_classes(
        )  # Does not matter which one as they all have the same phoc length