def data_preprare(self, test=False): list_data = get_files(self.data_dir, test) if test: test_dataset = dataset(list_data=list_data, test=True, transform=data_transforms( 'val', self.normalise_type)) return test_dataset else: data_pd = pd.DataFrame({ "data": list_data[0], "label": list_data[1] }) train_pd, val_pd = train_test_split_order(data_pd, test_size=0.2, num_classes=12) train_dataset = dataset(list_data=train_pd, transform=data_transforms( 'train', self.normalise_type)) val_dataset = dataset(list_data=val_pd, transform=data_transforms( 'val', self.normalise_type)) print(len(train_dataset)) print(len(val_dataset)) return train_dataset, val_dataset
def data_preprare(self, test=False): list_data = get_files(self.data_dir, test) if test: test_dataset = dataset(list_data=list_data, test=True, transform=data_transforms( 'val', self.normalise_type)) return test_dataset else: data_pd = pd.DataFrame({ "data": list_data[0], "label": list_data[1] }) train_pd, val_pd = train_test_split(data_pd, test_size=0.20, random_state=40, stratify=data_pd["label"]) train_dataset = dataset(list_data=train_pd, transform=data_transforms( 'train', self.normalise_type)) val_dataset = dataset(list_data=val_pd, transform=data_transforms( 'val', self.normalise_type)) return train_dataset, val_dataset
def data_split(self, transfer_learning=True): if transfer_learning: # get source train and val list_data = get_files(self.data_dir, self.source_N) data_pd = pd.DataFrame({"data": list_data[0], "label": list_data[1]}) train_pd, val_pd = train_test_split(data_pd, test_size=0.2, random_state=40, stratify=data_pd["label"]) source_train = dataset(list_data=train_pd, transform=self.data_transforms['train']) source_val = dataset(list_data=val_pd, transform=self.data_transforms['val']) # get target train and val list_data = get_files(self.data_dir, self.target_N) data_pd = pd.DataFrame({"data": list_data[0], "label": list_data[1]}) train_pd, val_pd = train_test_split(data_pd, test_size=0.2, random_state=40, stratify=data_pd["label"]) target_train = dataset(list_data=train_pd, transform=self.data_transforms['train']) target_val = dataset(list_data=val_pd, transform=self.data_transforms['val']) return source_train, source_val, target_train, target_val else: #get source train and val list_data = get_files(self.data_dir, self.source_N) data_pd = pd.DataFrame({"data": list_data[0], "label": list_data[1]}) train_pd, val_pd = train_test_split(data_pd, test_size=0.2, random_state=40, stratify=data_pd["label"]) source_train = dataset(list_data=train_pd, transform=self.data_transforms['train']) source_val = dataset(list_data=val_pd, transform=self.data_transforms['val']) # get target train and val list_data = get_files(self.data_dir, self.target_N) data_pd = pd.DataFrame({"data": list_data[0], "label": list_data[1]}) target_val = dataset(list_data=data_pd, transform=self.data_transforms['val']) return source_train, source_val, target_val