def test_split_ratio_path_move(): input_dir = os.path.join(os.path.dirname(__file__), "imgs") input_dir2 = os.path.join(os.path.dirname(__file__), "imgs_move") output_dir = os.path.join(os.path.dirname(__file__), "output") if pathlib.Path(input_dir2).exists(): rm_tree(pathlib.Path(input_dir2)) shutil.copytree(input_dir, input_dir2) # shutil.copytree(input_dir, input_dir2, dirs_exist_ok=True) shutil.rmtree(output_dir, ignore_errors=True) ratio( pathlib.Path(input_dir2), output=pathlib.Path(output_dir), seed=1337, ratio=( 0.8, 0.2, ), group_prefix=None, move=True, ) # ensure the number of pics is the same a = len(list(pathlib.Path(input_dir).glob("**/*.jpg"))) b = len(list(pathlib.Path(output_dir).glob("**/*.jpg"))) assert a == b
def __init__(self, encoder, DATA_PATH, batch_size, val_split, hidden_dims, train_transform, val_transform, num_workers, **kwargs): super().__init__() self.DATA_PATH = DATA_PATH self.val_split = val_split self.batch_size = batch_size self.hidden_dims = hidden_dims self.train_transform = train_transform self.val_transform = val_transform self.num_workers = num_workers #data stuff shutil.rmtree('split_data', ignore_errors=True) if not (path.isdir(f"{self.DATA_PATH}/train") and path.isdir(f"{self.DATA_PATH}/val")): splitfolders.ratio(self.DATA_PATH, output=f"split_data", ratio=(1 - self.val_split, self.val_split), seed=10) self.DATA_PATH = 'split_data' print( 'automatically splitting data into train and validation data') self.num_classes = len(os.listdir(f'{self.DATA_PATH}/train')) #model stuff self.eval_acc = Accuracy() self.encoder, self.embedding_size = load_encoder(encoder, kwargs) self.fc1 = nn.Linear(self.embedding_size, self.hidden_dims) self.fc2 = nn.Linear(self.hidden_dims, self.num_classes)
def slice_vid(**kwargs): video = cv2.VideoCapture(kwargs["video"]) vLeng = video.get(7) # vFps = video.get(int(cv2.CAP_PROP_FPS)) frame_counter = 1 # video.set(1, vLeng) watch, vFrame = video.read() frame = kwargs["fps"] id = int() if type(kwargs["path"]) == list: while watch: if frame_counter % frame == 0: id += 1 save_img(image=vFrame, id=id, path=kwargs["path"][0]) watch, vFrame = video.read() frame_counter = frame_counter + 1 splitfolders.ratio(kwargs["path"][0], output=kwargs["path"][1], ratio=kwargs["ratio"]) else: while watch: watch, vFrame = video.read() if frame_counter % frame == 0: id += 1 save_img(image=vFrame, id=id, path=kwargs["path"]) frame_counter = frame_counter + 1
def __init__(self, encoder, epochs, DATA_PATH, withhold, batch_size, val_split, hidden_dims, train_transform, val_transform, num_workers, **kwargs): #data stuff self.DATA_PATH = DATA_PATH self.val_split = val_split self.batch_size = batch_size self.hidden_dims = hidden_dims self.train_transform = train_transform self.val_transform = val_transform self.withhold = withhold self.epochs = epochs #self.num_workers = num_workers shutil.rmtree('split_data', ignore_errors=True) if not (path.isdir(f"{self.DATA_PATH}/train") and path.isdir(f"{self.DATA_PATH}/val")): splitfolders.ratio(self.DATA_PATH, output=f"split_data", ratio=(1 - self.val_split - self.withhold, self.val_split, self.withhold), seed=10) self.DATA_PATH = 'split_data' print( f'automatically splitting data into train and validation data {self.val_split} and withhold {self.withhold}' ) self.num_samples = sum( [len(files) for r, d, files in os.walk(f'{self.DATA_PATH}/train')]) #model stuff super().__init__(gpus=1, num_samples=self.num_samples, batch_size=batch_size, dataset='None', max_epochs=epochs) self.encoder, self.embedding_size = load_encoder(encoder, kwargs) class Projection(nn.Module): def __init__(self, input_dim, hidden_dim=2048, output_dim=128): super().__init__() self.output_dim = output_dim self.input_dim = input_dim self.hidden_dim = hidden_dim self.lin = nn.Linear(self.input_dim, self.hidden_dim) self.b = nn.BatchNorm1d(self.hidden_dim) self.l = nn.Linear(self.hidden_dim, self.output_dim, bias=False) def forward(self, x): x = self.lin(x) x = F.relu(self.b(x)) x = self.l(x) return F.normalize(x, dim=1) self.projection = Projection(input_dim=self.embedding_size, hidden_dim=self.hidden_dims)
def split_images_into_right_format(self, ratio=(0.8, 0.1, 0.1)): splitfolders.ratio( self._input_folder, output=self._output_folder, seed=1337, ratio=ratio, group_prefix=None, )
def test_split_ratio_prefix_error_2(): input_dir = os.path.join(os.path.dirname(__file__), "imgs_texts_error_2") output_dir = os.path.join(os.path.dirname(__file__), "output") shutil.rmtree(output_dir, ignore_errors=True) with pytest.raises(ValueError): ratio(input_dir, output_dir, group_prefix=2)
def divide(_input, _output): shutil.rmtree(r"./labeling/divided_output") splitfolders.ratio(_input, output=_output + "/", seed=1337, ratio=(.8, .1, .1), group_prefix=None) # default values print("Dividing images into folders.") return
def test_split_ratio_prefix(): input_dir = os.path.join(os.path.dirname(__file__), "imgs_texts") output_dir = os.path.join(os.path.dirname(__file__), "output") shutil.rmtree(output_dir, ignore_errors=True) ratio(input_dir, output_dir, group_prefix=2) # ensure the number of pics is the same a = len(list(pathlib.Path(input_dir).glob("**/*.jpg"))) b = len(list(pathlib.Path(output_dir).glob("**/*.jpg"))) assert a == b
def setup(self, stage=None): shutil.rmtree('split_data', ignore_errors=True) if not (path.isdir(f"{self.PATH}/train") and path.isdir(f"{self.PATH}/validation")): splitfolders.ratio(self.PATH, output=f"split_data", ratio=(1-self.val_split, self.val_split), seed = 10) self.train = ImageFolder('split_data/train', transform = self.train_transform) if self.val_split > 0: self.val = ImageFolder('split_data/val', transform = self.val_transform) else: self.train = ImageFolder(f'{self.PATH}/train', transform = self.train_transform) if self.val_split > 0: self.val = ImageFolder(f'{self.PATH}/validation', transform = self.val_transform) self.num_classes = len(self.train.classes) self.num_samples = len(self.train) print('We have the following classes: ', self.train.classes)
def setup(self): shutil.rmtree('split_data', ignore_errors=True) if not (path.isdir(f"{self.DATA_PATH}/train") and path.isdir(f"{self.DATA_PATH}/val")): splitfolders.ratio(self.DATA_PATH, output=f"split_data", ratio=(1 - self.val_split, self.val_split), seed=10) self.finetune_dataset = ImageFolder('split_data/train', transform=self.train_transform) self.finetune_val_dataset = ImageFolder('split_data/val', transform=self.val_transform) self.num_samples = len(self.finetune_dataset) self.num_classes = len(self.finetune_dataset.classes)
def main(): DIR_DATASET = "dataset/AWEDataset/awe" DIR_DATASET_OUT = "dataset/AWEDataset/awe-train-test-val" RATIO_TEST = 0.1 RATIO_VAL = 0.1 RATIO_TRAIN = 1.0 - RATIO_TEST - RATIO_VAL splitfolders.ratio( DIR_DATASET, output=DIR_DATASET_OUT, seed=128, ratio=(RATIO_TRAIN, RATIO_VAL, RATIO_TEST), group_prefix=None, )
def etl_data(aertist_dir): with mlflow.start_run() as mlrun: in_local_dir = tempfile.mkdtemp() out_local_dir = tempfile.mkdtemp() print(out_local_dir) print("aertist_dir=%s" % aertist_dir[1:-1]) splitfolders.ratio(aertist_dir[1:-1], output=out_local_dir, seed=1337, ratio=(.8, .2), group_prefix=None) artist_train = os.path.join(out_local_dir, "train") artist_validation = os.path.join(out_local_dir, "val") # print("Uploading artist_split_lib: %s" % artist_train) mlflow.log_artifact(artist_train, "artist_train") mlflow.log_artifact(artist_validation, "artist_val")
def __init__(self, encoder, DATA_PATH, withhold, batch_size, val_split, hidden_dims, train_transform, val_transform, num_workers, **kwargs): super().__init__() self.DATA_PATH = DATA_PATH self.val_split = val_split self.batch_size = batch_size self.hidden_dims = hidden_dims self.train_transform = train_transform self.val_transform = val_transform self.num_workers = num_workers self.withhold = withhold #data stuff shutil.rmtree('split_data', ignore_errors=True) if not (path.isdir(f"{self.DATA_PATH}/train") and path.isdir(f"{self.DATA_PATH}/val")): splitfolders.ratio(self.DATA_PATH, output=f"split_data", ratio=(1 - self.val_split - self.withhold, self.val_split, self.withhold), seed=10) self.DATA_PATH = 'split_data' print( f'automatically splitting data into train and validation data {self.val_split} and withhold {self.withhold}' ) self.num_classes = len(os.listdir(f'{self.DATA_PATH}/train')) #model stuff self.train_acc = Accuracy() self.val_acc = Accuracy(compute_on_step=False) print('KWARGS:', kwargs) self.encoder, self.embedding_size = load_encoder(encoder, kwargs) self.linear_layer = SSLEvaluator(n_input=self.embedding_size, n_classes=self.num_classes, p=0.1, n_hidden=self.hidden_dims)
def load_data(self): # Preprocessing data_transform = transforms.Compose([ transforms.CenterCrop(100), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) shutil.rmtree(r"./labeling/divided_output") splitfolders.ratio(r"./labeling/input", output = r"./labeling/divided_output", seed=1337, ratio=(.7, .3), group_prefix=None) self.train_dataset = datasets.ImageFolder(root='./labeling/divided_output/train', transform=data_transform) self.val_dataset = datasets.ImageFolder(root='./labeling/divided_output/val', transform=data_transform) self.dataset_unlabeled = ImageFolderWithPaths('./tree_segmentation/output', transform=data_transform) self.train_loader = torch.utils.data.DataLoader(self.train_dataset, batch_size=64, shuffle=True, num_workers=4) self.val_loader = torch.utils.data.DataLoader(self.val_dataset, batch_size=64, shuffle=False, num_workers=4) self.image_loader = torch.utils.data.DataLoader(self.dataset_unlabeled, batch_size=1, shuffle=False, num_workers=0) self.classes = self.train_dataset.classes self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def setup(self): shutil.rmtree('split_data', ignore_errors=True) if not (path.isdir(f"{self.DATA_PATH}/train") and path.isdir(f"{self.DATA_PATH}/val")): splitfolders.ratio(self.DATA_PATH, output=f"split_data", ratio=(1 - self.val_split, self.val_split), seed=10) temp = ImageFolder(self.DATA_PATH, transform=self.train_transform) plt.imshow(np.swapaxes(np.array(temp[8][0]), 0, 2)) plt.savefig('a.png') plt.clf() plt.close() plt.cla() self.finetune_dataset = FolderDataset_helper( self.DATA_PATH, validation=False, val_split=self.val_split, withold_train_percent=0, transform=self.train_transform, image_type='tif') plt.imshow(np.swapaxes(np.array(self.finetune_dataset[0][0]), 0, 2)) plt.savefig('b.png') self.finetune_val_dataset = FolderDataset_helper( self.DATA_PATH, validation=True, val_split=self.val_split, withold_train_percent=0, transform=self.val_transform, image_type='tif') self.num_samples = len(self.finetune_dataset) self.num_classes = len(set(self.finetune_dataset.labels))
import splitfolders splitfolders.ratio("..Files/BdSL/BdSL_digits/main", output="..Files/BdSL/BdSL_digits/split", seed=13, ratio=(.8, .1, .1), group_prefix=None)
def train_test_split(input_folder, output_folder): splitfolders.ratio(input_folder, output=output_folder, seed=1337, ratio=(.8, .1, .1))
import splitfolders # or import split_folders splitfolders.ratio("dataset", output="dataset", seed=1337, ratio=(.8, .2), group_prefix=None)
# -*- coding: utf-8 -*- """ """ import splitfolders import os input_dir = os.path.join('C:/Users/Desktop/Bayesian CNN/flowers/') output_dir = os.path.join('C:/Users/Desktop/Bayesian CNN/flowers_splitted/') splitfolders.ratio(input_dir, output=output_dir, seed=1337, ratio=(.8, .2), group_prefix=None)
def modeling(): input_dir = os.path.join( r'C:\Users\Lahiru\Desktop\Dataset\kvasir-dataset-v2') output_dir = os.path.join( r'C:\Users\Lahiru\Desktop\Dataset\kvasir-dataset-v2_splitted') splitfolders.ratio(input_dir, output=output_dir, seed=1337, ratio=(.7, .3), group_prefix=None) train_dir = os.path.join( r'C:\Users\Lahiru\Desktop\Dataset\kvasir-dataset-v2_splitted\train') test_dir = os.path.join( r'C:\Users\Lahiru\Desktop\Dataset\kvasir-dataset-v2_splitted\val') train_datagen = ImageDataGenerator(rescale=1 / 255) test_datagen = ImageDataGenerator(rescale=1 / 255) train_generator = train_datagen.flow_from_directory( train_dir, target_size=(75, 75), batch_size=214, class_mode='categorical', subset='training') test_generator = test_datagen.flow_from_directory(test_dir, target_size=(75, 75), batch_size=37, class_mode='categorical') classes = [ 'dyed-lifted-polyps', 'dyed-resection-margins', 'esophagitis', 'normal-cecum', 'normal-pylorus', 'normal-z-line', 'polyps', 'ulcerative-colitis' ] X_train = [] for j in range(len(train_generator)): for m in train_generator[j][0]: X_train.append(m) y_train = [] for i in range(len(train_generator)): for k in train_generator[i][1]: y_train.append(np.argmax(k)) X_test = [] for j in range(len(test_generator)): for m in test_generator[j][0]: X_test.append(m) y_test = [] for i in range(len(test_generator)): for k in test_generator[i][1]: y_test.append(np.argmax(k)) X_train = np.array(X_train) y_train = np.array(y_train) X_test = np.array(X_test) y_test = np.array(y_test) cnn = models.Sequential([ layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(75, 75, 3)), layers.MaxPooling2D((2, 2)), layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'), layers.MaxPooling2D((2, 2)), layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu'), layers.MaxPooling2D((2, 2)), layers.Flatten(), layers.Dense(64, activation='relu'), layers.Dense(8, activation='softmax') ]) cnn.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) history = cnn.fit(X_train, y_train, epochs=16) return cnn
def cli_main(): parser = ArgumentParser() parser.add_argument("--DATA_PATH", type=str, help="path to folders with images to train on.") parser.add_argument("--VAL_PATH", type=str, default=None, help="path to validation folders with images") parser.add_argument( "--model", type=str, help= "model to initialize. Can accept model checkpoint or just encoder name from models.py" ) parser.add_argument("--batch_size", default=128, type=int, help="batch size for SSL") parser.add_argument("--cpus", default=1, type=int, help="number of cpus to use to fetch data") parser.add_argument( "--hidden_dim", default=128, type=int, help= "hidden dimensions in projection head or classification layer for finetuning" ) parser.add_argument("--epochs", default=400, type=int, help="number of epochs to train model") parser.add_argument("--learning_rate", default=1e-3, type=float, help="learning rate for encoder") parser.add_argument( "--patience", default=-1, type=int, help= "automatically cuts off training if validation does not drop for (patience) epochs. Leave blank to have no validation based early stopping." ) parser.add_argument( "--val_split", default=0.2, type=float, help="percent in validation data. Ignored if VAL_PATH specified") parser.add_argument( "--withhold_split", default=0, type=float, help= "decimal from 0-1 representing how much of the training data to withold from either training or validation. Used for experimenting with labels neeeded" ) parser.add_argument("--gpus", default=1, type=int, help="number of gpus to use for training") parser.add_argument("--log_name", type=str, default=None, help="name of model to log on wandb and locally") parser.add_argument("--image_size", default=256, type=int, help="height of square image") parser.add_argument( "--resize", default=False, type=bool, help= "Pre-Resize data to right shape to reduce cuda memory requirements of reading large images" ) parser.add_argument("--technique", default=None, type=str, help="SIMCLR, SIMSIAM or CLASSIFIER") parser.add_argument("--seed", default=1729, type=int, help="random seed for run for reproducibility") #add ability to parse unknown args args, _ = parser.parse_known_args() technique = supported_techniques[args.technique] args, _ = technique.add_model_specific_args(parser).parse_known_args() #logging wandb_logger = None log_name = args.technique + '_' + args.log_name + '.ckpt' if log_name is not None: wandb_logger = WandbLogger(name=log_name, project='Curator') #resize images here if args.resize: #implement resize and modify args.DATA_PATH accordingly pass #Splitting Data into train and validation if not (os.path.isdir(f"{args.DATA_PATH}/train") and os.path.isdir(f"{args.DATA_PATH}/val") ) and args.val_split != 0 and args.VAL_PATH is None: print( colored( f'Automatically splitting data into train and validation data...', 'blue')) shutil.rmtree(f'./split_data_{log_name[:-5]}', ignore_errors=True) splitfolders.ratio(args.DATA_PATH, output=f'./split_data_{log_name[:-5]}', ratio=(1 - args.val_split - args.withhold_split, args.val_split, args.withhold_split), seed=args.seed) args.DATA_PATH = f'./split_data_{log_name[:-5]}/train' args.VAL_PATH = f'./split_data_{log_name[:-5]}/val' model = load_model(args) print(colored("Model architecture successfully loaded", 'blue')) cbs = [] backend = 'ddp' if args.patience > 0: cb = EarlyStopping('val_loss', patience=args.patience) cbs.append(cb) trainer = pl.Trainer( gpus=args.gpus, max_epochs=args.epochs, progress_bar_refresh_rate=20, callbacks=cbs, distributed_backend=f'{backend}' if args.gpus > 1 else None, sync_batchnorm=True if args.gpus > 1 else False, logger=wandb_logger, enable_pl_optimizer=True) trainer.fit(model) Path(f"./models/").mkdir(parents=True, exist_ok=True) trainer.save_checkpoint(f"./models/{log_name}") print(colored("YOUR MODEL CAN BE ACCESSED AT: ", 'blue'), f"./models/{log_name}")
# -*- coding: utf-8 -*- """ Created on Wed Dec 9 10:17:22 2020 @author: nabeel """ import splitfolders path = "D:/NAVTTCH/Pneumonia Detection using chest xrays images/Dataset_pneuminia_normal" splitfolders.ratio(path, output="train_test_Pneumonia_Dataset", seed=1337,ratio=(.7, .1,.2))
Original file is located at https://colab.research.google.com/drive/1-Ce3MQqKya1zOPz9CrtHETVQUCfnwyCV """ #mount googledrive from google.colab import drive drive.mount('/content/gdrive') !pip install split-folders import splitfolders input_path = '/content/gdrive/My Drive/APS360/Lab 3/Gesture_Dataset' output_path = '/content/gdrive/My Drive/APS360/Lab 3/Split Data' #splitting the data 60/20/20 splitfolders.ratio(input_path, output=output_path, seed=1, ratio=(.6, .2, .2)) # Loading these images from Drive import torch import numpy as np import torchvision import torchvision.transforms as transforms import matplotlib.pyplot as plt from torch.utils.data.sampler import SubsetRandomSampler # location on Google Drive master_path = '/content/gdrive/My Drive/APS360/Lab 3/Split Data/'
import os import glob import keras from keras_video import VideoFrameGenerator from data.CustomVideoGenerator import CustomVideoGenerator import keras_video.utils from matplotlib import pyplot as plt import numpy as np import splitfolders main_dir = 'videos/' output_dir = 'train_valid_dataset/' splitfolders.ratio(main_dir, output=output_dir, seed=42, ratio=(.8, .2)) def generate_video_frame(size=(224, 224), channel=3, n_frame=5, batch_size=8): # use sub directories names as classes classes = [i.split(os.path.sep)[1] for i in glob.glob('videos/*')] classes.sort() # pattern to get videos and classes glob_pattern_train = 'train_valid_dataset/train/{classname}/*.mp4' glob_pattern_val = 'train_valid_dataset/val/{classname}/*.mp4' # Create video frame generator train = CustomVideoGenerator(classes=classes, glob_pattern=glob_pattern_train, nb_frames=n_frame, shuffle=True, batch_size=batch_size, target_shape=size, nb_channel=channel, transformation=None,
torch.manual_seed(0) print('Using PyTorch version', torch.__version__) ### Preparing Train, Val, Test Sets ### directory = './data/COVID-19_Radiography_Dataset' source_dirs = ['Normal', 'Viral Pneumonia', 'COVID', 'Lung_Opacity'] train_dir = './model_data/train' val_dir = './model_data/val' test_dir = './model_data/test' if not os.path.isdir("model_data"): splitfolders.ratio('./data/COVID-19_Radiography_Dataset', output="model_data", seed=1337, ratio=(.8, 0.1, 0.1)) ### Creating Custom Dataset ### ### Reference#https://medium.com/analytics-vidhya/detecting-covid-19-using-chest-x-ray-images-a6fc822b73cc## class ChestXRayDataset(torch.utils.data.Dataset): def __init__(self, image_dirs, transform): def get_images(class_name): images = [ x for x in os.listdir(image_dirs[class_name]) if x.lower().endswith('png') ] print(f'Found {len(images)} {class_name} examples') return images
import splitfolders import os import pathlib folder = 'data/malignas/output' splitfolders.ratio(folder, output='data/malignaaugmentated', seed=1337, ratio=(0.75,0.25))
!wget --no-check-certificate \ https://dicodingacademy.blob.core.windows.net/picodiploma/ml_pemula_academy/rockpaperscissors.zip \ -O /tmp/rockpaperscissors.zip # melakukan ekstraksi pada file zip import zipfile,os local_zip = '/tmp/rockpaperscissors.zip' zip_ref = zipfile.ZipFile(local_zip, 'r') zip_ref.extractall('/tmp') zip_ref.close() os.listdir('/tmp') import splitfolders splitfolders.ratio('/tmp/rockpaperscissors/rps-cv-images', output="/tmp/rockpaperscissors/", seed=42, ratio=(.8, .2)) data_dir = '/tmp/rockpaperscissors/' train_dir = os.path.join(data_dir, 'train') validation_dir = os.path.join(data_dir, 'val') # membuat direktori gunting, kertas dan batu pada direktori data training train_clean_dir = os.path.join(train_dir, 'scissors') train_messy_dir = os.path.join(train_dir, 'paper') train_messy_dir = os.path.join(train_dir, 'rock') # membuat direktori gunting, kertas dan batu pada direktori data validasi validation_clean_dir = os.path.join(validation_dir, 'scissors') validation_messy_dir = os.path.join(validation_dir, 'paper') validation_messy_dir = os.path.join(validation_dir, 'rock')
IMG_SHAPE = (224, 224, 3) # Data augmentation D_AUG = True ROTATION = 10 WIDTH_SHIFT = 0.2 HEIGHT_SHIFT = 0.2 BRIGHTNESS = (0.2, 1.4) SHEAR = 0.2 ZOOM = 0.3 HORI_FLIP = True # To split the dataset into train, val, and test sets. splitfolders.ratio(DATA_DIR_PATH, OUTPUT_DIR, seed=SEED, ratio=(TRAIN_R, VAL_R, TEST_R)) train_data_dir = f"{OUTPUT_DIR}/train" val_data_dir = f"{OUTPUT_DIR}/val" test_data_dir = f"{OUTPUT_DIR}/test" helper = Helper() train_gen, val_gen, test_gen = helper.get_resnet_gens(train_data_dir, val_data_dir, test_data_dir, target_size=(IMG_SHAPE[0], IMG_SHAPE[1]), batch_size=BATCH_SIZE, data_aug=D_AUG, rotation=ROTATION,
for index, row in df.iterrows(): if row['probability']==0: uninfected_list.append(row['image'][7:]) else: infected_list.append(row['image'][7:]) images_path='/content/Original_Data' #add images directories to a list images_list=os.listdir(images_path) #copy infected cells to infected_path for i in range(0,len(images_list)): if images_list[i] in infected_list: file_directory=images_path+'/'+images_list[i] shutil.copy(file_directory,'/content/new_PV_cells_data/infected_cells') #copy uninfected cells to uninfected_path else : file_directory=images_path+'/'+images_list[i] shutil.copy(file_directory,'/content/new_PV_cells_data/uninfected_cells') #split DATA to 75% train and 25% test splitfolders.ratio("/content/new_PV_cells_data",output="/content/Splitted_pv_cells_data",seed=1337, ratio=(.75, .25,))#the seed makes splits reproducible.
from keras.layers import Dense import matplotlib.pyplot as plt import numpy as np # Global Variable train_ratio = 0.6 tests_ratio = 0.4 input_size = (224, 224) channel = (3, ) input_shape = input_size + channel batch_size = 16 epoch = 15 splitfolders.ratio("Dataset_Confirm", output="Dataset_Final", seed=1337, ratio=(train_ratio, tests_ratio), group_prefix=None) # Create Train and Validation Path dataset_dir = 'Dataset_Final' train_dir = os.path.join(dataset_dir, 'train') tests_dir = os.path.join(dataset_dir, 'val') # Image Augmentation (Pre-process) from tensorflow.keras.preprocessing.image import ImageDataGenerator train_datagen = ImageDataGenerator( rescale=1. / 255, rotation_range=20, horizontal_flip=True,