コード例 #1
0
def test_split_ratio_path_move():
    input_dir = os.path.join(os.path.dirname(__file__), "imgs")
    input_dir2 = os.path.join(os.path.dirname(__file__), "imgs_move")
    output_dir = os.path.join(os.path.dirname(__file__), "output")

    if pathlib.Path(input_dir2).exists():
        rm_tree(pathlib.Path(input_dir2))

    shutil.copytree(input_dir, input_dir2)
    # shutil.copytree(input_dir, input_dir2, dirs_exist_ok=True)
    shutil.rmtree(output_dir, ignore_errors=True)

    ratio(
        pathlib.Path(input_dir2),
        output=pathlib.Path(output_dir),
        seed=1337,
        ratio=(
            0.8,
            0.2,
        ),
        group_prefix=None,
        move=True,
    )

    # ensure the number of pics is the same
    a = len(list(pathlib.Path(input_dir).glob("**/*.jpg")))
    b = len(list(pathlib.Path(output_dir).glob("**/*.jpg")))
    assert a == b
コード例 #2
0
    def __init__(self, encoder, DATA_PATH, batch_size, val_split, hidden_dims,
                 train_transform, val_transform, num_workers, **kwargs):
        super().__init__()

        self.DATA_PATH = DATA_PATH
        self.val_split = val_split
        self.batch_size = batch_size
        self.hidden_dims = hidden_dims
        self.train_transform = train_transform
        self.val_transform = val_transform
        self.num_workers = num_workers

        #data stuff
        shutil.rmtree('split_data', ignore_errors=True)
        if not (path.isdir(f"{self.DATA_PATH}/train")
                and path.isdir(f"{self.DATA_PATH}/val")):
            splitfolders.ratio(self.DATA_PATH,
                               output=f"split_data",
                               ratio=(1 - self.val_split, self.val_split),
                               seed=10)
            self.DATA_PATH = 'split_data'
            print(
                'automatically splitting data into train and validation data')

        self.num_classes = len(os.listdir(f'{self.DATA_PATH}/train'))

        #model stuff
        self.eval_acc = Accuracy()
        self.encoder, self.embedding_size = load_encoder(encoder, kwargs)
        self.fc1 = nn.Linear(self.embedding_size, self.hidden_dims)
        self.fc2 = nn.Linear(self.hidden_dims, self.num_classes)
コード例 #3
0
def slice_vid(**kwargs):
    video = cv2.VideoCapture(kwargs["video"])
    vLeng = video.get(7)
    # vFps = video.get(int(cv2.CAP_PROP_FPS))
    
    frame_counter = 1
    # video.set(1, vLeng)
    watch, vFrame = video.read()
    frame = kwargs["fps"]

    id = int()

    if type(kwargs["path"]) == list:
        while watch:
            if frame_counter % frame == 0:
                id += 1
                save_img(image=vFrame, id=id, path=kwargs["path"][0])
            watch, vFrame = video.read()
            frame_counter = frame_counter + 1
        splitfolders.ratio(kwargs["path"][0], output=kwargs["path"][1], ratio=kwargs["ratio"])

    else:
        while watch:
            watch, vFrame = video.read()

            if frame_counter % frame == 0:
                id += 1
                save_img(image=vFrame, id=id, path=kwargs["path"])
            frame_counter = frame_counter + 1
コード例 #4
0
    def __init__(self, encoder, epochs, DATA_PATH, withhold, batch_size,
                 val_split, hidden_dims, train_transform, val_transform,
                 num_workers, **kwargs):
        #data stuff
        self.DATA_PATH = DATA_PATH
        self.val_split = val_split
        self.batch_size = batch_size
        self.hidden_dims = hidden_dims
        self.train_transform = train_transform
        self.val_transform = val_transform
        self.withhold = withhold
        self.epochs = epochs
        #self.num_workers = num_workers

        shutil.rmtree('split_data', ignore_errors=True)
        if not (path.isdir(f"{self.DATA_PATH}/train")
                and path.isdir(f"{self.DATA_PATH}/val")):
            splitfolders.ratio(self.DATA_PATH,
                               output=f"split_data",
                               ratio=(1 - self.val_split - self.withhold,
                                      self.val_split, self.withhold),
                               seed=10)
            self.DATA_PATH = 'split_data'
            print(
                f'automatically splitting data into train and validation data {self.val_split} and withhold {self.withhold}'
            )

        self.num_samples = sum(
            [len(files) for r, d, files in os.walk(f'{self.DATA_PATH}/train')])

        #model stuff
        super().__init__(gpus=1,
                         num_samples=self.num_samples,
                         batch_size=batch_size,
                         dataset='None',
                         max_epochs=epochs)
        self.encoder, self.embedding_size = load_encoder(encoder, kwargs)

        class Projection(nn.Module):
            def __init__(self, input_dim, hidden_dim=2048, output_dim=128):
                super().__init__()
                self.output_dim = output_dim
                self.input_dim = input_dim
                self.hidden_dim = hidden_dim

                self.lin = nn.Linear(self.input_dim, self.hidden_dim)
                self.b = nn.BatchNorm1d(self.hidden_dim)
                self.l = nn.Linear(self.hidden_dim,
                                   self.output_dim,
                                   bias=False)

            def forward(self, x):
                x = self.lin(x)
                x = F.relu(self.b(x))
                x = self.l(x)
                return F.normalize(x, dim=1)

        self.projection = Projection(input_dim=self.embedding_size,
                                     hidden_dim=self.hidden_dims)
コード例 #5
0
 def split_images_into_right_format(self, ratio=(0.8, 0.1, 0.1)):
     splitfolders.ratio(
         self._input_folder,
         output=self._output_folder,
         seed=1337,
         ratio=ratio,
         group_prefix=None,
     )
コード例 #6
0
def test_split_ratio_prefix_error_2():
    input_dir = os.path.join(os.path.dirname(__file__), "imgs_texts_error_2")
    output_dir = os.path.join(os.path.dirname(__file__), "output")

    shutil.rmtree(output_dir, ignore_errors=True)

    with pytest.raises(ValueError):
        ratio(input_dir, output_dir, group_prefix=2)
コード例 #7
0
def divide(_input, _output):

    shutil.rmtree(r"./labeling/divided_output")

    splitfolders.ratio(_input,
                       output=_output + "/",
                       seed=1337,
                       ratio=(.8, .1, .1),
                       group_prefix=None)  # default values
    print("Dividing images into folders.")
    return
コード例 #8
0
def test_split_ratio_prefix():
    input_dir = os.path.join(os.path.dirname(__file__), "imgs_texts")
    output_dir = os.path.join(os.path.dirname(__file__), "output")

    shutil.rmtree(output_dir, ignore_errors=True)

    ratio(input_dir, output_dir, group_prefix=2)

    # ensure the number of pics is the same
    a = len(list(pathlib.Path(input_dir).glob("**/*.jpg")))
    b = len(list(pathlib.Path(output_dir).glob("**/*.jpg")))
    assert a == b
コード例 #9
0
 def setup(self, stage=None):
     shutil.rmtree('split_data', ignore_errors=True)
     if not (path.isdir(f"{self.PATH}/train") and path.isdir(f"{self.PATH}/validation")): 
         splitfolders.ratio(self.PATH, output=f"split_data", ratio=(1-self.val_split, self.val_split), seed = 10)
         self.train = ImageFolder('split_data/train', transform = self.train_transform)
         if self.val_split > 0:
             self.val = ImageFolder('split_data/val', transform = self.val_transform)
     else:
         self.train = ImageFolder(f'{self.PATH}/train', transform = self.train_transform)
         if self.val_split > 0:
             self.val = ImageFolder(f'{self.PATH}/validation', transform = self.val_transform)
     self.num_classes = len(self.train.classes)
     self.num_samples = len(self.train)
     print('We have the following classes: ', self.train.classes)
コード例 #10
0
    def setup(self):
        shutil.rmtree('split_data', ignore_errors=True)
        if not (path.isdir(f"{self.DATA_PATH}/train")
                and path.isdir(f"{self.DATA_PATH}/val")):
            splitfolders.ratio(self.DATA_PATH,
                               output=f"split_data",
                               ratio=(1 - self.val_split, self.val_split),
                               seed=10)

        self.finetune_dataset = ImageFolder('split_data/train',
                                            transform=self.train_transform)
        self.finetune_val_dataset = ImageFolder('split_data/val',
                                                transform=self.val_transform)

        self.num_samples = len(self.finetune_dataset)
        self.num_classes = len(self.finetune_dataset.classes)
コード例 #11
0
def main():

    DIR_DATASET = "dataset/AWEDataset/awe"
    DIR_DATASET_OUT = "dataset/AWEDataset/awe-train-test-val"

    RATIO_TEST = 0.1
    RATIO_VAL = 0.1
    RATIO_TRAIN = 1.0 - RATIO_TEST - RATIO_VAL

    splitfolders.ratio(
        DIR_DATASET,
        output=DIR_DATASET_OUT,
        seed=128,
        ratio=(RATIO_TRAIN, RATIO_VAL, RATIO_TEST),
        group_prefix=None,
    )
コード例 #12
0
def etl_data(aertist_dir):
    with mlflow.start_run() as mlrun:
        in_local_dir = tempfile.mkdtemp()
        out_local_dir = tempfile.mkdtemp()
        print(out_local_dir)
        print("aertist_dir=%s" % aertist_dir[1:-1])

        splitfolders.ratio(aertist_dir[1:-1],
                           output=out_local_dir,
                           seed=1337,
                           ratio=(.8, .2),
                           group_prefix=None)

        artist_train = os.path.join(out_local_dir, "train")
        artist_validation = os.path.join(out_local_dir, "val")
        #        print("Uploading artist_split_lib: %s" % artist_train)
        mlflow.log_artifact(artist_train, "artist_train")
        mlflow.log_artifact(artist_validation, "artist_val")
コード例 #13
0
    def __init__(self, encoder, DATA_PATH, withhold, batch_size, val_split,
                 hidden_dims, train_transform, val_transform, num_workers,
                 **kwargs):
        super().__init__()

        self.DATA_PATH = DATA_PATH
        self.val_split = val_split
        self.batch_size = batch_size
        self.hidden_dims = hidden_dims
        self.train_transform = train_transform
        self.val_transform = val_transform
        self.num_workers = num_workers
        self.withhold = withhold

        #data stuff
        shutil.rmtree('split_data', ignore_errors=True)
        if not (path.isdir(f"{self.DATA_PATH}/train")
                and path.isdir(f"{self.DATA_PATH}/val")):
            splitfolders.ratio(self.DATA_PATH,
                               output=f"split_data",
                               ratio=(1 - self.val_split - self.withhold,
                                      self.val_split, self.withhold),
                               seed=10)
            self.DATA_PATH = 'split_data'
            print(
                f'automatically splitting data into train and validation data {self.val_split} and withhold {self.withhold}'
            )

        self.num_classes = len(os.listdir(f'{self.DATA_PATH}/train'))

        #model stuff
        self.train_acc = Accuracy()
        self.val_acc = Accuracy(compute_on_step=False)
        print('KWARGS:', kwargs)
        self.encoder, self.embedding_size = load_encoder(encoder, kwargs)

        self.linear_layer = SSLEvaluator(n_input=self.embedding_size,
                                         n_classes=self.num_classes,
                                         p=0.1,
                                         n_hidden=self.hidden_dims)
コード例 #14
0
ファイル: cnn.py プロジェクト: anlu1601/forest_classifier
    def load_data(self):
        # Preprocessing
        data_transform = transforms.Compose([
            transforms.CenterCrop(100),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                 std=[0.5, 0.5, 0.5])
        ])

        shutil.rmtree(r"./labeling/divided_output")

        splitfolders.ratio(r"./labeling/input", output = r"./labeling/divided_output", seed=1337, ratio=(.7, .3),
                           group_prefix=None)

        self.train_dataset = datasets.ImageFolder(root='./labeling/divided_output/train',
                                             transform=data_transform)
        self.val_dataset = datasets.ImageFolder(root='./labeling/divided_output/val',
                                           transform=data_transform)

        self.dataset_unlabeled = ImageFolderWithPaths('./tree_segmentation/output',
                                          transform=data_transform)

        self.train_loader = torch.utils.data.DataLoader(self.train_dataset,
                                                   batch_size=64, shuffle=True,
                                                   num_workers=4)

        self.val_loader = torch.utils.data.DataLoader(self.val_dataset,
                                                 batch_size=64, shuffle=False,
                                                 num_workers=4)

        self.image_loader = torch.utils.data.DataLoader(self.dataset_unlabeled,
                                                   batch_size=1, shuffle=False,
                                                   num_workers=0)

        self.classes = self.train_dataset.classes

        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
コード例 #15
0
    def setup(self):
        shutil.rmtree('split_data', ignore_errors=True)
        if not (path.isdir(f"{self.DATA_PATH}/train")
                and path.isdir(f"{self.DATA_PATH}/val")):
            splitfolders.ratio(self.DATA_PATH,
                               output=f"split_data",
                               ratio=(1 - self.val_split, self.val_split),
                               seed=10)

        temp = ImageFolder(self.DATA_PATH, transform=self.train_transform)
        plt.imshow(np.swapaxes(np.array(temp[8][0]), 0, 2))
        plt.savefig('a.png')
        plt.clf()
        plt.close()
        plt.cla()

        self.finetune_dataset = FolderDataset_helper(
            self.DATA_PATH,
            validation=False,
            val_split=self.val_split,
            withold_train_percent=0,
            transform=self.train_transform,
            image_type='tif')
        plt.imshow(np.swapaxes(np.array(self.finetune_dataset[0][0]), 0, 2))
        plt.savefig('b.png')

        self.finetune_val_dataset = FolderDataset_helper(
            self.DATA_PATH,
            validation=True,
            val_split=self.val_split,
            withold_train_percent=0,
            transform=self.val_transform,
            image_type='tif')

        self.num_samples = len(self.finetune_dataset)
        self.num_classes = len(set(self.finetune_dataset.labels))
コード例 #16
0
import splitfolders

splitfolders.ratio("..Files/BdSL/BdSL_digits/main",
                   output="..Files/BdSL/BdSL_digits/split",
                   seed=13,
                   ratio=(.8, .1, .1),
                   group_prefix=None)
コード例 #17
0
def train_test_split(input_folder, output_folder):
    splitfolders.ratio(input_folder,
                       output=output_folder,
                       seed=1337,
                       ratio=(.8, .1, .1))
コード例 #18
0
import splitfolders  # or import split_folders

splitfolders.ratio("dataset",
                   output="dataset",
                   seed=1337,
                   ratio=(.8, .2),
                   group_prefix=None)
コード例 #19
0
# -*- coding: utf-8 -*-
"""

"""
import splitfolders
import os

input_dir = os.path.join('C:/Users/Desktop/Bayesian CNN/flowers/')
output_dir = os.path.join('C:/Users/Desktop/Bayesian CNN/flowers_splitted/')

splitfolders.ratio(input_dir,
                   output=output_dir,
                   seed=1337,
                   ratio=(.8, .2),
                   group_prefix=None)
def modeling():
    input_dir = os.path.join(
        r'C:\Users\Lahiru\Desktop\Dataset\kvasir-dataset-v2')
    output_dir = os.path.join(
        r'C:\Users\Lahiru\Desktop\Dataset\kvasir-dataset-v2_splitted')

    splitfolders.ratio(input_dir,
                       output=output_dir,
                       seed=1337,
                       ratio=(.7, .3),
                       group_prefix=None)

    train_dir = os.path.join(
        r'C:\Users\Lahiru\Desktop\Dataset\kvasir-dataset-v2_splitted\train')
    test_dir = os.path.join(
        r'C:\Users\Lahiru\Desktop\Dataset\kvasir-dataset-v2_splitted\val')

    train_datagen = ImageDataGenerator(rescale=1 / 255)
    test_datagen = ImageDataGenerator(rescale=1 / 255)

    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(75, 75),
        batch_size=214,
        class_mode='categorical',
        subset='training')

    test_generator = test_datagen.flow_from_directory(test_dir,
                                                      target_size=(75, 75),
                                                      batch_size=37,
                                                      class_mode='categorical')

    classes = [
        'dyed-lifted-polyps', 'dyed-resection-margins', 'esophagitis',
        'normal-cecum', 'normal-pylorus', 'normal-z-line', 'polyps',
        'ulcerative-colitis'
    ]

    X_train = []
    for j in range(len(train_generator)):
        for m in train_generator[j][0]:
            X_train.append(m)

    y_train = []
    for i in range(len(train_generator)):
        for k in train_generator[i][1]:
            y_train.append(np.argmax(k))

    X_test = []
    for j in range(len(test_generator)):
        for m in test_generator[j][0]:
            X_test.append(m)

    y_test = []
    for i in range(len(test_generator)):
        for k in test_generator[i][1]:
            y_test.append(np.argmax(k))

    X_train = np.array(X_train)
    y_train = np.array(y_train)

    X_test = np.array(X_test)
    y_test = np.array(y_test)

    cnn = models.Sequential([
        layers.Conv2D(filters=32,
                      kernel_size=(3, 3),
                      activation='relu',
                      input_shape=(75, 75, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(8, activation='softmax')
    ])

    cnn.compile(optimizer='Adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])

    history = cnn.fit(X_train, y_train, epochs=16)

    return cnn
コード例 #21
0
def cli_main():

    parser = ArgumentParser()
    parser.add_argument("--DATA_PATH",
                        type=str,
                        help="path to folders with images to train on.")
    parser.add_argument("--VAL_PATH",
                        type=str,
                        default=None,
                        help="path to validation folders with images")
    parser.add_argument(
        "--model",
        type=str,
        help=
        "model to initialize. Can accept model checkpoint or just encoder name from models.py"
    )
    parser.add_argument("--batch_size",
                        default=128,
                        type=int,
                        help="batch size for SSL")
    parser.add_argument("--cpus",
                        default=1,
                        type=int,
                        help="number of cpus to use to fetch data")
    parser.add_argument(
        "--hidden_dim",
        default=128,
        type=int,
        help=
        "hidden dimensions in projection head or classification layer for finetuning"
    )
    parser.add_argument("--epochs",
                        default=400,
                        type=int,
                        help="number of epochs to train model")
    parser.add_argument("--learning_rate",
                        default=1e-3,
                        type=float,
                        help="learning rate for encoder")
    parser.add_argument(
        "--patience",
        default=-1,
        type=int,
        help=
        "automatically cuts off training if validation does not drop for (patience) epochs. Leave blank to have no validation based early stopping."
    )
    parser.add_argument(
        "--val_split",
        default=0.2,
        type=float,
        help="percent in validation data. Ignored if VAL_PATH specified")
    parser.add_argument(
        "--withhold_split",
        default=0,
        type=float,
        help=
        "decimal from 0-1 representing how much of the training data to withold from either training or validation. Used for experimenting with labels neeeded"
    )
    parser.add_argument("--gpus",
                        default=1,
                        type=int,
                        help="number of gpus to use for training")
    parser.add_argument("--log_name",
                        type=str,
                        default=None,
                        help="name of model to log on wandb and locally")
    parser.add_argument("--image_size",
                        default=256,
                        type=int,
                        help="height of square image")
    parser.add_argument(
        "--resize",
        default=False,
        type=bool,
        help=
        "Pre-Resize data to right shape to reduce cuda memory requirements of reading large images"
    )
    parser.add_argument("--technique",
                        default=None,
                        type=str,
                        help="SIMCLR, SIMSIAM or CLASSIFIER")
    parser.add_argument("--seed",
                        default=1729,
                        type=int,
                        help="random seed for run for reproducibility")

    #add ability to parse unknown args
    args, _ = parser.parse_known_args()
    technique = supported_techniques[args.technique]
    args, _ = technique.add_model_specific_args(parser).parse_known_args()

    #logging
    wandb_logger = None
    log_name = args.technique + '_' + args.log_name + '.ckpt'
    if log_name is not None:
        wandb_logger = WandbLogger(name=log_name, project='Curator')

    #resize images here
    if args.resize:
        #implement resize and modify args.DATA_PATH accordingly
        pass

    #Splitting Data into train and validation
    if not (os.path.isdir(f"{args.DATA_PATH}/train")
            and os.path.isdir(f"{args.DATA_PATH}/val")
            ) and args.val_split != 0 and args.VAL_PATH is None:
        print(
            colored(
                f'Automatically splitting data into train and validation data...',
                'blue'))
        shutil.rmtree(f'./split_data_{log_name[:-5]}', ignore_errors=True)
        splitfolders.ratio(args.DATA_PATH,
                           output=f'./split_data_{log_name[:-5]}',
                           ratio=(1 - args.val_split - args.withhold_split,
                                  args.val_split, args.withhold_split),
                           seed=args.seed)
        args.DATA_PATH = f'./split_data_{log_name[:-5]}/train'
        args.VAL_PATH = f'./split_data_{log_name[:-5]}/val'

    model = load_model(args)
    print(colored("Model architecture successfully loaded", 'blue'))

    cbs = []
    backend = 'ddp'

    if args.patience > 0:
        cb = EarlyStopping('val_loss', patience=args.patience)
        cbs.append(cb)

    trainer = pl.Trainer(
        gpus=args.gpus,
        max_epochs=args.epochs,
        progress_bar_refresh_rate=20,
        callbacks=cbs,
        distributed_backend=f'{backend}' if args.gpus > 1 else None,
        sync_batchnorm=True if args.gpus > 1 else False,
        logger=wandb_logger,
        enable_pl_optimizer=True)
    trainer.fit(model)

    Path(f"./models/").mkdir(parents=True, exist_ok=True)
    trainer.save_checkpoint(f"./models/{log_name}")
    print(colored("YOUR MODEL CAN BE ACCESSED AT: ", 'blue'),
          f"./models/{log_name}")
コード例 #22
0
# -*- coding: utf-8 -*-
"""
Created on Wed Dec  9 10:17:22 2020

@author: nabeel
"""



import splitfolders
path = "D:/NAVTTCH/Pneumonia Detection using chest xrays images/Dataset_pneuminia_normal"


splitfolders.ratio(path, output="train_test_Pneumonia_Dataset", seed=1337,ratio=(.7, .1,.2))
コード例 #23
0
Original file is located at
    https://colab.research.google.com/drive/1-Ce3MQqKya1zOPz9CrtHETVQUCfnwyCV
"""

#mount googledrive
from google.colab import drive
drive.mount('/content/gdrive')

!pip install split-folders

import splitfolders
input_path = '/content/gdrive/My Drive/APS360/Lab 3/Gesture_Dataset'
output_path = '/content/gdrive/My Drive/APS360/Lab 3/Split Data'
#splitting the data 60/20/20
splitfolders.ratio(input_path, output=output_path, seed=1, ratio=(.6, .2, .2))

# Loading these images from Drive

import torch
import numpy as np

import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

from torch.utils.data.sampler import SubsetRandomSampler

# location on Google Drive
master_path = '/content/gdrive/My Drive/APS360/Lab 3/Split Data/' 
コード例 #24
0
import os
import glob
import keras
from keras_video import VideoFrameGenerator
from data.CustomVideoGenerator import CustomVideoGenerator
import keras_video.utils
from matplotlib import pyplot as plt
import numpy as np
import splitfolders

main_dir = 'videos/'
output_dir = 'train_valid_dataset/'
splitfolders.ratio(main_dir, output=output_dir, seed=42, ratio=(.8, .2))


def generate_video_frame(size=(224, 224), channel=3, n_frame=5, batch_size=8):
    # use sub directories names as classes
    classes = [i.split(os.path.sep)[1] for i in glob.glob('videos/*')]
    classes.sort()
    # pattern to get videos and classes
    glob_pattern_train = 'train_valid_dataset/train/{classname}/*.mp4'
    glob_pattern_val = 'train_valid_dataset/val/{classname}/*.mp4'
    # Create video frame generator
    train = CustomVideoGenerator(classes=classes,
                                 glob_pattern=glob_pattern_train,
                                 nb_frames=n_frame,
                                 shuffle=True,
                                 batch_size=batch_size,
                                 target_shape=size,
                                 nb_channel=channel,
                                 transformation=None,
コード例 #25
0
torch.manual_seed(0)

print('Using PyTorch version', torch.__version__)

### Preparing Train, Val, Test Sets ###
directory = './data/COVID-19_Radiography_Dataset'

source_dirs = ['Normal', 'Viral Pneumonia', 'COVID', 'Lung_Opacity']
train_dir = './model_data/train'
val_dir = './model_data/val'
test_dir = './model_data/test'

if not os.path.isdir("model_data"):
    splitfolders.ratio('./data/COVID-19_Radiography_Dataset',
                       output="model_data",
                       seed=1337,
                       ratio=(.8, 0.1, 0.1))


### Creating Custom Dataset ###
### Reference#https://medium.com/analytics-vidhya/detecting-covid-19-using-chest-x-ray-images-a6fc822b73cc##
class ChestXRayDataset(torch.utils.data.Dataset):
    def __init__(self, image_dirs, transform):
        def get_images(class_name):
            images = [
                x for x in os.listdir(image_dirs[class_name])
                if x.lower().endswith('png')
            ]
            print(f'Found {len(images)} {class_name} examples')
            return images
コード例 #26
0
import splitfolders  
import os
import pathlib


folder = 'data/malignas/output'
splitfolders.ratio(folder, output='data/malignaaugmentated', seed=1337, ratio=(0.75,0.25))
!wget --no-check-certificate \
  https://dicodingacademy.blob.core.windows.net/picodiploma/ml_pemula_academy/rockpaperscissors.zip \
  -O /tmp/rockpaperscissors.zip

# melakukan ekstraksi pada file zip
import zipfile,os
local_zip = '/tmp/rockpaperscissors.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

os.listdir('/tmp')

import splitfolders

splitfolders.ratio('/tmp/rockpaperscissors/rps-cv-images', output="/tmp/rockpaperscissors/", seed=42, ratio=(.8, .2))

data_dir = '/tmp/rockpaperscissors/'
train_dir = os.path.join(data_dir, 'train')
validation_dir = os.path.join(data_dir, 'val')

# membuat direktori gunting, kertas dan batu pada direktori data training
train_clean_dir = os.path.join(train_dir, 'scissors')
train_messy_dir = os.path.join(train_dir, 'paper')
train_messy_dir = os.path.join(train_dir, 'rock')

# membuat direktori gunting, kertas dan batu pada direktori data validasi
validation_clean_dir = os.path.join(validation_dir, 'scissors')
validation_messy_dir = os.path.join(validation_dir, 'paper')
validation_messy_dir = os.path.join(validation_dir, 'rock')
コード例 #28
0
IMG_SHAPE = (224, 224, 3)


# Data augmentation
D_AUG = True
ROTATION = 10
WIDTH_SHIFT = 0.2
HEIGHT_SHIFT = 0.2
BRIGHTNESS = (0.2, 1.4)
SHEAR = 0.2
ZOOM = 0.3
HORI_FLIP = True


# To split the dataset into train, val, and test sets.
splitfolders.ratio(DATA_DIR_PATH, OUTPUT_DIR, seed=SEED, ratio=(TRAIN_R, VAL_R, TEST_R))

train_data_dir = f"{OUTPUT_DIR}/train"
val_data_dir = f"{OUTPUT_DIR}/val"
test_data_dir = f"{OUTPUT_DIR}/test"


helper = Helper()

train_gen, val_gen, test_gen = helper.get_resnet_gens(train_data_dir,
                                                      val_data_dir,
                                                      test_data_dir,
                                                      target_size=(IMG_SHAPE[0], IMG_SHAPE[1]),
                                                      batch_size=BATCH_SIZE,
                                                      data_aug=D_AUG,
                                                      rotation=ROTATION,
コード例 #29
0
for index, row in df.iterrows():
  if row['probability']==0:
    uninfected_list.append(row['image'][7:])
  else:
    infected_list.append(row['image'][7:])

images_path='/content/Original_Data'

#add images directories to a list
images_list=os.listdir(images_path)

#copy infected cells to infected_path
for i in range(0,len(images_list)):
  if images_list[i] in infected_list:
    file_directory=images_path+'/'+images_list[i]
    shutil.copy(file_directory,'/content/new_PV_cells_data/infected_cells')
    
  #copy uninfected cells to uninfected_path
  else :
    file_directory=images_path+'/'+images_list[i]
    shutil.copy(file_directory,'/content/new_PV_cells_data/uninfected_cells')
 
#split DATA to 75% train and 25% test
splitfolders.ratio("/content/new_PV_cells_data",output="/content/Splitted_pv_cells_data",seed=1337, ratio=(.75, .25,))#the seed makes splits reproducible.






コード例 #30
0
ファイル: CovidTrain.py プロジェクト: oracl4/CovidTest
from keras.layers import Dense
import matplotlib.pyplot as plt
import numpy as np

# Global Variable
train_ratio = 0.6
tests_ratio = 0.4
input_size = (224, 224)
channel = (3, )
input_shape = input_size + channel
batch_size = 16
epoch = 15

splitfolders.ratio("Dataset_Confirm",
                   output="Dataset_Final",
                   seed=1337,
                   ratio=(train_ratio, tests_ratio),
                   group_prefix=None)

# Create Train and Validation Path
dataset_dir = 'Dataset_Final'
train_dir = os.path.join(dataset_dir, 'train')
tests_dir = os.path.join(dataset_dir, 'val')

# Image Augmentation (Pre-process)
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=20,
    horizontal_flip=True,