Esempio n. 1
0
 def set_data_bunch(self):
     self.data_bunch = TextDataBunch.from_df(
         ".",
         self.train,
         self.val,
         tokenizer=self.coruscant_tokenizer,
         vocab=self.coruscant_vocab,
         include_bos=False,
         include_eos=False,
         text_cols=self.text_cols,
         label_cols=self.label_cols,
         bs=self.batch_size,
         collate_fn=partial(pad_collate, pad_first=False, pad_idx=0),
     )
Esempio n. 2
0
def run_training_sample(sample_size=300, image_size=224, n_cycles=10,
                        with_focal_loss=False, with_oversampling=True,
                        with_weighted_loss=True,
                        confusion_matrix_filename='train_confusion_matrix'):
    """

    :param sample_size: number of images per class
            if the input file has less than the given number, only the existing ones are used
    :param image_size:
            Size of the image in image augmantation pre-processing
    :param n_cycles:
            epochs
    :param with_focal_loss: bool
        Use it if data is balanced
    :param with_oversampling: bool
        Use oversampling for the mintority class of COVID xrays to match the `sample_size`
    :param with_weighted_loss: bool
        Use weighted loss for unbalaned sample size in classes
    :return:
    """

    data = load_dataset(sample_size=sample_size, image_size=image_size)

    callbacks = None
    if with_oversampling:
        callbacks = [partial(OverSamplingCallback)]

    learn = cnn_learner(data, models.resnet50, metrics=accuracy,
                        callback_fns = callbacks
                        )
    learn.model = torch.nn.DataParallel(learn.model)

    # handle unbalanced data with weights
    # ['COVID-19', 'normal', 'pneumonia']
    if with_weighted_loss:
        classes = {c:1 for c in learn.data.classes}
        classes['COVID-19'] = 5
        learn.loss_func = CrossEntropyLoss(weight=tensor(list(classes.values()),
                                           dtype=torch.float, device=fastai.torch_core.defaults.device),
                                           reduction='mean')
    elif with_focal_loss:
        learn.loss_func = FocalLoss()

    learn.fit_one_cycle(n_cycles)

    save_learner(learn, with_focal_loss=with_focal_loss, with_oversampling=with_oversampling,
                 sample_size=sample_size, with_weighted_loss=with_weighted_loss)

    _save_classification_interpert(learn, confusion_matrix_filename=confusion_matrix_filename)
Esempio n. 3
0
 def train(self, graph, max_epoch=100, min_delta=0, patience=0):
     model_num = self._model_num
     self._model_num = self._model_num + 1
     learn = Learner(self.data, graph.generate_model(), loss_func=self.loss_func, metrics=self.metrics,
                     callback_fns=[partial(ValueTrackingCallback,
                                           value_holder=self.accuracy,
                                           monitor=self.monitor,
                                           min_delta=min_delta,
                                           patience=patience)])
     progress_disabled_ctx(learn)
     learn.fit(max_epoch)
     print(f'Saving model {model_num}...', end='')
     graph.save(os.path.join(self.path, str(model_num)))
     print(' Done!')
     print(f'Model number: {model_num}\nBest accuracy: {self.accuracy.value}')
     return model_num, self.accuracy.value.item()
Esempio n. 4
0
    def create_model(self):
        # BERT model
        bert_model_class = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', num_labels=6)
        # Loss function to be used is Binary Cross Entropy with Logistic Losses
        loss_func = nn.BCEWithLogitsLoss()
        # Considering this is a multi-label classification problem, we cant use simple accuracy as metrics here.
        # we will use accuracy_thresh with threshold of 25% as our metric here.
        acc_02 = partial(accuracy_thresh, thresh=0.25)
        self.model = bert_model_class

        # learner function
        self.learner = Learner(self.data_bunch,
                               self.model,
                               loss_func=loss_func,
                               model_dir=self.model_dir,
                               metrics=acc_02)
def main(epochs):
    Task.init(project_name="examples",
              task_name="fastai with tensorboard callback")

    path = untar_data(URLs.MNIST_SAMPLE)

    data = ImageDataBunch.from_folder(path,
                                      ds_tfms=(rand_pad(2, 28), []),
                                      bs=64,
                                      num_workers=0)
    data.normalize(imagenet_stats)

    learn = cnn_learner(data, models.resnet18, metrics=accuracy)
    tboard_path = Path("data/tensorboard/project1")
    learn.callback_fns.append(
        partial(LearnerTensorboardWriter, base_dir=tboard_path, name="run0"))

    accuracy(*learn.get_preds())
    learn.fit_one_cycle(epochs, 0.01)
Esempio n. 6
0
def train_unet(epochs=5, batch_size=1, lr=0.1, val_percent=0.1):
    print("Start script")
    if args.isgrid is False:
        filename = "/media/adrian/E2B45A26B459FD8B/psfmaskmoving_zernike2d_128_n_1_s_0_p_0_b_0__noise_1_2dzernike_test/"
        batch_size = int(batch_size // 1.5)
    else:
        filename = "/idiap/temp/ashajkofci/psfmaskmoving_zernike2d_128_n_1_s_0_p_0_b_0__noise_1_2dzernike_train/"

        batch_size = batch_size
    os.environ['TORCH_HOME'] = os.getcwd() + 'data'

    #transform = transforms.Compose([
    #    transforms.ToPILImage(),
    #    transforms.RandomCrop([450, 450]),
    #    transforms.RandomVerticalFlip(),
    #    transforms.RandomHorizontalFlip(),
    #    transforms.ToTensor(),
    #                               ])

    all_files_list = glob.glob(filename + "*/*.png")
    print('{} files found in {}'.format(len(all_files_list), filename))

    all_files_list = [x for x in all_files_list if "_mask" not in x]
    print('{} files found'.format(len(all_files_list)))

    all_files_list = sorted(all_files_list, key=lambda name: int(name[-13:-4]))
    print('{} files found'.format(len(all_files_list)))

    #all_files_list = all_files_list[:100000]
    #all_labels_list = lambda x: str(x).replace('.png', '_mask.png')

    num_files = len(all_files_list)
    print('{} files found'.format(len(all_files_list)))
    print("Convert to Dataframe")
    #df = pd.DataFrame({'data':all_files_list, 'label':all_labels_list})
    df = pd.DataFrame(all_files_list)

    print("Create transforms")
    print("Create data")

    #class MyImageList(ImageList):
    #    def open(self, fn):
    #        image = Image(grayloader(fn, onedim=True))

    #       return image

    src = (MyImageImageList.from_df(df,
                                    path='/').split_by_rand_pct(val_percent))

    print("Creating dataloaders")

    data_gen = get_data(bs=batch_size, size=224, src=src)

    #dataset = DatasetFromFolder(filename, loader = grayloader,  transform=transform, target_transform=transform)

    #n_val = int(len(dataset) * val_percent)
    #n_train = len(dataset) - n_val
    #train, val = rs(dataset, [n_train, n_val])
    #data = ImageDataBunch.create(train, val, bs=batch_size, num_workers=4)
    #data.c = 2
    #data.normalize(imagenet_stats)
    #data_gen.show_batch(2)
    #plt.show()
    print("Creating learner")
    #optar = partial(DiffGrad, version=1, betas=(.95, .999), eps=1e-6)
    optar = partial(Ranger, betas=(0.95, 0.99), eps=1e-6)

    selfattention = False
    modelname = 'resnet34unetanneal'
    learn = unet_learner(data_gen,
                         model_resnet34,
                         pretrained=True,
                         self_attention=selfattention,
                         norm_type=NormType.Weight,
                         loss_func=loss_with_flag,
                         y_range=(0., 1.0))

    learn.model_dir = os.getcwd() + 'data'
    learn.opt_func = optar
    print("Summary...")
    dt_string = datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
    #writer = SummaryWriter(comment=f'PROJ_{args.nbgrid}_LR_{lr}_BS_{batch_size}_FP_{args.fakepenalty}_N_{args.network}')
    name = f'{dt_string}_PROJ_{args.nbgrid}_LR_{lr}_BS_{batch_size}_N_{args.network}_ATT_{selfattention}_MODEL_{modelname}'
    mycallback = partial(TensorboardLogger, path='runs', run_name=name)
    learn.callback_fns.append(mycallback)
    learn.model.layers = learn.model.layers[:-1]
    print(learn.summary())
    #learn.lr_find(stop_div = False, num_it=200)
    #learn.recorder.plot(suggestion=True)
    #plt.show()

    flattenAnneal(learn, lr, epochs, 0.7)

    #learn.fit_one_cycle(epochs, max_lr = lr)
    torch.save(learn.model,
               os.getcwd() + '/data/' + name + '_TORCH_INTERMEDIATE.pth')
    learn.export(os.getcwd() + '/data/' + name + '_INTERMEDIATE_EXPORT.pth')

    #learn.fit_one_cycle(epochs, max_lr=lr/5.0)
    learn.unfreeze()
    flattenAnneal(learn, lr / 5, epochs, 0.7)
    mycallback = partial(TensorboardLogger,
                         path='runs',
                         run_name=name + '_UNFREEZE')
    learn.callback_fns[-1] = mycallback

    torch.save(learn.model, os.getcwd() + '/data/' + name + '_TORCH.pth')
    learn.export(os.getcwd() + '/data/' + name + '_EXPORT.pth')
Esempio n. 7
0
def train_unet(epochs=5, batch_size=1, lr=0.1, val_percent=0.1):
    print("Start script")
    if args.isgrid is False:
        filename = "/media/adrian/OMENDATA/data/movementgenerator_data_multiple2/"
        batch_size = int(batch_size//1.5)
    else:
        filename = "/idiap/temp/ashajkofci/movementgenerator_data_multiple2/"

        batch_size = batch_size
    os.environ['TORCH_HOME'] = os.getcwd()+'/data'


    all_files_list = glob.glob(filename + "*/*.png")
    print('{} files found in {}'.format(len(all_files_list), filename))

    all_files_list = sorted(all_files_list, key=lambda name: int(name[-13:-4]))
    print('{} files found'.format(len(all_files_list)))

    print("Convert to Dataframe")
    df = pd.DataFrame(all_files_list)

    print("Create transforms")
    print("Create data")

    class MyImageList(ImageList):
        def open(self, fn):
            image = np.load(fn)['arr_0']
            image = np.transpose(image, (2, 0, 1))
            image[1] /= 128.0
            image[0] /= 128.0
            image[3] /= 5.0
            image = torch.Tensor(image)
            #print('{} {} {}'.format(image.min(), image.max(), image.mean()))

            image = Image(image)
            return image

    class MyImageImageList(ImageImageList):
        _label_cls = MyImageList

        def open(self, fn):
            return Image(grayloader(fn))

    def get_data(bs, size):
        data = (src.label_from_func(lambda x: str(x).replace('.png', '_mask.npy.npz'))
                .transform(get_transforms(do_flip = False, max_zoom=1.0, max_warp=0.0, max_rotate=0, max_lighting=0.3), tfm_y=False)
                .transform([rand_crop(), rand_crop()], tfm_y=True, size= size)
                .databunch(bs=bs).normalize(imagenet_stats, do_y=False))

        data.c = 4
        return data

    src = (MyImageImageList.from_df(df, path='/')
            .split_by_rand_pct(val_percent))

    print("Creating dataloaders")

    data_gen = get_data(bs=batch_size, size=448)

    #dataset = DatasetFromFolder(filename, loader = grayloader,  transform=transform, target_transform=transform)

    #n_val = int(len(dataset) * val_percent)
    #n_train = len(dataset) - n_val
    #train, val = rs(dataset, [n_train, n_val])
    #data = ImageDataBunch.create(train, val, bs=batch_size, num_workers=4)
    #data.c = 2
    #data.normalize(imagenet_stats)
    #data_gen.show_batch(2)
    #plt.show()
    print("Creating learner")
    optar = partial(DiffGrad, version=1, betas=(.95, .999), eps=1e-6)




    selfattention=False
    modelname='resnet34'

    learn = unet_learner(data_gen, model_resnet34, pretrained=True, loss_func = MSELossFlat(), self_attention=False)

    learn.model_dir = os.getcwd()+'/data'
    learn.opt_func = optar
    print("Summary...")
    dt_string = datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
    name =f'{dt_string}_PROJ_{args.nbgrid}_LR_{lr}_BS_{batch_size}_N_{args.network}_ATT_{selfattention}_MODEL_{modelname}'
    mycallback = partial(TensorboardLogger, path='runs', run_name=name, run_type='unet')
    learn.callback_fns.append(mycallback)
    learn.callback_fns.append(partial(SaveModelCallback,every='improvement', name='{}/{}.pth'.format(dir_checkpoint, name)))
    #learn.model.layers = learn.model.layers[:-1]
    print(learn.summary())
    #learn.lr_find(stop_div = False, num_it=200)
    #learn.recorder.plot(suggestion=True)
    #plt.show()

    learn.fit_one_cycle(epochs, max_lr = lr)
    torch.save(learn.model, 'data/'+name+'_TORCH_INTERMEDIATE.pth')
    learn.unfreeze()
    learn.fit_one_cycle(epochs, max_lr = slice(lr/50,lr/5))
    learn.save(name+'_FINAL')
    torch.save(learn.model, 'data/'+name+'_TORCH.pth')
Esempio n. 8
0
def train_cnn2(epochs=5, batch_size=1, lr=0.1, val_percent=0.1):
    print("Start script")
    if args.isgrid is False:
        filename = "/media/adrian/E2B45A26B459FD8B/movementgenerator_data_multiple/"
        batch_size = int(batch_size//1.5)
    else:
        filename = "/idiap/temp/ashajkofci/movementgenerator_data_multiple/"

        batch_size = batch_size
    os.environ['TORCH_HOME'] = os.getcwd()+'/data'

    #transform = transforms.Compose([
    #    transforms.ToPILImage(),
    #    transforms.RandomCrop([450, 450]),
    #    transforms.RandomVerticalFlip(),
    #    transforms.RandomHorizontalFlip(),
    #    transforms.ToTensor(),
    #                               ])

    all_files_list = glob.glob(filename + "*/*.png")
    print('{} files found in {}'.format(len(all_files_list), filename))

    all_files_list = sorted(all_files_list, key=lambda name: int(name[-13:-4]))
    print('{} files found'.format(len(all_files_list)))

    _file_csv = read_csv(os.path.expanduser(filename + "parameters.txt"), header=None)
    _labels = _file_csv.values.astype(np.float)
    print('{} labels found'.format(len(_labels)))

    print('{} files found'.format(len(all_files_list)))

    print("Convert to Dataframe")
    df = pd.DataFrame(all_files_list)

    print("Create transforms")
    print("Create data")


    class MyImageImageList(ImageList):
        _label_cls = FloatList

        def open(self, fn):
            return Image(grayloader(fn))

    def ff(input):
        out =  _labels[int(input[-13:-4])]
        out[0] /= 255.0
        out[1] /= 255.0
        #out[3] /= 5.0
        out = out.tolist()
        return out

    def get_data(bs, size):
        data = (src.label_from_func(ff)
                .transform(get_transforms(do_flip = False, max_zoom=1.0, max_warp=0.0, max_rotate=0, max_lighting=0.3), tfm_y=False)
                .transform([rand_crop(), rand_crop()], tfm_y=False, size= size)
                .databunch(bs=bs).normalize(imagenet_stats, do_y=False))

        data.c = 120
        return data

    src = (MyImageImageList.from_df(df, path='/')
            .split_by_rand_pct(val_percent))

    print("Creating dataloaders")

    data_gen = get_data(bs=batch_size, size=448)

    #dataset = DatasetFromFolder(filename, loader = grayloader,  transform=transform, target_transform=transform)

    #n_val = int(len(dataset) * val_percent)
    #n_train = len(dataset) - n_val
    #train, val = rs(dataset, [n_train, n_val])
    #data = ImageDataBunch.create(train, val, bs=batch_size, num_workers=4)
    #data.c = 2
    #data.normalize(imagenet_stats)
    #data_gen.show_batch(2)
    #plt.show()
    optar = partial(DiffGrad, version=1, betas=(.95, .999), eps=1e-6)


    def minmse(output, target):
        all_losses = torch.zeros(20)
        for i in range(20):
            loss =  torch.pow((output[:,i*5:i*5+4] - target[:, i*5:i*5+4]), 2).sum()
            all_losses[i] = loss
        return torch.min(all_losses)

    class SmoothL1NoOrderLoss(_Loss):

        __constants__ = ['reduction']

        def __init__(self, size_average=None, reduce=None, reduction='mean'):
            super(SmoothL1NoOrderLoss, self).__init__(size_average, reduce, reduction)

        def forward(self, input, target):
            nb_feat = 8
            nb_points = 15
            input = input.double()
            target = target.double()
            with torch.no_grad():
                target2 = target.view(-1, nb_points, nb_feat)
                #out_total = torch.zeros((target2.shape[0] * 15 * nb_feat))
                final_target_total = torch.zeros((target2.shape[0] * nb_points * nb_feat)).double().cuda()

                for batch_image in range(target2.shape[0]):
                    filter_ori = target2[batch_image].unsqueeze(1)
                    data = input.view(-1, 1, nb_points * nb_feat)[batch_image].unsqueeze(0)
                    filter = filter_ori.clone()
                    m = filter.mean(axis=2).unsqueeze(2)
                    m2 = filter.sum(axis=2).view(-1, 1, 1)
                    filter.sub_(m)
                    filter.div_(m2)
                    filter[filter != filter] = 0.0
                    filter_for_mean = torch.ones((1, 1, filter.shape[2])).double().cuda() / filter.shape[2]
                    data_group_mean2 = F.conv1d(data, filter_for_mean, padding=0, stride=filter.shape[2])
                    data_group_mean2 = data_group_mean2.repeat_interleave(filter.shape[2])
                    d2 = (data - data_group_mean2)
                    filtered_data = (F.conv1d(d2.expand((-1, 1, -1)), filter, stride=nb_feat))
                    index_min = torch.argsort(filtered_data, dim=2, descending=True)
                    res = torch.argsort(index_min)
                    final_indexes = (res[0, :] == 0).nonzero()[:, 1].view(-1)
                    final_target = filter_ori[final_indexes, :, :]
                    #out_total[batch_image*data.view(-1).shape[0]:batch_image*data.view(-1).shape[0]+data.view(-1).shape[0]] = data.view(-1)
                    final_target_total[batch_image*data.view(-1).shape[0]:batch_image*data.view(-1).shape[0]+data.view(-1).shape[0]] = final_target.view(-1)
                    # print(out)
                    # print(final_target)
                    #loss += F.smooth_l1_loss(out, final_target, reduction=self.reduction)
                    #loss += (out - final_target).pow(2).mean()

            return F.mse_loss(input.view(-1), final_target_total, reduction=self.reduction)

    class LossMatern(nn.Module):

        def __init__(self):
            super(LossMatern, self).__init__()

        def forward(self,output, target):
            covar_module = gpytorch.kernels.MaternKernel(batch_shape= torch.Size([output.shape[0]]), nu=2.5).cuda()

            class GaussianKernel():
                def __init__(self):
                    pass
                def forward(self, output, target):
                    return torch.exp(-(output-target)**2)
            #covar_module = GaussianKernel()

            nb_feat = 8
            nb_points = 15
            target = target.view(-1, nb_points, nb_feat)
            output = output.view(-1, nb_points, nb_feat)

            similarity_xx = covar_module(output, output).evaluate().mean(dim=2).mean(dim=1)
            similarity_yy = covar_module(target, target).evaluate().mean(dim=2).mean(dim=1)
            similarity_xy = covar_module(output, target).evaluate().mean(dim=2).mean(dim=1)

            loss = similarity_xx + similarity_yy - 2*similarity_xy
            return loss.mean()

    def loss_matern(output, target):
        covar_module = gpytorch.kernels.MaternKernel(batch_shape= torch.Size([output.shape[0]]), nu=2.5).cuda()

        class GaussianKernel():
            def __init__(self):
                pass
            def forward(self, output, target):
                return torch.exp(-(output-target)**2)
        #covar_module = GaussianKernel()

        nb_feat = 8
        nb_points = 15
        target = target.view(-1, nb_points, nb_feat)
        output = output.view(-1, nb_points, nb_feat)

        similarity_xx = covar_module(output, output).evaluate().mean(dim=2).mean(dim=1)
        similarity_yy = covar_module(target, target).evaluate().mean(dim=2).mean(dim=1)
        similarity_xy = covar_module(output, target).evaluate().mean(dim=2).mean(dim=1)

        loss = similarity_xx + similarity_yy - 2*similarity_xy
        return loss.mean()

        #target_full = target.view(-1, nb_feat*15)
        #loss = 0.0
        #for i in range(target.shape[0]):
        #    for ii in range(target.shape[1]):
        #        patch = target[i, ii, :][None, ...]
        #       ncc = NCC(patch)
        #        input = output[i][None,...][None, ...]
        #        ncc_results = ncc(input)
        #        position = np.unravel_index(ncc_results.argmax(), output.shape)[1]
        #        position = position - position % nb_feat
        #        loss +=((target_full[i,position:position+nb_feat] - output[i,position:position+nb_feat])**2).mean()
        #loss /= target.shape[0]
        #return loss

    selfattention=False
    modelname='resnet34'

    a, b = data_gen.one_batch()
    u = b.clone().double().cuda()
    b = b.double().cuda()
    #a = torch.Tensor([[[1,1,1], [0,0,0], [2,2,2], [0,0,0]]]).double().cuda()
    #b = torch.Tensor([[[2,2,2], [0,0,85], [1,1,1], [0,0,0]]]).double().cuda()

    b.requires_grad=True
    #a.requires_grad=True
    criterion = loss_matern
    loss = criterion(b,u)
    loss.backward()
    #res = torch.autograd.gradcheck(LossMatern(), (b, u), eps=1e-3, atol=1e-3, raise_exception=True)
    #print('Gradient check:{}'.format(res))  # res should be True if the gradients are correct.

    print("Creating learner")

    learn = cnn_learner(data_gen, model_resnet34, pretrained=True, loss_func = loss_matern)

    learn.model_dir = os.getcwd()+'/data'
    learn.opt_func = optar
    print("Summary...")
    dt_string = datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
    #writer = SummaryWriter(comment=f'PROJ_{args.nbgrid}_LR_{lr}_BS_{batch_size}_FP_{args.fakepenalty}_N_{args.network}')
    name =f'{dt_string}_PROJ_{args.nbgrid}_LR_{lr}_BS_{batch_size}_N_{args.network}_ATT_{selfattention}_MODEL_{modelname}'
    mycallback = partial(TensorboardLogger, path='runs', run_name=name)
    learn.callback_fns.append(mycallback)
    #learn.callback_fns.append(partial(SaveModelCallback,every='epoch', name='{}/{}.pth'.format(dir_checkpoint, name)))
    #learn.model.layers = learn.model.layers[:-1]
    print(learn.summary())
    #learn.lr_find(stop_div = False, num_it=200)
    #learn.recorder.plot(suggestion=True)
    #plt.show()

    learn.fit_one_cycle(epochs, max_lr = lr)
    torch.save(learn.model, 'data/'+name+'_TORCH_INTERMEDIATE.pth')
    learn.unfreeze()
    learn.fit_one_cycle(epochs, max_lr = slice(lr/100,lr/10))
    torch.save(learn.model, 'data/'+name+'_TORCH.pth')
    learn.save(name+'_FINAL.pth')
Esempio n. 9
0
def train_cnn(epochs=5, batch_size=1, lr=0.1, val_percent=0.1):
    print("Start script")
    if args.isgrid is False:
        filename = "/media/adrian/E2B45A26B459FD8B/movementgenerator_data_realworld_big/"
        batch_size = int(batch_size//1.5)
    else:
        filename = "/idiap/temp/ashajkofci/movementgenerator_data_realworld_big/"

        batch_size = batch_size
    os.environ['TORCH_HOME'] = os.getcwd()+'/data'

    #transform = transforms.Compose([
    #    transforms.ToPILImage(),
    #    transforms.RandomCrop([450, 450]),
    #    transforms.RandomVerticalFlip(),
    #    transforms.RandomHorizontalFlip(),
    #    transforms.ToTensor(),
    #                               ])

    all_files_list = glob.glob(filename + "*/*.png")
    print('{} files found in {}'.format(len(all_files_list), filename))

    all_files_list = sorted(all_files_list, key=lambda name: int(name[-13:-4]))
    print('{} files found'.format(len(all_files_list)))

    _file_csv = read_csv(os.path.expanduser(filename + "parameters.txt"), header=None)
    _labels = _file_csv.values.astype(np.float)
    print('{} labels found'.format(len(_labels)))

    print('{} files found'.format(len(all_files_list)))

    print("Convert to Dataframe")
    df = pd.DataFrame(all_files_list)

    print("Create transforms")
    print("Create data")


    class MyImageImageList(ImageList):
        _label_cls = FloatList

        def open(self, fn):
            return Image(grayloader(fn))

    def ff(input):
        out =  _labels[int(input[-13:-4])]
        out = out.tolist()
        return out

    def get_data(bs, size):
        data = (src.label_from_func(ff)
                .transform(get_transforms(do_flip = False, max_zoom=1.0, max_warp=0.0, max_rotate=0, max_lighting=0.3), tfm_y=False)
                .transform([rand_crop(), rand_crop()], tfm_y=False, size= size)
                .databunch(bs=bs).normalize(imagenet_stats, do_y=False))

        data.c = 6
        return data

    src = (MyImageImageList.from_df(df, path='/')
            .split_by_idx(list(range(int(val_percent*len(all_files_list))))))

    print("Creating dataloaders")
    patch_size = 112
    data_gen = get_data(bs=batch_size, size=patch_size)

    #dataset = DatasetFromFolder(filename, loader = grayloader,  transform=transform, target_transform=transform)

    #n_val = int(len(dataset) * val_percent)
    #n_train = len(dataset) - n_val
    #train, val = rs(dataset, [n_train, n_val])
    #data = ImageDataBunch.create(train, val, bs=batch_size, num_workers=4)
    #data.c = 2
    #data.normalize(imagenet_stats)
    #data_gen.show_batch(2)
    #plt.show()
    print("Creating learner")
    optar = partial(DiffGrad, version=1, betas=(.95, .999), eps=1e-6)


    selfattention=False
    modelname='resnet34'

    learn = cnn_learner(data_gen, model_resnet34, pretrained=True, loss_func = loss_with_flag)

    learn.model_dir = os.getcwd()+'/data'
    learn.opt_func = optar
    print("Summary...")
    dt_string = datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
    #writer = SummaryWriter(comment=f'PROJ_{args.nbgrid}_LR_{lr}_BS_{batch_size}_FP_{args.fakepenalty}_N_{args.network}')
    name =f'{dt_string}_PROJ_{args.nbgrid}_LR_{lr}_BS_{batch_size}_N_{args.network}_ATT_{selfattention}_MODEL_{modelname}_PATCH_{patch_size}'
    mycallback = partial(TensorboardLogger, path='runs', run_name=name)
    learn.callback_fns.append(mycallback)
    learn.callback_fns.append(partial(SaveModelCallback,every='improvement', name='{}/{}.pth'.format(dir_checkpoint, name)))
    #learn.model.layers = learn.model.layers[:-1]
    print(learn.summary())
    #learn.lr_find(stop_div = False, num_it=200)
    #learn.recorder.plot(suggestion=True)
    #plt.show()

    learn.fit_one_cycle(epochs, max_lr = lr)
    torch.save(learn.model, 'data/'+name+'_TORCH_INTERMEDIATE.pth')
    learn.unfreeze()
    learn.fit_one_cycle(epochs, max_lr = slice(lr/20,lr/5))
    learn.save(name+'_FINAL')
    torch.save(learn.model, 'data/'+name+'_TORCH.pth')
Esempio n. 10
0
print(f"Fast.ai version = {fastai.__version__}")
which_processor()

EPOCHS = 10
LEARNING_RATE = 1e-4
IM_SIZE = 300

BATCH_SIZE = 16
ARCHITECTURE = models.resnet18
path = Path('/app/classifier_data/')

data = (ImageList.from_folder(path).split_by_rand_pct(
    valid_pct=0.2,
    seed=10).label_from_folder().transform(size=IM_SIZE).databunch(
        bs=BATCH_SIZE, num_workers=db_num_workers()).normalize(imagenet_stats))

print(f'number of classes: {data.c}')
print(data.classes)

learn = cnn_learner(
    data,
    ARCHITECTURE,
    metrics=[accuracy],
    callback_fns=[partial(TrainMetricsRecorder, show_graph=True)])
learn.unfreeze()
learn.fit(EPOCHS, LEARNING_RATE)
learn.export(file=Path("/app/classifier_model.pkl"))
_, validation_accuracy = learn.validate(learn.data.valid_dl,
                                        metrics=[accuracy])
print(f'Accuracy on validation set: {100*float(validation_accuracy):3.2f}')
def get_metrics(thresh=0.2):
    """TODO"""
    acc_thresh = partial(accuracy_thresh, thresh=thresh)
    f_score = partial(fbeta, thresh=thresh)
    metrics = [acc_thresh, f_score]
    return metrics
Esempio n. 12
0
    PATH, 'train_v2.csv', folder='train-jpg',
    suffix='.jpg').split_by_rand_pct(0.2).label_from_df(label_delim=' ')
data = origin_data.transform(transforms, size=256).databunch().normalize(
    vision.imagenet_stats)
#---------------------OR to set batch size to avoid CUDA out of memory--------------------------------------------
# data = origin_data.transform(transforms, size=256).databunch()
# data.batch_size = 32
# data.normalize(vision.imagenet_stats)
#-------------------------------------------------------------------------------------------------------------------
# Show some sample images, labels are separated by semicolon
print(data.show_batch(rows=3, figsize=(12, 12), heatmap=False))

# In training pass, resnet34 is used again, but we use accuracy_thresh rather than accuracy
arch = vision.models.resnet50
# Define accuracy threshold and f2 score metrics
acc_thresh = vision.partial(metrics.accuracy_thresh, thresh=THRESHOLD)
f2_score = vision.partial(metrics.fbeta, thresh=THRESHOLD)
learner = vision.cnn_learner(data, arch, metrics=[acc_thresh, f2_score])

# Use LR finder to find good lr
learner.lr_find()
learner.recorder.plot()
LR = 0.01
learner.fit_one_cycle(EPOCHS, max_lr=LR)
learner.save('stage-1-34')

# Finetuning model
learner.unfreeze()
# Use LR finder to find good lr
learner.lr_find()
learner.recorder.plot()