Exemple #1
0
 def set_data(self, data):
     if data is not None:
         self.data = data
         self.learn = Learner(self.data, self.model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy,
                              add_time=False, bn_wd=False, silent=True)
         self.label.setText(self.learn.summary())
     else:
         self.data = None
def get_learn(data, model, name):
    """TODO"""
    metrics = get_metrics()
    learn = Learner(data,
                    model,
                    metrics=metrics,
                    path="models",
                    model_dir=name)
    learn = learn.mixup(stack_y=False).to_fp16()
    return learn
def fit_with_annealing(learn:Learner, num_epoch:int, lr:float=1e-3, annealing_start:float=0.7,
                    callbacks:list=None)->None:
    n = len(learn.data.train_dl)
    anneal_start = int(n*num_epoch*annealing_start)
    phase0 = TrainingPhase(anneal_start).schedule_hp('lr', lr)
    phase1 = TrainingPhase(n*num_epoch - anneal_start).schedule_hp('lr', lr, anneal=annealing_cos)
    phases = [phase0, phase1]
    sched = GeneralScheduler(learn, phases)
    learn.callbacks.append(sched)
    learn.fit(num_epoch, callbacks=callbacks)
Exemple #4
0
def train_classifier(model, config, x_train, y_train, x_val, y_val, train_tfms=None):
    loss_func = torch.nn.CrossEntropyLoss()

    if train_tfms is None:
        train_tfms = []
    train_ds = ImageArrayDS(x_train, y_train, train_tfms)
    val_ds = ImageArrayDS(x_val, y_val)
    data = ImageDataBunch.create(train_ds, val_ds, bs=config['batch_size'])

    callbacks = [partial(EarlyStoppingCallback, min_delta=1e-3, patience=config['patience'])]
    learner = Learner(data, model, metrics=accuracy, loss_func=loss_func, callback_fns=callbacks)
    learner.fit(config['epochs'], config['start_lr'], wd=config['weight_decay'])

    return learner
Exemple #5
0
def predict(learn: Learner, name: str):
    # submission.csv
    preds, _ = learn.get_preds(ds_type=DatasetType.Test)
    test['has_cactus'] = preds.numpy()[:, 0]
    test.to_csv('submission_{}.csv'.format(name), index=False)
    print('Finish creating submission_{}.csv'.format(name))
    # loss.csv
    id_ = range(len(learn.recorder.losses))
    loss_df = pd.DataFrame({
        'id': id_,
        'loss': np.array(learn.recorder.losses)
    })
    loss_df.to_csv('loss_{}.csv'.format(name), index=False)
    print('Finish creating loss_{}.csv'.format(name))
    # Calculate some metrics on the training set
    preds, targets = learn.get_preds(ds_type=DatasetType.Train)
    preds_label = np.argmax(preds.numpy(), axis=1)
    id_ = range(len(preds))
    train_pred_df = pd.DataFrame({
        'id': id_,
        'preds': preds.numpy()[:, 0],
        'preds_label': preds_label,
        'targets': targets.numpy()
    })
    train_pred_df.to_csv('./train_pred_{}.csv'.format(name))
    print('Finish creating train_pred_{}.csv'.format(name))
    correct_count = np.equal(preds_label, targets.numpy()).sum()
    len_preds = len(preds)
    incorrect_count = len_preds - correct_count
    fpr, tpr = metrics.roc_curve(preds[:, 0], targets)
    fpr, tpr = fpr.numpy(), tpr.numpy()
    FP = np.floor(fpr * len_preds)
    FN = incorrect_count - FP
    TP = np.floor(tpr * len_preds)
    TN = correct_count - TP
    id_ = range(len(fpr))
    train_index_df = pd.DataFrame({
        'id': id_,
        'fpr': fpr,
        'tpr': tpr,
        'TP': TP,
        'TN': TN,
        'FP': FP,
        'FN': FN
    })
    train_index_df.to_csv('./train_index_{}.csv'.format(name))
    print('Finish creating train_index_{}.csv'.format(name))
    # Destroy learn and save the model
    learn.export('./model_{}.pth'.format(name), destroy=True)
def save_learner(learn: Learner,
                 with_focal_loss=False,
                 with_oversampling=False,
                 sample_size=None,
                 with_weighted_loss=False):

    postfix = _get_postfix(with_focal_loss, with_oversampling, sample_size,
                           with_weighted_loss)

    save_file_name = f'{config.PIPELINE_SAVE_FILE}{_version}{postfix}.pkl'
    save_path = config.TRAINED_MODEL_DIR / save_file_name

    learn.export(save_path)

    # fix bug in fastai, missing layer_groups
    joblib.dump(learn.layer_groups, f'{save_path}_layer_groups')
Exemple #7
0
 def train(self, graph, max_epoch=100, min_delta=0, patience=0):
     model_num = self._model_num
     self._model_num = self._model_num + 1
     learn = Learner(self.data, graph.generate_model(), loss_func=self.loss_func, metrics=self.metrics,
                     callback_fns=[partial(ValueTrackingCallback,
                                           value_holder=self.accuracy,
                                           monitor=self.monitor,
                                           min_delta=min_delta,
                                           patience=patience)])
     progress_disabled_ctx(learn)
     learn.fit(max_epoch)
     print(f'Saving model {model_num}...', end='')
     graph.save(os.path.join(self.path, str(model_num)))
     print(' Done!')
     print(f'Model number: {model_num}\nBest accuracy: {self.accuracy.value}')
     return model_num, self.accuracy.value.item()
Exemple #8
0
    def create_model(self):
        # BERT model
        bert_model_class = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', num_labels=6)
        # Loss function to be used is Binary Cross Entropy with Logistic Losses
        loss_func = nn.BCEWithLogitsLoss()
        # Considering this is a multi-label classification problem, we cant use simple accuracy as metrics here.
        # we will use accuracy_thresh with threshold of 25% as our metric here.
        acc_02 = partial(accuracy_thresh, thresh=0.25)
        self.model = bert_model_class

        # learner function
        self.learner = Learner(self.data_bunch,
                               self.model,
                               loss_func=loss_func,
                               model_dir=self.model_dir,
                               metrics=acc_02)
Exemple #9
0
def _save_classification_interpert(learn: Learner, confusion_matrix_filename='confusion_matrix'):

    # interp = ClassificationInterpretation.from_learner(learn)
    train_interp = learn.interpret(ds_type=fastai.vision.DatasetType.Train)
    valid_interp = learn.interpret(ds_type=fastai.vision.DatasetType.Valid)

    joblib.dump(train_interp.confusion_matrix(), f'train_{confusion_matrix_filename}.pkl')
    joblib.dump(valid_interp.confusion_matrix(), f'valid_{confusion_matrix_filename}.pkl')

    train_interp.plot_confusion_matrix(return_fig=True).savefig(f'train_{confusion_matrix_filename}', dpi=200)
    train_interp.plot_top_losses(9, return_fig=True, figsize=(14,14)).savefig('top_losses.png', dpi=200)
Exemple #10
0
def custom_tta(learn:Learner, ds_type:DatasetType=DatasetType.Valid):
    dl = learn.dl(ds_type)
    ds = dl.dataset

    old_open_image = fastai.vision.data.open_image
    try:
        maxNumberOfCrops = 20
        for i in range(maxNumberOfCrops):
            #print("starting")
            setupNewCrop(i)
            yield get_preds(learn.model, dl, activ=_loss_func2activ(learn.loss_func))[0]
    finally:
            fastai.vision.data.open_image = old_open_image
def test_freeze_unfreeze_effnet():
    this_tests(cnn_learner)

    def get_number_of_trainable_params(model: nn.Module):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)

    base_arch = EfficientNetB1
    path = untar_data(URLs.MNIST_TINY)
    data = ImageDataBunch.from_folder(path, size=64)
    data.c = 1000  # Set number of class to be 1000 to stay in line with the pretrained model.
    cnn_learn = cnn_learner(data, base_arch, pretrained=True)
    ref_learn = Learner(data, EfficientNet.from_pretrained("efficientnet-b1"))
    # By default the neural net in cnn learner is freezed.
    assert get_number_of_trainable_params(
        cnn_learn.model) < get_number_of_trainable_params(ref_learn.model)
    cnn_learn.unfreeze()
    assert get_number_of_trainable_params(
        cnn_learn.model) == get_number_of_trainable_params(ref_learn.model)
Exemple #12
0
def model_to_learner(model: nn.Module,
                     im_size: int = IMAGENET_IM_SIZE) -> Learner:
    """Create Learner based on pyTorch ImageNet model.

    Args:
        model (nn.Module): Base ImageNet model. E.g. models.resnet18()
        im_size (int): Image size the model will expect to have.

    Returns:
         Learner: a model trainer for prediction
    """

    # Currently, fast.ai api requires to pass a DataBunch to create a model trainer (learner).
    # To use the learner for prediction tasks without retraining, we have to pass an empty DataBunch.
    # single_from_classes is deprecated, but this is the easiest go-around method.
    # Create ImageNet data spec as an empty DataBunch.
    # Related thread: https://forums.fast.ai/t/solved-get-prediction-from-the-imagenet-model-without-creating-a-databunch/36197/5
    empty_data = ImageDataBunch.single_from_classes(
        "", classes=imagenet_labels(), size=im_size).normalize(imagenet_stats)

    return Learner(empty_data, model)
def unet_learner(
    data: DataBunch,
    arch: Callable,
    pretrained: bool = True,
    blur_final: bool = True,
    norm_type: Optional[NormType] = NormType,
    split_on: Optional[SplitFuncOrIdxList] = None,
    blur: bool = False,
    self_attention: bool = False,
    y_range: Optional[Tuple[float, float]] = None,
    last_cross: bool = True,
    bottle: bool = False,
    cut: Union[int, Callable] = None,
    hypercolumns=True,
    **learn_kwargs: Any,
) -> Learner:
    "Build Unet learner from `data` and `arch`."
    meta = cnn_config(arch)
    body = create_body(arch, pretrained, cut)
    M = DynamicUnet_Hcolumns if hypercolumns else DynamicUnet
    model = to_device(
        M(
            body,
            n_classes=data.c,
            blur=blur,
            blur_final=blur_final,
            self_attention=self_attention,
            y_range=y_range,
            norm_type=norm_type,
            last_cross=last_cross,
            bottle=bottle,
        ),
        data.device,
    )
    learn = Learner(data, model, **learn_kwargs)
    learn.split(ifnone(split_on, meta["split"]))
    if pretrained:
        learn.freeze()
    apply_init(model[2], nn.init.kaiming_normal_)
    return learn
Exemple #14
0
def unet_learner_wide(data: DataBunch,
                      arch: Callable,
                      pretrained: bool = True,
                      blur_final: bool = True,
                      norm_type: Optional[NormType] = NormType,
                      split_on: Optional[SplitFuncOrIdxList] = None,
                      blur: bool = False,
                      self_attention: bool = False,
                      y_range: Optional[Tuple[float, float]] = None,
                      last_cross: bool = True,
                      bottle: bool = False,
                      nf_factor: int = 1,
                      **kwargs: Any) -> Learner:
    "Build Unet learner from `data` and `arch`."
    meta = cnn_config(arch)
    body = create_body(arch, pretrained)
    # can tell to go to another gpu
    model = to_device(
        DynamicUnetWide(
            body,
            n_classes=data.c,
            blur=blur,
            blur_final=blur_final,
            self_attention=self_attention,
            y_range=y_range,
            norm_type=norm_type,
            last_cross=last_cross,
            bottle=bottle,
            nf_factor=nf_factor,
        ),
        data.device,
    )
    learn = Learner(data, model, **kwargs)
    learn.split(ifnone(split_on, meta['split']))
    if pretrained:
        learn.freeze()
    apply_init(model[2], nn.init.kaiming_normal_)
    return learn
def benchmark_uncertainty(config):
    results = []
    plt.figure(figsize=(10, 8))
    for i in range(config['repeats']):
        x_set, y_set, x_val, y_val, train_tfms = config['prepare_dataset'](
            config)

        if len(x_set) > config['train_size']:
            _, x_train, _, y_train = train_test_split(
                x_set, y_set, test_size=config['train_size'], stratify=y_set)
        else:
            x_train, y_train = x_set, y_set

        train_ds = ImageArrayDS(x_train, y_train, train_tfms)
        val_ds = ImageArrayDS(x_val, y_val)
        data = ImageDataBunch.create(train_ds, val_ds, bs=config['batch_size'])

        loss_func = torch.nn.CrossEntropyLoss()
        np.set_printoptions(threshold=sys.maxsize, suppress=True)

        model = build_model(config['model_type'])
        callbacks = [
            partial(EarlyStoppingCallback,
                    min_delta=1e-3,
                    patience=config['patience'])
        ]
        learner = Learner(data,
                          model,
                          metrics=accuracy,
                          loss_func=loss_func,
                          callback_fns=callbacks)
        learner.fit(config['epochs'],
                    config['start_lr'],
                    wd=config['weight_decay'])

        images = torch.FloatTensor(x_val).cuda()

        probabilities = F.softmax(model(images), dim=1).detach().cpu().numpy()
        predictions = np.argmax(probabilities, axis=-1)

        for name in config['estimators']:
            ue = calc_ue(model, images, probabilities, name, config['nn_runs'])
            mistake = 1 - (predictions == y_val).astype(np.int)

            roc_auc = roc_auc_score(mistake, ue)
            print(name, roc_auc)
            results.append((name, roc_auc))

            if i == config['repeats'] - 1:
                fpr, tpr, thresholds = roc_curve(mistake, ue, pos_label=1)
                plt.plot(fpr, tpr, label=name, alpha=0.8)
                plt.xlabel('FPR')
                plt.ylabel('TPR')

    dir = Path(ROOT_DIR) / 'experiments' / 'data' / 'ood'
    plt.title(f"{config['name']} uncertainty ROC")
    plt.legend()
    file = f"var_{label}_roc_{config['name']}_{config['train_size']}_{config['nn_runs']}"
    plt.savefig(dir / file)
    # plt.show()

    df = pd.DataFrame(results, columns=['Estimator type', 'ROC-AUC score'])
    df = df.replace('mc_dropout', 'MC dropout')
    df = df.replace('decorrelating_sc', 'decorrelation')
    df = df[df['Estimator type'] != 'k_dpp_noisereg']
    print(df)
    fig, ax = plt.subplots(figsize=(8, 6))
    plt.subplots_adjust(left=0.2)

    with sns.axes_style('whitegrid'):
        sns.boxplot(data=df,
                    x='ROC-AUC score',
                    y='Estimator type',
                    orient='h',
                    ax=ax)

    ax.yaxis.grid(True)
    ax.xaxis.grid(True)

    plt.title(f'{config["name"]} wrong prediction ROC-AUC')

    file = f"var_{label}_boxplot_{config['name']}_{config['train_size']}_{config['nn_runs']}"
    plt.savefig(dir / file)
    df.to_csv(dir / file + '.csv')
            nn.Dropout(p=0.5),
            nn.Linear(512, num_classes),
        )

    def forward(self, x):
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)
        x = self.l4(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return F.log_softmax(x, dim=1)


if __name__ == "__main__":
    datasetdir = os.path.join(os.path.dirname(__file__), './kuzu_mnist')
    datasetdir = os.path.abspath(datasetdir)

    # Load dataset
    databunch = get_databunch(datasetdir)
    print('Dataset loaded')

    # Create VGG model
    learn = Learner(databunch, VGG(), metrics=accuracy)

    # Train
    learn.fit(1)

    # Save
    learn.save('vgg_model_with_norm')
Exemple #17
0
def main():
    data, x_train, y_train, x_val, y_val = load_data()
    loss_func = torch.nn.CrossEntropyLoss()

    models = {
        'cnn': AnotherConv(),
        'resnet': resnet_masked(pretrained=True),
        'resnet_multiple': resnet_linear(pretrained=True)
    }

    estimation_samples = 5_000
    ndcgs, estimator_type, model_types = [], [], []
    accuracies = []

    for i in range(config['model_runs']):
        print('==models run==', i + 1)
        for name, model in models.items():
            callbacks = [
                partial(EarlyStoppingCallback,
                        patience=3,
                        min_delta=1e-2,
                        monitor='valid_loss')
            ]
            learner = Learner(data,
                              model,
                              loss_func=loss_func,
                              metrics=[accuracy],
                              callback_fns=callbacks)
            learner.fit(100, lr, wd=weight_decay)
            inferencer = Inferencer(model)
            masks = build_masks(DEFAULT_MASKS)

            for j in range(config['repeat_runs']):
                idxs = np.random.choice(len(x_val),
                                        estimation_samples,
                                        replace=False)
                x_current = x_val[idxs]
                y_current = y_val[idxs]

                # masks
                current_ll = ll(inferencer, x_current, y_current)
                for mask_name, mask in masks.items():
                    print(mask_name)
                    estimator = build_estimator(
                        'bald_masked',
                        inferencer,
                        nn_runs=config['nn_runs'],
                        dropout_mask=mask,
                        dropout_rate=config['dropout_uq'],
                        num_classes=config['num_classes'])
                    uq = estimator.estimate(x_current)
                    estimator.reset()
                    ndcgs.append(uq_ndcg(-current_ll, uq))
                    estimator_type.append(mask_name)
                    estimator.reset()
                    model_types.append(name)
                    accuracies.append(learner.recorder.metrics[-1][0].item())
    #

    try:
        plt.figure(figsize=(12, 8))
        plt.title(f"NDCG on different train samples")

        df = pd.DataFrame({
            'ndcg': ndcgs,
            'estimator_type': estimator_type,
            'model': model_types
        })
        sns.boxplot(data=df, x='estimator_type', y='ndcg', hue='model')
        plt.show()

        plt.figure(figsize=(12, 8))
        plt.title('Accuracies')
        df = pd.DataFrame({'accuracy': accuracies, 'model': model_types})
        sns.boxplot(data=df, y='accuracy', x='model')
        plt.show()
    except Exception as e:
        print(e)
        import ipdb
        ipdb.set_trace()
Exemple #18
0
class CNNM(OWWidget):
    name = "M CNN"
    description = ""
    # icon = "icons/robot.svg"

    want_main_area = True

    class Inputs:
        data = Input('Data', ImageDataBunch, default=True)

    def __init__(self):
        super().__init__()
        self.learn = None

        # train_button = gui.button(self.controlArea, self, "开始训练", callback=self.train)
        self.label = gui.label(self.mainArea, self, "模型结构")

        #: The current evaluating task (if any)
        self._task = None  # type: Optional[Task]
        #: An executor we use to submit learner evaluations into a thread pool
        self._executor = ThreadExecutor()

        self.model = nn.Sequential(
            self.conv(1, 8),  # 14
            nn.BatchNorm2d(8),
            nn.ReLU(),
            self.conv(8, 16),  # 7
            nn.BatchNorm2d(16),
            nn.ReLU(),
            self.conv(16, 32),  # 4
            nn.BatchNorm2d(32),
            nn.ReLU(),
            self.conv(32, 16),  # 2
            nn.BatchNorm2d(16),
            nn.ReLU(),
            self.conv(16, 10),  # 1
            nn.BatchNorm2d(10),
            Flatten()  # remove (1,1) grid
        )

    def handleNewSignals(self):
        self._update()

    def _update(self):
        if self._task is not None:
            # First make sure any pending tasks are cancelled.
            self.cancel()
        assert self._task is None

        if self.data is None:
            return
        # collect all learners for which results have not yet been computed
        if not self.learn:
            return

        # setup the task state
        self._task = task = Task()
        # The learning_curve[_with_test_data] also takes a callback function
        # to report the progress. We instrument this callback to both invoke
        # the appropriate slots on this widget for reporting the progress
        # (in a thread safe manner) and to implement cooperative cancellation.
        set_progress = methodinvoke(self, "setProgressValue", (float,))

        def callback(finished):
            # check if the task has been cancelled and raise an exception
            # from within. This 'strategy' can only be used with code that
            # properly cleans up after itself in the case of an exception
            # (does not leave any global locks, opened file descriptors, ...)
            if task.cancelled:
                raise KeyboardInterrupt()
            set_progress(finished * 100)

        self.progressBarInit()
        # Submit the evaluation function to the executor and fill in the
        # task with the resultant Future.
        # task.future = self._executor.submit(self.learn.fit_one_cycle(1))

        with progress_disabled_ctx(self.learn) as learn:
            fit_model = partial(my_fit, learn, 1, callback=callback)
            task.future = self._executor.submit(fit_model)
            # Setup the FutureWatcher to notify us of completion
            task.watcher = FutureWatcher(task.future)
            # by using FutureWatcher we ensure `_task_finished` slot will be
            # called from the main GUI thread by the Qt's event loop
            task.watcher.done.connect(self._task_finished)

    @pyqtSlot(float)
    def setProgressValue(self, value):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(value)

    @pyqtSlot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters
        ----------
        f : Future
            The future instance holding the result of learner evaluation.
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.progressBarFinished()

        # try:
        #     result = f.result()  # type: List[Results]
        # except Exception as ex:
        #     # Log the exception with a traceback
        #     log = logging.getLogger()
        #     log.exception(__name__, exc_info=True)
        #     self.error("Exception occurred during evaluation: {!r}".format(ex))
        #     # clear all results
        #     self.result= None
        # else:
        print(self.learn.validate())
            # ... and update self.results

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._task_finished)
            self._task = None

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()

    def conv(self, ni, nf):
        return nn.Conv2d(ni, nf, kernel_size=3, stride=2, padding=1)

    def train(self):
        if self.learn is None:
            return
        self.learn.fit_one_cycle(3)

    @Inputs.data
    def set_data(self, data):
        if data is not None:
            self.data = data
            self.learn = Learner(self.data, self.model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy,
                                 add_time=False, bn_wd=False, silent=True)
            self.label.setText(self.learn.summary())
        else:
            self.data = None
Exemple #19
0
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)

        return F.log_softmax(x, dim=1)


if __name__ == "__main__":
    datasetdir = os.path.join(os.path.dirname(__file__), './kuzu_mnist')
    datasetdir = os.path.abspath(datasetdir)

    # Load dataset
    databunch = get_databunch(datasetdir)
    print('Dataset loaded')

    # Create VGG model
    learn = Learner(databunch,
                    MyResNet(BasicBlock, [2, 2, 2, 2]),
                    metrics=accuracy)

    # Train
    learn.fit(1)

    # Save
    learn.save('resnet_model_with_norm')
import numpy as np

import os

from utils import get_databunch
from make_vgg_resnet import VGG_ResNet

if __name__ == "__main__":
    datasetdir = os.path.join(os.path.dirname(__file__), './kuzu_mnist')
    datasetdir = os.path.abspath(datasetdir)

    # Load dataset
    databunch = get_databunch(datasetdir)

    # Create VGG + ResNet model
    learn = Learner(databunch, VGG_ResNet(), metrics=accuracy)

    # Load
    learn.load('vgg_resnet_model_with_norm')

    # Validate
    loss, acc = learn.validate()
    print('val_loss: {}, val_acc: {}'.format(loss, acc))

    mat = np.zeros((10, 10))
    for data in databunch.valid_ds:
        images, labels = data
        images = images.reshape((1, 1, 28, 28))
        outputs = learn.model(images)
        _, predicted = torch.max(outputs, 1)
        predicted = int(predicted)
Exemple #21
0
    
def vgg_resnet_load_model(learner, vgg_name, resnet_name):
        device = learner.data.device
        vgg_state = torch.load(learner.path/learner.model_dir/f'{vgg_name}.pth', map_location=device)
        learner.model.vgg.load_state_dict(vgg_state['model'], strict=True)
        
        resnet_state = torch.load(learner.path/learner.model_dir/f'{resnet_name}.pth', map_location=device)
        learner.model.resnet.load_state_dict(resnet_state['model'], strict=True)


if __name__ == "__main__":
    datasetdir = os.path.join(os.path.dirname(__file__), './kuzu_mnist')
    datasetdir = os.path.abspath(datasetdir)

    # Load dataset
    databunch = get_databunch(datasetdir)
    print('Dataset loaded')

    # Create VGG + ResNet model
    learn = Learner(databunch, VGG_ResNet(), metrics=accuracy)

    vgg_name = 'vgg_model_with_norm'
    resnet_name = 'resnet_model_with_norm'
    vgg_resnet_load_model(learn, vgg_name, resnet_name)

    # Train
    learn.fit(1)

    # Save
    learn.save('vgg_resnet_model_with_norm')
def save_preds(input_csv, output_csv):
    df = pd.read_csv(input_csv)
    try:
        df = df[['Study']]
    except:
        try:
            df = df[['Path']]
        except:
            raise ValueError('csv has no attribute for path/study.')

    for lbl in ALL_LBLS:
        df[lbl] = np.zeros(len(df))

    test = ImageDataBunch.from_df(
        path=folder_path,
        df=df,
        folder=chexpert_folder,
        seed=0,
        label_col=ALL_LBLS,
        suffix='',
        valid_pct=1,
        ds_tfms=data_tfms,
        bs=BS,
        size=IMG_SZ)  #.normalize([IMG_MEAN, IMG_STD])

    IDs, outputs = test.valid_ds.x.items, []

    learn = cnn_learner(test,
                        models.densenet121,
                        model_dir=model_path,
                        pretrained=False)
    learn.load(model_names[0])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn.load(model_names[1])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn = cnn_learner(test,
                        models.resnet152,
                        model_dir=model_path,
                        pretrained=False)
    learn.load(model_names[2])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn.load(model_names[3])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    model = resnext101_64x4d(pretrained=None)
    model.last_linear = nn.Sequential(nn.Linear(32768, 2048), nn.ReLU(True),
                                      nn.Dropout(), nn.Linear(2048, 14))
    learn = Learner(test, model, model_dir=model_path)
    learn.load(model_names[4])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn = cnn_learner(test,
                        models.vgg19_bn,
                        model_dir=model_path,
                        pretrained=False)
    learn.load(model_names[5])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn.load(model_names[6])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn = cnn_learner(test,
                        models.densenet121,
                        model_dir=model_path,
                        pretrained=False)
    learn.load(model_names[7])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    output = ensemble_method(outputs, mode='avg')
    if torch.cuda.is_available():
        output = output.cpu()
    output = output.numpy()

    df = pd.DataFrame({
        'Path': IDs,
        EVAL_LBLS[0]: output[:, 1],
        EVAL_LBLS[1]: output[:, 2],
        EVAL_LBLS[2]: output[:, 3],
        EVAL_LBLS[3]: output[:, 4],
        EVAL_LBLS[4]: output[:, 5]
    })

    df.to_csv(output_csv, index=False)
    print('submission saved.')
Exemple #23
0
        .normalize(IMAGE_STATS_GLOBAL2)
       )


from fastai.vision.models import resnet50
from models.efficientnet import EfficientNet
#making model
arch = 'efficientnet-b0'
model_name = f'{arch}-v1'
# Parameters for the entire model (stem, all blocks, and head)

md_ef = EfficientNet.from_pretrained(arch, num_classes=1, dropout_rate=0.5)
# md_ef = resnet50(pretrained=False, num_classes=1)

learn = Learner(data, md_ef, opt_func=optar,
                metrics = [accuracy_thresh],
                model_dir='fastai-class1').to_fp16()
learn.path = Path(DATA_BASE_PATH)


# First
learn.unfreeze()
learn.fit_one_cycle(10, max_lr=1e-2)
learn.save(f'{model_name}')


# First
learn.unfreeze()
learn.fit_one_cycle(10, max_lr=1e-3)
learn.save(f'{model_name}-stage2')
Exemple #24
0
from run_check import should_stop

batch_size = 64
databunch = DataBunch(
    DataLoader(Dataset(16 + 64 * 4),
               batch_size=batch_size,
               shuffle=True,
               num_workers=6,
               pin_memory=False),
    DataLoader(Dataset(16, 16 + 64 * 4),
               batch_size=batch_size,
               num_workers=2,
               pin_memory=True))

model = Unet(1, 3, n=4)
learner = Learner(databunch, model, loss_func=torch.nn.MSELoss())
test_data = list(Dataset(0, 16))
test_x = torch.stack([a[0] for a in test_data]).cuda()
test_y = [np.array(a[1]) for a in test_data]

epoch = -1
while not should_stop():
    epoch += 1
    print('Epoch:', epoch)

    learner.fit(1)

    prediction = model(test_x)

    image_out = None
    for i in range(len(prediction)):
Exemple #25
0
class CoruscantModel:
    type_pretrained = None
    data_root = None
    list_files = None
    model_dir = None

    tokenizer_pretrained_coruscant = None
    coruscant_vocab = None
    coruscant_tokenizer = None

    # data bunch
    data_bunch = None
    batch_size = None

    # data to feed the model
    train = None
    test = None
    val = None

    # model
    bert_model_class = None
    loss_func = None
    acc_02 = None
    model = None
    learner = None

    # constants
    label_cols = None
    text_cols = None

    # init constructor
    def __init__(self,
                 type_pretrained='BERT',
                 text_cols="comment_text",
                 list_files=["train.csv", "test.csv"],
                 label_cols=[
                     "toxic", "severe_toxic", "obscene", "threat", "insult",
                     "identity_hate"
                 ],
                 data_root=Path("..") / "api/app/dataset/jigsaw",
                 model_dir='model',
                 batch_size=12):
        self.data_root = data_root
        self.model_dir = model_dir
        self.batch_size = batch_size
        self.label_cols = label_cols
        self.text_cols = text_cols
        self.list_files = list_files
        self.type_pretrained = type_pretrained
        gc.collect()

        log.debug('type_pretrained: ' + type_pretrained)
        if self.type_pretrained == 'BERT':
            self.tokenizer_pretrained_coruscant = BertTokenizer.from_pretrained(
                "bert-base-uncased")

    def make_model(self):
        log.debug('----- set_train_val_data ------')
        self.set_train_val_data()
        log.debug('----- set_vocab_tokenizer ------')
        self.set_vocab_tokenizer()
        log.debug('----- set_data_bunch ------')
        self.set_data_bunch()
        log.debug('----- create_model ------')
        self.create_model()
        log.debug('----- train_and_save ------')
        self.train_save()

    def set_data_bunch(self):
        self.data_bunch = TextDataBunch.from_df(
            ".",
            self.train,
            self.val,
            tokenizer=self.coruscant_tokenizer,
            vocab=self.coruscant_vocab,
            include_bos=False,
            include_eos=False,
            text_cols=self.text_cols,
            label_cols=self.label_cols,
            bs=self.batch_size,
            collate_fn=partial(pad_collate, pad_first=False, pad_idx=0),
        )

    def set_train_val_data(self):
        self.train, self.test = [
            pd.read_csv(self.data_root / fname) for fname in self.list_files
        ]
        self.train, self.val = train_test_split(self.train,
                                                shuffle=True,
                                                test_size=0.2,
                                                random_state=42)
        # log.info(self.train.head())

    def set_vocab_tokenizer(self):
        # In following code snippets, we need to wrap BERT vocab and BERT tokenizer with Fastai modules
        self.coruscant_vocab = Vocab(
            list(self.tokenizer_pretrained_coruscant.vocab.keys()))
        self.coruscant_tokenizer = Tokenizer(tok_func=FastAiBertTokenizer(
            self.tokenizer_pretrained_coruscant, max_seq_len=256),
                                             pre_rules=[],
                                             post_rules=[])

    def create_model(self):
        # BERT model
        bert_model_class = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', num_labels=6)
        # Loss function to be used is Binary Cross Entropy with Logistic Losses
        loss_func = nn.BCEWithLogitsLoss()
        # Considering this is a multi-label classification problem, we cant use simple accuracy as metrics here.
        # we will use accuracy_thresh with threshold of 25% as our metric here.
        acc_02 = partial(accuracy_thresh, thresh=0.25)
        self.model = bert_model_class

        # learner function
        self.learner = Learner(self.data_bunch,
                               self.model,
                               loss_func=loss_func,
                               model_dir=self.model_dir,
                               metrics=acc_02)

    def train_save(self):
        x = bert_clas_split(self.model)
        # Let's split the model now in 6 parts
        self.learner.split([x[0], x[1], x[2], x[3], x[5]])
        self.learner.lr_find()
        self.learner.fit_one_cycle(2,
                                   max_lr=slice(1e-5, 5e-4),
                                   moms=(0.8, 0.7),
                                   pct_start=0.2,
                                   wd=(1e-7, 1e-5, 1e-4, 1e-3, 1e-2))

        self.learner.save(self.type_pretrained + '_first')
        self.learner.load(self.type_pretrained + '_first')

        # Now, we will unfreeze last two last layers and train the model again
        self.learner.freeze_to(-2)
        self.learner.fit_one_cycle(2,
                                   max_lr=slice(1e-5, 5e-4),
                                   moms=(0.8, 0.7),
                                   pct_start=0.2,
                                   wd=(1e-7, 1e-5, 1e-4, 1e-3, 1e-2))

        self.learner.save(self.type_pretrained + '_final')
        self.learner.load(self.type_pretrained + '_final')

        # We will now unfreeze the entire model and train it
        self.learner.unfreeze()
        self.learner.lr_find()
        self.learner.fit_one_cycle(2,
                                   slice(5e-6, 5e-5),
                                   moms=(0.8, 0.7),
                                   pct_start=0.2,
                                   wd=(1e-7, 1e-5, 1e-4, 1e-3, 1e-2))

    def test_prediction(self):
        # We will now see our model's prediction power
        text = 'you are so sweet'
        log.info(text)
        log.info(self.learner.predict(text))

        text = 'you are pathetic piece of shit'
        log.info(text)
        log.info(self.learner.predict(text))

        text = "what’s so great about return of the jedi?  the special effects are abysmal,  and the acting is " \
               "horrible. it’s like they phoned it in.  it’s a mess."
        log.info(text)
        log.info(self.learner.predict(text))

        text = "i hate myself for being too human.  how do i liberate my soul ?"
        log.info(text)
        log.info(self.learner.predict(text))

        text = "why was guru arjun singh killed by jahangir?"
        log.info(text)
        log.info(self.learner.predict(text))

        text = "funny how the person that bullies you in elementary is ugly as f**k in high school, and your high " \
               "school bull1, a loser in college..."
        log.info(text)
        log.info(self.learner.predict(text))

        text = "stop making fun of amy winehouse and michael jackso2, #rickcastellano is a bully."
        log.info(text)
        log.info(self.learner.predict(text))
Exemple #26
0
class PNNM(OWWidget):
    name = "Pytorch CNN"
    description = ""
    # icon = "icons/robot.svg"

    want_main_area = True

    class Inputs:
        data = Input('Data', ImageDataBunch, default=True)

    def __init__(self):
        super().__init__()
        self.learn = None

        # train_button = gui.button(self.controlArea, self, "开始训练", callback=self.train)
        self.label = gui.label(self.mainArea, self, "模型结构")

        #: The current evaluating task (if any)
        self._task = None  # type: Optional[Task]
        #: An executor we use to submit learner evaluations into a thread pool
        self._executor = ThreadExecutor()

        # Device configuration
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

        # Hyper parameters
        num_epochs = 5
        num_classes = 10
        batch_size = 100
        learning_rate = 0.001

        dir_path = Path(__file__).resolve()
        data_path = f'{dir_path.parent.parent.parent}/datasets/'

        # MNIST dataset
        self.train_dataset = torchvision.datasets.MNIST(root=data_path,
                                                   train=True,
                                                   transform=transforms.ToTensor(),
                                                   download=False)

        self.test_dataset = torchvision.datasets.MNIST(root=data_path,
                                                  train=False,
                                                  transform=transforms.ToTensor())

        # Data loader
        self.train_loader = torch.utils.data.DataLoader(dataset=self.train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=False)

        self.test_loader = torch.utils.data.DataLoader(dataset=self.test_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False)

        # self.model = ConvNet(num_classes).to(self.device)
        self.model = nn.Sequential(
            self.conv(1, 8),  # 14
            nn.BatchNorm2d(8),
            nn.ReLU(),
            self.conv(8, 16),  # 7
            nn.BatchNorm2d(16),
            nn.ReLU(),
            self.conv(16, 32),  # 4
            nn.BatchNorm2d(32),
            nn.ReLU(),
            self.conv(32, 16),  # 2
            nn.BatchNorm2d(16),
            nn.ReLU(),
            self.conv(16, 10),  # 1
            nn.BatchNorm2d(10),
            Flatten()  # remove (1,1) grid
        ).to(self.device)

        # Loss and optimizer
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)

    def handleNewSignals(self):
        self._update()

    def _update(self):
        if self._task is not None:
            # First make sure any pending tasks are cancelled.
            self.cancel()
        assert self._task is None

        if self.data is None:
            return
        # collect all learners for which results have not yet been computed
        if not self.learn:
            return

        # setup the task state
        self._task = task = Task()
        # The learning_curve[_with_test_data] also takes a callback function
        # to report the progress. We instrument this callback to both invoke
        # the appropriate slots on this widget for reporting the progress
        # (in a thread safe manner) and to implement cooperative cancellation.
        set_progress = methodinvoke(self, "setProgressValue", (float,))

        def callback(finished):
            # check if the task has been cancelled and raise an exception
            # from within. This 'strategy' can only be used with code that
            # properly cleans up after itself in the case of an exception
            # (does not leave any global locks, opened file descriptors, ...)
            if task.cancelled:
                raise KeyboardInterrupt()
            set_progress(finished * 100)

        self.progressBarInit()
        # Submit the evaluation function to the executor and fill in the
        # task with the resultant Future.
        # task.future = self._executor.submit(self.learn.fit_one_cycle(1))

        fit_model = partial(train_model, self.model, 5, self.train_loader, self.test_loader, self.device,
                            self.criterion, self.optimizer, callback=callback)

        task.future = self._executor.submit(fit_model)
        # Setup the FutureWatcher to notify us of completion
        task.watcher = FutureWatcher(task.future)
        # by using FutureWatcher we ensure `_task_finished` slot will be
        # called from the main GUI thread by the Qt's event loop
        task.watcher.done.connect(self._task_finished)

    @pyqtSlot(float)
    def setProgressValue(self, value):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(value)

    @pyqtSlot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters
        ----------
        f : Future
            The future instance holding the result of learner evaluation.
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.progressBarFinished()

        self.model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in self.test_loader:
                images = images.to(self.device)
                labels = labels.to(self.device)
                outputs = self.model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

        # try:
        #     result = f.result()  # type: List[Results]
        # except Exception as ex:
        #     # Log the exception with a traceback
        #     log = logging.getLogger()
        #     log.exception(__name__, exc_info=True)
        #     self.error("Exception occurred during evaluation: {!r}".format(ex))
        #     # clear all results
        #     self.result= None
        # else:
        print(self.learn.validate())
            # ... and update self.results

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._task_finished)
            self._task = None

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()

    def conv(self, ni, nf):
        return nn.Conv2d(ni, nf, kernel_size=3, stride=2, padding=1)

    def train(self):
        if self.learn is None:
            return
        self.learn.fit_one_cycle(3)

    @Inputs.data
    def set_data(self, data):
        if data is not None:
            self.data = data
            self.learn = Learner(self.data, self.model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy,
                                 add_time=False, bn_wd=False, silent=True)
            self.label.setText(self.learn.summary())
        else:
            self.data = None
Exemple #27
0
    def train(self, df_path, data_root, output_dir, weights=False,
                  col_image='image_path', col_label='label', col_group=None):
        '''
        train
        '''

        import matplotlib
        matplotlib.use('Agg')
        from fastai.vision import Learner
        from fastai.vision import get_transforms, models
        from fastai.vision import accuracy, AUROC
        from fastai.vision import DataBunch, DatasetType
        from fastai.callbacks import SaveModelCallback
        
        data_root = Path(data_root)
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True,exist_ok=True)
        model_name = 'scripted_model.zip'

        df = pd.read_csv(df_path)
        num_classes = df['label'].nunique()
        df_train = df[df.dataset.isin(['train'])]
        df_valid = df[df.dataset.isin(['valid'])]
        df_test = df[df.dataset.isin(['test'])]

        sample_size = self._sample_size
        sample_duration = self._sample_duration
        input_channels = self._input_channels
        num_workers = self._num_workers
        batch_size = self._batch_size
        n_epoch = self._n_epoch
        devices = self._devices

        if len(devices) == 0 or devices[0].lower() != 'cpu':
            pin_memory = True
            device_data = devices[0]
        else:
            pin_memory = False
            device_data = None
        
        from vol_dataset import VolumeDataset
        ds_train = VolumeDataset(df_train,data_root,input_channels)
        ds_valid = VolumeDataset(df_valid,data_root,input_channels)
        ds_test = VolumeDataset(df_test,data_root,input_channels)
        data = DataBunch.create(ds_train, ds_valid, test_ds=ds_test, bs=batch_size, 
                                num_workers=num_workers, device=device_data, pin_memory=pin_memory)
        print(df_train.shape, df_valid.shape, df_test.shape)

        from resnet3d import r3d_18 as resnet18
        model = resnet18(input_channels=input_channels,num_classes=num_classes)
        model_single = model
        if len(devices) >= 2:
            model = nn.DataParallel(model_single,device_ids=devices)
        
        if isinstance(weights,bool):
            if weights:
                weights = 1/ds_train.label.value_counts(sort=False)
                weights = weights.values/weights.min()
            else:
                weights = [1,1,1]
        elif isinstance(weights,str) and ',' in weights:
            weights = [float(w) for w in weights.split(',')]
        elif isinstance(weights,list) or isinstance(weights,tuple):
            pass
        weights = torch.tensor(weights)
        loss_func = nn.CrossEntropyLoss(weight=weights)
        loss_func = loss_func.to(devices[0])
        
        metrics = [accuracy]
        metrics += [AUCk(num_classes-1)]
        learn = Learner(data, model, metrics=metrics, wd=1e-2, path=output_dir, loss_func=loss_func)

        lr = self._lr
        learn.fit_one_cycle(n_epoch, slice(lr), callbacks=[SaveModelCallback(learn, every='improvement',monitor='valid_loss', name='best')])
        lr = self._lr/10
        learn.fit_one_cycle(n_epoch, slice(lr), callbacks=[SaveModelCallback(learn, every='improvement',monitor='valid_loss', name='best')])

        x_sample = torch.rand((2,input_channels,sample_duration,sample_size,sample_size))
        x_sample = x_sample.to(devices[0])
        model_scripted = torch.jit.trace(model_single,x_sample)
        model_scripted.to('cpu')
        model_scripted.save(str(output_dir/model_name))