Beispiel #1
0
def download_url(url,
                 dest,
                 overwrite=False,
                 pbar=None,
                 show_progress=True,
                 chunk_size=1024 * 1024,
                 timeout=4,
                 retries=5):
    "Download `url` to `dest` unless it exists and not `overwrite`"
    if os.path.exists(dest) and not overwrite: return

    s = requests.Session()
    s.mount('http://', requests.adapters.HTTPAdapter(max_retries=retries))
    u = s.get(url, stream=True, timeout=timeout)
    try:
        file_size = int(u.headers["Content-Length"])
    except:
        show_progress = False

    with open(dest, 'wb') as f:
        nbytes = 0
        if show_progress:
            pbar = progress_bar(range(file_size),
                                auto_update=False,
                                leave=False,
                                parent=pbar)
        try:
            for chunk in u.iter_content(chunk_size=chunk_size):
                nbytes += len(chunk)
                if show_progress: pbar.update(nbytes)
                f.write(chunk)
        except requests.exceptions.ConnectionError as e:
            fname = url.split('/')[-1]
            data_dir = dest.parent
            print(f'\n Download of {url} has failed after {retries} retries\n'
                  f' Fix the download manually:\n'
                  f'$ mkdir -p {data_dir}\n'
                  f'$ cd {data_dir}\n'
                  f'$ wget -c {url}\n'
                  f'$ tar -zxvf {fname}\n'
                  f' And re-run your code once the download is successful\n')
Beispiel #2
0
    def predict_folds(self,
                      fy: FoldYielder,
                      pred_name: str = 'pred',
                      callbacks: Optional[List[AbsCallback]] = None,
                      verbose: bool = True,
                      bs: Optional[int] = None) -> None:
        r'''
        Apply model to all dataaccessed by a :class:`~lumin.nn.data.fold_yielder.FoldYielder` and save predictions as new group in fold file

        Arguments:
            fy: :class:`~lumin.nn.data.fold_yielder.FoldYielder` interfacing to data
            pred_name: name of group to which to save predictions
            callbacks: list of any callbacks to use during evaluation
            verbose: whether to print average prediction timings
            bs: if not `None`, will run prediction in batches of specified size to save of memory
        '''

        times = []
        mb = master_bar(range(len(fy)))
        for fold_idx in mb:
            fold_tmr = timeit.default_timer()
            if not fy.test_time_aug:
                fold = fy.get_fold(fold_idx)['inputs']
                pred = self.predict_array(fold, callbacks=callbacks, bs=bs)
            else:
                tmpPred = []
                pb = progress_bar(range(fy.aug_mult), parent=mb)
                for aug in pb:
                    fold = fy.get_test_fold(fold_idx, aug)['inputs']
                    tmpPred.append(
                        self.predict_array(fold, callbacks=callbacks, bs=bs))
                pred = np.mean(tmpPred, axis=0)

            times.append((timeit.default_timer() - fold_tmr) / len(fold))
            if self.n_out > 1:
                fy.save_fold_pred(pred, fold_idx, pred_name=pred_name)
            else:
                fy.save_fold_pred(pred[:, 0], fold_idx, pred_name=pred_name)
        times = uncert_round(np.mean(times),
                             np.std(times, ddof=1) / np.sqrt(len(times)))
        if verbose: print(f'Mean time per event = {times[0]}±{times[1]}')
Beispiel #3
0
 def train(self, dataloader):
     self.net.train()
     train_loss, train_metrics = [], None
     if self.metrics:
         train_metrics = [[] for m in self.metrics]
     for batch in progress_bar(dataloader, parent=self.mb):
         X, y = batch.text, batch.label
         X, y = X.to(device), y.to(device)
         self.optimizer.zero_grad()
         output = self.net(X)
         loss = self.loss(output, y)
         loss.backward()
         self.optimizer.step()
         train_loss.append(loss.item())
         comment = f'train_loss {np.mean(train_loss):.5f}'
         if self.metrics:
             for i, metric in enumerate(self.metrics):
                 train_metrics[i].append(metric.call(output, y))
                 comment += f' train_{metric.name} {np.mean(train_metrics[i]):.5f}'
         self.mb.child.comment = comment
     return train_loss, train_metrics
Beispiel #4
0
def train(model, dataloader, optimizer):
    model.train()
    avg_loss = 0

    for i, (xc, yc, xt,
            yt) in enumerate(progress_bar(dataloader, parent=args.mb)):
        xc, yc, xt, yt = xc.to(args.device), yc.to(args.device), xt.to(
            args.device), yt.to(args.device)

        optimizer.zero_grad()

        pred_dist = model(xc, yc, xt)

        loss = -pred_dist.log_prob(yt).sum(-1).mean()

        loss.backward()
        optimizer.step()

        avg_loss -= loss.item() * xc.size(0)

    return avg_loss / len(dataloader.dataset)
Beispiel #5
0
def save_tiles(tile_size, num_tiles=5, scale=4, threshold=180):
    print(f'\n\nsave {tile_size} tiles')
    hr_ROI = paired_001 / f'roi_hr_{tile_size}'
    lr_ROI = paired_001 / f'roi_lr_{tile_size}'
    lr_ROI_small = paired_001 / f'roi_lr_up_{tile_size}'

    for (id, depth), hr_fn in progress_bar(list(hr_file_map.items())):
        lr_fn = lr_file_map[(id, depth)]
        if id in train_ids: sub_dir = 'train'
        else: sub_dir = 'valid'
        base_name = f'{tile_size}_{id}_{depth}.tif'
        helpers.tif_to_tiles(lr_fn,
                             hr_fn,
                             base_name,
                             hr_ROI / sub_dir,
                             lr_ROI / sub_dir,
                             lr_ROI_small / sub_dir,
                             size=tile_size,
                             num_tiles=num_tiles,
                             scale=scale,
                             threshold=threshold)
 def get_features(self,
                  layer_idx: int,
                  ds_name: str,
                  ds_type: str = 'test') -> torch.Tensor:
     print(f'getting features for {ds_name} {ds_type}..')
     batched_outputs = []
     layer_dir = f'/mnt/disks/disk/{ds_name}_{ds_type}_layer_output/{layer_idx}'
     batched_output_files = os.listdir(layer_dir)
     for batch_output_files in progress_bar(batched_output_files):
         batch_filename = f'{layer_dir}/{batch_output_files}'
         with self.load_feature_file(batch_filename) as batched_output:
             if isinstance(self.layers[layer_idx],
                           (Conv2d, BatchNorm2d, ReLU, MaxPool2d)):
                 batched_output = self.get_mean_channels(batched_output)
             else:
                 batched_output = torch.tensor(batched_output).to('cpu')
             batched_outputs.append(batched_output)
     return torch.cat(batched_outputs,
                      out=torch.Tensor(
                          len(batched_output_files) * len(batched_output),
                          64))
Beispiel #7
0
    def predict_images(self, image_list, ds_kwargs={}, verbose=1, **kwargs):
        "Predict images in 'image_list' with kwargs and save to zarr"

        for f in progress_bar(image_list, leave=False):
            if verbose > 0: print(f'Predicting {f.name}')
            ds = TileDataset([f],
                             stats=self.stats,
                             return_index=True,
                             **ds_kwargs)
            softmax, stdeviation, energy = self.predict(ds, **kwargs)

            # Save to zarr
            self.g_smx[f.name] = softmax
            if stdeviation is not None:
                self.g_std = self.root.require_group('std')
                self.g_std[f.name] = stdeviation
            if energy is not None:
                self.g_eng = self.root.require_group('energy')
                self.g_eng[f.name] = energy

        return self.g_smx, self.g_std, self.g_eng
Beispiel #8
0
def train_epoch(model, train_loader, criterion, optimizer, mb, cfg):
    model.train()
    avg_loss = 0.

    for images, labels in progress_bar(train_loader, parent=mb):
        images = Variable(images).to(device)
        labels = Variable(labels).to(device)

        preds = model(images.float())

        if cfg.model.n_classes > 1:
            loss = criterion(preds, labels)
        else:
            loss = criterion(preds.view(labels.shape), labels.float())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        avg_loss += loss.item() / len(train_loader)
    del images, labels; gc.collect()
    return model, avg_loss
Beispiel #9
0
def train(model, dataloader, optimizer):
    model.train()
    avg_loss = 0

    for index, (I, _) in enumerate(progress_bar(dataloader, parent=args.mb)):
        I = I.to(args.device)

        optimizer.zero_grad()

        pred_dist = model(I)

        loss = -pred_dist.log_prob(channel_last((I))).sum(-1).mean()

        loss.backward()
        optimizer.step()

        avg_loss -= loss.item() * I.size(0)
        if index % 10 == 0:
            args.mb.child.comment = 'loss={:.3f}'.format(loss.item())

    return avg_loss / len(dataloader.dataset)
Beispiel #10
0
    def __init__(
        self,
        draws,
        tune,
        chains,
        cores,
        seeds,
        start_points,
        step_method,
        start_chain_num=0,
        progressbar=True,
    ):

        if any(len(arg) != chains for arg in [seeds, start_points]):
            raise ValueError("Number of seeds and start_points must be %s." % chains)

        self._samplers = [
            ProcessAdapter(
                draws, tune, step_method, chain + start_chain_num, seed, start
            )
            for chain, seed, start in zip(range(chains), seeds, start_points)
        ]

        self._inactive = self._samplers.copy()
        self._finished = []
        self._active = []
        self._max_active = cores

        self._in_context = False
        self._start_chain_num = start_chain_num

        self._progress = None
        self._divergences = 0
        self._total_draws = 0
        self._desc = "Sampling {0._chains:d} chains, {0._divergences:,d} divergences"
        self._chains = chains
        self._progress = progress_bar(
            range(chains * (draws + tune)), display=progressbar, auto_update=False
        )
        self._progress.comment = self._desc.format(self)
Beispiel #11
0
def train_epoch(model, train_loader, criterion, optimizer, mb, cfg):
    model.train()
    avg_loss = 0.

    for images, labels in progress_bar(train_loader, parent=mb):
        images = images.to(device)
        labels = labels.to(device)

        r = np.random.rand()
        if cfg.data.train.mixup and r < 0.5:
            images, labels = mixup(images, labels, 1.0)

        preds = model(images.float())

        loss = criterion(preds.view(labels.shape), labels.float())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        avg_loss += loss.item() / len(train_loader)
    del images, labels; gc.collect()
    return model, avg_loss
Beispiel #12
0
def train_one_epoch(model, optimizer, scheduler, data_loader, device,
                    master_bar):
    model.train()

    for x, target in progress_bar(data_loader, parent=master_bar):

        x = [_x.to(device) for _x in x]
        target = [{k: v.to(device) for k, v in t.items()} for t in target]
        loss_dict = model(x, [t['boxes'] for t in target],
                          [t['labels'] for t in target])
        batch_loss = sum(loss_dict.values())

        optimizer.zero_grad()
        batch_loss.backward()
        # Safeguard for Gradient explosion
        torch.nn.utils.clip_grad_norm_(model.parameters(), .25)
        optimizer.step()
        if isinstance(scheduler, torch.optim.lr_scheduler.OneCycleLR):
            scheduler.step()

        master_bar.child.comment = ' | '.join(f"{k}: {v.item():.4}"
                                              for k, v in loss_dict.items())
def get_idxs_and_dists(query_features, index_features, BS=32):
    import faiss
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0
    res = faiss.StandardGpuResources()
    co = faiss.GpuClonerOptions()
    FEAT_DIM = index_features.shape[1]
    cpu_index = faiss.IndexFlatL2(FEAT_DIM)
    cpu_index.add(index_features)
    index = faiss.index_cpu_to_gpu(res, 0, cpu_index, co)
    out_dists = np.zeros((len(query_features), 100), dtype=np.float32)
    out_idxs = np.zeros((len(query_features), 100), dtype=np.int32)
    NUM_QUERY = len(query_features)
    for ind in progress_bar(range(0, len(query_features), BS)):
        fin = ind + BS
        if fin > NUM_QUERY:
            fin = NUM_QUERY
        q_descs = query_features[ind:fin]
        D, I = index.search(q_descs, 100)
        out_dists[ind:fin] = D
        out_idxs[ind:fin] = I
    return out_idxs, out_dists
Beispiel #14
0
    def train(self, num_epochs = 30, lr = 1e-3):
        
        
        criterion = RMELoss().to(self.device)
        optimizer = torch.optim.Adam(self.model.parameters(), lr=lr, weight_decay=1e-5)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=500, factor=0.5, min_lr=1e-7, eps=1e-08)

        print("SingleLSTM Train")

        # Train the model
        for epoch in progress_bar(range(num_epochs)):
            self.model.train()
            outputs = self.model(self.trainX.to(self.device))
            optimizer.zero_grad()

            # obtain the loss function
            loss = criterion(outputs, self.trainY.to(self.device))
            valid = self.model(self.testX.to(self.device))
            vali_loss = criterion(valid, self.testY.to(self.device))
            scheduler.step(vali_loss)

        return loss.cpu().item(), vali_loss.cpu().item()
Beispiel #15
0
def zmuv(learn: Learner,
         tol: float = 1e-5,
         exp_mean: float = 0.,
         exp_std: float = 1.,
         orthonorm: bool = True,
         cond: callable = has_weight_or_bias,
         verbose: bool = False) -> Learner:
    print('ZMUV initialization...')
    xb, yb = next(iter(learn.data.train_dl))
    if orthonorm: learn.model.apply(orthogonal_weights_init)
    mods = get_layers(learn.model, cond=cond)
    mean_act, std_act = [], []
    from fastprogress import progress_bar
    from time import sleep
    pb = progress_bar(mods)
    for m in pb:
        sleep(0.01)
        if has_weight(m) or has_bias(m):
            pre_mean, pre_std, mean, std = zmuv_layer(learn.model,
                                                      m,
                                                      xb,
                                                      tol=tol,
                                                      exp_mean=exp_mean,
                                                      exp_std=exp_std)
            if mean == 0 and std == 0: continue
            mean_act.append(mean)
            std_act.append(std)
            if verbose >= 2:
                print(m)
                print(
                    '     pre-zmuv activations    :  mean = {:9.5f}   std = {:9.5f}'
                    .format(pre_mean, pre_std))
                print(
                    '     post-zmuv activations   :  mean = {:9.5f}   std = {:9.5f}'
                    .format(mean, std))
    print('\noverall post-zmuv activations:  mean = {:9.5f}   std = {:9.5f}'.
          format(np.mean(mean_act), np.mean(std_act)))
    print('...ZMUV initialization complete\n')
    return learn
Beispiel #16
0
    def predict_data(self, model, loader, device, batch_size, target_cols):
        """
        Run prediction.

        Args:
          model (obj): model
          loader (arr): test data
          device (str): choice of gpu or cpu for running model
          batch_size (int): batch size
          target_cols (arr): target features

        Returns:
          arr: list of batches of dict of target features and their corresponding predictions
        """
        data_list = []
        for i, data in enumerate(progress_bar(loader)):
            data_list += self.predict_batch(model, data, device, target_cols)
        expected_length = model.pred_len * len(loader) * batch_size
        assert len(
            data_list
        ) == expected_length, f"len = {len(data_list)} expected = {expected_length}"
        return data_list
Beispiel #17
0
    def load(self, name: str) -> None:
        r'''
        Load an instantiated :class:`~lumin.nn.ensemble.ensemble.Ensemble` with weights and :class:`~lumin.nn.models.model.Model` from save.

        Arguments;
            name: base name for saved objects

        Examples::
            >>> ensemble.load('weights/ensemble') 
        '''

        with open(f'{name}_builder.pkl', 'rb') as fin:
            self.model_builder = pickle.load(fin)
        names = glob.glob(f'{name}_*.h5')
        self.models = []
        for n in progress_bar(sorted(names)):
            m = Model(self.model_builder)
            m.load(n)
            self.models.append(m)
        self.size = len(self.models)
        self.n_out = self.models[0].get_out_size()
        with open(f'{name}_weights.pkl', 'rb') as fin:
            self.weights = pickle.load(fin)
        try:
            with open(f'{name}_input_pipe.pkl', 'rb') as fin:
                self.input_pipe = pickle.load(fin)
        except FileNotFoundError:
            pass
        try:
            with open(f'{name}_output_pipe.pkl', 'rb') as fin:
                self.output_pipe = pickle.load(fin)
        except FileNotFoundError:
            pass
        try:
            with open(f'{name}_feats.pkl', 'rb') as fin:
                self.feats = pickle.load(fin)
        except FileNotFoundError:
            pass
Beispiel #18
0
def train_epoch(model, train_loader, optimizer, criterion, master_bar,
                epoch=0, scheduler=None, device='cpu'):
    """Train a model for one epoch
    Args:
        model (torch.nn.Module): model to train
        train_loader (torch.utils.data.DataLoader): training dataloader
        optimizer (torch.optim.Optimizer): parameter optimizer
        criterion (torch.nn.Module): criterion object
        master_bar (fastprogress.MasterBar): master bar of training progress
        epoch (int): current epoch index
        scheduler (torch.optim._LRScheduler, optional): learning rate scheduler
        device (str): device hosting tensor data
    Returns:
        batch_loss (float): latch batch loss
    """

    # Training
    model.train()
    loader_iter = iter(train_loader)
    train_loss = 0
    for _ in progress_bar(range(len(train_loader)), parent=master_bar):

        x, target = next(loader_iter)
        target = target.type(torch.LongTensor).squeeze()
        # Work with tensors on GPU
        if device.startswith('cuda'):
            x, target = x.cuda(), target.cuda()

        batch_loss = train_batch(model, x, target, optimizer, criterion)
        train_loss += batch_loss
        if scheduler:
            scheduler.step()

        master_bar.child.comment = f"Batch loss: {batch_loss:.4}"

    train_loss /= len(train_loader)

    return train_loss
Beispiel #19
0
    def _fit_epoch(self, freeze_until, mb):
        """
        Fit a single epoch

        Args:
            freeze_until (str): last layer to freeze
            mb (fastprogress.master_bar): primary progress bar
        """
        # self.model = freeze_bn(self.model.train())
        self.train_loss = 0
        self.model.train()
        pb = progress_bar(self.train_loader, parent=mb)
        for x, target in pb:
            x, target = self.to_cuda(x, target)
            self.example_ct +=  x.shape[0]
            # Forward
            batch_loss = self._get_loss(x, target)
            self.train_loss += batch_loss.item()

            # Backprop
            self._backprop_step(batch_loss)
            # Update LR
            self.scheduler.step()
            pb.comment = f"Training loss: {batch_loss.item():.4}"

            self.step += 1

            # Report metrics every 20th batch
            if self.step % 5 == 0:
                # where the magic happens
                if self.wb:
                    wandb.log({"epoch": self.epoch, "train_loss": batch_loss.item()}, step=self.example_ct)
   

        self.epoch += 1
        # print(self.train_loss,len(self.train_loader),self.train_loss/len(self.train_loader))
        self.train_loss /= len(self.train_loader)
        self.train_loss_recorder.append(self.train_loss)
Beispiel #20
0
def _check_val_set_fy(train_fy:FoldYielder, val_fy:FoldYielder, test_fy:Optional[FoldYielder]=None, n_folds:Optional[int]=None) -> None:
    '''Method to check validation set suitability by seeing whether random forests can predict whether events belong to one dataset or the other.
    Trainings are run once per fold and averaged.'''
    n = min(train_fy.n_folds, val_fy.n_folds)
    if test_fy is not None: n = min(n, test_fy.n_folds)
    if n_folds is not None: n = min(n, n_folds)
    train_feats = None
        
    samples = {'train': train_fy} if test_fy is None else {'train': train_fy, 'test': test_fy}
    for sample in samples:
        aucs = []
        fi = pd.DataFrame()
        for fold_idx in progress_bar(range(n)):
            df_0 = samples[sample].get_df(pred_name='None', inc_inputs=True, deprocess=True, fold_idx=fold_idx, verbose=False, suppress_warn=True)
            df_1 = val_fy.get_df(pred_name='None', inc_inputs=True, deprocess=True, fold_idx=fold_idx, verbose=False, suppress_warn=True)
            df_0['gen_target'] = 0
            df_1['gen_target'] = 1
            df_0['gen_weight'] = 1/len(df_0)
            df_1['gen_weight'] = 1/len(df_1)

            df = df_0.append(df_1, ignore_index=True).sample(frac=1)
            df_trn, df_val = df[:len(df)//2], df[len(df)//2:]
            if train_feats is None: train_feats = [f for f in df_trn.columns if 'gen_' not in f]

            m = RandomForestClassifier(n_estimators=40, min_samples_leaf=25, n_jobs=-1)
            m.fit(X=df_trn[train_feats], y=df_trn['gen_target'], sample_weight=df_trn['gen_weight'])
            aucs.append(roc_auc_score(y_true=df_val['gen_target'], y_score=m.predict(df_val[train_feats]), sample_weight=df_val['gen_weight']))
            fi = fi.append(get_rf_feat_importance(m, df_val[train_feats], df_val['gen_target'], df_val['gen_weight']), ignore_index=True)

        mean = uncert_round(np.mean(aucs), np.std(aucs, ddof=1)/np.sqrt(len(aucs)))
        print(f"\nAUC for {sample}-validation discrimination = {mean[0]}±{mean[1]}")
        print("Top 10 most important features are:")
        mean_fi = pd.DataFrame()
        mean_fi['Importance'] = fi['Importance'].groupby(fi['Feature']).mean()
        mean_fi['Uncertainty'] = fi['Importance'].groupby(fi['Feature']).std()/np.sqrt(n)
        mean_fi.sort_values(['Importance'], inplace=True, ascending=False)
        mean_fi.reset_index(inplace=True)
        print(mean_fi[:min(10, len(mean_fi))])
Beispiel #21
0
 def gt_estimation(self,
                   method='STAPLE',
                   save_dir=None,
                   filetype='.png',
                   **kwargs):
     assert method in ['STAPLE', 'majority_voting']
     res = []
     refs = {}
     print(f'Starting ground truth estimation - {method}')
     for m, exps in progress_bar(self.masks.items()):
         masks = [_read_msk(self.mask_fn(exp, m)) for exp in exps]
         if method == 'STAPLE':
             ref = staple(masks, self.staple_fval, self.staple_thres)
         elif method == 'majority_voting':
             ref = m_voting(masks, self.mv_undec)
         refs[m] = ref
         #assert ref.mean() > 0, 'Please try again!'
         df_tmp = pd.DataFrame({
             'method': method,
             'file': m,
             'exp': exps,
             'iou': [iou(ref, msk) for msk in masks]
         })
         res.append(df_tmp)
         if save_dir:
             path = self.path / save_dir
             path.mkdir(exist_ok=True, parents=True)
             save_mask(ref, path / Path(m).stem, filetype)
     self.gt[method] = refs
     self.df_res = pd.concat(res)
     self.df_agg = self.df_res.groupby('exp').agg(average_iou=('iou',
                                                               'mean'),
                                                  std_iou=('iou', 'std'))
     if save_dir:
         self.df_res.to_csv(path.parent / f'{method}_vs_experts.csv',
                            index=False)
         self.df_agg.to_csv(path.parent / f'{method}_vs_experts_agg.csv',
                            index=False)
Beispiel #22
0
    def train_epoch(self, stats, epoch, mb, lrs):
        it = epoch * len(self.train_batches)
        self.model.train(True)
        for lr, batch in zip(lrs, progress_bar(self.train_batches, parent=mb)):
            input, target = self.get_batch(batch)
            output = self.model(input)
            metric = self.metric(output, target)
            loss = self.criterion(output, target)

            it += 1
            stats["train_it"].append(it)
            stats["train_loss"].append(
                loss.item())  # loss.item() * inputs.size(0)
            stats["train_metric"].append(metric)

            graphs = [[stats["train_it"], stats["train_loss"]],
                      [stats["train_it"], stats["train_metric"]],
                      [stats["valid_it"], stats["valid_loss"]],
                      [stats["valid_it"], stats["valid_metric"]]]
            mb.update_graph(graphs)

            self.backward(lr, loss)
        return stats
Beispiel #23
0
def generate_tifs(src, dest, learn, size, tag=None, max_imgs=None):
    for fn in progress_bar(src):
        category = fn.parts[-3]
        try:
            if fn.suffix == '.czi':
                czi_predict_images(learn,
                                   fn,
                                   dest,
                                   category,
                                   size=size,
                                   tag=tag,
                                   max_imgs=max_imgs)
            elif fn.suffix == '.tif':
                tif_predict_images(learn,
                                   fn,
                                   dest,
                                   category,
                                   size=size,
                                   tag=tag,
                                   max_imgs=max_imgs)
        except Exception as e:
            print(f'exception with {fn.stem}')
            print(e)
Beispiel #24
0
def parallel(f,
             items,
             *args,
             n_workers=defaults.cpus,
             total=None,
             progress=None,
             pause=0,
             timeout=None,
             chunksize=1,
             **kwargs):
    "Applies `func` in parallel to `items`, using `n_workers`"
    if progress is None: progress = progress_bar is not None
    with ProcessPoolExecutor(n_workers, pause=pause) as ex:
        r = ex.map(f,
                   items,
                   *args,
                   timeout=timeout,
                   chunksize=chunksize,
                   **kwargs)
        if progress:
            if total is None: total = len(items)
            r = progress_bar(r, total=total, leave=False)
        return L(r)
Beispiel #25
0
    def _fit_epoch(self, mb: ConsoleMasterBar) -> None:
        """Fit a single epoch

        Args:
            mb (fastprogress.master_bar): primary progress bar
        """
        self.model = freeze_bn(self.model.train())

        pb = progress_bar(self.train_loader, parent=mb)
        for x, target in pb:
            x, target = self.to_cuda(x, target)

            # Forward
            batch_loss = self._get_loss(x, target)

            # Backprop
            self._backprop_step(batch_loss)
            # Update LR
            self.scheduler.step()
            pb.comment = f"Training loss: {batch_loss.item():.4}"

            self.step += 1
        self.epoch += 1
Beispiel #26
0
def validate(dataloader, model, loss_fn, device, master_bar):
    """Compute loss, accuracy and confusion matrix on validation set.

    Args:
        dataloader (DataLoader): Torch DataLoader object to load data
        model (nn.Module): Torch model to train
        loss_fn: Torch loss function
        device (torch.device): Torch device to use for training
        master_bar (fastprogress.master_bar): Will be iterated over to draw 
            batches and show validation progress

    Returns:
        float, float, torch.Tensor shape (10,10): Mean loss on validation set, 
            fraction of correct predictions on validation set (accuracy)
    """
    epoch_loss = []
    epoch_mse_loss, epoch_kld_loss = [], []

    model.eval()
    with torch.no_grad():
        for x, y in fastprogress.progress_bar(dataloader, parent=master_bar):
            # make a prediction on validation set

            y_hat, mu, logvar = model(x.to(device))

            # Compute loss
            total_loss, mse_loss, kld_loss = loss_fn(y_hat, x.to(device), mu,
                                                     logvar)

            # For plotting the test loss, save it for each sample
            epoch_loss.append(total_loss.item())
            epoch_mse_loss.append(mse_loss.item())
            epoch_kld_loss.append(kld_loss.item())

    # Return the mean loss, the accuracy and the confusion matrix
    return np.mean(epoch_loss), np.mean(epoch_mse_loss), np.mean(
        epoch_kld_loss)
def create_ground_truth(train: pd.DataFrame):
    labels = np.zeros((len(train), 264), dtype=int)
    for i, row in progress_bar(train.iterrows(), total=len(train)):
        ebird_code = BIRD_CODE[row.ebird_code]
        labels[i, ebird_code] = 1

        secondary_labels = eval(row.secondary_labels)
        for sl in secondary_labels:
            if NAME2CODE.get(sl) is not None:
                second_code = NAME2CODE[sl]
                labels[i, BIRD_CODE[second_code]] = 1

        background = row["background"]
        if isinstance(background, str):
            academic_names = re.findall("\((.*>)\)", background)
            academic_names = list(
                filter(lambda x: x is not None,
                       map(lambda x: SCINAME2CODE.get(x), academic_names)))
            for bl in academic_names:
                labels[i, BIRD_CODE[bl]] = 1
    columns = list(BIRD_CODE.keys())
    index = train["filename"].map(lambda x: x.replace(".mp3", ".wav")).values
    labels_df = pd.DataFrame(labels, index=index, columns=columns)
    return labels_df
Beispiel #28
0
def parallel(f,
             items,
             *args,
             n_workers=defaults.cpus,
             total=None,
             progress=None,
             pause=0,
             threadpool=False,
             timeout=None,
             chunksize=1,
             **kwargs):
    "Applies `func` in parallel to `items`, using `n_workers`"
    pool = ThreadPoolExecutor if threadpool else ProcessPoolExecutor
    with pool(n_workers, pause=pause) as ex:
        r = ex.map(f,
                   items,
                   *args,
                   timeout=timeout,
                   chunksize=chunksize,
                   **kwargs)
        if progress and progress_bar:
            if total is None: total = len(items)
            r = progress_bar(r, total=total, leave=False)
        return L(r)
    def epoch_train(self, model, train_iterator, optimizer, loss_criteria,
                    device, mb):  # Epoch training
        '''
		Training rountine in each epoch
		Returns:
			train_loss
		'''
        model.train()
        training_loss = 0
        for batch, (images, labels,
                    _) in enumerate(progress_bar(train_iterator, parent=mb)):
            # Move X, Y to device
            images = images.to(device)
            labels = labels.to(device)

            # Clear previous gradient
            optimizer.zero_grad()

            # Feed forward tge nidek
            pred = model(images)
            loss = loss_criteria(pred, labels)

            # Back probagation
            loss.backward()

            # Update parameters()
            optimizer.step()

            # Update training loss after each batch
            training_loss += loss.item()

            mb.child.comment = f'Training loss: {round(training_loss/(batch + 1), 5)}'

        del images, labels, loss

        return training_loss / len(train_iterator)
Beispiel #30
0
def save_tiles(tile_size, untiled_files, num_tiles=5, scale=4, threshold=100):
    for sub_dir in ['train', 'valid']:
        print(f'\n\nbuild tiles {sub_dir}/{tile_size}\n\n')
        hr_ROI = dpath / f'roi_hr_{tile_size}' / sub_dir
        lr_ROI = dpath / f'roi_lr_{tile_size}' / sub_dir
        lr_up_ROI = dpath / f'roi_lr_up_{tile_size}' / sub_dir
        #print('\n', hr_ROI, '\n', lr_ROI, '\n', lr_up_ROI)
        print('Creating ROIs with tile size ' + str(tile_size))
        hrdir = hr_path / sub_dir
        lrdir = lr_path / sub_dir
        for hr_fn in progress_bar(list(hrdir.iterdir())):
            #print('Processing ' + hr_fn.name + ', tile_size is ' + str(tile_size) + '.')
            lr_fn = lrdir / hr_fn.name
            helpers.tif_to_tiles(lr_fn,
                                 hr_fn,
                                 hr_fn.stem,
                                 hr_ROI,
                                 lr_up_ROI,
                                 lr_ROI,
                                 size=tile_size,
                                 num_tiles=num_tiles,
                                 scale=scale,
                                 threshold=threshold,
                                 untiled_ls=untiled_files)