def get_probability_and_backpointer(transition_probability, emission_probability, old_prev_states, current_word, prev_prob): decoded = {} for curr_state in utils.get_or_default(emission_probability, [current_word], list(transition_probability.keys())): max_tag_prob = 0 back_pointer = "" for prev_state in old_prev_states: curr_state_prob = utils.get_or_default(transition_probability, [prev_state, curr_state], 0) \ * utils.get_or_default(emission_probability, [current_word, curr_state], 1) \ * utils.get_or_default(prev_prob, [prev_state, 'prob'], 0) if curr_state_prob > max_tag_prob: max_tag_prob = curr_state_prob back_pointer = prev_state if max_tag_prob == 0: print('come here') if max_tag_prob != 0: decoded[curr_state] = {'prob': max_tag_prob, 'parent': back_pointer} else: if max_tag_prob < 0: print("Problem with %s" % current_word) return decoded
def __init__(self, **kwargs): self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX) self.scope = get_or_default(kwargs, "scope", "linear") self.concat = get_or_default(kwargs, "concat", False) self.multibias = get_or_default(kwargs, "multibias", False) self.bias = option(use=True, initializer=zeros_initializer) self.weight = option(output_major=False, initializer=uniform_initializer)
def __init__(self, **kwargs): self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX) self.scope = get_or_default(kwargs, "scope", "rnnsearch") self.source_embedding = embedding_config(dtype=self.dtype, scope="source_embedding") self.target_embedding = embedding_config(dtype=self.dtype, scope="target_embedding") self.encoder = encoder_config(dtype=self.dtype) self.decoder = decoder_config(dtype=self.dtype)
def __init__(self, **kwargs): self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX) self.scope = get_or_default(kwargs, "scope", "feedforward") self.activation = get_or_default(kwargs, "activation", theano.tensor.nnet.sigmoid) self.concat = get_or_default(kwargs, "concat", False) self.multibias = get_or_default(kwargs, "multibias", False) self.bias = option(use=True, initializer=zeros_initializer) self.weight = option(output_major=False, initializer=uniform_initializer)
def __init__(self, cfg): super().__init__() self.cfg = cfg trn_params = cfg['train_params'] self.fold = get_or_default(trn_params, 'fold', 0) self.batch_size = get_or_default(trn_params, 'batch_size', 16) self.num_workers = get_or_default(trn_params, 'num_workers', 2) self.train_path = get_or_default(trn_params, 'train_path', 'input/crops256/train/') self.mask_path = get_or_default(trn_params, 'masks_path', 'input/crops256/masks/') data_path = Path(get_or_default(trn_params, 'data_path', '../input/hubmap-kidney-segmentation')) data_path_zarr = Path(get_or_default(trn_params, 'data_path_zarr', '../input/hubmap-zarr/train_scale2')) mask_preproc_dir = get_or_default(trn_params, 'mask_preproc_dir', '/kaggle/input/hubmap-labels-pdf-0-5-0-25-0-01/masks_scale2') backbone = cfg['model']['backbone'] encoder_weights = get_or_default(cfg['model'], 'weights', 'imagenet') if cfg['model']['type'] == 'Unet': self.model = smp.Unet(encoder_name=backbone, classes=1, encoder_weights=encoder_weights) else: raise Exception(cfg['model']['name'] + ' not supported') self.crit = symmetric_lovasz df_train = pd.read_csv(data_path / 'train.csv') df_info = pd.read_csv(data_path / 'HuBMAP-20-dataset_information.csv') files = [x for x in data_path_zarr.iterdir() if x.is_dir() if not x.name.startswith('.')] label_fn = lambda o: o cfg = CONFIG() aug = alb.Compose([ alb.OneOf([ alb.HueSaturationValue(10, 15, 10), alb.CLAHE(clip_limit=2), alb.RandomBrightnessContrast(), ], p=0.3), alb.Normalize(p=1, std=[0.15167958, 0.23584107, 0.13146145], mean=[0.65459856, 0.48386562, 0.69428385])]) ds_kwargs = { 'tile_shape': cfg.tile_shape, 'padding': cfg.padding, 'scale': cfg.scale, 'n_jobs': cfg.n_jobs, 'preproc_dir': mask_preproc_dir, 'val_length': cfg.val_length, 'sample_mult': cfg.sample_mult, 'loss_weights': False, 'zoom_sigma': cfg.zoom_sigma, 'flip': cfg.flip, 'max_rotation': cfg.max_rotation, 'deformation_grid_size': cfg.deformation_grid_size, 'deformation_magnitude': cfg.deformation_magnitude, 'albumentations_tfms': aug } self.train_ds = RandomTileDataset(files, label_fn=label_fn, **ds_kwargs) self.valid_ds = TileDataset(files, label_fn=label_fn, **ds_kwargs, is_zarr=True)
def __init__(self, **kwargs): self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX) self.scope = get_or_default(kwargs, "scope", "gru") self.concat = get_or_default(kwargs, "concat", False) self.activation = get_or_default(kwargs, "activation", theano.tensor.tanh) self.gates = feedforward_config(dtype=self.dtype, scope="gates") self.reset_gate = feedforward_config(dtype=self.dtype, scope="reset_gate") self.update_gate = feedforward_config(dtype=self.dtype, scope="update_gate") self.candidate = linear_config(dtype=self.dtype, scope="candidate")
def get_crit(trn_params): crit_name = get_or_default(trn_params, 'crit', 'HardDarkRank') if crit_name == 'HardDarkRank': dark_alpha = float(get_or_default(trn_params, 'dark_alpha', 2)) dark_beta = float(get_or_default(trn_params, 'dark_alpha', 3)) dark_crit = HardDarkRank(alpha=dark_alpha, beta=dark_beta) return dark_crit elif crit_name == 'TopnMSELoss': topk = float(get_or_default(trn_params, 'topk', 5)) return TopnMSELoss(k=topk) else: raise Exception(' crit {} is not supported'.format(crit_name))
def __init__(self, cfg, fold): super().__init__() LOCAL_RANK = int(os.environ.get("GLOBAL_RANK", 0)) seed_everything(42 + LOCAL_RANK) self.cfg = cfg self.model = get_train_efficientdet() self.fold = fold self.df = pd.read_csv('train_folds_only_pos.csv') trn_params = cfg['train_params'] self.img_size = get_or_default(trn_params, 'img_size', 512) self.batch_size = get_or_default(trn_params, 'batch_size', 16) self.num_workers = get_or_default(trn_params, 'num_workers', 4) print('using fold', self.fold)
def __init__(self, **kwargs): self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX) self.scope = get_or_default(kwargs, "scope", "decoder") self.init_transform = feedforward_config(dtype=self.dtype, scope="init_transform", activation=theano.tensor.tanh) self.annotation_transform = linear_config(dtype=self.dtype, scope="annotation_transform") self.state_transform = linear_config(dtype=self.dtype, scope="state_transform") self.context_transform = linear_config(dtype=self.dtype, scope="context_transform") self.rnn = gru_config(dtype=self.dtype, scope="rnn") self.maxout = maxout_config(dtype=self.dtype, scope="maxout") self.deepout = linear_config(dtypde=self.dtype, scope="deepout") self.classify = linear_config(dtype=self.dtype, scope="classify")
def age(self): if self._age is None: age_data = utils.get_or_default(self["clin_shared:days_to_birth"], '#text') if age_data != 'UNK': age_data = -int(age_data) / 365.25 self._age = age_data return self._age
def __init__(self, cfg, fold=0): super().__init__() self.fold = fold self.cfg = cfg trn_params = cfg['train_params'] self.batch_size = get_or_default(trn_params, 'batch_size', 16) self.num_workers = get_or_default(trn_params, 'num_workers', 2) self.aug_type = get_or_default(cfg, 'aug', '0') self.margin_start = get_or_default(cfg, 'margin_start', 10) self.csv_path = get_or_default(cfg, 'csv_path', 'input/train_folds.csv') self.trn_path = get_or_default(cfg, 'image_path', 'input/train') self.le = LabelEncoder() train = pd.read_csv(self.csv_path) self.le.fit(train.label_group) model = get_or_default(cfg, 'model', 'ShopeeModelTimm') self.model = model num_classes = len(self.le.classes_) if model == 'ShopeeModelTimm': self.model = ShopeeModelTimm(num_classes, backbone=cfg['backbone']) elif model == 'ShopeeModelResnext': self.model = ShopeeModelResnext(num_classes=num_classes) else: raise Exception('unsupported model {}'.format(model)) self.crit = nn.CrossEntropyLoss() self.acc = Accuracy() print('using fold', self.fold)
def extract_tags(probs_with_backpointers): res = [] prev_top_tag = "" top_tag = get_top_tag(probs_with_backpointers[-1]) for dict in reversed(probs_with_backpointers): prev_top_tag = top_tag word = list(dict.keys())[0] res.append((word, top_tag)) # TODO: Default to prev tags top_tag # top_tag = dict[word][top_tag]['parent'] top_tag = utils.get_or_default(dict, [word, top_tag, 'parent'], prev_top_tag) return res
def train_dataloader(self): trn_aug = get_aug(cfg=self.cfg) train = pd.read_csv(self.csv_path) trn_ds = RanzcrDs(df=train[train.fold != self.fold].reset_index().drop( 'index', axis=1).drop('fold', axis=1), aug=trn_aug, path=self.trn_path, logits_path=get_or_default(cfg, 'logits', 'logits.npy')) trn_dl = torch.utils.data.DataLoader(trn_ds, shuffle=True, batch_size=self.batch_size, num_workers=self.num_workers) return trn_dl
def __init__(self, cfg): super().__init__() self.cfg = cfg self.model = RanzcrModel(cfg=cfg) trn_params = cfg['train_params'] self.fold = get_or_default(trn_params, 'fold', 0) self.batch_size = get_or_default(trn_params, 'batch_size', 16) self.num_workers = get_or_default(trn_params, 'num_workers', 2) self.aug_type = get_or_default(cfg, 'aug', '0') self.csv_path = get_or_default(cfg, 'csv_path', 'input/train_folds.csv') self.trn_path = get_or_default(cfg, 'image_path', 'input/train') self.crit = nn.BCEWithLogitsLoss() print('using fold', self.fold)
def __init__(self, cfg, fold=0): super().__init__(cfg=cfg, fold=fold) trn_params = cfg['train_params'] backbone = get_or_default(cfg['model'], 'backbone', 'tf_efficientnet_b4_ns') self.model = timm.create_model(backbone, pretrained=True, num_classes=4, in_chans=1) self.seg_supervision = bool( get_or_default(trn_params, 'seg_supervision', False)) self.seg_supervision_weight = float( get_or_default(trn_params, 'seg_supervision_weight', 0.1)) self.grad_starvation_fix = bool( get_or_default(trn_params, 'grad_starvation_fix', False)) self.grad_rev = bool(get_or_default(trn_params, 'grad_rev', True)) if 'mixup' in trn_params: self.mixup = True self.mixup_alpha = float( get_or_default(trn_params, 'mixup_alpha', 1.0)) else: self.mixup = False self.mixup_alpha = 0.0 self.seg_head = get_seg_head(in_channels=feat_map_size[backbone], out_channels=1) self.seg_crit = torch.nn.BCEWithLogitsLoss() self.dom_head = nn.Sequential( GradientReversal(), nn.Linear(feat_map_size[backbone], 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Linear(512, 2), nn.ReLU(), ) self.acc_metric = Accuracy() self.df = pd.read_csv(get_or_default(cfg['train_params'], 'csv', None)) self.crit = torch.nn.CrossEntropyLoss()
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, required=True) parser.add_argument('--resume', type=str, required=False) args = parser.parse_args() cfg = benedict.from_yaml(args.config) module = RanzcrModule(cfg=cfg) early_stop = EarlyStopping(monitor='val/_avg_roc_auc', verbose=True, patience=20, mode='max') logger = TensorBoardLogger("lightning_logs", name=args.config) lrm = LearningRateMonitor() mdl_ckpt = ModelCheckpoint(monitor='val/_avg_roc_auc', save_top_k=3, mode='max') precision = get_or_default(cfg, 'precision', 32) grad_clip = float(get_or_default(cfg, 'grad_clip', 0)) trainer = pl.Trainer(gpus=1, max_epochs=100, callbacks=[early_stop, lrm, mdl_ckpt], logger=logger, precision=precision, gradient_clip_val=grad_clip) trainer.fit(module)
def tumor_tissue_site(self): if self._tumor_tissue_site is None: self._tumor_tissue_site = utils.get_or_default( self['clin_shared:tumor_tissue_site'], '#text') return self._tumor_tissue_site
def pathologic_stage(self): if self._pathologic_stage is None: self._pathologic_stage = utils.get_or_default( self['shared_stage:stage_event'] ['shared_stage:pathologic_stage'], '#text') return self._pathologic_stage
def gender(self): if self._gender is None: self._gender = utils.get_or_default(self['shared:gender'], '#text') return self._gender
def histological_type(self): if self._histological_type is None: self._histological_type = utils.get_or_default( self['shared:histological_type'], '#text') return self._histological_type
def clinical_stage(self): if self._clinical_stage is None: self._clinical_stage = utils.get_or_default( self['shared_stage:stage_event'] ['shared_stage:clinical_stage'], '#text') return self._clinical_stage
def __init__(self, **kwargs): self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX) self.scope = get_or_default(kwargs, "scope", "encoder") self.forward_rnn = gru_config(dtype=self.dtype, scope="forward_rnn") self.backward_rnn = gru_config(dtype=self.dtype, scope="backward_rnn")
return optimizer def train_dataloader(self): dl = DataLoader(self.train_ds, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers) return dl def val_dataloader(self): dl = DataLoader(self.valid_ds, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers) return dl if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, required=True) parser.add_argument('--resume', type=str, required=False) args = parser.parse_args() cfg = benedict.from_yaml(args.config) module = HubmapModule(cfg) early_stop = EarlyStopping(monitor='val/avg_dice', verbose=True, patience=50, mode='max') logger = TensorBoardLogger("lightning_logs", name=args.config) lrm = LearningRateMonitor() mdl_ckpt = ModelCheckpoint(monitor='val/avg_dice', save_top_k=5, ) precision = get_or_default(cfg, 'precision', 32) clip_grad = get_or_default(cfg, 'cril_grad', 0.0) trainer = pl.Trainer(gpus=1, max_epochs=200, callbacks=[early_stop, lrm, mdl_ckpt], logger=logger, precision=precision, gradient_clip_val=clip_grad) trainer.fit(module)
def __init__(self, **kwargs): self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX) self.scope = get_or_default(kwargs, "scope", "embedding") self.initializer = get_or_default(kwargs, "initializer", uniform_initializer) self.bias = option(use=True, initializer=zeros_initializer)