Example #1
0
def start_train(
    config,
    config_path,
    yolo_model: yolo.YOLO_Model,
    train_generator,
    valid_generator,
    dry_mode: bool
):
    print('Full training')

    ###############################
    #   Optimizers
    ###############################

    optimizers = {
        'sgd': opt.SGD(lr=config['train']['learning_rate']),
        'adam': opt.Adam(lr=config['train']['learning_rate']),
        'adamax': opt.Adamax(lr=config['train']['learning_rate']),
        'nadam': opt.Nadam(lr=config['train']['learning_rate']),
        'rmsprop': opt.RMSprop(lr=config['train']['learning_rate']),
        # 'Radam': RAdam(lr=config['train']['learning_rate'], warmup_proportion=0.1, min_lr=1e-5)
    }

    optimizer = optimizers[config['train']['optimizer'].lower()]

    if config['train']['clipnorm'] > 0:
        optimizer.clipnorm = config['train']['clipnorm']

    if config['train'].get('lr_decay', 0) > 0:
        optimizer.decay = config['train']['lr_decay']

    if config['train']['optimizer'] == 'Nadam':
        # Just to set field
        optimizer.decay = 0.0

    ###############################
    #   Callbacks
    ###############################

    checkpoint_name = utils.get_checkpoint_name(config)
    utils.makedirs_4_file(checkpoint_name)

    checkpoint_vloss = cbs.CustomModelCheckpoint(
        model_to_save=yolo_model.infer_model,
        filepath=checkpoint_name,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min',
        period=1
    )

    # tensorboard_logdir = utils.get_tensorboard_name(config)
    # utils.makedirs(tensorboard_logdir)
    # print('Tensorboard dir: {}'.format(tensorboard_logdir))

    # tensorboard_cb = TensorBoard(
    #     log_dir=tensorboard_logdir,
    #     histogram_freq=0,
    #     write_graph=False
    # )

    mAP_checkpoint_name = utils.get_mAP_checkpoint_name(config)
    mAP_checkpoint_static_name = utils.get_mAP_checkpoint_static_name(config)
    utils.makedirs_4_file(mAP_checkpoint_name)
    map_evaluator_cb = cbs.MAP_evaluation(
        model=yolo_model,
        generator=valid_generator,
        save_best=True,
        save_name=mAP_checkpoint_name,
        save_static_name=mAP_checkpoint_static_name,
        # tensorboard=tensorboard_cb,
        neptune=neptune if not dry_mode else None
    )

    reduce_on_plateau = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.4,
        patience=20,
        verbose=1,
        mode='min',
        min_delta=0,
        cooldown=10,
        min_lr=1e-8
    )

    early_stop = EarlyStopping(
        monitor='val_loss',
        min_delta=0,
        patience=80,
        mode='min',
        verbose=1
    )

    neptune_mon = cbs.NeptuneMonitor(
        monitoring=['loss', 'val_loss'],
        neptune=neptune
    )

    # logger_cb = cbs.CustomLogger(
    #     config=config,
    #     tensorboard=tensorboard_cb
    # )

    # fps_logger = cbs.FPSLogger(
    #     infer_model=yolo_model.infer_model,
    #     generator=valid_generator,
    #     infer_sz=config['model']['infer_shape'],
    #     tensorboard=tensorboard_cb
    # )

    callbacks = [
        # tensorboard_cb,
        map_evaluator_cb,
        # early_stop,
        reduce_on_plateau,
    ]

    ###############################
    #   Prepare fit
    ###############################

    if not dry_mode:
        callbacks.append(neptune_mon)

        with open('config.json', 'w') as f:
            json.dump(config, f, indent=4)

        sources_to_upload = [
            'yolo.py',
            '_common/backend.py',
            'config.json'
        ]

        params = {
            'base_params': str(config['model']['base_params']),
            'infer_size': "H{}xW{}".format(*config['model']['infer_shape']),
            'anchors_per_output': config['model']['anchors_per_output'],
            'anchors': str(config['model']['anchors'])
        }
        
        tags = [
            config['model']['base']
        ]

        logger.info('Tags: {}'.format(tags))
        
        neptune.create_experiment(
            name=utils.get_neptune_name(config),
            upload_stdout=False,
            upload_source_files=sources_to_upload,
            params=params,
            tags=tags
        )
    else:
        config['train']['nb_epochs'] = 10

    yolo_model.train_model.compile(loss=yolo.dummy_loss, optimizer=optimizer)
    yolo_model.train_model.fit_generator(
        generator=train_generator,
        steps_per_epoch=len(train_generator) * config['train']['train_times'],

        validation_data=valid_generator,
        validation_steps=len(valid_generator) * config['valid']['valid_times'],

        epochs=config['train']['nb_epochs'],
        verbose=1,
        callbacks=callbacks,
        workers=mp.cpu_count(),
        max_queue_size=100,
        use_multiprocessing=False
    )

    if not dry_mode:
        neptune.send_artifact(mAP_checkpoint_static_name)
        neptune.send_artifact('config.json')
Example #2
0
          'overall_patience': overall_patience, 
          'loss_delta': loss_delta,          
         }
print(f'parameters: {PARAMS}')

# Create experiment with defined parameters
neptune.init(project_qualified_name='blonde/wheat',
            api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiMTExN2QzMzQtMDJlYi00ODkzLTk5YTktYWNhNzg4MWFjZGQ3In0=',
            )

#neptune.init(project_qualified_name='shared/onboarding',
#             api_token='ANONYMOUS',
#             )

neptune.create_experiment (name=experiment_name,
                          params=PARAMS, 
                          tags=[experiment_name, experiment_tag],
                          upload_source_files=['train_tanya.py'])   


def main() -> None:
    device = f"cuda:{gpu_number}" if torch.cuda.is_available() else torch.device('cpu')
    print(device)

    train_boxes_df = pd.read_csv(META_TRAIN)
    train_boxes_df = preprocess_boxes(train_boxes_df)
    train_images_df = pd.read_csv('folds/orig_alex_folds.csv')    
    print(f'\nTotal images: {len(train_images_df.image_id.unique())}')
    
    # Leave only images with bboxes
    image_id_column = 'image_id'
    print('Leave only train images with boxes')
                                        cooldown=0)

checkpoint = keras.callbacks.ModelCheckpoint(filepath,
                                             monitor='val_mse',
                                             verbose=0,
                                             save_best_only=True,
                                             save_weights_only=False,
                                             mode='min')

earlyStop = keras.callbacks.EarlyStopping(monitor='val_mse',
                                          mode='min',
                                          patience=10,
                                          restore_best_weights=True,
                                          verbose=1)

with neptune.create_experiment(name=modelName, params=conf) as npexp:
    neptune_monitor = NeptuneMonitor()

    callbacks_list = [checkpoint, neptune_monitor, RLR, earlyStop]

    model.summary()
    history = model.fit(train_generator,
                        validation_data=val_generator,
                        verbose=1,
                        epochs=numEpochs,
                        steps_per_epoch=train_generator.n /
                        train_generator.batch_size,
                        callbacks=callbacks_list)

    import glob
Example #4
0
                                        num_workers=num_workers)

        model_prefix = f"{cfg.model_file}_fold{fold}.{datetime.now().strftime('%b%d_%H-%M-%S')}"
        leaf_model = LeafModel(cfg,
                               model_prefix=model_prefix,
                               output_dir=output_dir)

        neptune.init(project_qualified_name='vmorelli/cassava')

        neptune_tags = []
        neptune_tags.extend((["gcp"] if on_gcp else []) +
                            (["dbg"] if debug else []))
        neptune.create_experiment(name=model_prefix,
                                  params=get_params_dict(cfg),
                                  upload_source_files=[
                                      '*.py', 'leaf/*.py', 'environment.yml',
                                      "*.ipynb"
                                  ],
                                  description=cfg.description,
                                  tags=neptune_tags)

        trainer = Trainer(leaf_model,
                          train_dataloader,
                          val_dataloader,
                          log_steps,
                          neptune=neptune,
                          fp16=use_fp16,
                          grad_norm=grad_norm)

        # Warmup
        leaf_model.optimizer = Adam(leaf_model.model.parameters(),
                                    lr=start_lr,
Example #5
0
"""
This is the driver script for an initial experiment.
"""

if __name__ == "__main__":
    import initialize
    initialize.initialize()

    import neptune

    from src import utils
    from src import constants

    neptune.init(constants.neptune_project_qualified_name)
    # Docs for create_experiment: https://neptune-client.readthedocs.io/en/latest/technical_reference/project.html#neptune.projects.Project.create_experiment
    with neptune.create_experiment(
            name="Insert experiment name here",
            description="Insert description here",
            upload_source_files=utils.get_source_files()) as npt_exp:
        pass
Example #6
0
    model.fit(X_train, y_train, batch_size=64, epochs=1, callbacks=[es, mc])
    y_pred = model.predict(X_val)

    #error = sklearn.metrics.mean_squared_error(Y_test, y_pred)

    if not np.isnan(y_pred.any()):

        error = sklearn.metrics.mean_absolute_error(y_pred, y_val)

        # output: evaluation score
        return error
    else:
        print('nan values')


callback = None
n_trials = 100

if log_report:
    neptune.init(project_qualified_name='4ND4/sandbox')
    neptune.create_experiment(name='optuna sweep')
    monitor = opt_utils.NeptuneMonitor()
    callback = [monitor]
    n_trials = 1

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=n_trials, callbacks=callback)

print('Minimum mean absolute error: ' + str(study.best_value))
print('Best parameter: ' + str(study.best_params))
    test_it = test_datagen.flow_from_directory(
        test_path, class_mode='categorical', batch_size=datagen_batch_size, target_size=(image_size, image_size))

    return train_it, val_it, test_it


objective = Objective(
    results_directory,
    maximum_epochs, early_stop_epochs,
    learning_rate_epochs
)

if log_results:

    neptune.init(project_qualified_name='4ND4/sandbox')
    result = neptune.create_experiment(name='optuna Resnet50 DeepUAge2.0')
    monitor = opt_utils.NeptuneMonitor()
    callback = [monitor]
    optuna.logging.set_verbosity(optuna.logging.WARNING)
else:
    callback = None

study = optuna.create_study(direction=optimizer_direction,
                            # sampler=TPESampler(n_startup_trials=number_of_random_points) read paper
                            )

study.optimize(
    objective,
    callbacks=callback,
    n_trials=100
)
Example #8
0
"""#Mount"""

# from google.colab import drive

# drive.mount('/content/gdrive')
# folder_loc = '/content/gdrive/Shareddrives/gggg/'

# #!pip install neptune-client

neptune.init(
    project_qualified_name='kbh/gggg',
    api_token=
    'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiNTE4Yjg0MzEtMjYyYS00NzVlLTg4MjAtZGNiZGJhYThkY2Q4In0='
)

neptune.create_experiment('Baseline')
"""#Data Loader"""


class DTloader(Dataset):
    def __init__(self,
                 root: str,
                 train: bool = True,
                 transform=None,
                 target_transform=None):
        self.train = train
        self.transform = transform
        self.root = root
        self.file_list = glob.glob(root + '/' + '*')
        self.data_len = len(self.file_list)
Example #9
0
epochs = 800
batch_size = 256
n_chunks = 8
weighting = 0.999 # classification loss weighting
weighting_decay = 0.95 
supress_cl = 6 
freeze_e = 5


neptune.init(api_token=NEPTUNE_TOKEN,
             project_qualified_name=f'artursil/{PROJECT_NAME}')
neptune.create_experiment(EXPERIMENT_NAME,
                          params={'weighting': weighting,
                                  'weighting_decay': weighting_decay,
                                  'batch_size':batch_size,
                                  'lr':lr,
                                  'lr_decay':lr_decay,
                                  'network_layers': '[128,64,32]',
                                  'optimiser': 'rmsprop'
                                  
                                  })
from bokeh.io.export import get_screenshot_as_png
from bokeh.palettes import magma
from bokeh.transform import jitter
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Panel, Tabs, Slider
from bokeh.plotting import figure, save, output_file
from sklearn.metrics import confusion_matrix
from scipy.special import softmax
from PIL import Image

def box_plot(x,y,cl_ar,bok_file,epoch):
Example #10
0
def train(name, run, folds_csv):
    
    wandb.init(project='dfdc', 
               config=config_defaults,
               name=f'{name},val_fold:{VAL_FOLD},run{run}')
    config = wandb.config
    
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    model = timm.create_model('xception', pretrained=True, num_classes=1)
    model.to(device)
    # model = DataParallel(model).to(device)
    wandb.watch(model)
    
    if config.optimizer == 'radam' :
        optimizer = torch_optimizer.RAdam(model.parameters(), 
                                          lr=config.learning_rate,
                                          weight_decay = config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), 
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay)
        
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=config.schedule_patience,
        threshold=0.001,
        mode="min",
        factor = config.schedule_factor
    )
    criterion = nn.BCEWithLogitsLoss()
    es = EarlyStopping(patience = 10, mode='min')
    
    data_train = CelebDF_Dataset(data_root=DATA_ROOT,
                                mode='train',
                                folds_csv=folds_csv,
                                val_fold=VAL_FOLD,
                                test_fold=TEST_FOLD,
                                cutout_fill=config.cutout_fill,
                                hardcore=False,
                                random_erase=True,
                                oversample_real=True,
                                transforms=create_train_transforms(size=224))
    data_train.reset(config.rand_seed)
    train_data_loader = DataLoader( data_train, 
                                    batch_size=config.train_batch_size, 
                                    num_workers=8, 
                                    shuffle=True, 
                                    drop_last=True)

    data_val = CelebDF_Dataset(data_root=DATA_ROOT,
                                mode='val',
                                folds_csv=folds_csv,
                                val_fold=VAL_FOLD,
                                test_fold=TEST_FOLD,
                                hardcore=False,
                                oversample_real=False,
                                transforms=create_val_transforms(size=224))
    data_val.reset(config.rand_seed)

    val_data_loader = DataLoader(data_val, 
                                 batch_size=config.valid_batch_size, 
                                 num_workers=8, 
                                 shuffle=False, 
                                 drop_last=True)
    
    data_test = CelebDF_Dataset(data_root=DATA_ROOT,
                            mode='test',
                            folds_csv=folds_csv,
                            val_fold=VAL_FOLD,
                            test_fold=TEST_FOLD,
                            hardcore=False,
                            oversample_real=False,
                            transforms=create_val_transforms(size=224))
    data_test.reset(config.rand_seed)

    test_data_loader = DataLoader(data_test, 
                                 batch_size=config.valid_batch_size, 
                                 num_workers=8, 
                                 shuffle=False, 
                                 drop_last=True)

    train_history = []
    val_history = []
    test_history = []
    
    for epoch in range(config.epochs):
        print(f"Epoch = {epoch}/{config.epochs-1}")
        print("------------------")
        
        train_metrics = train_epoch(model, train_data_loader, optimizer, criterion, epoch)
        valid_metrics = valid_epoch(model, val_data_loader, criterion, epoch)
        scheduler.step(valid_metrics['valid_loss'])

        print(f"TRAIN_AUC = {train_metrics['train_auc']}, TRAIN_LOSS = {train_metrics['train_loss']}")
        print(f"VALID_AUC = {valid_metrics['valid_auc']}, VALID_LOSS = {valid_metrics['valid_loss']}")
        
        train_history.append(train_metrics)
        val_history.append(valid_metrics)

        es(valid_metrics['valid_loss'], model, model_path=os.path.join(OUTPUT_DIR,f"{name}_fold_{VAL_FOLD}_run_{run}.h5"))
        if es.early_stop:
            print("Early stopping")
            break
    
    model.load_state_dict(torch.load(f'weights/{name}_fold_{VAL_FOLD}_run_{run}.h5'))

    neptune.init('sowmen/dfdc')
    neptune.create_experiment(name=f'{name},val_fold:{VAL_FOLD},run{run}')

    test_history = test(model, test_data_loader, criterion)

    try:
        pkl.dump( train_history, open( f"train_history{name}{run}.pkl", "wb" ) )
        pkl.dump( val_history, open( f"val_history{name}{run}.pkl", "wb" ) )
        pkl.dump( test_history, open( f"test_history{name}{run}.pkl", "wb" ) )
    except:
        print("Error pickling")

    wandb.save(f'weights/{name}_fold_{VAL_FOLD}_run_{run}.h5')
Example #11
0
test_f1 = f1_score(y_test, y_test_pred.argmax(axis=1), average='macro')
print(f'Train f1:{train_f1} | Test f1:{test_f1}')

# Step 2: Initialize Neptune

import neptune

neptune.init(
    project_qualified_name=
    'shared/onboarding',  # change this to your `workspace_name/project_name`
    api_token='ANONYMOUS',  # change this to your api token
)

# Step 3: Create an experiment and save parameters

neptune.create_experiment(name='great-idea', params=params)

# Step 4. Add tags to organize things

neptune.append_tag(['experiment-organization', 'me'])

# Step 5. Add logging of train and evaluation metrics

neptune.log_metric('train_f1', train_f1)
neptune.log_metric('test_f1', test_f1)

# Step 6. Run a few experiments with different parameters

# tests
current_exp = neptune.get_experiment()
def train(opt):
    params = Params(f'projects/{opt.project}.yml')

    # Neptune staff
    all_params = opt.__dict__
    all_params.update(params.params)

    data_path = os.path.join(opt.data_path, params.project_name)

    tags = [
        'EfficientDet', f'D{opt.compound_coef}', f'bs{opt.batch_size}',
        opt.optim
    ]
    if opt.head_only:
        tags.append('head_only')

    if len(params.obj_list) == 1:
        tags.append('one_class')

    if opt.no_aug:
        tags.append('no_aug')

    neptune.create_experiment(name='EfficientDet',
                              tags=tags,
                              params=all_params,
                              upload_source_files=['train.py', 'coco_eval.py'])
    log_data_version(data_path)

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    opt.saved_path = os.path.join(opt.saved_path, params.project_name)
    opt.log_path = os.path.join(opt.log_path, params.project_name,
                                'tensorboard/')
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    training_params = {
        'batch_size': opt.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    val_params = {
        'batch_size': opt.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    if opt.no_aug:
        transform_list = [
            Normalizer(mean=params.mean, std=params.std),
            Resizer(input_sizes[opt.compound_coef])
        ]
    else:
        transform_list = [
            Normalizer(mean=params.mean, std=params.std),
            Augmenter(),
            Resizer(input_sizes[opt.compound_coef])
        ]

    training_set = CocoDataset(root_dir=os.path.join(opt.data_path,
                                                     params.project_name),
                               set=params.train_set,
                               transform=transforms.Compose(transform_list))
    training_generator = DataLoader(training_set, **training_params)

    val_set = CocoDataset(root_dir=os.path.join(opt.data_path,
                                                params.project_name),
                          set=params.val_set,
                          transform=transforms.Compose([
                              Normalizer(mean=params.mean, std=params.std),
                              Resizer(input_sizes[opt.compound_coef])
                          ]))
    val_generator = DataLoader(val_set, **val_params)

    model = EfficientDetBackbone(num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios),
                                 scales=eval(params.anchors_scales))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(
        opt.log_path +
        f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    opt.lr,
                                    momentum=opt.momentum,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    best_step = 0
    best_checkpoint = None
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            epoch_cls_loss = []
            epoch_reg_loss = []

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss = model(imgs,
                                                   annot,
                                                   obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                    .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression Loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication Loss', {'val': cls_loss},
                                   step)

                neptune.log_metric('Val Loss', step, loss)
                neptune.log_metric('Val Regression Loss', step, reg_loss)
                neptune.log_metric('Val Classification Loss', step, cls_loss)

                with torch.no_grad():
                    stats = evaluate(model.model,
                                     params.params,
                                     threshold=opt.val_threshold,
                                     step=step)

                neptune.log_metric('AP at IoU=.50:.05:.95', step, stats[0])
                neptune.log_metric('AP at IoU=.50', step, stats[1])
                neptune.log_metric('AP at IoU=.75', step, stats[2])
                neptune.log_metric('AR given 1 detection per image', step,
                                   stats[6])
                neptune.log_metric('AR given 10 detection per image', step,
                                   stats[7])
                neptune.log_metric('AR given 100 detection per image', step,
                                   stats[8])

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch
                    best_step = step
                    checkpoint_name = f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    checkpoint_path = save_checkpoint(model, opt.saved_path,
                                                      checkpoint_name)
                    best_checkpoint = checkpoint_path

                model.train()

            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = data['img']
                    annot = data['annot']

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    cls_loss, reg_loss = model(imgs,
                                               annot,
                                               obj_list=params.obj_list,
                                               step=step)
                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    epoch_loss.append(float(loss))
                    epoch_cls_loss.append(float(cls_loss))
                    epoch_reg_loss.append(float(reg_loss))

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                        .format(step, epoch, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)

                    neptune.log_metric('Train Loss', step, loss)
                    neptune.log_metric('Train Regression Loss', step, reg_loss)
                    neptune.log_metric('Train Classification Loss', step,
                                       cls_loss)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)
                    neptune.log_metric('Learning Rate', step, current_lr)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(
                            model, opt.saved_path,
                            f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                        )
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue

            scheduler.step(np.mean(epoch_loss))
            neptune.log_metric('Epoch Loss', step, np.mean(epoch_loss))
            neptune.log_metric('Epoch Classification Loss', step,
                               np.mean(epoch_cls_loss))
            neptune.log_metric('Epoch Regression Loss', step,
                               np.mean(epoch_reg_loss))

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                    .format(epoch, best_loss))
                break

    except KeyboardInterrupt:
        save_checkpoint(
            model, opt.saved_path,
            f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        send_best_checkpoint(best_checkpoint, best_step)
        writer.close()
    writer.close()
    send_best_checkpoint(best_checkpoint, best_step)
    neptune.stop()
Example #13
0
get_ipython().system(' pip install neptune-client==0.4.123')

# Initialize Neptune

import neptune

neptune.init(
    api_token="ANONYMOUS",
    project_qualified_name="shared/colab-test-run"
)

# Basic Example

neptune.create_experiment(
    name='basic-colab-example',
    params={'learning_rate':0.1}
)

neptune.log_metric('accuracy', 0.93)

neptune.append_tags(['basic', 'finished_successfully'])

# tests
current_exp = neptune.get_experiment()

if set(current_exp.get_logs().keys()) != set(['accuracy']):
    raise ValueError()

neptune.stop()

# Keras classification example [Advanced]
Example #14
0
import numpy
from platform import python_version
print("python_version() ---> ", python_version())
print("torch.__version__ --> ", torch.__version__)

# make sure to fix the randomness at the very beginning
torch.manual_seed(0)
numpy.random.seed(0)

params = load_json_as_dict("./ML_parameters.json")

neptune.set_project(params["neptune_project"])
exp: neptune.experiments.Experiment = \
    neptune.create_experiment(params=flatten_dict(params),
                              upload_source_files=["./main_merfish.py", "./ML_parameters.json", "./MODULES/vae_parts.py",
                                                   "./MODULES/vae_model.py", "./MODULES/encoders_decoders.py"],
                              upload_stdout=True,
                              upload_stderr=True)

# Get the training and test data
preprocessed = load_obj("./data_train.pt")
img_torch = preprocessed.img.float()
roi_mask_torch = preprocessed.roi_mask.bool()
assert len(img_torch.shape) == len(roi_mask_torch.shape) == 4
# print("GPU GB after opening data ->",torch.cuda.memory_allocated()/1E9)

BATCH_SIZE = params["simulation"]["batch_size"]
SIZE_CROPS = params["input_image"]["size_raw_image"]
N_TEST = params["simulation"]["N_test"]
N_TRAIN = params["simulation"]["N_train"]
conditional_crop_test = ConditionalRandomCrop(desired_w=SIZE_CROPS,
Example #15
0
 def __init__(self, token, experiment, proj):
     self.token = token
     self.experiment = experiment
     self.proj = proj
     neptune.init(api_token=token, project_qualified_name=proj)
     neptune.create_experiment(name=experiment)
batch_size = 512
n_chunks = 8
weighting = 0.999  # classification loss weighting
weighting_decay = 0.95
supress_cl = 6
class_weight = [1., 1., 3., 3., 3., 3., 3., 3.]
rmse = True

neptune.init(api_token=NEPTUNE_TOKEN,
             project_qualified_name=f'artursil/{PROJECT_NAME}')
neptune.create_experiment(EXPERIMENT_NAME,
                          params={
                              'weighting': weighting,
                              'weighting_decay': weighting_decay,
                              'batch_size': batch_size,
                              'lr': lr,
                              'lr_decay': lr_decay,
                              'loss_margin': loss_margin,
                              'class_weight': f'{class_weight}',
                              'emb_layer': '[128,128]',
                              'optimiser': 'sgd'
                          })


def train_model(model, epoch_start, epochs, lr, lr_decay, weighting,
                weighting_decay):
    loss_tuple = []
    for epoch in range(epoch_start, epochs + 1):
        if torch.cuda.is_available():
            model.cuda()
        print(f"Starting epoch {epoch}")
        st = time.time()
Example #17
0
SEND_NEPTUNE = True
OUT_SIZE = 10
CIFAR_FACTOR = 1
PATIENCE = 0
NUM_EPOCHS = 150
WEIGHT_DECAY = 0.00004
MOMENTUM = 0.9
LEARNING_RATE = 0.2
MILESTONES = [30, 70, 110]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

if SEND_NEPTUNE:
    neptune.init('andrzejzdobywca/pretrainingpp')
    neptune.create_experiment(name=TRAIN_NAME)


def setup_half_loaders():
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    if DATASET == "Cifar":
        image_datasets = {
            'train':
            torchvision.datasets.CIFAR10(root='./data_dir_cifar',
                                         train=True,
                                         download=True,
                                         transform=transform),
def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon


# select project
neptune.init(project_qualified_name='amitshakarchy/muscle-formation',
             api_token=api_token)
# create experiment
with neptune.create_experiment(
        name='my_vae - resnet archi',
        description="learning rate scheduler",
        tags=['classification', 'tf_2'],
        upload_source_files=['classification-example.py', 'requirements.txt'],
        params=PARAMS) as npt_exp:
    input_img = Input(shape=(image_size, image_size, 1), )

    # <editor-fold desc="block 1">
    '''block_1'''
    b1_cnv2d_1 = Conv2D(filters=n_filters * 16,
                        kernel_size=(3, 3),
                        strides=(2, 2),
                        padding='same',
                        use_bias=False,
                        name='b1_cnv2d_1',
                        kernel_initializer='normal')(input_img)
    b1_relu_1 = ReLU(name='b1_relu_1')(b1_cnv2d_1)
    b1_bn_1 = BatchNormalization(epsilon=1e-3, momentum=0.999,
Example #19
0
    'num_dense_nodes': 1024,
    'dense_nodes_divisor': 4,
    'batch_size': 64,
    'drop_out': 0.1,
    'learning_rate': 0.001,
    'input_shape': (VECTOR_SIZE, 1, 1)
}

# start experiment

name = 'keras-integration-cnn'

if LOG_NEPTUNE:
    neptune.init(project_qualified_name='4ND4/sandbox')
    neptune_tb.integrate_with_keras()
    result = neptune.create_experiment(name=name, params=PARAMS)

    name = result.id

# start of cnn coding
input_tensor = Input(shape=PARAMS.get('input_shape'))

# 1st cnn block
x = BatchNormalization()(input_tensor)
x = Activation('relu')(x)
x = Conv2D(filters=PARAMS['num_filters'],
           kernel_size=PARAMS['kernel_size'],
           strides=1,
           padding='same')(x)
# x = MaxPooling2D()(x)
x = Dropout(PARAMS['drop_out'])(x)
Example #20
0
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

## Initialize Neptune

import neptune

neptune.init(api_token='ANONYMOUS',
             project_qualified_name='shared/tensorflow-keras-integration')

# Quickstart

## Step 1: Create an Experiment

neptune.create_experiment('tensorflow-keras-quickstart')

## Step 2: Add NeptuneMonitor Callback to model.fit()

from neptunecontrib.monitoring.keras import NeptuneMonitor

model.fit(x_train,
          y_train,
          epochs=5,
          batch_size=64,
          callbacks=[NeptuneMonitor()])

## Step 3: Explore results in the Neptune UI

## Step 4: Stop logging
Example #21
0
def main(cfg):
    """Runs main training procedure."""

    print('Starting training...')
    print('Current working directory is:', os.getcwd())

    # fix random seeds for reproducibility
    seed_everything(seed=cfg['seed'])

    # neptune logging
    neptune.init(project_qualified_name=cfg['neptune_project_name'],
                 api_token=cfg['neptune_api_token'])

    neptune.create_experiment(name=cfg['neptune_experiment'], params=cfg)

    num_classes = 1 if len(cfg['classes']) == 1 else (len(cfg['classes']) + 1)
    activation = 'sigmoid' if num_classes == 1 else 'softmax2d'
    background = False if cfg['ignore_channels'] else True

    aux_params = dict(
        pooling=cfg['pooling'],  # one of 'avg', 'max'
        dropout=cfg['dropout'],  # dropout ratio, default is None
        activation='sigmoid',  # activation function, default is None
        classes=num_classes)  # define number of output labels

    # configure model
    models = {
        'unet':
        Unet(encoder_name=cfg['encoder_name'],
             encoder_weights=cfg['encoder_weights'],
             decoder_use_batchnorm=cfg['use_batchnorm'],
             classes=num_classes,
             activation=activation,
             aux_params=aux_params),
        'unetplusplus':
        UnetPlusPlus(encoder_name=cfg['encoder_name'],
                     encoder_weights=cfg['encoder_weights'],
                     decoder_use_batchnorm=cfg['use_batchnorm'],
                     classes=num_classes,
                     activation=activation,
                     aux_params=aux_params),
        'deeplabv3plus':
        DeepLabV3Plus(encoder_name=cfg['encoder_name'],
                      encoder_weights=cfg['encoder_weights'],
                      classes=num_classes,
                      activation=activation,
                      aux_params=aux_params)
    }

    assert cfg['architecture'] in models.keys()
    model = models[cfg['architecture']]

    # configure loss
    losses = {
        'dice_loss':
        DiceLoss(include_background=background,
                 softmax=False,
                 batch=cfg['combine']),
        'generalized_dice':
        GeneralizedDiceLoss(include_background=background,
                            softmax=False,
                            batch=cfg['combine'])
    }

    assert cfg['loss'] in losses.keys()
    loss = losses[cfg['loss']]

    # configure optimizer
    optimizers = {
        'adam': Adam([dict(params=model.parameters(), lr=cfg['lr'])]),
        'adamw': AdamW([dict(params=model.parameters(), lr=cfg['lr'])]),
        'rmsprop': RMSprop([dict(params=model.parameters(), lr=cfg['lr'])])
    }

    assert cfg['optimizer'] in optimizers.keys()
    optimizer = optimizers[cfg['optimizer']]

    # configure metrics
    metrics = {
        'dice_score':
        DiceMetric(include_background=background, reduction='mean'),
        'dice_smp':
        Fscore(threshold=cfg['rounding'],
               ignore_channels=cfg['ignore_channels']),
        'iou_smp':
        IoU(threshold=cfg['rounding'], ignore_channels=cfg['ignore_channels']),
        'generalized_dice':
        GeneralizedDiceLoss(include_background=background,
                            softmax=False,
                            batch=cfg['combine']),
        'dice_loss':
        DiceLoss(include_background=background,
                 softmax=False,
                 batch=cfg['combine']),
        'cross_entropy':
        BCELoss(reduction='mean'),
        'accuracy':
        Accuracy(ignore_channels=cfg['ignore_channels'])
    }

    assert all(m['name'] in metrics.keys() for m in cfg['metrics'])
    metrics = [(metrics[m['name']], m['name'], m['type'])
               for m in cfg['metrics']]  # tuple of (metric, name, type)

    # configure scheduler
    schedulers = {
        'steplr':
        StepLR(optimizer, step_size=cfg['step_size'], gamma=0.5),
        'cosine':
        CosineAnnealingLR(optimizer,
                          cfg['epochs'],
                          eta_min=cfg['eta_min'],
                          last_epoch=-1)
    }

    assert cfg['scheduler'] in schedulers.keys()
    scheduler = schedulers[cfg['scheduler']]

    # configure augmentations
    train_transform = load_train_transform(transform_type=cfg['transform'],
                                           patch_size=cfg['patch_size'])
    valid_transform = load_valid_transform(patch_size=cfg['patch_size'])

    train_dataset = SegmentationDataset(df_path=cfg['train_data'],
                                        transform=train_transform,
                                        normalize=cfg['normalize'],
                                        tissuemix=cfg['tissuemix'],
                                        probability=cfg['probability'],
                                        blending=cfg['blending'],
                                        warping=cfg['warping'],
                                        color=cfg['color'])

    valid_dataset = SegmentationDataset(df_path=cfg['valid_data'],
                                        transform=valid_transform,
                                        normalize=cfg['normalize'])

    # save intermediate augmentations
    if cfg['eval_dir']:
        default_dataset = SegmentationDataset(df_path=cfg['train_data'],
                                              transform=None,
                                              normalize=None)

        transform_dataset = SegmentationDataset(df_path=cfg['train_data'],
                                                transform=None,
                                                normalize=None,
                                                tissuemix=cfg['tissuemix'],
                                                probability=cfg['probability'],
                                                blending=cfg['blending'],
                                                warping=cfg['warping'],
                                                color=cfg['color'])

        for idx in range(0, min(500, len(default_dataset)), 10):
            image_input, image_mask = default_dataset[idx]
            image_input = image_input.transpose((1, 2, 0))
            image_input = image_input.astype(np.uint8)

            image_mask = image_mask.transpose(
                1, 2, 0)  # Why do we need transpose here?
            image_mask = image_mask.astype(np.uint8)
            image_mask = image_mask.squeeze()
            image_mask = image_mask * 255

            image_transform, _ = transform_dataset[idx]
            image_transform = image_transform.transpose(
                (1, 2, 0)).astype(np.uint8)

            idx_str = str(idx).zfill(3)
            skimage.io.imsave(os.path.join(cfg['eval_dir'],
                                           f'{idx_str}a_image_input.png'),
                              image_input,
                              check_contrast=False)
            plt.imsave(os.path.join(cfg['eval_dir'],
                                    f'{idx_str}b_image_mask.png'),
                       image_mask,
                       vmin=0,
                       vmax=1)
            skimage.io.imsave(os.path.join(cfg['eval_dir'],
                                           f'{idx_str}c_image_transform.png'),
                              image_transform,
                              check_contrast=False)

        del transform_dataset

    train_loader = DataLoader(train_dataset,
                              batch_size=cfg['batch_size'],
                              num_workers=cfg['workers'],
                              shuffle=True)

    valid_loader = DataLoader(valid_dataset,
                              batch_size=cfg['batch_size'],
                              num_workers=cfg['workers'],
                              shuffle=False)

    trainer = Trainer(model=model,
                      device=cfg['device'],
                      save_checkpoints=cfg['save_checkpoints'],
                      checkpoint_dir=cfg['checkpoint_dir'],
                      checkpoint_name=cfg['checkpoint_name'])

    trainer.compile(optimizer=optimizer,
                    loss=loss,
                    metrics=metrics,
                    num_classes=num_classes)

    trainer.fit(train_loader,
                valid_loader,
                epochs=cfg['epochs'],
                scheduler=scheduler,
                verbose=cfg['verbose'],
                loss_weight=cfg['loss_weight'])

    # validation inference
    best_model = model
    best_model.load_state_dict(
        torch.load(os.path.join(cfg['checkpoint_dir'],
                                cfg['checkpoint_name'])))
    best_model.to(cfg['device'])
    best_model.eval()

    # setup directory to save plots
    if os.path.isdir(cfg['plot_dir']):
        # remove existing dir and content
        shutil.rmtree(cfg['plot_dir'])
    # create absolute destination
    os.makedirs(cfg['plot_dir'])

    # valid dataset without transformations and normalization for image visualization
    valid_dataset_vis = SegmentationDataset(df_path=cfg['valid_data'],
                                            transform=valid_transform,
                                            normalize=None)

    if cfg['save_checkpoints']:
        for n in range(len(valid_dataset)):
            image_vis = valid_dataset_vis[n][0].astype('uint8')
            image_vis = image_vis.transpose((1, 2, 0))

            image, gt_mask = valid_dataset[n]
            gt_mask = gt_mask.transpose((1, 2, 0))
            gt_mask = gt_mask.squeeze()

            x_tensor = torch.from_numpy(image).to(cfg['device']).unsqueeze(0)
            pr_mask, _ = best_model.predict(x_tensor)
            pr_mask = pr_mask.cpu().numpy().round()
            pr_mask = pr_mask.squeeze()

            save_predictions(out_path=cfg['plot_dir'],
                             index=n,
                             image=image_vis,
                             ground_truth_mask=gt_mask,
                             predicted_mask=pr_mask,
                             average='macro')
Example #22
0
import neptune

# The init() function called this way assumes that
# NEPTUNE_API_TOKEN environment variable is defined.

neptune.init('macabdul9/sandbox')
neptune.create_experiment(name='minimal_example')

# log some metrics

for i in range(100):
    neptune.log_metric('loss', 0.95**i)

neptune.log_metric('AUC', 0.96)
Example #23
0
def main(rank, option, resume, save_folder):
    # Basic Options
    resume_path = os.path.join(save_folder, 'last_dict.pt')

    num_gpu = len(option.result['train']['gpu'].split(','))

    total_epoch = option.result['train']['total_epoch']
    multi_gpu = len(option.result['train']['gpu'].split(',')) > 1
    if multi_gpu:
        ddp = option.result['train']['ddp']
    else:
        ddp = False

    scheduler = option.result['train']['scheduler']
    batch_size, pin_memory = option.result['train'][
        'batch_size'], option.result['train']['pin_memory']

    # Logger
    if (rank == 0) or (rank == 'cuda'):
        neptune.init(
            'sunghoshin/imp',
            api_token=
            'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI5MTQ3MjY2Yy03YmM4LTRkOGYtOWYxYy0zOTk3MWI0ZDY3M2MifQ=='
        )
        exp_name, exp_num = save_folder.split('/')[-2], save_folder.split(
            '/')[-1]
        neptune.create_experiment(params={
            'exp_name': exp_name,
            'exp_num': exp_num
        },
                                  tags=['inference:False'])

    # Load Model
    model = load_model(option)
    patch_criterion, detection_criterion = load_loss(option, rank)
    save_module = train_module(total_epoch, model, patch_criterion,
                               detection_criterion, multi_gpu)

    if resume:
        save_module.import_module(resume_path)
        model.load_state_dict(save_module.save_dict['model'][0])

    # Multi-Processing GPUs
    if ddp:
        setup(rank, num_gpu)
        torch.manual_seed(0)
        torch.cuda.set_device(rank)

        model.to(rank)
        model = DDP(model, device_ids=[rank])

        model = apply_gradient_allreduce(model)
        patch_criterion.to(rank)
        detection_criterion.to(rank)

    else:
        if multi_gpu:
            model = nn.DataParallel(model).to(rank)
        else:
            model = model.to(rank)

    # Optimizer and Scheduler
    if resume:
        # Load Optimizer
        optimizer = load_optimizer(option, model.parameters())
        optimizer.load_state_dict(save_module.save_dict['optimizer'][0])

        # Load Scheduler
        if scheduler is not None:
            scheduler = load_scheduler(option, optimizer)
            scheduler.load_state_dict(save_module.save_dict['scheduler'][0])

    else:
        optimizer = load_optimizer(option, model.parameters())
        if scheduler is not None:
            scheduler = load_scheduler(option, optimizer)

    # Early Stopping
    early_stop = option.result['train']['early']

    if early_stop:
        early = EarlyStopping(patience=option.result['train']['patience'])
    else:
        early = None

    # Dataset and DataLoader
    tr_robust_dataset, tr_coco_dataset, tr_ex_dataset = load_data(option,
                                                                  type='train')
    val_robust_dataset, val_coco_dataset, val_ex_dataset = load_data(
        option, type='val')

    if ddp:
        # Robust Dataset Loader
        if tr_robust_dataset is not None:
            tr_robust_sampler = torch.utils.data.distributed.DistributedSampler(
                dataset=tr_robust_dataset, num_replicas=num_gpu, rank=rank)
            val_robust_sampler = torch.utils.data.distributed.DistributedSampler(
                dataset=val_robust_dataset, num_replicas=num_gpu, rank=rank)

            tr_robust_loader = torch.utils.data.DataLoader(
                dataset=tr_robust_dataset,
                batch_size=batch_size,
                shuffle=False,
                num_workers=4 * num_gpu,
                pin_memory=pin_memory,
                sampler=tr_robust_sampler)
            val_robust_loader = torch.utils.data.DataLoader(
                dataset=val_robust_dataset,
                batch_size=batch_size,
                shuffle=False,
                num_workers=4 * num_gpu,
                pin_memory=pin_memory,
                sampler=val_robust_sampler)
        else:
            tr_robust_loader = None
            val_robust_loader = None

        # Detection-COCO-Dark-Dataset Loader
        if tr_coco_dataset is not None:
            tr_coco_sampler = torch.utils.data.distributed.DistributedSampler(
                dataset=tr_coco_dataset, num_replicas=num_gpu, rank=rank)

            val_coco_sampler = torch.utils.data.distributed.DistributedSampler(
                dataset=val_coco_dataset, num_replicas=num_gpu, rank=rank)

            tr_coco_loader = torch.utils.data.DataLoader(
                dataset=tr_coco_dataset,
                batch_size=batch_size,
                shuffle=False,
                num_workers=4 * num_gpu,
                pin_memory=pin_memory,
                sampler=tr_coco_sampler,
                collate_fn=detection_collate)
            val_coco_loader = torch.utils.data.DataLoader(
                dataset=val_coco_dataset,
                batch_size=batch_size,
                shuffle=False,
                num_workers=4 * num_gpu,
                pin_memory=pin_memory,
                sampler=val_coco_sampler,
                collate_fn=detection_collate)
        else:
            tr_coco_loader = None
            val_coco_loader = None

        # Detection-EX-Dark-Dataset Loader
        if tr_ex_dataset is not None:
            tr_ex_sampler = torch.utils.data.distributed.DistributedSampler(
                dataset=tr_ex_dataset, num_replicas=num_gpu, rank=rank)

            val_ex_sampler = torch.utils.data.distributed.DistributedSampler(
                dataset=val_ex_dataset, num_replicas=num_gpu, rank=rank)

            tr_ex_loader = torch.utils.data.DataLoader(
                dataset=tr_ex_dataset,
                batch_size=batch_size,
                shuffle=False,
                num_workers=4 * num_gpu,
                pin_memory=pin_memory,
                sampler=tr_ex_sampler,
                collate_fn=detection_collate)
            val_ex_loader = torch.utils.data.DataLoader(
                dataset=val_ex_dataset,
                batch_size=batch_size,
                shuffle=False,
                num_workers=4 * num_gpu,
                pin_memory=pin_memory,
                sampler=val_ex_sampler,
                collate_fn=detection_collate)
        else:
            tr_ex_loader = None
            val_ex_loader = None

    else:
        # Robust Dataset Loader
        if tr_robust_dataset is not None:
            tr_robust_loader = DataLoader(tr_robust_dataset,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          pin_memory=pin_memory,
                                          num_workers=4 * num_gpu)
            val_robust_loader = DataLoader(val_robust_dataset,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           pin_memory=pin_memory,
                                           num_workers=4 * num_gpu)
        else:
            tr_robust_loader = None
            val_robust_loader = None

        # Detection-COCO-Dark-Dataset Loader
        if tr_coco_dataset is not None:
            tr_coco_loader = DataLoader(tr_coco_dataset,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        pin_memory=pin_memory,
                                        num_workers=4 * num_gpu,
                                        collate_fn=detection_collate)
            val_coco_loader = DataLoader(val_coco_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         pin_memory=pin_memory,
                                         num_workers=4 * num_gpu,
                                         collate_fn=detection_collate)
        else:
            tr_coco_loader = None
            val_coco_loader = None

        # Detection-EX-Dark-Dataset Loader
        if tr_ex_dataset is not None:
            tr_ex_loader = DataLoader(tr_ex_dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      pin_memory=pin_memory,
                                      num_workers=4 * num_gpu,
                                      collate_fn=detection_collate)
            val_ex_loader = DataLoader(val_ex_dataset,
                                       batch_size=batch_size,
                                       shuffle=False,
                                       pin_memory=pin_memory,
                                       num_workers=4 * num_gpu,
                                       collate_fn=detection_collate)
        else:
            tr_ex_loader = None
            val_ex_loader = None

    # Mixed Precision
    mixed_precision = option.result['train']['mixed_precision']
    if mixed_precision:
        scaler = torch.cuda.amp.GradScaler()
    else:
        scaler = None

    # Training
    from module.trainer import robust_trainer
    early, save_module, option = robust_trainer.run(option, model, tr_robust_loader, tr_coco_loader, tr_ex_loader, \
                                                    val_robust_loader, val_coco_loader, val_ex_loader, optimizer, \
                                                    patch_criterion, detection_criterion, scaler, scheduler, early, \
                                                    save_folder, save_module, multi_gpu, rank, neptune)

    if ddp:
        cleanup()
Example #24
0
# %%


def record_eval_metric(neptune, metrics):
    for k, v in metrics.items():
        neptune.log_metric(k, v)


# %%
model_path = '/workspace/ml-workspace/thesis_git/thesis/models/'
best_eval_f1 = 0
# Measure the total training time for the whole run.
total_t0 = time.time()
with neptune.create_experiment(name="HierarchicalSemanticGraphNetwork",
                               params=PARAMS,
                               upload_source_files=['HSGN_GAT.py']):
    neptune.append_tag(
        ["homogeneous_graph", "GATConv", "bidirectional_token_node_edge"])
    neptune.set_property('server', 'IRGPU2')
    neptune.set_property('training_set_path', training_path)
    neptune.set_property('dev_set_path', dev_path)
    # For each epoch...
    for epoch_i in range(0, epochs):

        # ========================================
        #               Training
        # ========================================

        # Perform one full pass over the training set.
Example #25
0
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Step 2: Initialize Neptune

import neptune

neptune.init(project_qualified_name='shared/onboarding', # change this to your `workspace_name/project_name`
             api_token='ANONYMOUS', # change this to your api token
            )

# Step 3: Create an experiment

neptune.create_experiment(name='great-idea')

# Step 4: Add logging for metrics and losses

class NeptuneMonitor(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        for metric_name, metric_value in logs.items():
            neptune.log_metric(metric_name, metric_value)

model.fit(x_train, y_train,
          epochs=PARAMS['epoch_nr'],
          batch_size=PARAMS['batch_size'],
          callbacks=[NeptuneMonitor()])

# tests
current_exp = neptune.get_experiment()
          'decay': 0.0,
          'momentum': 0.9,
          'custLossThresh': -99.0,
          'label_level': 'composite',
          'label_feature_threshold': 0.001,
          'refl_scaling_min': -35.0,
          'refl_scaling_per99.99': 45.6660232543945,
          'W_scaling_min': -14.29787,
          'W_scaling_per99.99': 0.288602113723755,
          'model_run_name': 'unet_v6p2',
          'feature_description': '10minAfterHour_refl',
          }

neptune.create_experiment(name=neptune_experiment_name, 
                          params=PARAMS,
                          upload_source_files=neptune_upload_source_files,
                          tags=neptune_tags
)

#--------------------------
#
if PARAMS['optimizer'] == 'Adam':
  optimizer = Adam(lr=PARAMS['learning_rate'],
                   beta_1=PARAMS['beta_1'],
                   beta_2=PARAMS['beta_2'],
                   epsilon=PARAMS['epsilon'],
                   decay=PARAMS['decay']
  )
elif PARAMS['optimizer'] == 'SGD':
  optimizer = SGD(lr=PARAMS['learning_rate'],
                  decay=PARAMS['decay'],
Example #27
0
        if parts[1] in set(
            ["glove", "back", "front"]
        ):  # Order is not always consistent
            return parts[2]
        return parts[1]

    nb_train = int(len(data) * train_ratio)
    o_samples = list(map(get_roshambo_user, data.samples))
    _, indices, counts = np.unique(o_samples, return_inverse=True, return_counts=True)
    training_users = np.nonzero(np.cumsum(counts) <= nb_train)[0]
    training_mask = np.isin(indices, training_users)
    train_data = torch_data.Subset(data, indices=np.arange(len(data))[training_mask])
    val_data = torch_data.Subset(data, indices=np.arange(len(data))[~training_mask])

    return train_data, val_data


if __name__ == "__main__":
    logging.basicConfig(
        format="%(asctime)s %(levelname)-8s %(message)s",
        level=logging.INFO,
        datefmt="%Y-%m-%d %H:%M:%S",
        filename="roshambo.log",
    )
    neptune.init("abc/abc", backend=neptune.OfflineBackend())
    # neptune.init("tihbe/pcritical")
    with neptune.create_experiment(
        "roshambo", upload_stdout=False, upload_stderr=False
    ):
        run_roshambo()
Example #28
0
 nml_exp = neptune.create_experiment(
     name='EXP-' + str(args.experiment_id),
     logger=rsna_logger.logger,
     upload_stdout=False,
     tags=['dev', 'any'],
     params={
         'data_dataset':
         args.data_dataset,
         'data_fold':
         args.data_fold,
         'data_train_transform':
         args.data_train_transform
         if args.data_train_transform is not None else '-',
         'data_valid_transform':
         args.data_valid_transform
         if args.data_valid_transform is not None else '-',
         'data_sampler':
         args.data_sampler,
         'net_model':
         args.net_model,
         'net_loss':
         'bce',
         'net_pretrained':
         args.net_pretrained,
         'net_weight_file':
         args.net_weight_file
         if args.net_weight_file is not None else '-',
         'net_num_classes':
         args.net_num_classes,
         'optim':
         args.optim,
         'optim_lr':
         args.optim_lr,
         'optim_momentum':
         args.optim_momentum,
         'optim_nesterov':
         args.optim_nesterov,
         'optim_weight_decay':
         args.optim_weight_decay,
         'optim_lookahead_enabled':
         args.optim_lookahead_enabled,
         'optim_scheduler':
         args.optim_scheduler,
         'optim_scheduler_warmup':
         args.optim_scheduler_warmup,
         'optim_scheduler_max_lr':
         args.optim_scheduler_max_lr,
         'optim_scheduler_min_lr':
         args.optim_scheduler_min_lr,
         'tr_iteration_num':
         args.tr_iteration_num,
         'tr_batch_size':
         args.tr_batch_size,
         'tr_accumulation_step':
         args.tr_accumulation_step
     },
     properties={
         'command': cmd,
     },
     upload_source_files=[
         'config.py',
         'rsna_dataset.py',
         'rsna_model.py',
         'rsna_network.py',
         'train_any.py'
         'train_any.sh'
         'common/**/*.py',
         'transforms/*.json',
     ])
Example #29
0
params = {
    'max_depth': 5,
    'eta': 0.5,
    'gamma': 0.1,
    'subsample': 1,
    'lambda': 1,
    'alpha': 0.35,
    'objective': 'reg:squarederror',
    'eval_metric': ['mae', 'rmse']
}
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 20

# Train model using `xgb.train()`

neptune.create_experiment(name='xgb', tags=['train'], params=params)
xgb.train(params, dtrain, num_round, watchlist, callbacks=[neptune_callback()])

neptune.stop()

# Train model using `xgb.cv()`

neptune.create_experiment(name='xgb', tags=['cv'], params=params)
xgb.cv(params,
       dtrain,
       num_boost_round=num_round,
       nfold=7,
       callbacks=[neptune_callback()])

neptune.stop()
Example #30
0
# See converted experiments
# Click on the link(s) above to browse the TensorBoard run in Neptune or go to [shared/tensorflow-integration project](https://ui.neptune.ai/o/shared/org/tensorboard-integration/experiments?viewId=def2c858-3510-4bf9-9e52-8720fadecb11).

# Log runs live to Neptune via TensorBoard

# Step 1: Initialize Neptune

import neptune

neptune.init(api_token='ANONYMOUS',
             project_qualified_name='shared/tensorboard-integration')

# Step 2: Create an experiment

neptune.create_experiment('tensorboard-logging')

# Step 3: Run ``neptune_tensorboard.integrate_with_tensorflow()``

import neptune_tensorboard

neptune_tensorboard.integrate_with_tensorflow()

# Step 4: Add your training code

import tensorflow as tf
import datetime

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()