def start_train( config, config_path, yolo_model: yolo.YOLO_Model, train_generator, valid_generator, dry_mode: bool ): print('Full training') ############################### # Optimizers ############################### optimizers = { 'sgd': opt.SGD(lr=config['train']['learning_rate']), 'adam': opt.Adam(lr=config['train']['learning_rate']), 'adamax': opt.Adamax(lr=config['train']['learning_rate']), 'nadam': opt.Nadam(lr=config['train']['learning_rate']), 'rmsprop': opt.RMSprop(lr=config['train']['learning_rate']), # 'Radam': RAdam(lr=config['train']['learning_rate'], warmup_proportion=0.1, min_lr=1e-5) } optimizer = optimizers[config['train']['optimizer'].lower()] if config['train']['clipnorm'] > 0: optimizer.clipnorm = config['train']['clipnorm'] if config['train'].get('lr_decay', 0) > 0: optimizer.decay = config['train']['lr_decay'] if config['train']['optimizer'] == 'Nadam': # Just to set field optimizer.decay = 0.0 ############################### # Callbacks ############################### checkpoint_name = utils.get_checkpoint_name(config) utils.makedirs_4_file(checkpoint_name) checkpoint_vloss = cbs.CustomModelCheckpoint( model_to_save=yolo_model.infer_model, filepath=checkpoint_name, monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=1 ) # tensorboard_logdir = utils.get_tensorboard_name(config) # utils.makedirs(tensorboard_logdir) # print('Tensorboard dir: {}'.format(tensorboard_logdir)) # tensorboard_cb = TensorBoard( # log_dir=tensorboard_logdir, # histogram_freq=0, # write_graph=False # ) mAP_checkpoint_name = utils.get_mAP_checkpoint_name(config) mAP_checkpoint_static_name = utils.get_mAP_checkpoint_static_name(config) utils.makedirs_4_file(mAP_checkpoint_name) map_evaluator_cb = cbs.MAP_evaluation( model=yolo_model, generator=valid_generator, save_best=True, save_name=mAP_checkpoint_name, save_static_name=mAP_checkpoint_static_name, # tensorboard=tensorboard_cb, neptune=neptune if not dry_mode else None ) reduce_on_plateau = ReduceLROnPlateau( monitor='val_loss', factor=0.4, patience=20, verbose=1, mode='min', min_delta=0, cooldown=10, min_lr=1e-8 ) early_stop = EarlyStopping( monitor='val_loss', min_delta=0, patience=80, mode='min', verbose=1 ) neptune_mon = cbs.NeptuneMonitor( monitoring=['loss', 'val_loss'], neptune=neptune ) # logger_cb = cbs.CustomLogger( # config=config, # tensorboard=tensorboard_cb # ) # fps_logger = cbs.FPSLogger( # infer_model=yolo_model.infer_model, # generator=valid_generator, # infer_sz=config['model']['infer_shape'], # tensorboard=tensorboard_cb # ) callbacks = [ # tensorboard_cb, map_evaluator_cb, # early_stop, reduce_on_plateau, ] ############################### # Prepare fit ############################### if not dry_mode: callbacks.append(neptune_mon) with open('config.json', 'w') as f: json.dump(config, f, indent=4) sources_to_upload = [ 'yolo.py', '_common/backend.py', 'config.json' ] params = { 'base_params': str(config['model']['base_params']), 'infer_size': "H{}xW{}".format(*config['model']['infer_shape']), 'anchors_per_output': config['model']['anchors_per_output'], 'anchors': str(config['model']['anchors']) } tags = [ config['model']['base'] ] logger.info('Tags: {}'.format(tags)) neptune.create_experiment( name=utils.get_neptune_name(config), upload_stdout=False, upload_source_files=sources_to_upload, params=params, tags=tags ) else: config['train']['nb_epochs'] = 10 yolo_model.train_model.compile(loss=yolo.dummy_loss, optimizer=optimizer) yolo_model.train_model.fit_generator( generator=train_generator, steps_per_epoch=len(train_generator) * config['train']['train_times'], validation_data=valid_generator, validation_steps=len(valid_generator) * config['valid']['valid_times'], epochs=config['train']['nb_epochs'], verbose=1, callbacks=callbacks, workers=mp.cpu_count(), max_queue_size=100, use_multiprocessing=False ) if not dry_mode: neptune.send_artifact(mAP_checkpoint_static_name) neptune.send_artifact('config.json')
'overall_patience': overall_patience, 'loss_delta': loss_delta, } print(f'parameters: {PARAMS}') # Create experiment with defined parameters neptune.init(project_qualified_name='blonde/wheat', api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiMTExN2QzMzQtMDJlYi00ODkzLTk5YTktYWNhNzg4MWFjZGQ3In0=', ) #neptune.init(project_qualified_name='shared/onboarding', # api_token='ANONYMOUS', # ) neptune.create_experiment (name=experiment_name, params=PARAMS, tags=[experiment_name, experiment_tag], upload_source_files=['train_tanya.py']) def main() -> None: device = f"cuda:{gpu_number}" if torch.cuda.is_available() else torch.device('cpu') print(device) train_boxes_df = pd.read_csv(META_TRAIN) train_boxes_df = preprocess_boxes(train_boxes_df) train_images_df = pd.read_csv('folds/orig_alex_folds.csv') print(f'\nTotal images: {len(train_images_df.image_id.unique())}') # Leave only images with bboxes image_id_column = 'image_id' print('Leave only train images with boxes')
cooldown=0) checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_mse', verbose=0, save_best_only=True, save_weights_only=False, mode='min') earlyStop = keras.callbacks.EarlyStopping(monitor='val_mse', mode='min', patience=10, restore_best_weights=True, verbose=1) with neptune.create_experiment(name=modelName, params=conf) as npexp: neptune_monitor = NeptuneMonitor() callbacks_list = [checkpoint, neptune_monitor, RLR, earlyStop] model.summary() history = model.fit(train_generator, validation_data=val_generator, verbose=1, epochs=numEpochs, steps_per_epoch=train_generator.n / train_generator.batch_size, callbacks=callbacks_list) import glob
num_workers=num_workers) model_prefix = f"{cfg.model_file}_fold{fold}.{datetime.now().strftime('%b%d_%H-%M-%S')}" leaf_model = LeafModel(cfg, model_prefix=model_prefix, output_dir=output_dir) neptune.init(project_qualified_name='vmorelli/cassava') neptune_tags = [] neptune_tags.extend((["gcp"] if on_gcp else []) + (["dbg"] if debug else [])) neptune.create_experiment(name=model_prefix, params=get_params_dict(cfg), upload_source_files=[ '*.py', 'leaf/*.py', 'environment.yml', "*.ipynb" ], description=cfg.description, tags=neptune_tags) trainer = Trainer(leaf_model, train_dataloader, val_dataloader, log_steps, neptune=neptune, fp16=use_fp16, grad_norm=grad_norm) # Warmup leaf_model.optimizer = Adam(leaf_model.model.parameters(), lr=start_lr,
""" This is the driver script for an initial experiment. """ if __name__ == "__main__": import initialize initialize.initialize() import neptune from src import utils from src import constants neptune.init(constants.neptune_project_qualified_name) # Docs for create_experiment: https://neptune-client.readthedocs.io/en/latest/technical_reference/project.html#neptune.projects.Project.create_experiment with neptune.create_experiment( name="Insert experiment name here", description="Insert description here", upload_source_files=utils.get_source_files()) as npt_exp: pass
model.fit(X_train, y_train, batch_size=64, epochs=1, callbacks=[es, mc]) y_pred = model.predict(X_val) #error = sklearn.metrics.mean_squared_error(Y_test, y_pred) if not np.isnan(y_pred.any()): error = sklearn.metrics.mean_absolute_error(y_pred, y_val) # output: evaluation score return error else: print('nan values') callback = None n_trials = 100 if log_report: neptune.init(project_qualified_name='4ND4/sandbox') neptune.create_experiment(name='optuna sweep') monitor = opt_utils.NeptuneMonitor() callback = [monitor] n_trials = 1 study = optuna.create_study(direction='minimize') study.optimize(objective, n_trials=n_trials, callbacks=callback) print('Minimum mean absolute error: ' + str(study.best_value)) print('Best parameter: ' + str(study.best_params))
test_it = test_datagen.flow_from_directory( test_path, class_mode='categorical', batch_size=datagen_batch_size, target_size=(image_size, image_size)) return train_it, val_it, test_it objective = Objective( results_directory, maximum_epochs, early_stop_epochs, learning_rate_epochs ) if log_results: neptune.init(project_qualified_name='4ND4/sandbox') result = neptune.create_experiment(name='optuna Resnet50 DeepUAge2.0') monitor = opt_utils.NeptuneMonitor() callback = [monitor] optuna.logging.set_verbosity(optuna.logging.WARNING) else: callback = None study = optuna.create_study(direction=optimizer_direction, # sampler=TPESampler(n_startup_trials=number_of_random_points) read paper ) study.optimize( objective, callbacks=callback, n_trials=100 )
"""#Mount""" # from google.colab import drive # drive.mount('/content/gdrive') # folder_loc = '/content/gdrive/Shareddrives/gggg/' # #!pip install neptune-client neptune.init( project_qualified_name='kbh/gggg', api_token= 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiNTE4Yjg0MzEtMjYyYS00NzVlLTg4MjAtZGNiZGJhYThkY2Q4In0=' ) neptune.create_experiment('Baseline') """#Data Loader""" class DTloader(Dataset): def __init__(self, root: str, train: bool = True, transform=None, target_transform=None): self.train = train self.transform = transform self.root = root self.file_list = glob.glob(root + '/' + '*') self.data_len = len(self.file_list)
epochs = 800 batch_size = 256 n_chunks = 8 weighting = 0.999 # classification loss weighting weighting_decay = 0.95 supress_cl = 6 freeze_e = 5 neptune.init(api_token=NEPTUNE_TOKEN, project_qualified_name=f'artursil/{PROJECT_NAME}') neptune.create_experiment(EXPERIMENT_NAME, params={'weighting': weighting, 'weighting_decay': weighting_decay, 'batch_size':batch_size, 'lr':lr, 'lr_decay':lr_decay, 'network_layers': '[128,64,32]', 'optimiser': 'rmsprop' }) from bokeh.io.export import get_screenshot_as_png from bokeh.palettes import magma from bokeh.transform import jitter from bokeh.layouts import widgetbox from bokeh.models.widgets import Panel, Tabs, Slider from bokeh.plotting import figure, save, output_file from sklearn.metrics import confusion_matrix from scipy.special import softmax from PIL import Image def box_plot(x,y,cl_ar,bok_file,epoch):
def train(name, run, folds_csv): wandb.init(project='dfdc', config=config_defaults, name=f'{name},val_fold:{VAL_FOLD},run{run}') config = wandb.config os.makedirs(OUTPUT_DIR, exist_ok=True) model = timm.create_model('xception', pretrained=True, num_classes=1) model.to(device) # model = DataParallel(model).to(device) wandb.watch(model) if config.optimizer == 'radam' : optimizer = torch_optimizer.RAdam(model.parameters(), lr=config.learning_rate, weight_decay = config.weight_decay) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=config.schedule_patience, threshold=0.001, mode="min", factor = config.schedule_factor ) criterion = nn.BCEWithLogitsLoss() es = EarlyStopping(patience = 10, mode='min') data_train = CelebDF_Dataset(data_root=DATA_ROOT, mode='train', folds_csv=folds_csv, val_fold=VAL_FOLD, test_fold=TEST_FOLD, cutout_fill=config.cutout_fill, hardcore=False, random_erase=True, oversample_real=True, transforms=create_train_transforms(size=224)) data_train.reset(config.rand_seed) train_data_loader = DataLoader( data_train, batch_size=config.train_batch_size, num_workers=8, shuffle=True, drop_last=True) data_val = CelebDF_Dataset(data_root=DATA_ROOT, mode='val', folds_csv=folds_csv, val_fold=VAL_FOLD, test_fold=TEST_FOLD, hardcore=False, oversample_real=False, transforms=create_val_transforms(size=224)) data_val.reset(config.rand_seed) val_data_loader = DataLoader(data_val, batch_size=config.valid_batch_size, num_workers=8, shuffle=False, drop_last=True) data_test = CelebDF_Dataset(data_root=DATA_ROOT, mode='test', folds_csv=folds_csv, val_fold=VAL_FOLD, test_fold=TEST_FOLD, hardcore=False, oversample_real=False, transforms=create_val_transforms(size=224)) data_test.reset(config.rand_seed) test_data_loader = DataLoader(data_test, batch_size=config.valid_batch_size, num_workers=8, shuffle=False, drop_last=True) train_history = [] val_history = [] test_history = [] for epoch in range(config.epochs): print(f"Epoch = {epoch}/{config.epochs-1}") print("------------------") train_metrics = train_epoch(model, train_data_loader, optimizer, criterion, epoch) valid_metrics = valid_epoch(model, val_data_loader, criterion, epoch) scheduler.step(valid_metrics['valid_loss']) print(f"TRAIN_AUC = {train_metrics['train_auc']}, TRAIN_LOSS = {train_metrics['train_loss']}") print(f"VALID_AUC = {valid_metrics['valid_auc']}, VALID_LOSS = {valid_metrics['valid_loss']}") train_history.append(train_metrics) val_history.append(valid_metrics) es(valid_metrics['valid_loss'], model, model_path=os.path.join(OUTPUT_DIR,f"{name}_fold_{VAL_FOLD}_run_{run}.h5")) if es.early_stop: print("Early stopping") break model.load_state_dict(torch.load(f'weights/{name}_fold_{VAL_FOLD}_run_{run}.h5')) neptune.init('sowmen/dfdc') neptune.create_experiment(name=f'{name},val_fold:{VAL_FOLD},run{run}') test_history = test(model, test_data_loader, criterion) try: pkl.dump( train_history, open( f"train_history{name}{run}.pkl", "wb" ) ) pkl.dump( val_history, open( f"val_history{name}{run}.pkl", "wb" ) ) pkl.dump( test_history, open( f"test_history{name}{run}.pkl", "wb" ) ) except: print("Error pickling") wandb.save(f'weights/{name}_fold_{VAL_FOLD}_run_{run}.h5')
test_f1 = f1_score(y_test, y_test_pred.argmax(axis=1), average='macro') print(f'Train f1:{train_f1} | Test f1:{test_f1}') # Step 2: Initialize Neptune import neptune neptune.init( project_qualified_name= 'shared/onboarding', # change this to your `workspace_name/project_name` api_token='ANONYMOUS', # change this to your api token ) # Step 3: Create an experiment and save parameters neptune.create_experiment(name='great-idea', params=params) # Step 4. Add tags to organize things neptune.append_tag(['experiment-organization', 'me']) # Step 5. Add logging of train and evaluation metrics neptune.log_metric('train_f1', train_f1) neptune.log_metric('test_f1', test_f1) # Step 6. Run a few experiments with different parameters # tests current_exp = neptune.get_experiment()
def train(opt): params = Params(f'projects/{opt.project}.yml') # Neptune staff all_params = opt.__dict__ all_params.update(params.params) data_path = os.path.join(opt.data_path, params.project_name) tags = [ 'EfficientDet', f'D{opt.compound_coef}', f'bs{opt.batch_size}', opt.optim ] if opt.head_only: tags.append('head_only') if len(params.obj_list) == 1: tags.append('one_class') if opt.no_aug: tags.append('no_aug') neptune.create_experiment(name='EfficientDet', tags=tags, params=all_params, upload_source_files=['train.py', 'coco_eval.py']) log_data_version(data_path) if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) opt.saved_path = os.path.join(opt.saved_path, params.project_name) opt.log_path = os.path.join(opt.log_path, params.project_name, 'tensorboard/') os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) training_params = { 'batch_size': opt.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } val_params = { 'batch_size': opt.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] if opt.no_aug: transform_list = [ Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[opt.compound_coef]) ] else: transform_list = [ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef]) ] training_set = CocoDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.train_set, transform=transforms.Compose(transform_list)) training_generator = DataLoader(training_set, **training_params) val_set = CocoDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.val_set, transform=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[opt.compound_coef]) ])) val_generator = DataLoader(val_set, **val_params) model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) # freeze backbone if train head_only if opt.head_only: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), opt.lr) else: optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=opt.momentum, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) epoch = 0 best_loss = 1e5 best_epoch = 0 best_step = 0 best_checkpoint = None step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] epoch_cls_loss = [] epoch_reg_loss = [] if epoch % opt.val_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(val_generator): with torch.no_grad(): imgs = data['img'] annot = data['annot'] if params.num_gpus == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression Loss', {'val': reg_loss}, step) writer.add_scalars('Classfication Loss', {'val': cls_loss}, step) neptune.log_metric('Val Loss', step, loss) neptune.log_metric('Val Regression Loss', step, reg_loss) neptune.log_metric('Val Classification Loss', step, cls_loss) with torch.no_grad(): stats = evaluate(model.model, params.params, threshold=opt.val_threshold, step=step) neptune.log_metric('AP at IoU=.50:.05:.95', step, stats[0]) neptune.log_metric('AP at IoU=.50', step, stats[1]) neptune.log_metric('AP at IoU=.75', step, stats[2]) neptune.log_metric('AR given 1 detection per image', step, stats[6]) neptune.log_metric('AR given 10 detection per image', step, stats[7]) neptune.log_metric('AR given 100 detection per image', step, stats[8]) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch best_step = step checkpoint_name = f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' checkpoint_path = save_checkpoint(model, opt.saved_path, checkpoint_name) best_checkpoint = checkpoint_path model.train() progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): if iter < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = data['img'] annot = data['annot'] if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list, step=step) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) epoch_cls_loss.append(float(cls_loss)) epoch_reg_loss.append(float(reg_loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) neptune.log_metric('Train Loss', step, loss) neptune.log_metric('Train Regression Loss', step, reg_loss) neptune.log_metric('Train Classification Loss', step, cls_loss) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) neptune.log_metric('Learning Rate', step, current_lr) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, opt.saved_path, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) print('checkpoint...') except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) neptune.log_metric('Epoch Loss', step, np.mean(epoch_loss)) neptune.log_metric('Epoch Classification Loss', step, np.mean(epoch_cls_loss)) neptune.log_metric('Epoch Regression Loss', step, np.mean(epoch_reg_loss)) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break except KeyboardInterrupt: save_checkpoint( model, opt.saved_path, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') send_best_checkpoint(best_checkpoint, best_step) writer.close() writer.close() send_best_checkpoint(best_checkpoint, best_step) neptune.stop()
get_ipython().system(' pip install neptune-client==0.4.123') # Initialize Neptune import neptune neptune.init( api_token="ANONYMOUS", project_qualified_name="shared/colab-test-run" ) # Basic Example neptune.create_experiment( name='basic-colab-example', params={'learning_rate':0.1} ) neptune.log_metric('accuracy', 0.93) neptune.append_tags(['basic', 'finished_successfully']) # tests current_exp = neptune.get_experiment() if set(current_exp.get_logs().keys()) != set(['accuracy']): raise ValueError() neptune.stop() # Keras classification example [Advanced]
import numpy from platform import python_version print("python_version() ---> ", python_version()) print("torch.__version__ --> ", torch.__version__) # make sure to fix the randomness at the very beginning torch.manual_seed(0) numpy.random.seed(0) params = load_json_as_dict("./ML_parameters.json") neptune.set_project(params["neptune_project"]) exp: neptune.experiments.Experiment = \ neptune.create_experiment(params=flatten_dict(params), upload_source_files=["./main_merfish.py", "./ML_parameters.json", "./MODULES/vae_parts.py", "./MODULES/vae_model.py", "./MODULES/encoders_decoders.py"], upload_stdout=True, upload_stderr=True) # Get the training and test data preprocessed = load_obj("./data_train.pt") img_torch = preprocessed.img.float() roi_mask_torch = preprocessed.roi_mask.bool() assert len(img_torch.shape) == len(roi_mask_torch.shape) == 4 # print("GPU GB after opening data ->",torch.cuda.memory_allocated()/1E9) BATCH_SIZE = params["simulation"]["batch_size"] SIZE_CROPS = params["input_image"]["size_raw_image"] N_TEST = params["simulation"]["N_test"] N_TRAIN = params["simulation"]["N_train"] conditional_crop_test = ConditionalRandomCrop(desired_w=SIZE_CROPS,
def __init__(self, token, experiment, proj): self.token = token self.experiment = experiment self.proj = proj neptune.init(api_token=token, project_qualified_name=proj) neptune.create_experiment(name=experiment)
batch_size = 512 n_chunks = 8 weighting = 0.999 # classification loss weighting weighting_decay = 0.95 supress_cl = 6 class_weight = [1., 1., 3., 3., 3., 3., 3., 3.] rmse = True neptune.init(api_token=NEPTUNE_TOKEN, project_qualified_name=f'artursil/{PROJECT_NAME}') neptune.create_experiment(EXPERIMENT_NAME, params={ 'weighting': weighting, 'weighting_decay': weighting_decay, 'batch_size': batch_size, 'lr': lr, 'lr_decay': lr_decay, 'loss_margin': loss_margin, 'class_weight': f'{class_weight}', 'emb_layer': '[128,128]', 'optimiser': 'sgd' }) def train_model(model, epoch_start, epochs, lr, lr_decay, weighting, weighting_decay): loss_tuple = [] for epoch in range(epoch_start, epochs + 1): if torch.cuda.is_available(): model.cuda() print(f"Starting epoch {epoch}") st = time.time()
SEND_NEPTUNE = True OUT_SIZE = 10 CIFAR_FACTOR = 1 PATIENCE = 0 NUM_EPOCHS = 150 WEIGHT_DECAY = 0.00004 MOMENTUM = 0.9 LEARNING_RATE = 0.2 MILESTONES = [30, 70, 110] device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) if SEND_NEPTUNE: neptune.init('andrzejzdobywca/pretrainingpp') neptune.create_experiment(name=TRAIN_NAME) def setup_half_loaders(): transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if DATASET == "Cifar": image_datasets = { 'train': torchvision.datasets.CIFAR10(root='./data_dir_cifar', train=True, download=True, transform=transform),
def sampling(args): z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon # select project neptune.init(project_qualified_name='amitshakarchy/muscle-formation', api_token=api_token) # create experiment with neptune.create_experiment( name='my_vae - resnet archi', description="learning rate scheduler", tags=['classification', 'tf_2'], upload_source_files=['classification-example.py', 'requirements.txt'], params=PARAMS) as npt_exp: input_img = Input(shape=(image_size, image_size, 1), ) # <editor-fold desc="block 1"> '''block_1''' b1_cnv2d_1 = Conv2D(filters=n_filters * 16, kernel_size=(3, 3), strides=(2, 2), padding='same', use_bias=False, name='b1_cnv2d_1', kernel_initializer='normal')(input_img) b1_relu_1 = ReLU(name='b1_relu_1')(b1_cnv2d_1) b1_bn_1 = BatchNormalization(epsilon=1e-3, momentum=0.999,
'num_dense_nodes': 1024, 'dense_nodes_divisor': 4, 'batch_size': 64, 'drop_out': 0.1, 'learning_rate': 0.001, 'input_shape': (VECTOR_SIZE, 1, 1) } # start experiment name = 'keras-integration-cnn' if LOG_NEPTUNE: neptune.init(project_qualified_name='4ND4/sandbox') neptune_tb.integrate_with_keras() result = neptune.create_experiment(name=name, params=PARAMS) name = result.id # start of cnn coding input_tensor = Input(shape=PARAMS.get('input_shape')) # 1st cnn block x = BatchNormalization()(input_tensor) x = Activation('relu')(x) x = Conv2D(filters=PARAMS['num_filters'], kernel_size=PARAMS['kernel_size'], strides=1, padding='same')(x) # x = MaxPooling2D()(x) x = Dropout(PARAMS['drop_out'])(x)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) ## Initialize Neptune import neptune neptune.init(api_token='ANONYMOUS', project_qualified_name='shared/tensorflow-keras-integration') # Quickstart ## Step 1: Create an Experiment neptune.create_experiment('tensorflow-keras-quickstart') ## Step 2: Add NeptuneMonitor Callback to model.fit() from neptunecontrib.monitoring.keras import NeptuneMonitor model.fit(x_train, y_train, epochs=5, batch_size=64, callbacks=[NeptuneMonitor()]) ## Step 3: Explore results in the Neptune UI ## Step 4: Stop logging
def main(cfg): """Runs main training procedure.""" print('Starting training...') print('Current working directory is:', os.getcwd()) # fix random seeds for reproducibility seed_everything(seed=cfg['seed']) # neptune logging neptune.init(project_qualified_name=cfg['neptune_project_name'], api_token=cfg['neptune_api_token']) neptune.create_experiment(name=cfg['neptune_experiment'], params=cfg) num_classes = 1 if len(cfg['classes']) == 1 else (len(cfg['classes']) + 1) activation = 'sigmoid' if num_classes == 1 else 'softmax2d' background = False if cfg['ignore_channels'] else True aux_params = dict( pooling=cfg['pooling'], # one of 'avg', 'max' dropout=cfg['dropout'], # dropout ratio, default is None activation='sigmoid', # activation function, default is None classes=num_classes) # define number of output labels # configure model models = { 'unet': Unet(encoder_name=cfg['encoder_name'], encoder_weights=cfg['encoder_weights'], decoder_use_batchnorm=cfg['use_batchnorm'], classes=num_classes, activation=activation, aux_params=aux_params), 'unetplusplus': UnetPlusPlus(encoder_name=cfg['encoder_name'], encoder_weights=cfg['encoder_weights'], decoder_use_batchnorm=cfg['use_batchnorm'], classes=num_classes, activation=activation, aux_params=aux_params), 'deeplabv3plus': DeepLabV3Plus(encoder_name=cfg['encoder_name'], encoder_weights=cfg['encoder_weights'], classes=num_classes, activation=activation, aux_params=aux_params) } assert cfg['architecture'] in models.keys() model = models[cfg['architecture']] # configure loss losses = { 'dice_loss': DiceLoss(include_background=background, softmax=False, batch=cfg['combine']), 'generalized_dice': GeneralizedDiceLoss(include_background=background, softmax=False, batch=cfg['combine']) } assert cfg['loss'] in losses.keys() loss = losses[cfg['loss']] # configure optimizer optimizers = { 'adam': Adam([dict(params=model.parameters(), lr=cfg['lr'])]), 'adamw': AdamW([dict(params=model.parameters(), lr=cfg['lr'])]), 'rmsprop': RMSprop([dict(params=model.parameters(), lr=cfg['lr'])]) } assert cfg['optimizer'] in optimizers.keys() optimizer = optimizers[cfg['optimizer']] # configure metrics metrics = { 'dice_score': DiceMetric(include_background=background, reduction='mean'), 'dice_smp': Fscore(threshold=cfg['rounding'], ignore_channels=cfg['ignore_channels']), 'iou_smp': IoU(threshold=cfg['rounding'], ignore_channels=cfg['ignore_channels']), 'generalized_dice': GeneralizedDiceLoss(include_background=background, softmax=False, batch=cfg['combine']), 'dice_loss': DiceLoss(include_background=background, softmax=False, batch=cfg['combine']), 'cross_entropy': BCELoss(reduction='mean'), 'accuracy': Accuracy(ignore_channels=cfg['ignore_channels']) } assert all(m['name'] in metrics.keys() for m in cfg['metrics']) metrics = [(metrics[m['name']], m['name'], m['type']) for m in cfg['metrics']] # tuple of (metric, name, type) # configure scheduler schedulers = { 'steplr': StepLR(optimizer, step_size=cfg['step_size'], gamma=0.5), 'cosine': CosineAnnealingLR(optimizer, cfg['epochs'], eta_min=cfg['eta_min'], last_epoch=-1) } assert cfg['scheduler'] in schedulers.keys() scheduler = schedulers[cfg['scheduler']] # configure augmentations train_transform = load_train_transform(transform_type=cfg['transform'], patch_size=cfg['patch_size']) valid_transform = load_valid_transform(patch_size=cfg['patch_size']) train_dataset = SegmentationDataset(df_path=cfg['train_data'], transform=train_transform, normalize=cfg['normalize'], tissuemix=cfg['tissuemix'], probability=cfg['probability'], blending=cfg['blending'], warping=cfg['warping'], color=cfg['color']) valid_dataset = SegmentationDataset(df_path=cfg['valid_data'], transform=valid_transform, normalize=cfg['normalize']) # save intermediate augmentations if cfg['eval_dir']: default_dataset = SegmentationDataset(df_path=cfg['train_data'], transform=None, normalize=None) transform_dataset = SegmentationDataset(df_path=cfg['train_data'], transform=None, normalize=None, tissuemix=cfg['tissuemix'], probability=cfg['probability'], blending=cfg['blending'], warping=cfg['warping'], color=cfg['color']) for idx in range(0, min(500, len(default_dataset)), 10): image_input, image_mask = default_dataset[idx] image_input = image_input.transpose((1, 2, 0)) image_input = image_input.astype(np.uint8) image_mask = image_mask.transpose( 1, 2, 0) # Why do we need transpose here? image_mask = image_mask.astype(np.uint8) image_mask = image_mask.squeeze() image_mask = image_mask * 255 image_transform, _ = transform_dataset[idx] image_transform = image_transform.transpose( (1, 2, 0)).astype(np.uint8) idx_str = str(idx).zfill(3) skimage.io.imsave(os.path.join(cfg['eval_dir'], f'{idx_str}a_image_input.png'), image_input, check_contrast=False) plt.imsave(os.path.join(cfg['eval_dir'], f'{idx_str}b_image_mask.png'), image_mask, vmin=0, vmax=1) skimage.io.imsave(os.path.join(cfg['eval_dir'], f'{idx_str}c_image_transform.png'), image_transform, check_contrast=False) del transform_dataset train_loader = DataLoader(train_dataset, batch_size=cfg['batch_size'], num_workers=cfg['workers'], shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=cfg['batch_size'], num_workers=cfg['workers'], shuffle=False) trainer = Trainer(model=model, device=cfg['device'], save_checkpoints=cfg['save_checkpoints'], checkpoint_dir=cfg['checkpoint_dir'], checkpoint_name=cfg['checkpoint_name']) trainer.compile(optimizer=optimizer, loss=loss, metrics=metrics, num_classes=num_classes) trainer.fit(train_loader, valid_loader, epochs=cfg['epochs'], scheduler=scheduler, verbose=cfg['verbose'], loss_weight=cfg['loss_weight']) # validation inference best_model = model best_model.load_state_dict( torch.load(os.path.join(cfg['checkpoint_dir'], cfg['checkpoint_name']))) best_model.to(cfg['device']) best_model.eval() # setup directory to save plots if os.path.isdir(cfg['plot_dir']): # remove existing dir and content shutil.rmtree(cfg['plot_dir']) # create absolute destination os.makedirs(cfg['plot_dir']) # valid dataset without transformations and normalization for image visualization valid_dataset_vis = SegmentationDataset(df_path=cfg['valid_data'], transform=valid_transform, normalize=None) if cfg['save_checkpoints']: for n in range(len(valid_dataset)): image_vis = valid_dataset_vis[n][0].astype('uint8') image_vis = image_vis.transpose((1, 2, 0)) image, gt_mask = valid_dataset[n] gt_mask = gt_mask.transpose((1, 2, 0)) gt_mask = gt_mask.squeeze() x_tensor = torch.from_numpy(image).to(cfg['device']).unsqueeze(0) pr_mask, _ = best_model.predict(x_tensor) pr_mask = pr_mask.cpu().numpy().round() pr_mask = pr_mask.squeeze() save_predictions(out_path=cfg['plot_dir'], index=n, image=image_vis, ground_truth_mask=gt_mask, predicted_mask=pr_mask, average='macro')
import neptune # The init() function called this way assumes that # NEPTUNE_API_TOKEN environment variable is defined. neptune.init('macabdul9/sandbox') neptune.create_experiment(name='minimal_example') # log some metrics for i in range(100): neptune.log_metric('loss', 0.95**i) neptune.log_metric('AUC', 0.96)
def main(rank, option, resume, save_folder): # Basic Options resume_path = os.path.join(save_folder, 'last_dict.pt') num_gpu = len(option.result['train']['gpu'].split(',')) total_epoch = option.result['train']['total_epoch'] multi_gpu = len(option.result['train']['gpu'].split(',')) > 1 if multi_gpu: ddp = option.result['train']['ddp'] else: ddp = False scheduler = option.result['train']['scheduler'] batch_size, pin_memory = option.result['train'][ 'batch_size'], option.result['train']['pin_memory'] # Logger if (rank == 0) or (rank == 'cuda'): neptune.init( 'sunghoshin/imp', api_token= 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI5MTQ3MjY2Yy03YmM4LTRkOGYtOWYxYy0zOTk3MWI0ZDY3M2MifQ==' ) exp_name, exp_num = save_folder.split('/')[-2], save_folder.split( '/')[-1] neptune.create_experiment(params={ 'exp_name': exp_name, 'exp_num': exp_num }, tags=['inference:False']) # Load Model model = load_model(option) patch_criterion, detection_criterion = load_loss(option, rank) save_module = train_module(total_epoch, model, patch_criterion, detection_criterion, multi_gpu) if resume: save_module.import_module(resume_path) model.load_state_dict(save_module.save_dict['model'][0]) # Multi-Processing GPUs if ddp: setup(rank, num_gpu) torch.manual_seed(0) torch.cuda.set_device(rank) model.to(rank) model = DDP(model, device_ids=[rank]) model = apply_gradient_allreduce(model) patch_criterion.to(rank) detection_criterion.to(rank) else: if multi_gpu: model = nn.DataParallel(model).to(rank) else: model = model.to(rank) # Optimizer and Scheduler if resume: # Load Optimizer optimizer = load_optimizer(option, model.parameters()) optimizer.load_state_dict(save_module.save_dict['optimizer'][0]) # Load Scheduler if scheduler is not None: scheduler = load_scheduler(option, optimizer) scheduler.load_state_dict(save_module.save_dict['scheduler'][0]) else: optimizer = load_optimizer(option, model.parameters()) if scheduler is not None: scheduler = load_scheduler(option, optimizer) # Early Stopping early_stop = option.result['train']['early'] if early_stop: early = EarlyStopping(patience=option.result['train']['patience']) else: early = None # Dataset and DataLoader tr_robust_dataset, tr_coco_dataset, tr_ex_dataset = load_data(option, type='train') val_robust_dataset, val_coco_dataset, val_ex_dataset = load_data( option, type='val') if ddp: # Robust Dataset Loader if tr_robust_dataset is not None: tr_robust_sampler = torch.utils.data.distributed.DistributedSampler( dataset=tr_robust_dataset, num_replicas=num_gpu, rank=rank) val_robust_sampler = torch.utils.data.distributed.DistributedSampler( dataset=val_robust_dataset, num_replicas=num_gpu, rank=rank) tr_robust_loader = torch.utils.data.DataLoader( dataset=tr_robust_dataset, batch_size=batch_size, shuffle=False, num_workers=4 * num_gpu, pin_memory=pin_memory, sampler=tr_robust_sampler) val_robust_loader = torch.utils.data.DataLoader( dataset=val_robust_dataset, batch_size=batch_size, shuffle=False, num_workers=4 * num_gpu, pin_memory=pin_memory, sampler=val_robust_sampler) else: tr_robust_loader = None val_robust_loader = None # Detection-COCO-Dark-Dataset Loader if tr_coco_dataset is not None: tr_coco_sampler = torch.utils.data.distributed.DistributedSampler( dataset=tr_coco_dataset, num_replicas=num_gpu, rank=rank) val_coco_sampler = torch.utils.data.distributed.DistributedSampler( dataset=val_coco_dataset, num_replicas=num_gpu, rank=rank) tr_coco_loader = torch.utils.data.DataLoader( dataset=tr_coco_dataset, batch_size=batch_size, shuffle=False, num_workers=4 * num_gpu, pin_memory=pin_memory, sampler=tr_coco_sampler, collate_fn=detection_collate) val_coco_loader = torch.utils.data.DataLoader( dataset=val_coco_dataset, batch_size=batch_size, shuffle=False, num_workers=4 * num_gpu, pin_memory=pin_memory, sampler=val_coco_sampler, collate_fn=detection_collate) else: tr_coco_loader = None val_coco_loader = None # Detection-EX-Dark-Dataset Loader if tr_ex_dataset is not None: tr_ex_sampler = torch.utils.data.distributed.DistributedSampler( dataset=tr_ex_dataset, num_replicas=num_gpu, rank=rank) val_ex_sampler = torch.utils.data.distributed.DistributedSampler( dataset=val_ex_dataset, num_replicas=num_gpu, rank=rank) tr_ex_loader = torch.utils.data.DataLoader( dataset=tr_ex_dataset, batch_size=batch_size, shuffle=False, num_workers=4 * num_gpu, pin_memory=pin_memory, sampler=tr_ex_sampler, collate_fn=detection_collate) val_ex_loader = torch.utils.data.DataLoader( dataset=val_ex_dataset, batch_size=batch_size, shuffle=False, num_workers=4 * num_gpu, pin_memory=pin_memory, sampler=val_ex_sampler, collate_fn=detection_collate) else: tr_ex_loader = None val_ex_loader = None else: # Robust Dataset Loader if tr_robust_dataset is not None: tr_robust_loader = DataLoader(tr_robust_dataset, batch_size=batch_size, shuffle=True, pin_memory=pin_memory, num_workers=4 * num_gpu) val_robust_loader = DataLoader(val_robust_dataset, batch_size=batch_size, shuffle=False, pin_memory=pin_memory, num_workers=4 * num_gpu) else: tr_robust_loader = None val_robust_loader = None # Detection-COCO-Dark-Dataset Loader if tr_coco_dataset is not None: tr_coco_loader = DataLoader(tr_coco_dataset, batch_size=batch_size, shuffle=True, pin_memory=pin_memory, num_workers=4 * num_gpu, collate_fn=detection_collate) val_coco_loader = DataLoader(val_coco_dataset, batch_size=batch_size, shuffle=False, pin_memory=pin_memory, num_workers=4 * num_gpu, collate_fn=detection_collate) else: tr_coco_loader = None val_coco_loader = None # Detection-EX-Dark-Dataset Loader if tr_ex_dataset is not None: tr_ex_loader = DataLoader(tr_ex_dataset, batch_size=batch_size, shuffle=True, pin_memory=pin_memory, num_workers=4 * num_gpu, collate_fn=detection_collate) val_ex_loader = DataLoader(val_ex_dataset, batch_size=batch_size, shuffle=False, pin_memory=pin_memory, num_workers=4 * num_gpu, collate_fn=detection_collate) else: tr_ex_loader = None val_ex_loader = None # Mixed Precision mixed_precision = option.result['train']['mixed_precision'] if mixed_precision: scaler = torch.cuda.amp.GradScaler() else: scaler = None # Training from module.trainer import robust_trainer early, save_module, option = robust_trainer.run(option, model, tr_robust_loader, tr_coco_loader, tr_ex_loader, \ val_robust_loader, val_coco_loader, val_ex_loader, optimizer, \ patch_criterion, detection_criterion, scaler, scheduler, early, \ save_folder, save_module, multi_gpu, rank, neptune) if ddp: cleanup()
# %% def record_eval_metric(neptune, metrics): for k, v in metrics.items(): neptune.log_metric(k, v) # %% model_path = '/workspace/ml-workspace/thesis_git/thesis/models/' best_eval_f1 = 0 # Measure the total training time for the whole run. total_t0 = time.time() with neptune.create_experiment(name="HierarchicalSemanticGraphNetwork", params=PARAMS, upload_source_files=['HSGN_GAT.py']): neptune.append_tag( ["homogeneous_graph", "GATConv", "bidirectional_token_node_edge"]) neptune.set_property('server', 'IRGPU2') neptune.set_property('training_set_path', training_path) neptune.set_property('dev_set_path', dev_path) # For each epoch... for epoch_i in range(0, epochs): # ======================================== # Training # ======================================== # Perform one full pass over the training set.
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Step 2: Initialize Neptune import neptune neptune.init(project_qualified_name='shared/onboarding', # change this to your `workspace_name/project_name` api_token='ANONYMOUS', # change this to your api token ) # Step 3: Create an experiment neptune.create_experiment(name='great-idea') # Step 4: Add logging for metrics and losses class NeptuneMonitor(keras.callbacks.Callback): def on_epoch_end(self, epoch, logs=None): for metric_name, metric_value in logs.items(): neptune.log_metric(metric_name, metric_value) model.fit(x_train, y_train, epochs=PARAMS['epoch_nr'], batch_size=PARAMS['batch_size'], callbacks=[NeptuneMonitor()]) # tests current_exp = neptune.get_experiment()
'decay': 0.0, 'momentum': 0.9, 'custLossThresh': -99.0, 'label_level': 'composite', 'label_feature_threshold': 0.001, 'refl_scaling_min': -35.0, 'refl_scaling_per99.99': 45.6660232543945, 'W_scaling_min': -14.29787, 'W_scaling_per99.99': 0.288602113723755, 'model_run_name': 'unet_v6p2', 'feature_description': '10minAfterHour_refl', } neptune.create_experiment(name=neptune_experiment_name, params=PARAMS, upload_source_files=neptune_upload_source_files, tags=neptune_tags ) #-------------------------- # if PARAMS['optimizer'] == 'Adam': optimizer = Adam(lr=PARAMS['learning_rate'], beta_1=PARAMS['beta_1'], beta_2=PARAMS['beta_2'], epsilon=PARAMS['epsilon'], decay=PARAMS['decay'] ) elif PARAMS['optimizer'] == 'SGD': optimizer = SGD(lr=PARAMS['learning_rate'], decay=PARAMS['decay'],
if parts[1] in set( ["glove", "back", "front"] ): # Order is not always consistent return parts[2] return parts[1] nb_train = int(len(data) * train_ratio) o_samples = list(map(get_roshambo_user, data.samples)) _, indices, counts = np.unique(o_samples, return_inverse=True, return_counts=True) training_users = np.nonzero(np.cumsum(counts) <= nb_train)[0] training_mask = np.isin(indices, training_users) train_data = torch_data.Subset(data, indices=np.arange(len(data))[training_mask]) val_data = torch_data.Subset(data, indices=np.arange(len(data))[~training_mask]) return train_data, val_data if __name__ == "__main__": logging.basicConfig( format="%(asctime)s %(levelname)-8s %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S", filename="roshambo.log", ) neptune.init("abc/abc", backend=neptune.OfflineBackend()) # neptune.init("tihbe/pcritical") with neptune.create_experiment( "roshambo", upload_stdout=False, upload_stderr=False ): run_roshambo()
nml_exp = neptune.create_experiment( name='EXP-' + str(args.experiment_id), logger=rsna_logger.logger, upload_stdout=False, tags=['dev', 'any'], params={ 'data_dataset': args.data_dataset, 'data_fold': args.data_fold, 'data_train_transform': args.data_train_transform if args.data_train_transform is not None else '-', 'data_valid_transform': args.data_valid_transform if args.data_valid_transform is not None else '-', 'data_sampler': args.data_sampler, 'net_model': args.net_model, 'net_loss': 'bce', 'net_pretrained': args.net_pretrained, 'net_weight_file': args.net_weight_file if args.net_weight_file is not None else '-', 'net_num_classes': args.net_num_classes, 'optim': args.optim, 'optim_lr': args.optim_lr, 'optim_momentum': args.optim_momentum, 'optim_nesterov': args.optim_nesterov, 'optim_weight_decay': args.optim_weight_decay, 'optim_lookahead_enabled': args.optim_lookahead_enabled, 'optim_scheduler': args.optim_scheduler, 'optim_scheduler_warmup': args.optim_scheduler_warmup, 'optim_scheduler_max_lr': args.optim_scheduler_max_lr, 'optim_scheduler_min_lr': args.optim_scheduler_min_lr, 'tr_iteration_num': args.tr_iteration_num, 'tr_batch_size': args.tr_batch_size, 'tr_accumulation_step': args.tr_accumulation_step }, properties={ 'command': cmd, }, upload_source_files=[ 'config.py', 'rsna_dataset.py', 'rsna_model.py', 'rsna_network.py', 'train_any.py' 'train_any.sh' 'common/**/*.py', 'transforms/*.json', ])
params = { 'max_depth': 5, 'eta': 0.5, 'gamma': 0.1, 'subsample': 1, 'lambda': 1, 'alpha': 0.35, 'objective': 'reg:squarederror', 'eval_metric': ['mae', 'rmse'] } watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 20 # Train model using `xgb.train()` neptune.create_experiment(name='xgb', tags=['train'], params=params) xgb.train(params, dtrain, num_round, watchlist, callbacks=[neptune_callback()]) neptune.stop() # Train model using `xgb.cv()` neptune.create_experiment(name='xgb', tags=['cv'], params=params) xgb.cv(params, dtrain, num_boost_round=num_round, nfold=7, callbacks=[neptune_callback()]) neptune.stop()
# See converted experiments # Click on the link(s) above to browse the TensorBoard run in Neptune or go to [shared/tensorflow-integration project](https://ui.neptune.ai/o/shared/org/tensorboard-integration/experiments?viewId=def2c858-3510-4bf9-9e52-8720fadecb11). # Log runs live to Neptune via TensorBoard # Step 1: Initialize Neptune import neptune neptune.init(api_token='ANONYMOUS', project_qualified_name='shared/tensorboard-integration') # Step 2: Create an experiment neptune.create_experiment('tensorboard-logging') # Step 3: Run ``neptune_tensorboard.integrate_with_tensorflow()`` import neptune_tensorboard neptune_tensorboard.integrate_with_tensorflow() # Step 4: Add your training code import tensorflow as tf import datetime mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data()