def main(): args = parser.parse_args() assert args.n_views == 2, "Only two view training is supported. Please use --n-views 2." # check if gpu training is available set_seed(args.seed) if not args.disable_cuda and torch.cuda.is_available(): args.device = torch.device('cuda') cudnn.deterministic = True cudnn.benchmark = True else: args.device = torch.device('cpu') args.gpu_index = -1 dataset = ContrastiveLearningDataset(args.data) train_dataset = dataset.get_dataset(args.dataset_name, args.n_views) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, drop_last=True) model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim) optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=len(train_loader), eta_min=0, last_epoch=-1) # It’s a no-op if the 'gpu_index' argument is a negative integer or None. with torch.cuda.device(args.gpu_index): simclr = SimCLR(model=model, optimizer=optimizer, scheduler=scheduler, args=args) simclr.train(train_loader)
from datetime import datetime from config import arg_config, proj_root from utils.misc import construct_exp_name, construct_path, construct_print, pre_mkdir, set_seed from utils.solver import Solver if __name__ == '__main__': construct_print(f"{datetime.now()}: Initializing...") construct_print(f"Project Root: {proj_root}") init_start = datetime.now() exp_name = construct_exp_name(arg_config) path_config = construct_path( proj_root=proj_root, exp_name=exp_name, xlsx_name=arg_config["xlsx_name"], ) pre_mkdir(path_config) set_seed(seed=0, use_cudnn_benchmark=arg_config["size_list"] != None) solver = Solver(exp_name, arg_config, path_config) construct_print(f"Total initialization time:{datetime.now() - init_start}") shutil.copy(f"{proj_root}/config.py", path_config["cfg_log"]) shutil.copy(f"{proj_root}/utils/solver.py", path_config["trainer_log"]) construct_print(f"{datetime.now()}: Start...") if arg_config["resume_mode"] == "test" or arg_config["resume_mode"] == "measure": solver.test() else: solver.train() construct_print(f"{datetime.now()}: End...")
def train(args, dataset, model, tokenizer, labels, pad_token_label_id): """ Trains the given model on the given dataset. """ train_dataset = dataset['train'] train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader( train_dataset, sampler=train_sampler, batch_size=args.train_batch_size) n_train_steps__single_epoch = len(train_dataloader) // args.gradient_accumulation_steps n_train_steps = n_train_steps__single_epoch * args.num_train_epochs args.logging_steps = n_train_steps__single_epoch # Prepare optimizer and schedule (linear warmup and decay) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], "weight_decay": args.weight_decay, }, { "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0 }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=int(args.warmup_ratio*n_train_steps), num_training_steps=n_train_steps ) # Train! logging.info("***** Running training *****") logging.info(" Num examples = %d", len(train_dataset)) logging.info(" Num Epochs = %d", args.num_train_epochs) logging.info( " Total train batch size (w. accumulation) = %d", args.train_batch_size * args.gradient_accumulation_steps ) logging.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) logging.info(" Total optimization steps = %d", n_train_steps) logging.info(" Using linear warmup (ratio=%s)", args.warmup_ratio) logging.info(" Using weight decay (value=%s)", args.weight_decay) global_step = 0 epochs_trained = 0 steps_trained_in_current_epoch = 0 tr_loss, logging_loss = 0.0, 0.0 best_metric, best_epoch = -1.0, -1 # Init best -1 so that 0 > best model.zero_grad() train_iterator = tqdm.trange(epochs_trained, int(args.num_train_epochs), desc="Epoch") set_seed(seed_value=args.seed) # Added here for reproductibility for num_epoch in train_iterator: epoch_iterator = tqdm.tqdm(train_dataloader, desc="Iteration") for step, batch in enumerate(epoch_iterator): # Skip past any already trained steps if resuming training if steps_trained_in_current_epoch > 0: steps_trained_in_current_epoch -= 1 continue model.train() batch = tuple(t.to(args.device) for t in batch) inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], "labels": batch[3]} outputs = model(**inputs) loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps loss.backward() tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 if global_step % args.logging_steps == 0: # Log metrics # -- Only evaluate when single GPU otherwise metrics may not average well results, _ = evaluate( args=args, eval_dataset=dataset["validation"], model=model, labels=labels, pad_token_label_id=pad_token_label_id ) logging_loss = tr_loss metric = results['f1'] if metric > best_metric: best_metric = metric best_epoch = num_epoch # Save model checkpoint if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) model.save_pretrained(args.output_dir) if 'character' not in args.embedding: tokenizer.save_pretrained(args.output_dir) torch.save(args, os.path.join(args.output_dir, "training_args.bin")) logging.info("Saving model checkpoint to %s", args.output_dir) #torch.save(optimizer.state_dict(), os.path.join(args.output_dir, "optimizer.pt")) #torch.save(scheduler.state_dict(), os.path.join(args.output_dir, "scheduler.pt")) #logging.info("Saving optimizer and scheduler states to %s", args.output_dir) return global_step, tr_loss / global_step, best_metric, best_epoch
return support_x, support_y, query_x, query_y def build_dataloader(split): valid_splits = ["train", "valid", "test"] assert split in valid_splits, f"{split} should be one of {valid_splits}." dataset = ChexpertDataset(Path(cfg.DATA.PATH) / f"{split}.csv", split) bs = 1 if split == 'train': bs = cfg.DATA.BATCH_SIZE dl_labeled = DataLoader(dataset, batch_size=bs, num_workers=min(os.cpu_count(), 12), shuffle=split == 'train') return dl_labeled if __name__ == '__main__': set_seed(0) cfg.merge_from_file("config/maml_base.yaml") cfg.freeze() ds = ChexpertDataset(Path(cfg.DATA.PATH) / 'train.csv', 'train') print(len(ds)) for t, (sx, sy, qx, qy) in enumerate(ds): print(t) print(torch.norm(sx).item()) exit()
def main(): args = parse_args() os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" if not torch.cuda.is_available(): assert NotImplementedError torch.cuda.set_device(args.gpu) print("Using", torch.cuda.get_device_name()) set_seed(args.seed) cfg.merge_from_file(args.cfg) cfg.freeze() output_dir = Path(cfg.OUTPUT_ROOT_DIR) / args.output output_dir.mkdir(parents=True, exist_ok=True) shutil.copy(args.cfg, output_dir / 'config.yaml') device = torch.device('cuda') maml = Meta().to(device) tmp = filter(lambda x: x.requires_grad, maml.parameters()) num = sum(map(lambda x: np.prod(x.shape), tmp)) # print(maml) print('Total trainable tensors:', num) # batchsz here means total episode number ''' mini = MiniImagenet('/home/i/tmp/MAML-Pytorch/miniimagenet/', mode='train', n_way=args.n_way, k_shot=args.k_spt, k_query=args.k_qry, batchsz=10000, resize=args.imgsz) mini_test = MiniImagenet('/home/i/tmp/MAML-Pytorch/miniimagenet/', mode='test', n_way=args.n_way, k_shot=args.k_spt, k_query=args.k_qry, batchsz=100, resize=args.imgsz) ''' train_loader = build_dataloader("train") val_loader = build_dataloader("valid") test_loader = build_dataloader("test") print("Train Batches:", len(train_loader), "| Val Batches:", len(val_loader), "| Test Batches:", len(test_loader)) postfix_map = {} t = tqdm(range(cfg.MAML.EPOCHS), leave=True, dynamic_ncols=True) for epoch in t: # fetch meta_batchsz num of episode each time # db = DataLoader(mini, args.task_num, shuffle=True, num_workers=1, pin_memory=True) # for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(db): for step, (x_spt, y_spt, x_qry, y_qry) in enumerate( tqdm(train_loader, position=1, dynamic_ncols=True, leave=False)): x_spt, y_spt, x_qry, y_qry = x_spt.to(device), y_spt.to( device), x_qry.to(device), y_qry.to(device) accs = maml(x_spt, y_spt, x_qry, y_qry) if step % 30 == 0: postfix_map["trAcc"] = accs if step % 500 == 0: # evaluation # db_test = DataLoader(mini_test, 1, shuffle=True, num_workers=1, pin_memory=True) accs_all_test = [] for x_spt, y_spt, x_qry, y_qry in tqdm(val_loader, desc="Validation", position=2, leave=False): x_spt, y_spt, x_qry, y_qry = x_spt.squeeze(0).to(device), y_spt.squeeze(0).to(device), \ x_qry.squeeze(0).to(device), y_qry.squeeze(0).to(device) accs = maml.finetunning(x_spt, y_spt, x_qry, y_qry) accs_all_test.append(accs) # [b, update_step+1] accs = np.array(accs_all_test).mean(axis=0).astype(np.float16) postfix_map["valAcc"] = accs t.set_postfix(postfix_map)
def run(try_num, config): args = get_args() print('config:', config.to_dict(), flush=True) print('args:', args, flush=True) os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' model_dir = f'blending-02-tabnet-{try_num}' if not os.path.exists(model_dir): os.mkdir(model_dir) train_features = pd.read_csv('../input/lish-moa/train_features.csv') train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv') dae_features = pd.read_csv(config.dae_path) test_features = pd.read_csv('../input/lish-moa/test_features.csv') if args.debug: train_features = train_features[:500] train_targets = train_targets[:500] dae_features = pd.concat([dae_features.iloc[:500], dae_features.iloc[-3982:]]).reset_index(drop=True) config.update(dict( n_folds=3, seeds=[222], n_epochs=3, batch_size=128, )) target_columns = [col for col in train_targets.columns if col != 'sig_id'] n_targets = len(target_columns) train_features, train_targets, test_features = preprocess(config, model_dir, train_features, train_targets, test_features, dae_features) features_columns = [col for col in train_features.columns if col not in ['sig_id', 'cp_type', 'cp_time', 'cp_dose', 'cp_type_ctl_vehicle', 'cp_type_trt_cp']] train_features = train_features[features_columns] test_features = test_features[features_columns] smooth_loss_function = SmoothBCEwLogits(smoothing=config.smoothing) kfold = MultilabelStratifiedKFold(n_splits=config.n_folds, random_state=42, shuffle=True) oof_preds = np.zeros((len(train_features), len(config.seeds), n_targets)) test_preds = [] for seed_index, seed in enumerate(config.seeds): print(f'Train seed {seed}', flush=True) set_seed(seed) for fold_index, (train_indices, val_indices) in enumerate(kfold.split( train_targets[target_columns].values, train_targets[target_columns].values )): print(f'Train fold {fold_index + 1}', flush=True) x_train = train_features.loc[train_indices, features_columns].values y_train = train_targets.loc[train_indices, target_columns].values x_val = train_features.loc[val_indices, features_columns].values y_val = train_targets.loc[val_indices, target_columns].values weights_path = f'{model_dir}/weights-{seed}-{fold_index}.pt' tabnet_conf = dict( seed=seed, optimizer_fn=optim.Adam, scheduler_fn=optim.lr_scheduler.ReduceLROnPlateau, n_d=32, n_a=32, n_steps=1, gamma=1.3, lambda_sparse=0, momentum=0.02, optimizer_params=dict(lr=2e-2, weight_decay=1e-5), scheduler_params=dict(mode="min", patience=5, min_lr=1e-5, factor=0.9), mask_type="entmax", verbose=10, n_independent=1, n_shared=1, ) if args.only_pred: print('Skip training', flush=True) else: model = TabNetRegressor(**tabnet_conf) model.fit( X_train=x_train, y_train=y_train, eval_set=[(x_val, y_val)], eval_name=['val'], eval_metric=['logits_ll'], max_epochs=config.n_epochs, patience=20, batch_size=1024, virtual_batch_size=32, num_workers=1, drop_last=True, loss_fn=smooth_loss_function ) model.save_model(weights_path) print('Save weights to: ', weights_path, flush=True) model = TabNetRegressor(**tabnet_conf) model.load_model(f'{weights_path}.zip') val_preds = sigmoid(model.predict(x_val)) score = mean_log_loss(y_val, val_preds, n_targets) print(f'fold_index {fold_index} - val_loss: {score:5.5f}', flush=True) oof_preds[val_indices, seed_index, :] = val_preds preds = sigmoid(model.predict(test_features.values)) test_preds.append(preds) score = mean_log_loss(train_targets[target_columns].values, oof_preds[:, seed_index, :], n_targets) print(f'Seed {seed} - val_loss: {score:5.5f}', flush=True) oof_preds = np.mean(oof_preds, axis=1) score = mean_log_loss(train_targets[target_columns].values, oof_preds, n_targets) print(f'Overall score is {score:5.5f}', flush=True) oof_pred_df = train_targets.copy() oof_pred_df.loc[:, target_columns] = oof_preds oof_pred_df.to_csv(f'{model_dir}/oof_pred.csv', index=False) test_features = pd.read_csv('../input/lish-moa/test_features.csv') submission = create_submission(test_features, ['sig_id'] + target_columns) submission[target_columns] = np.mean(test_preds, axis=0) submission.loc[test_features['cp_type'] == 'ctl_vehicle', target_columns] = 0 submission.to_csv(f'{model_dir}/submission.csv', index=False)
folders = ["event", "model", "log", "param"] if args.setup in ['single']: folders.append('decoding') for name in folders: folder = "{}/{}/".format(name, args.experiment) if hasattr(args, "experiment") else name + '/' args.__dict__["{}_path".format(name)] = os.path.join(args.exp_dir, folder) Path(args.__dict__["{}_path".format(name)]).mkdir(parents=True, exist_ok=True) if not hasattr(args, 'hp_str'): args.hp_str = get_hp_str(args) args.prefix = strftime("%m.%d_%H.%M.", localtime()) args.id_str = args.prefix + "_" + args.hp_str logger = get_logger(args) set_seed(args) # Save config args.save((str(args.param_path + args.id_str))) # Data train_it, dev_it = get_data(args) args.__dict__.update({'logger': logger}) args.logger.info(args) args.logger.info('Starting with HPARAMS: {}'.format(args.hp_str)) # Model model = get_model(args) extra_input = {} if args.gpu > -1 and torch.cuda.device_count() > 0:
def run(try_num, config): args = get_args() print('args', args, flush=True) print('config:', config.to_dict(), flush=True) set_seed(config.rand_seed) pretrained_model = f"tf_efficientnet_b3_ns" model_dir = f'deepinsight-{try_num}' if not os.path.exists(model_dir): os.mkdir(model_dir) train_features = pd.read_csv(f"../input/lish-moa/train_features.csv") train_targets = pd.read_csv(f"../input/lish-moa/train_targets_scored.csv") test_features = pd.read_csv(f"../input/lish-moa/test_features.csv") if config.dae_path: dae_features = pd.read_csv(config.dae_path) if args.debug: train_features = train_features.iloc[:500] train_targets = train_targets.iloc[:500] if config.dae_path: dae_features = pd.concat([dae_features.iloc[:500], dae_features.iloc[-3982:]]).reset_index(drop=True) config.update(dict( kfolds=3, n_epoch=3 )) train_features = train_features.sort_values(by=["sig_id"], axis=0, inplace=False).reset_index(drop=True) train_targets = train_targets.sort_values(by=["sig_id"], axis=0, inplace=False).reset_index(drop=True) cat_features_columns = ["cp_dose", 'cp_time'] num_feature_columns = [c for c in train_features.columns if c != "sig_id" and c not in cat_features_columns + ['cp_type']] all_features_columns = cat_features_columns + num_feature_columns target_columns = [c for c in train_targets.columns if c != "sig_id"] g_feature_columns = [c for c in num_feature_columns if c.startswith("g-")] c_feature_columns = [c for c in num_feature_columns if c.startswith("c-")] if config.dae_path: if config.dae_strategy == 'replace': train_features, test_features = assign_dae_features( train_features, test_features, dae_features, len(num_feature_columns)) else: train_features, test_features, dae_feature_columns = merge_dae_features( train_features, test_features, dae_features, len(g_feature_columns), len(c_feature_columns)) all_features_columns += dae_feature_columns train_targets = train_targets.loc[train_features['cp_type'] == 'trt_cp'].reset_index(drop=True) train_features = train_features.loc[train_features['cp_type'] == 'trt_cp'].reset_index(drop=True) if config.normalizer == 'rank': train_features, test_features = normalize(train_features, test_features, num_feature_columns) for df in [train_features, test_features]: df['cp_type'] = df['cp_type'].map({'ctl_vehicle': 0, 'trt_cp': 1}) df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1}) df['cp_time'] = df['cp_time'].map({24: 0, 48: 0.5, 72: 1}) if config.variance_target_type == 1: pickle_path = f'{model_dir}/variance_reduction.pkl' variance_target_features = num_feature_columns if config.dae_path and config.dae_strategy != 'replace': variance_target_features += dae_feature_columns if not os.path.exists(pickle_path): vt = variance_reduction_fit(train_features, variance_target_features, config.variance_threshold) save_pickle(vt, pickle_path) vt = load_pickle(pickle_path) train_features = variance_reduction_transform(vt, train_features, variance_target_features) test_features = variance_reduction_transform(vt, test_features, variance_target_features) print('(variance_reduction) Number of features after applying:', len(train_features.columns), flush=True) all_features_columns = list(train_features.columns[1:]) skf = MultilabelStratifiedKFold(n_splits=config.kfolds, shuffle=True, random_state=config.rand_seed) y_labels = np.sum(train_targets.drop("sig_id", axis=1), axis=0).index.tolist() logger = Logger() for fold_index, (train_index, val_index) in enumerate(skf.split(train_features, train_targets[y_labels])): if args.only_pred: print('Skip training', flush=True) break print(f'Fold: {fold_index}', train_index.shape, val_index.shape, flush=True) X_train = train_features.loc[train_index, all_features_columns].copy().values y_train = train_targets.iloc[train_index, 1:].copy().values X_valid = train_features.loc[val_index, all_features_columns].copy().values y_valid = train_targets.iloc[val_index, 1:].copy().values if config.normalizer == 'log': scaler = LogScaler() if config.norm_apply_all: scaler.fit(X_train) X_train = scaler.transform(X_train) X_valid = scaler.transform(X_valid) else: target_features = [i for i, c in enumerate(all_features_columns) if c in num_feature_columns] non_target_features = [i for i, c in enumerate(all_features_columns) if c not in num_feature_columns] scaler.fit(X_train[:, target_features]) X_train_tr = scaler.transform(X_train[:, target_features]) X_valid_tr = scaler.transform(X_valid[:, target_features]) X_train = np.concatenate([X_train[:, non_target_features], X_train_tr], axis=1) X_valid = np.concatenate([X_valid[:, non_target_features], X_valid_tr], axis=1) save_pickle(scaler, f'{model_dir}/scaler-{fold_index}.pkl') transformer = DeepInsightTransformer( feature_extractor=config.extractor, pixels=config.resolution, perplexity=config.perplexity, random_state=config.rand_seed, n_jobs=-1 ).fit(X_train) save_pickle(transformer, f'{model_dir}/transformer-{fold_index}.pkl') model = MoAEfficientNet( pretrained_model_name=pretrained_model, fc_size=config.fc_size, drop_rate=config.drop_rate, drop_connect_rate=config.drop_connect_rate, weight_init='goog', ).to(DEVICE) if config.smoothing is not None: if config.weighted_loss_weights is not None: indices = get_minority_target_index(train_targets, threshold=config.weighted_loss_threshold) indices = [int(i not in indices) for i, c in enumerate(target_columns)] train_loss_function = SmoothBCEwLogits( smoothing=config.smoothing, weight=config.weighted_loss_weights, weight_targets=indices, n_labels=len(target_columns)) else: train_loss_function = SmoothBCEwLogits(smoothing=config.smoothing) else: train_loss_function = bce_loss eval_loss_function = bce_loss optimizer = optim.Adam(model.parameters(), weight_decay=config.weight_decay, lr=config.learning_rate) if config.scheduler_type == 'ca': scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.t_max, eta_min=0, last_epoch=-1) elif config.scheduler_type == 'ms': scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.ms_scheduler_milestones, gamma=0.1) else: scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.1, patience=config.rp_patience, eps=1e-4, verbose=True) early_stopping = EarlyStopping(patience=7) best_score = np.inf start_time = time.time() for epoch in range(config.n_epoch): if config.swap_enable: dataset = MoAImageSwapDataset( X_train, y_train, transformer, image_size=config.image_size, swap_prob=config.swap_prob, swap_portion=config.swap_portion) else: dataset = MoAImageDataset(X_train, y_train, transformer, image_size=config.image_size) dataloader = DataLoader( dataset, batch_size=config.batch_size, shuffle=True, num_workers=8, pin_memory=True, drop_last=False) loss = loop_train(model, train_loss_function, dataloader, optimizer) if config.scheduler_type == 'rp': scheduler.step(loss) else: scheduler.step() for param_group in optimizer.param_groups: print('current learning rate:', param_group['lr']) del dataset, dataloader dataset = MoAImageDataset(X_valid, y_valid, transformer, image_size=config.image_size) dataloader = DataLoader( dataset, batch_size=config.infer_batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) valid_loss, valid_preds = loop_valid(model, eval_loss_function, dataloader) del dataset, dataloader logger.update({'fold': fold_index, 'epoch': epoch + 1, 'train_loss': loss, 'val_loss': valid_loss}) print(f'epoch {epoch + 1}/{config.n_epoch} - train_loss: {loss:.5f} - ' + f'valid_loss: {valid_loss:.5f} - elapsed: {time_format(time.time() - start_time)}', flush=True) if valid_loss < best_score: best_score = valid_loss torch.save(model.state_dict(), f'./{model_dir}/deepinsight-{fold_index}.pt') if early_stopping.should_stop(valid_loss): print('Early stopping', flush=True) break print(f'Done -> Fold {fold_index}/{config.kfolds} - best_valid_loss: {best_score:.5f} - ' + f'elapsed: {time_format(time.time() - start_time)}', flush=True) torch.cuda.empty_cache() gc.collect() if args.return_first_fold: logger.save(f'{model_dir}/log.csv') return test_preds = np.zeros((test_features.shape[0], len(target_columns))) start_time = time.time() print('Start infarence', flush=True) oof_preds = np.zeros((len(train_features), len(target_columns))) eval_loss_function = bce_loss for fold_index, (train_index, val_index) in enumerate(skf.split(train_features, train_targets[y_labels])): print(f'Infarence Fold: {fold_index}', train_index.shape, val_index.shape, flush=True) X_valid = train_features.loc[val_index, all_features_columns].copy().values y_valid = train_targets.iloc[val_index, 1:].copy().values X_test = test_features[all_features_columns].values if config.normalizer == 'log': scaler = load_pickle(f'{model_dir}/scaler-{fold_index}.pkl') X_valid = scaler.transform(X_valid) X_test = scaler.transform(X_test) transformer = load_pickle(f'{model_dir}/transformer-{fold_index}.pkl') model = MoAEfficientNet( pretrained_model_name=pretrained_model, fc_size=config.fc_size, drop_rate=config.drop_rate, drop_connect_rate=config.drop_connect_rate, weight_init='goog', ).to(DEVICE) model.load_state_dict(torch.load(f'./{model_dir}/deepinsight-{fold_index}.pt')) dataset = MoAImageDataset(X_valid, y_valid, transformer, image_size=config.image_size) dataloader = DataLoader( dataset, batch_size=config.infer_batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) valid_loss, valid_preds = loop_valid(model, eval_loss_function, dataloader) print(f'Fold {fold_index}/{config.kfolds} - fold_valid_loss: {valid_loss:.5f}', flush=True) logger.update({'fold': fold_index, 'val_loss': valid_loss}) oof_preds[val_index, :] = valid_preds dataset = TestDataset(X_test, None, transformer, image_size=config.image_size) dataloader = DataLoader( dataset, batch_size=config.infer_batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) preds = loop_preds(model, dataloader) test_preds += preds / config.kfolds oof_preds_df = train_targets.copy() oof_preds_df.loc[:, target_columns] = oof_preds.clip(0, 1) oof_preds_df.to_csv(f'{model_dir}/oof_preds.csv', index=False) oof_loss = mean_log_loss(train_targets.loc[:, target_columns].values, oof_preds) print(f'OOF Validation Loss: {oof_loss:.6f}', flush=True) print(f'Done infarence Elapsed {time_format(time.time() - start_time)}', flush=True) logger.update({'fold': 'oof', 'val_loss': oof_loss}) logger.save(f'{model_dir}/log.csv') submission = pd.DataFrame(data=test_features['sig_id'].values, columns=['sig_id']) submission = submission.reindex(columns=['sig_id'] + target_columns) submission.loc[:, target_columns] = test_preds.clip(0, 1) submission.loc[test_features['cp_type'] == 0, submission.columns[1:]] = 0 submission.to_csv(f'{model_dir}/submission.csv', index=False)
def train(args, train_dataset, eval_dataset, model): """ Trains the given model on the given dataset. """ train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size) n_train_steps__single_epoch = len(train_dataloader) n_train_steps = n_train_steps__single_epoch * args.num_train_epochs args.logging_steps = n_train_steps__single_epoch # (old) Prepare SGD optimizer # no_decay = ["bias", "LayerNorm.weight"] # optimizer_grouped_parameters = [ # { # "params": [p for n, p in model.named_parameters() # if not any(nd in n for nd in no_decay)], # "weight_decay": args.weight_decay, # }, # { # "params": [p for n, p in model.named_parameters() # if any(nd in n for nd in no_decay)], # "weight_decay": 0.0 # }, # ] # optimizer = SGD(optimizer_grouped_parameters, lr=args.learning_rate) # scheduler = get_constant_schedule(optimizer) # Prepare Adam optimizer and schedule (linear warmup and decay) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0 }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=int(args.warmup_ratio * n_train_steps), num_training_steps=n_train_steps) # Train! logging.info("***** Running training *****") logging.info(" Num examples = %d", len(train_dataset)) logging.info(" Num Epochs = %d", args.num_train_epochs) logging.info(" Total optimization steps = %d", n_train_steps) logging.info(" Using linear warmup (ratio=%s)", args.warmup_ratio) logging.info(" Using weight decay (value=%s)", args.weight_decay) global_step = 0 epochs_trained = 0 tr_loss, logging_loss = 0.0, 0.0 best_metric, best_epoch = -1.0, -1 # Init best -1 so that 0 > best model.zero_grad() train_iterator = tqdm.trange(epochs_trained, int(args.num_train_epochs), desc="Epoch") set_seed(seed_value=args.seed) # Added here for reproductibility for num_epoch in train_iterator: epoch_loss = 0.0 epoch_iterator = tqdm.tqdm(train_dataloader, desc="Iteration") for step, batch in enumerate(epoch_iterator): model.train() batch = {k: v.to(device=args.device) for k, v in batch.items()} batch['return_dict'] = False outputs = model(**batch) loss = outputs[ 0] # model outputs are always tuple in pytorch-transformers (see doc) loss.backward() tr_loss += loss.item() epoch_loss += loss.item() torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 if global_step % args.logging_steps == 0: # Log metrics # -- Only evaluate when single GPU otherwise metrics may not average well results, _ = evaluate(args=args, eval_dataset=eval_dataset, model=model) logging_loss = tr_loss metric = results['overall'] if metric > best_metric: best_metric = metric best_epoch = num_epoch # Save model checkpoint if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) model.save_pretrained(args.output_dir) torch.save( args, os.path.join(args.output_dir, "training_args.bin")) logging.info("Saving model checkpoint to %s", args.output_dir) #torch.save(optimizer.state_dict(), os.path.join(args.output_dir, "optimizer.pt")) #torch.save(scheduler.state_dict(), os.path.join(args.output_dir, "scheduler.pt")) #logging.info("Saving optimizer and scheduler states to %s", args.output_dir) logging.info(" epoch loss %d = %f", num_epoch, epoch_loss / step + 1) return global_step, tr_loss / global_step, best_metric, best_epoch
"--self_training", action='store_true', help="Enable self training.") ap.add_argument('-ta', "--train_all", action='store_true', help="Whether or not to train whole network.") return ap.parse_args() if __name__ == '__main__': device = 'cuda' if torch.cuda.is_available() else 'cpu' args = parse_args() with open(os.path.join(args.dir / 'config.yml')) as file: config = yaml.safe_load(file) set_seed(args.seed) output_dir = args.dir / args.name output_dir.mkdir(exist_ok=True) # check if gpu training is available os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" if not config['disable_cuda'] and torch.cuda.is_available(): torch.cuda.set_device(args.gpu) args.device = torch.device('cuda') cudnn.deterministic = True cudnn.benchmark = True print("Using device:", torch.cuda.get_device_name()) if config['arch'] == 'resnet18': model = torchvision.models.resnet18(pretrained=False,
def run(try_num, config): logger = Logger() args = get_args() print('config:', config.to_dict(), flush=True) print('args:', args, flush=True) os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' model_dir = f'blending-01-nn-{try_num}' if not os.path.exists(model_dir): os.mkdir(model_dir) train_features = pd.read_csv('../input/lish-moa/train_features.csv') train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv') dae_features = pd.read_csv(config.dae_path) test_features = pd.read_csv('../input/lish-moa/test_features.csv') if args.debug: train_features = train_features[:500] train_targets = train_targets[:500] dae_features = pd.concat( [dae_features.iloc[:500], dae_features.iloc[-3982:]]).reset_index(drop=True) config.update( dict( n_folds=3, seeds=[222], n_epochs=3, batch_size=128, )) target_columns = [col for col in train_targets.columns if col != 'sig_id'] n_targets = len(target_columns) train_features, train_targets, test_features = preprocess( config, model_dir, train_features, train_targets, test_features, dae_features) features_columns = [ col for col in train_features.columns if col not in [ 'sig_id', 'cp_type', 'cp_time', 'cp_dose', 'cp_type_ctl_vehicle', 'cp_type_trt_cp' ] ] metric_loss_function = nn.BCELoss() if config.weighted_loss_strategy == 1: indices = get_minority_target_index( train_targets, threshold=config.weighted_loss_threshold) indices = [int(i not in indices) for i, c in enumerate(target_columns)] smooth_loss_function = SmoothBCELoss( smoothing=config.smoothing, weight=config.weighted_loss_weights, weight_targets=indices, n_labels=n_targets) else: smooth_loss_function = SmoothBCELoss(smoothing=config.smoothing) kfold = MultilabelStratifiedKFold(n_splits=config.n_folds, random_state=42, shuffle=True) for seed_index, seed in enumerate(config.seeds): if args.only_pred: print('Skip training', flush=True) break print(f'Train seed {seed}', flush=True) set_seed(seed) for fold_index, (train_indices, val_indices) in enumerate( kfold.split(train_targets[target_columns].values, train_targets[target_columns].values)): print(f'Train fold {fold_index + 1}', flush=True) x_train = train_features.loc[train_indices, features_columns] y_train = train_targets.loc[train_indices, target_columns] x_val = train_features.loc[val_indices, features_columns] y_val = train_targets.loc[val_indices, target_columns] model = new_model(config.model_kind, len(features_columns)).to(DEVICE) checkpoint_path = f'{model_dir}/repeat-{seed}_Fold-{fold_index + 1}.pt' optimizer = optim.Adam(model.parameters(), weight_decay=config.weight_decay, lr=config.learning_rate) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, eps=1e-4, verbose=True) best_loss = np.inf for epoch in range(config.n_epochs): dataset = MoaDataset(x_train.values, y_train.values) dataloader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True, drop_last=True) train_loss = loop_train(model, dataloader, optimizer, loss_functions=( smooth_loss_function, metric_loss_function, )) dataset = MoaDataset(x_val.values, y_val.values) dataloader = DataLoader(dataset, batch_size=config.val_batch_size, shuffle=False) valid_loss, _ = loop_valid(model, dataloader, metric_loss_function) print( 'Epoch {}/{} - loss: {:5.5f} - val_loss: {:5.5f}'. format(epoch + 1, config.n_epochs, train_loss, valid_loss), flush=True) logger.update({ 'epoch': epoch + 1, 'loss': train_loss, 'val_loss': valid_loss }) scheduler.step(valid_loss) if valid_loss < best_loss: best_loss = valid_loss torch.save(model.state_dict(), checkpoint_path) oof_preds = np.zeros((len(train_features), len(config.seeds), n_targets)) test_preds = np.zeros((len(test_features), n_targets)) for seed_index in range(len(config.seeds)): seed = config.seeds[seed_index] print(f'Inference for seed {seed}', flush=True) _test_preds_in_seed = np.zeros((len(test_features), n_targets)) for fold_index, (_, valid_indices) in enumerate( kfold.split(train_targets[target_columns].values, train_targets[target_columns].values)): x_val = train_features.loc[valid_indices, features_columns] y_val = train_targets.loc[valid_indices, target_columns] checkpoint_path = f'{model_dir}/repeat-{seed}_Fold-{fold_index + 1}.pt' model = new_model(config.model_kind, len(features_columns)).to(DEVICE) model.load_state_dict(torch.load(checkpoint_path)) dataset = MoaDataset(x_val.values, y_val.values) dataloader = DataLoader(dataset, batch_size=config.val_batch_size, shuffle=False) preds = loop_pred(model, dataloader) oof_preds[valid_indices, seed_index, :] = preds dataset = MoaDataset(test_features[features_columns].values, None) dataloader = DataLoader(dataset, batch_size=config.val_batch_size, shuffle=False) preds = loop_pred(model, dataloader) _test_preds_in_seed += preds / config.n_folds score = mean_log_loss(train_targets.loc[:, target_columns].values, oof_preds[:, seed_index, :], n_targets=n_targets) test_preds += _test_preds_in_seed / len(config.seeds) print(f'Score for this seed {score:5.5f}', flush=True) logger.update({'val_loss': score}) # Evalucate validation score oof_preds = np.mean(oof_preds, axis=1) score = mean_log_loss(train_targets.loc[:, target_columns].values, oof_preds, n_targets=n_targets) print(f'Overall score is {score:5.5f}', flush=True) # Save validation prediction oof_pred_df = train_targets.copy() oof_pred_df.iloc[:, 1:] = oof_preds oof_pred_df.to_csv(f'{model_dir}/oof_pred.csv', index=False) # Save log logger.update({'val_loss': score}) logger.save(f'{model_dir}/log.csv') # Save Test Prediction test_features = pd.read_csv('../input/lish-moa/test_features.csv') submission = create_submission(test_features, ['sig_id'] + target_columns) submission[target_columns] = test_preds submission.loc[test_features['cp_type'] == 'ctl_vehicle', target_columns] = 0 submission.to_csv(f'{model_dir}/submission.csv', index=False)
def main(): args = parse_args() os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" if not torch.cuda.is_available(): assert NotImplementedError torch.cuda.set_device(args.gpu) print("Using", torch.cuda.get_device_name()) set_seed(args.seed) cfg.merge_from_file(args.cfg) cfg.freeze() output_dir = Path(cfg.OUTPUT_ROOT_DIR) / args.output output_dir.mkdir(parents=True, exist_ok=True) shutil.copy(args.cfg, output_dir / 'config.yaml') train_loader, train_loader_u = build_dataloader("train") val_loader, _ = build_dataloader("valid") test_loader, _ = build_dataloader("test") print("Train Batches:", len(train_loader), "| Val Batches:", len(val_loader), "| Test Batches:", len(test_loader)) device = torch.device("cuda") # first iteration is teacher training, second is student teacher_model = None for i in range(int(cfg.SOLVER.SELF_TRAINING) + 1): model = Net().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=cfg.SOLVER.BASE_LR, weight_decay=cfg.SOLVER.WEIGHT_DECAY) scheduler = StepLR(optimizer, step_size=cfg.SOLVER.SCHEDULER_STEP_SIZE, gamma=0.1) kwargs = { 'model': model, 'optimizer': optimizer, 'scheduler': scheduler, 'train_loader': train_loader, 'train_loader_unlabeled': train_loader_u, 'val_loader': val_loader, 'test_loader': test_loader, 'output_dir': output_dir, 'teacher_model': teacher_model } trainer = Trainer(**kwargs) try: if i == 0 and args.teacher_init: model.load_state_dict(torch.load(args.teacher_init)) print("Loaded teacher model from", args.teacher_init) trainer.validate('test') else: trainer.train() teacher_model = model except BaseException: if len(glob(f"{output_dir}/*.pth")) < 1: shutil.rmtree(output_dir, ignore_errors=True) raise
def parse_args(): """ Parse command line arguments and initialize experiment. """ parser = argparse.ArgumentParser() parser.add_argument("--task", type=str, required=True, choices=['classification', 'sequence_labelling'], help="The evaluation task.") parser.add_argument("--embedding", type=str, required=True, choices=AVAILABLE_MODELS, help="The model to use.") parser.add_argument( "--do_lower_case", action="store_true", help="Whether to apply lowercasing during tokenization.") parser.add_argument("--train_batch_size", type=int, default=1, help="Batch size to use for training.") parser.add_argument("--eval_batch_size", type=int, default=1, help="Batch size to use for evaluation.") parser.add_argument("--gradient_accumulation_steps", type=int, default=1, help="Number of gradient accumulation steps.") parser.add_argument("--num_train_epochs", type=int, default=3, help="Number of training epochs.") parser.add_argument( "--validation_ratio", default=0.5, type=float, help="Proportion of training set to use as a validation set.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--weight_decay", default=0.1, type=float, help="Weight decay if we apply some.") parser.add_argument("--warmup_ratio", default=0.1, type=int, help="Linear warmup over warmup_ratio*total_steps.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--do_train", action="store_true", help="Do training & validation.") parser.add_argument("--do_predict", action="store_true", help="Do prediction on the test set.") parser.add_argument("--seed", type=int, default=42, help="Random seed.") args = parser.parse_args() args.start_time = datetime.datetime.now().strftime('%d-%m-%Y_%Hh%Mm%Ss') args.output_dir = os.path.join('results', args.task, args.embedding, f'{args.start_time}__seed-{args.seed}') # --------------------------------- INIT --------------------------------- # Set up logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(filename)s - %(message)s", datefmt="%d/%m/%Y %H:%M:%S", level=logging.INFO) # Check for GPUs if torch.cuda.is_available(): assert torch.cuda.device_count( ) == 1 # This script doesn't support multi-gpu args.device = torch.device("cuda") logging.info("Using GPU (`%s`)", torch.cuda.get_device_name(0)) else: args.device = torch.device("cpu") logging.info("Using CPU") # Set random seed for reproducibility set_seed(seed_value=args.seed) return args