Beispiel #1
0
def main():
    args = parser.parse_args()
    assert args.n_views == 2, "Only two view training is supported. Please use --n-views 2."
    # check if gpu training is available
    set_seed(args.seed)
    if not args.disable_cuda and torch.cuda.is_available():
        args.device = torch.device('cuda')
        cudnn.deterministic = True
        cudnn.benchmark = True
    else:
        args.device = torch.device('cpu')
        args.gpu_index = -1

    dataset = ContrastiveLearningDataset(args.data)

    train_dataset = dataset.get_dataset(args.dataset_name, args.n_views)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               drop_last=True)

    model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim)

    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 weight_decay=args.weight_decay)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=len(train_loader), eta_min=0, last_epoch=-1)

    #  It’s a no-op if the 'gpu_index' argument is a negative integer or None.
    with torch.cuda.device(args.gpu_index):
        simclr = SimCLR(model=model,
                        optimizer=optimizer,
                        scheduler=scheduler,
                        args=args)
        simclr.train(train_loader)
Beispiel #2
0
from datetime import datetime

from config import arg_config, proj_root
from utils.misc import construct_exp_name, construct_path, construct_print, pre_mkdir, set_seed
from utils.solver import Solver

if __name__ == '__main__':
    construct_print(f"{datetime.now()}: Initializing...")
    construct_print(f"Project Root: {proj_root}")
    init_start = datetime.now()

    exp_name = construct_exp_name(arg_config)
    path_config = construct_path(
        proj_root=proj_root, exp_name=exp_name, xlsx_name=arg_config["xlsx_name"],
    )
    pre_mkdir(path_config)
    set_seed(seed=0, use_cudnn_benchmark=arg_config["size_list"] != None)

    solver = Solver(exp_name, arg_config, path_config)
    construct_print(f"Total initialization time:{datetime.now() - init_start}")

    shutil.copy(f"{proj_root}/config.py", path_config["cfg_log"])
    shutil.copy(f"{proj_root}/utils/solver.py", path_config["trainer_log"])

    construct_print(f"{datetime.now()}: Start...")
    if arg_config["resume_mode"] == "test" or arg_config["resume_mode"] == "measure":
        solver.test()
    else:
        solver.train()
    construct_print(f"{datetime.now()}: End...")
Beispiel #3
0
def train(args, dataset, model, tokenizer, labels, pad_token_label_id):
    """ Trains the given model on the given dataset. """

    train_dataset = dataset['train']
    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(
        train_dataset,
        sampler=train_sampler,
        batch_size=args.train_batch_size)

    n_train_steps__single_epoch = len(train_dataloader) // args.gradient_accumulation_steps
    n_train_steps = n_train_steps__single_epoch * args.num_train_epochs
    args.logging_steps = n_train_steps__single_epoch

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters()
                       if not any(nd in n for nd in no_decay)],
            "weight_decay": args.weight_decay,
        },
        {
            "params": [p for n, p in model.named_parameters()
                       if any(nd in n for nd in no_decay)],
            "weight_decay": 0.0
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(args.warmup_ratio*n_train_steps),
        num_training_steps=n_train_steps
    )

    # Train!
    logging.info("***** Running training *****")
    logging.info("  Num examples = %d", len(train_dataset))
    logging.info("  Num Epochs = %d", args.num_train_epochs)
    logging.info(
        "  Total train batch size (w. accumulation) = %d",
        args.train_batch_size * args.gradient_accumulation_steps
    )
    logging.info("  Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
    logging.info("  Total optimization steps = %d", n_train_steps)
    logging.info("  Using linear warmup (ratio=%s)", args.warmup_ratio)
    logging.info("  Using weight decay (value=%s)", args.weight_decay)
    global_step = 0
    epochs_trained = 0
    steps_trained_in_current_epoch = 0

    tr_loss, logging_loss = 0.0, 0.0
    best_metric, best_epoch = -1.0, -1  # Init best -1 so that 0 > best

    model.zero_grad()
    train_iterator = tqdm.trange(epochs_trained, int(args.num_train_epochs), desc="Epoch")

    set_seed(seed_value=args.seed)  # Added here for reproductibility
    for num_epoch in train_iterator:
        epoch_iterator = tqdm.tqdm(train_dataloader, desc="Iteration")
        for step, batch in enumerate(epoch_iterator):

            # Skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue

            model.train()
            batch = tuple(t.to(args.device) for t in batch)
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
                "labels": batch[3]}

            outputs = model(**inputs)
            loss = outputs[0]  # model outputs are always tuple in pytorch-transformers (see doc)

            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            loss.backward()

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                if global_step % args.logging_steps == 0:
                    # Log metrics
                    # -- Only evaluate when single GPU otherwise metrics may not average well
                    results, _ = evaluate(
                        args=args,
                        eval_dataset=dataset["validation"],
                        model=model, labels=labels,
                        pad_token_label_id=pad_token_label_id
                    )

                    logging_loss = tr_loss
                    metric = results['f1']

                    if metric > best_metric:
                        best_metric = metric
                        best_epoch = num_epoch

                        # Save model checkpoint
                        if not os.path.exists(args.output_dir):
                            os.makedirs(args.output_dir)
                        model.save_pretrained(args.output_dir)
                        if 'character' not in args.embedding:
                            tokenizer.save_pretrained(args.output_dir)
                        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))
                        logging.info("Saving model checkpoint to %s", args.output_dir)

                        #torch.save(optimizer.state_dict(), os.path.join(args.output_dir, "optimizer.pt"))
                        #torch.save(scheduler.state_dict(), os.path.join(args.output_dir, "scheduler.pt"))
                        #logging.info("Saving optimizer and scheduler states to %s", args.output_dir)

    return global_step, tr_loss / global_step, best_metric, best_epoch
        return support_x, support_y, query_x, query_y


def build_dataloader(split):
    valid_splits = ["train", "valid", "test"]
    assert split in valid_splits, f"{split} should be one of {valid_splits}."

    dataset = ChexpertDataset(Path(cfg.DATA.PATH) / f"{split}.csv", split)
    bs = 1
    if split == 'train':
        bs = cfg.DATA.BATCH_SIZE
    dl_labeled = DataLoader(dataset,
                            batch_size=bs,
                            num_workers=min(os.cpu_count(), 12),
                            shuffle=split == 'train')
    return dl_labeled


if __name__ == '__main__':
    set_seed(0)

    cfg.merge_from_file("config/maml_base.yaml")
    cfg.freeze()

    ds = ChexpertDataset(Path(cfg.DATA.PATH) / 'train.csv', 'train')
    print(len(ds))
    for t, (sx, sy, qx, qy) in enumerate(ds):
        print(t)
        print(torch.norm(sx).item())
        exit()
Beispiel #5
0
def main():
    args = parse_args()

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    if not torch.cuda.is_available():
        assert NotImplementedError
    torch.cuda.set_device(args.gpu)
    print("Using", torch.cuda.get_device_name())
    set_seed(args.seed)

    cfg.merge_from_file(args.cfg)
    cfg.freeze()

    output_dir = Path(cfg.OUTPUT_ROOT_DIR) / args.output
    output_dir.mkdir(parents=True, exist_ok=True)
    shutil.copy(args.cfg, output_dir / 'config.yaml')

    device = torch.device('cuda')
    maml = Meta().to(device)

    tmp = filter(lambda x: x.requires_grad, maml.parameters())
    num = sum(map(lambda x: np.prod(x.shape), tmp))
    # print(maml)
    print('Total trainable tensors:', num)

    # batchsz here means total episode number
    '''
    mini = MiniImagenet('/home/i/tmp/MAML-Pytorch/miniimagenet/', mode='train', n_way=args.n_way, k_shot=args.k_spt,
                        k_query=args.k_qry,
                        batchsz=10000, resize=args.imgsz)
    mini_test = MiniImagenet('/home/i/tmp/MAML-Pytorch/miniimagenet/', mode='test', n_way=args.n_way, k_shot=args.k_spt,
                             k_query=args.k_qry,
                             batchsz=100, resize=args.imgsz)
    '''
    train_loader = build_dataloader("train")
    val_loader = build_dataloader("valid")
    test_loader = build_dataloader("test")

    print("Train Batches:", len(train_loader), "| Val Batches:",
          len(val_loader), "| Test Batches:", len(test_loader))

    postfix_map = {}
    t = tqdm(range(cfg.MAML.EPOCHS), leave=True, dynamic_ncols=True)
    for epoch in t:
        # fetch meta_batchsz num of episode each time
        # db = DataLoader(mini, args.task_num, shuffle=True, num_workers=1, pin_memory=True)

        # for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(db):
        for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(
                tqdm(train_loader, position=1, dynamic_ncols=True,
                     leave=False)):

            x_spt, y_spt, x_qry, y_qry = x_spt.to(device), y_spt.to(
                device), x_qry.to(device), y_qry.to(device)
            accs = maml(x_spt, y_spt, x_qry, y_qry)

            if step % 30 == 0:
                postfix_map["trAcc"] = accs

            if step % 500 == 0:  # evaluation
                # db_test = DataLoader(mini_test, 1, shuffle=True, num_workers=1, pin_memory=True)
                accs_all_test = []

                for x_spt, y_spt, x_qry, y_qry in tqdm(val_loader,
                                                       desc="Validation",
                                                       position=2,
                                                       leave=False):
                    x_spt, y_spt, x_qry, y_qry = x_spt.squeeze(0).to(device), y_spt.squeeze(0).to(device), \
                                                 x_qry.squeeze(0).to(device), y_qry.squeeze(0).to(device)

                    accs = maml.finetunning(x_spt, y_spt, x_qry, y_qry)
                    accs_all_test.append(accs)

                # [b, update_step+1]
                accs = np.array(accs_all_test).mean(axis=0).astype(np.float16)
                postfix_map["valAcc"] = accs
            t.set_postfix(postfix_map)
Beispiel #6
0
def run(try_num, config):
    args = get_args()

    print('config:', config.to_dict(), flush=True)
    print('args:', args, flush=True)
    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

    model_dir = f'blending-02-tabnet-{try_num}'

    if not os.path.exists(model_dir):
        os.mkdir(model_dir)

    train_features = pd.read_csv('../input/lish-moa/train_features.csv')
    train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
    dae_features = pd.read_csv(config.dae_path)
    test_features = pd.read_csv('../input/lish-moa/test_features.csv')

    if args.debug:
        train_features = train_features[:500]
        train_targets = train_targets[:500]
        dae_features = pd.concat([dae_features.iloc[:500], dae_features.iloc[-3982:]]).reset_index(drop=True)

        config.update(dict(
            n_folds=3,
            seeds=[222],
            n_epochs=3,
            batch_size=128,
        ))

    target_columns = [col for col in train_targets.columns if col != 'sig_id']
    n_targets = len(target_columns)

    train_features, train_targets, test_features = preprocess(config, model_dir, train_features,
                                                              train_targets, test_features,
                                                              dae_features)
    features_columns = [col for col in train_features.columns
                        if col not in ['sig_id', 'cp_type', 'cp_time', 'cp_dose',
                                       'cp_type_ctl_vehicle', 'cp_type_trt_cp']]

    train_features = train_features[features_columns]
    test_features = test_features[features_columns]

    smooth_loss_function = SmoothBCEwLogits(smoothing=config.smoothing)
    kfold = MultilabelStratifiedKFold(n_splits=config.n_folds, random_state=42, shuffle=True)

    oof_preds = np.zeros((len(train_features), len(config.seeds), n_targets))
    test_preds = []

    for seed_index, seed in enumerate(config.seeds):
        print(f'Train seed {seed}', flush=True)
        set_seed(seed)

        for fold_index, (train_indices, val_indices) in enumerate(kfold.split(
            train_targets[target_columns].values,
            train_targets[target_columns].values
        )):
            print(f'Train fold {fold_index + 1}', flush=True)
            x_train = train_features.loc[train_indices, features_columns].values
            y_train = train_targets.loc[train_indices, target_columns].values
            x_val = train_features.loc[val_indices, features_columns].values
            y_val = train_targets.loc[val_indices, target_columns].values

            weights_path = f'{model_dir}/weights-{seed}-{fold_index}.pt'

            tabnet_conf = dict(
                seed=seed,
                optimizer_fn=optim.Adam,
                scheduler_fn=optim.lr_scheduler.ReduceLROnPlateau,
                n_d=32,
                n_a=32,
                n_steps=1,
                gamma=1.3,
                lambda_sparse=0,
                momentum=0.02,
                optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
                scheduler_params=dict(mode="min", patience=5, min_lr=1e-5, factor=0.9),
                mask_type="entmax",
                verbose=10,
                n_independent=1,
                n_shared=1,
            )

            if args.only_pred:
                print('Skip training', flush=True)
            else:
                model = TabNetRegressor(**tabnet_conf)

                model.fit(
                    X_train=x_train,
                    y_train=y_train,
                    eval_set=[(x_val, y_val)],
                    eval_name=['val'],
                    eval_metric=['logits_ll'],
                    max_epochs=config.n_epochs,
                    patience=20,
                    batch_size=1024,
                    virtual_batch_size=32,
                    num_workers=1,
                    drop_last=True,
                    loss_fn=smooth_loss_function
                )

                model.save_model(weights_path)
                print('Save weights to: ', weights_path, flush=True)

            model = TabNetRegressor(**tabnet_conf)
            model.load_model(f'{weights_path}.zip')

            val_preds = sigmoid(model.predict(x_val))
            score = mean_log_loss(y_val, val_preds, n_targets)
            print(f'fold_index {fold_index}   -   val_loss: {score:5.5f}', flush=True)

            oof_preds[val_indices, seed_index, :] = val_preds

            preds = sigmoid(model.predict(test_features.values))
            test_preds.append(preds)

        score = mean_log_loss(train_targets[target_columns].values, oof_preds[:, seed_index, :], n_targets)
        print(f'Seed {seed}   -   val_loss: {score:5.5f}', flush=True)

    oof_preds = np.mean(oof_preds, axis=1)
    score = mean_log_loss(train_targets[target_columns].values, oof_preds, n_targets)
    print(f'Overall score is {score:5.5f}', flush=True)

    oof_pred_df = train_targets.copy()
    oof_pred_df.loc[:, target_columns] = oof_preds
    oof_pred_df.to_csv(f'{model_dir}/oof_pred.csv', index=False)

    test_features = pd.read_csv('../input/lish-moa/test_features.csv')
    submission = create_submission(test_features, ['sig_id'] + target_columns)
    submission[target_columns] = np.mean(test_preds, axis=0)
    submission.loc[test_features['cp_type'] == 'ctl_vehicle', target_columns] = 0
    submission.to_csv(f'{model_dir}/submission.csv', index=False)
Beispiel #7
0
folders = ["event", "model", "log", "param"]
if args.setup in ['single']:
    folders.append('decoding')

for name in folders:
    folder = "{}/{}/".format(name, args.experiment) if hasattr(args, "experiment") else name + '/'
    args.__dict__["{}_path".format(name)] = os.path.join(args.exp_dir, folder)
    Path(args.__dict__["{}_path".format(name)]).mkdir(parents=True, exist_ok=True)

if not hasattr(args, 'hp_str'):
    args.hp_str = get_hp_str(args)
    args.prefix = strftime("%m.%d_%H.%M.", localtime())
    args.id_str = args.prefix + "_" + args.hp_str
logger = get_logger(args)
set_seed(args)

# Save config
args.save((str(args.param_path + args.id_str)))

# Data
train_it, dev_it = get_data(args)

args.__dict__.update({'logger': logger})
args.logger.info(args)
args.logger.info('Starting with HPARAMS: {}'.format(args.hp_str))

# Model
model = get_model(args)
extra_input = {}
if args.gpu > -1 and torch.cuda.device_count() > 0:
def run(try_num, config):
    args = get_args()

    print('args', args, flush=True)
    print('config:', config.to_dict(), flush=True)

    set_seed(config.rand_seed)

    pretrained_model = f"tf_efficientnet_b3_ns"
    model_dir = f'deepinsight-{try_num}'

    if not os.path.exists(model_dir):
        os.mkdir(model_dir)

    train_features = pd.read_csv(f"../input/lish-moa/train_features.csv")
    train_targets = pd.read_csv(f"../input/lish-moa/train_targets_scored.csv")
    test_features = pd.read_csv(f"../input/lish-moa/test_features.csv")

    if config.dae_path:
        dae_features = pd.read_csv(config.dae_path)

    if args.debug:
        train_features = train_features.iloc[:500]
        train_targets = train_targets.iloc[:500]
        if config.dae_path:
            dae_features = pd.concat([dae_features.iloc[:500], dae_features.iloc[-3982:]]).reset_index(drop=True)

        config.update(dict(
            kfolds=3,
            n_epoch=3
        ))

    train_features = train_features.sort_values(by=["sig_id"], axis=0, inplace=False).reset_index(drop=True)
    train_targets = train_targets.sort_values(by=["sig_id"], axis=0, inplace=False).reset_index(drop=True)

    cat_features_columns = ["cp_dose", 'cp_time']
    num_feature_columns = [c for c in train_features.columns
                           if c != "sig_id" and c not in cat_features_columns + ['cp_type']]
    all_features_columns = cat_features_columns + num_feature_columns
    target_columns = [c for c in train_targets.columns if c != "sig_id"]
    g_feature_columns = [c for c in num_feature_columns if c.startswith("g-")]
    c_feature_columns = [c for c in num_feature_columns if c.startswith("c-")]

    if config.dae_path:
        if config.dae_strategy == 'replace':
            train_features, test_features = assign_dae_features(
                train_features, test_features, dae_features, len(num_feature_columns))
        else:
            train_features, test_features, dae_feature_columns = merge_dae_features(
                train_features, test_features, dae_features, len(g_feature_columns), len(c_feature_columns))
            all_features_columns += dae_feature_columns

    train_targets = train_targets.loc[train_features['cp_type'] == 'trt_cp'].reset_index(drop=True)
    train_features = train_features.loc[train_features['cp_type'] == 'trt_cp'].reset_index(drop=True)

    if config.normalizer == 'rank':
        train_features, test_features = normalize(train_features, test_features, num_feature_columns)

    for df in [train_features, test_features]:
        df['cp_type'] = df['cp_type'].map({'ctl_vehicle': 0, 'trt_cp': 1})
        df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1})
        df['cp_time'] = df['cp_time'].map({24: 0, 48: 0.5, 72: 1})

    if config.variance_target_type == 1:
        pickle_path = f'{model_dir}/variance_reduction.pkl'

        variance_target_features = num_feature_columns
        if config.dae_path and config.dae_strategy != 'replace':
            variance_target_features += dae_feature_columns

        if not os.path.exists(pickle_path):
            vt = variance_reduction_fit(train_features, variance_target_features, config.variance_threshold)
            save_pickle(vt, pickle_path)

        vt = load_pickle(pickle_path)
        train_features = variance_reduction_transform(vt, train_features, variance_target_features)
        test_features = variance_reduction_transform(vt, test_features, variance_target_features)
        print('(variance_reduction) Number of features after applying:', len(train_features.columns), flush=True)
        all_features_columns = list(train_features.columns[1:])

    skf = MultilabelStratifiedKFold(n_splits=config.kfolds, shuffle=True, random_state=config.rand_seed)
    y_labels = np.sum(train_targets.drop("sig_id", axis=1), axis=0).index.tolist()
    logger = Logger()

    for fold_index, (train_index, val_index) in enumerate(skf.split(train_features, train_targets[y_labels])):
        if args.only_pred:
            print('Skip training', flush=True)
            break

        print(f'Fold: {fold_index}', train_index.shape, val_index.shape, flush=True)

        X_train = train_features.loc[train_index, all_features_columns].copy().values
        y_train = train_targets.iloc[train_index, 1:].copy().values
        X_valid = train_features.loc[val_index, all_features_columns].copy().values
        y_valid = train_targets.iloc[val_index, 1:].copy().values

        if config.normalizer == 'log':
            scaler = LogScaler()
            if config.norm_apply_all:
                scaler.fit(X_train)
                X_train = scaler.transform(X_train)
                X_valid = scaler.transform(X_valid)
            else:
                target_features = [i for i, c in enumerate(all_features_columns) if c in num_feature_columns]
                non_target_features = [i for i, c in enumerate(all_features_columns) if c not in num_feature_columns]

                scaler.fit(X_train[:, target_features])
                X_train_tr = scaler.transform(X_train[:, target_features])
                X_valid_tr = scaler.transform(X_valid[:, target_features])
                X_train = np.concatenate([X_train[:, non_target_features], X_train_tr], axis=1)
                X_valid = np.concatenate([X_valid[:, non_target_features], X_valid_tr], axis=1)
            save_pickle(scaler, f'{model_dir}/scaler-{fold_index}.pkl')

        transformer = DeepInsightTransformer(
            feature_extractor=config.extractor,
            pixels=config.resolution,
            perplexity=config.perplexity,
            random_state=config.rand_seed,
            n_jobs=-1
        ).fit(X_train)

        save_pickle(transformer, f'{model_dir}/transformer-{fold_index}.pkl')

        model = MoAEfficientNet(
            pretrained_model_name=pretrained_model,
            fc_size=config.fc_size,
            drop_rate=config.drop_rate,
            drop_connect_rate=config.drop_connect_rate,
            weight_init='goog',
        ).to(DEVICE)

        if config.smoothing is not None:
            if config.weighted_loss_weights is not None:
                indices = get_minority_target_index(train_targets, threshold=config.weighted_loss_threshold)
                indices = [int(i not in indices) for i, c in enumerate(target_columns)]
                train_loss_function = SmoothBCEwLogits(
                    smoothing=config.smoothing,
                    weight=config.weighted_loss_weights,
                    weight_targets=indices,
                    n_labels=len(target_columns))
            else:
                train_loss_function = SmoothBCEwLogits(smoothing=config.smoothing)
        else:
            train_loss_function = bce_loss

        eval_loss_function = bce_loss

        optimizer = optim.Adam(model.parameters(), weight_decay=config.weight_decay, lr=config.learning_rate)

        if config.scheduler_type == 'ca':
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.t_max, eta_min=0, last_epoch=-1)
        elif config.scheduler_type == 'ms':
            scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.ms_scheduler_milestones, gamma=0.1)
        else:
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode='min', factor=0.1, patience=config.rp_patience, eps=1e-4, verbose=True)

        early_stopping = EarlyStopping(patience=7)
        best_score = np.inf
        start_time = time.time()

        for epoch in range(config.n_epoch):

            if config.swap_enable:
                dataset = MoAImageSwapDataset(
                    X_train,
                    y_train,
                    transformer,
                    image_size=config.image_size,
                    swap_prob=config.swap_prob,
                    swap_portion=config.swap_portion)
            else:
                dataset = MoAImageDataset(X_train, y_train, transformer, image_size=config.image_size)

            dataloader = DataLoader(
                dataset,
                batch_size=config.batch_size,
                shuffle=True,
                num_workers=8,
                pin_memory=True,
                drop_last=False)
            loss = loop_train(model, train_loss_function, dataloader, optimizer)

            if config.scheduler_type == 'rp':
                scheduler.step(loss)
            else:
                scheduler.step()
                for param_group in optimizer.param_groups:
                    print('current learning rate:', param_group['lr'])

            del dataset, dataloader

            dataset = MoAImageDataset(X_valid, y_valid, transformer, image_size=config.image_size)
            dataloader = DataLoader(
                dataset,
                batch_size=config.infer_batch_size,
                shuffle=False,
                num_workers=8,
                pin_memory=True,
                drop_last=False)
            valid_loss, valid_preds = loop_valid(model, eval_loss_function, dataloader)

            del dataset, dataloader

            logger.update({'fold': fold_index, 'epoch': epoch + 1, 'train_loss': loss, 'val_loss': valid_loss})
            print(f'epoch {epoch + 1}/{config.n_epoch}  -  train_loss: {loss:.5f}  -  ' +
                  f'valid_loss: {valid_loss:.5f}  -  elapsed: {time_format(time.time() - start_time)}', flush=True)

            if valid_loss < best_score:
                best_score = valid_loss
                torch.save(model.state_dict(), f'./{model_dir}/deepinsight-{fold_index}.pt')

            if early_stopping.should_stop(valid_loss):
                print('Early stopping', flush=True)
                break

        print(f'Done -> Fold {fold_index}/{config.kfolds}  -  best_valid_loss: {best_score:.5f}  -  ' +
              f'elapsed: {time_format(time.time() - start_time)}', flush=True)

        torch.cuda.empty_cache()
        gc.collect()

        if args.return_first_fold:
            logger.save(f'{model_dir}/log.csv')
            return

    test_preds = np.zeros((test_features.shape[0], len(target_columns)))
    start_time = time.time()
    print('Start infarence', flush=True)

    oof_preds = np.zeros((len(train_features), len(target_columns)))
    eval_loss_function = bce_loss

    for fold_index, (train_index, val_index) in enumerate(skf.split(train_features, train_targets[y_labels])):
        print(f'Infarence Fold: {fold_index}', train_index.shape, val_index.shape, flush=True)
        X_valid = train_features.loc[val_index, all_features_columns].copy().values
        y_valid = train_targets.iloc[val_index, 1:].copy().values
        X_test = test_features[all_features_columns].values

        if config.normalizer == 'log':
            scaler = load_pickle(f'{model_dir}/scaler-{fold_index}.pkl')
            X_valid = scaler.transform(X_valid)
            X_test = scaler.transform(X_test)

        transformer = load_pickle(f'{model_dir}/transformer-{fold_index}.pkl')
        model = MoAEfficientNet(
            pretrained_model_name=pretrained_model,
            fc_size=config.fc_size,
            drop_rate=config.drop_rate,
            drop_connect_rate=config.drop_connect_rate,
            weight_init='goog',
        ).to(DEVICE)
        model.load_state_dict(torch.load(f'./{model_dir}/deepinsight-{fold_index}.pt'))

        dataset = MoAImageDataset(X_valid, y_valid, transformer, image_size=config.image_size)
        dataloader = DataLoader(
            dataset,
            batch_size=config.infer_batch_size,
            shuffle=False,
            num_workers=8,
            pin_memory=True,
            drop_last=False)
        valid_loss, valid_preds = loop_valid(model, eval_loss_function, dataloader)
        print(f'Fold {fold_index}/{config.kfolds}  -  fold_valid_loss: {valid_loss:.5f}', flush=True)
        logger.update({'fold': fold_index, 'val_loss': valid_loss})

        oof_preds[val_index, :] = valid_preds

        dataset = TestDataset(X_test, None, transformer, image_size=config.image_size)
        dataloader = DataLoader(
            dataset,
            batch_size=config.infer_batch_size,
            shuffle=False,
            num_workers=8,
            pin_memory=True,
            drop_last=False)

        preds = loop_preds(model, dataloader)
        test_preds += preds / config.kfolds

    oof_preds_df = train_targets.copy()
    oof_preds_df.loc[:, target_columns] = oof_preds.clip(0, 1)
    oof_preds_df.to_csv(f'{model_dir}/oof_preds.csv', index=False)
    oof_loss = mean_log_loss(train_targets.loc[:, target_columns].values, oof_preds)

    print(f'OOF Validation Loss: {oof_loss:.6f}', flush=True)
    print(f'Done infarence  Elapsed {time_format(time.time() - start_time)}', flush=True)
    logger.update({'fold': 'oof', 'val_loss': oof_loss})
    logger.save(f'{model_dir}/log.csv')

    submission = pd.DataFrame(data=test_features['sig_id'].values, columns=['sig_id'])
    submission = submission.reindex(columns=['sig_id'] + target_columns)
    submission.loc[:, target_columns] = test_preds.clip(0, 1)
    submission.loc[test_features['cp_type'] == 0, submission.columns[1:]] = 0
    submission.to_csv(f'{model_dir}/submission.csv', index=False)
Beispiel #9
0
def train(args, train_dataset, eval_dataset, model):
    """ Trains the given model on the given dataset. """

    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size)

    n_train_steps__single_epoch = len(train_dataloader)
    n_train_steps = n_train_steps__single_epoch * args.num_train_epochs
    args.logging_steps = n_train_steps__single_epoch

    # (old) Prepare SGD optimizer
    # no_decay = ["bias", "LayerNorm.weight"]
    # optimizer_grouped_parameters = [
    #     {
    #         "params": [p for n, p in model.named_parameters()
    #                    if not any(nd in n for nd in no_decay)],
    #         "weight_decay": args.weight_decay,
    #     },
    #     {
    #         "params": [p for n, p in model.named_parameters()
    #                    if any(nd in n for nd in no_decay)],
    #         "weight_decay": 0.0
    #     },
    # ]
    # optimizer = SGD(optimizer_grouped_parameters, lr=args.learning_rate)
    # scheduler = get_constant_schedule(optimizer)

    # Prepare Adam optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(args.warmup_ratio * n_train_steps),
        num_training_steps=n_train_steps)

    # Train!
    logging.info("***** Running training *****")
    logging.info("  Num examples = %d", len(train_dataset))
    logging.info("  Num Epochs = %d", args.num_train_epochs)
    logging.info("  Total optimization steps = %d", n_train_steps)
    logging.info("  Using linear warmup (ratio=%s)", args.warmup_ratio)
    logging.info("  Using weight decay (value=%s)", args.weight_decay)
    global_step = 0
    epochs_trained = 0

    tr_loss, logging_loss = 0.0, 0.0
    best_metric, best_epoch = -1.0, -1  # Init best -1 so that 0 > best

    model.zero_grad()
    train_iterator = tqdm.trange(epochs_trained,
                                 int(args.num_train_epochs),
                                 desc="Epoch")

    set_seed(seed_value=args.seed)  # Added here for reproductibility
    for num_epoch in train_iterator:
        epoch_loss = 0.0
        epoch_iterator = tqdm.tqdm(train_dataloader, desc="Iteration")
        for step, batch in enumerate(epoch_iterator):
            model.train()
            batch = {k: v.to(device=args.device) for k, v in batch.items()}
            batch['return_dict'] = False

            outputs = model(**batch)
            loss = outputs[
                0]  # model outputs are always tuple in pytorch-transformers (see doc)
            loss.backward()
            tr_loss += loss.item()
            epoch_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.max_grad_norm)
            optimizer.step()
            scheduler.step()  # Update learning rate schedule
            model.zero_grad()
            global_step += 1

            if global_step % args.logging_steps == 0:
                # Log metrics
                # -- Only evaluate when single GPU otherwise metrics may not average well
                results, _ = evaluate(args=args,
                                      eval_dataset=eval_dataset,
                                      model=model)

                logging_loss = tr_loss
                metric = results['overall']

                if metric > best_metric:
                    best_metric = metric
                    best_epoch = num_epoch

                    # Save model checkpoint
                    if not os.path.exists(args.output_dir):
                        os.makedirs(args.output_dir)
                    model.save_pretrained(args.output_dir)
                    torch.save(
                        args, os.path.join(args.output_dir,
                                           "training_args.bin"))
                    logging.info("Saving model checkpoint to %s",
                                 args.output_dir)

                    #torch.save(optimizer.state_dict(), os.path.join(args.output_dir, "optimizer.pt"))
                    #torch.save(scheduler.state_dict(), os.path.join(args.output_dir, "scheduler.pt"))
                    #logging.info("Saving optimizer and scheduler states to %s", args.output_dir)
        logging.info(" epoch loss %d = %f", num_epoch, epoch_loss / step + 1)

    return global_step, tr_loss / global_step, best_metric, best_epoch
Beispiel #10
0
                    "--self_training",
                    action='store_true',
                    help="Enable self training.")
    ap.add_argument('-ta',
                    "--train_all",
                    action='store_true',
                    help="Whether or not to train whole network.")
    return ap.parse_args()


if __name__ == '__main__':
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    args = parse_args()
    with open(os.path.join(args.dir / 'config.yml')) as file:
        config = yaml.safe_load(file)
    set_seed(args.seed)
    output_dir = args.dir / args.name
    output_dir.mkdir(exist_ok=True)

    # check if gpu training is available
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    if not config['disable_cuda'] and torch.cuda.is_available():
        torch.cuda.set_device(args.gpu)
        args.device = torch.device('cuda')
        cudnn.deterministic = True
        cudnn.benchmark = True

    print("Using device:", torch.cuda.get_device_name())

    if config['arch'] == 'resnet18':
        model = torchvision.models.resnet18(pretrained=False,
Beispiel #11
0
def run(try_num, config):
    logger = Logger()
    args = get_args()

    print('config:', config.to_dict(), flush=True)
    print('args:', args, flush=True)
    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

    model_dir = f'blending-01-nn-{try_num}'

    if not os.path.exists(model_dir):
        os.mkdir(model_dir)

    train_features = pd.read_csv('../input/lish-moa/train_features.csv')
    train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
    dae_features = pd.read_csv(config.dae_path)
    test_features = pd.read_csv('../input/lish-moa/test_features.csv')

    if args.debug:
        train_features = train_features[:500]
        train_targets = train_targets[:500]
        dae_features = pd.concat(
            [dae_features.iloc[:500],
             dae_features.iloc[-3982:]]).reset_index(drop=True)

        config.update(
            dict(
                n_folds=3,
                seeds=[222],
                n_epochs=3,
                batch_size=128,
            ))

    target_columns = [col for col in train_targets.columns if col != 'sig_id']
    n_targets = len(target_columns)

    train_features, train_targets, test_features = preprocess(
        config, model_dir, train_features, train_targets, test_features,
        dae_features)
    features_columns = [
        col for col in train_features.columns if col not in [
            'sig_id', 'cp_type', 'cp_time', 'cp_dose', 'cp_type_ctl_vehicle',
            'cp_type_trt_cp'
        ]
    ]

    metric_loss_function = nn.BCELoss()

    if config.weighted_loss_strategy == 1:
        indices = get_minority_target_index(
            train_targets, threshold=config.weighted_loss_threshold)
        indices = [int(i not in indices) for i, c in enumerate(target_columns)]
        smooth_loss_function = SmoothBCELoss(
            smoothing=config.smoothing,
            weight=config.weighted_loss_weights,
            weight_targets=indices,
            n_labels=n_targets)
    else:
        smooth_loss_function = SmoothBCELoss(smoothing=config.smoothing)

    kfold = MultilabelStratifiedKFold(n_splits=config.n_folds,
                                      random_state=42,
                                      shuffle=True)

    for seed_index, seed in enumerate(config.seeds):
        if args.only_pred:
            print('Skip training', flush=True)
            break

        print(f'Train seed {seed}', flush=True)
        set_seed(seed)

        for fold_index, (train_indices, val_indices) in enumerate(
                kfold.split(train_targets[target_columns].values,
                            train_targets[target_columns].values)):
            print(f'Train fold {fold_index + 1}', flush=True)

            x_train = train_features.loc[train_indices, features_columns]
            y_train = train_targets.loc[train_indices, target_columns]
            x_val = train_features.loc[val_indices, features_columns]
            y_val = train_targets.loc[val_indices, target_columns]

            model = new_model(config.model_kind,
                              len(features_columns)).to(DEVICE)
            checkpoint_path = f'{model_dir}/repeat-{seed}_Fold-{fold_index + 1}.pt'
            optimizer = optim.Adam(model.parameters(),
                                   weight_decay=config.weight_decay,
                                   lr=config.learning_rate)
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                             mode='min',
                                                             factor=0.1,
                                                             patience=3,
                                                             eps=1e-4,
                                                             verbose=True)

            best_loss = np.inf

            for epoch in range(config.n_epochs):
                dataset = MoaDataset(x_train.values, y_train.values)
                dataloader = DataLoader(dataset,
                                        batch_size=config.batch_size,
                                        shuffle=True,
                                        drop_last=True)

                train_loss = loop_train(model,
                                        dataloader,
                                        optimizer,
                                        loss_functions=(
                                            smooth_loss_function,
                                            metric_loss_function,
                                        ))

                dataset = MoaDataset(x_val.values, y_val.values)
                dataloader = DataLoader(dataset,
                                        batch_size=config.val_batch_size,
                                        shuffle=False)
                valid_loss, _ = loop_valid(model, dataloader,
                                           metric_loss_function)

                print(
                    'Epoch {}/{}   -   loss: {:5.5f}   -   val_loss: {:5.5f}'.
                    format(epoch + 1, config.n_epochs, train_loss, valid_loss),
                    flush=True)

                logger.update({
                    'epoch': epoch + 1,
                    'loss': train_loss,
                    'val_loss': valid_loss
                })

                scheduler.step(valid_loss)

                if valid_loss < best_loss:
                    best_loss = valid_loss
                    torch.save(model.state_dict(), checkpoint_path)

    oof_preds = np.zeros((len(train_features), len(config.seeds), n_targets))
    test_preds = np.zeros((len(test_features), n_targets))

    for seed_index in range(len(config.seeds)):
        seed = config.seeds[seed_index]

        print(f'Inference for seed {seed}', flush=True)

        _test_preds_in_seed = np.zeros((len(test_features), n_targets))

        for fold_index, (_, valid_indices) in enumerate(
                kfold.split(train_targets[target_columns].values,
                            train_targets[target_columns].values)):
            x_val = train_features.loc[valid_indices, features_columns]
            y_val = train_targets.loc[valid_indices, target_columns]

            checkpoint_path = f'{model_dir}/repeat-{seed}_Fold-{fold_index + 1}.pt'
            model = new_model(config.model_kind,
                              len(features_columns)).to(DEVICE)
            model.load_state_dict(torch.load(checkpoint_path))

            dataset = MoaDataset(x_val.values, y_val.values)
            dataloader = DataLoader(dataset,
                                    batch_size=config.val_batch_size,
                                    shuffle=False)
            preds = loop_pred(model, dataloader)

            oof_preds[valid_indices, seed_index, :] = preds

            dataset = MoaDataset(test_features[features_columns].values, None)
            dataloader = DataLoader(dataset,
                                    batch_size=config.val_batch_size,
                                    shuffle=False)
            preds = loop_pred(model, dataloader)

            _test_preds_in_seed += preds / config.n_folds

        score = mean_log_loss(train_targets.loc[:, target_columns].values,
                              oof_preds[:, seed_index, :],
                              n_targets=n_targets)
        test_preds += _test_preds_in_seed / len(config.seeds)

        print(f'Score for this seed {score:5.5f}', flush=True)
        logger.update({'val_loss': score})

    # Evalucate validation score
    oof_preds = np.mean(oof_preds, axis=1)
    score = mean_log_loss(train_targets.loc[:, target_columns].values,
                          oof_preds,
                          n_targets=n_targets)
    print(f'Overall score is {score:5.5f}', flush=True)

    # Save validation prediction
    oof_pred_df = train_targets.copy()
    oof_pred_df.iloc[:, 1:] = oof_preds
    oof_pred_df.to_csv(f'{model_dir}/oof_pred.csv', index=False)

    # Save log
    logger.update({'val_loss': score})
    logger.save(f'{model_dir}/log.csv')

    # Save Test Prediction
    test_features = pd.read_csv('../input/lish-moa/test_features.csv')
    submission = create_submission(test_features, ['sig_id'] + target_columns)
    submission[target_columns] = test_preds
    submission.loc[test_features['cp_type'] == 'ctl_vehicle',
                   target_columns] = 0
    submission.to_csv(f'{model_dir}/submission.csv', index=False)
Beispiel #12
0
def main():
    args = parse_args()

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    if not torch.cuda.is_available():
        assert NotImplementedError
    torch.cuda.set_device(args.gpu)
    print("Using", torch.cuda.get_device_name())
    set_seed(args.seed)

    cfg.merge_from_file(args.cfg)
    cfg.freeze()

    output_dir = Path(cfg.OUTPUT_ROOT_DIR) / args.output
    output_dir.mkdir(parents=True, exist_ok=True)
    shutil.copy(args.cfg, output_dir / 'config.yaml')

    train_loader, train_loader_u = build_dataloader("train")
    val_loader, _ = build_dataloader("valid")
    test_loader, _ = build_dataloader("test")

    print("Train Batches:", len(train_loader), "| Val Batches:",
          len(val_loader), "| Test Batches:", len(test_loader))

    device = torch.device("cuda")

    # first iteration is teacher training, second is student
    teacher_model = None
    for i in range(int(cfg.SOLVER.SELF_TRAINING) + 1):
        model = Net().to(device)
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=cfg.SOLVER.BASE_LR,
                                     weight_decay=cfg.SOLVER.WEIGHT_DECAY)
        scheduler = StepLR(optimizer,
                           step_size=cfg.SOLVER.SCHEDULER_STEP_SIZE,
                           gamma=0.1)

        kwargs = {
            'model': model,
            'optimizer': optimizer,
            'scheduler': scheduler,
            'train_loader': train_loader,
            'train_loader_unlabeled': train_loader_u,
            'val_loader': val_loader,
            'test_loader': test_loader,
            'output_dir': output_dir,
            'teacher_model': teacher_model
        }
        trainer = Trainer(**kwargs)

        try:
            if i == 0 and args.teacher_init:
                model.load_state_dict(torch.load(args.teacher_init))
                print("Loaded teacher model from", args.teacher_init)
                trainer.validate('test')
            else:
                trainer.train()
            teacher_model = model
        except BaseException:
            if len(glob(f"{output_dir}/*.pth")) < 1:
                shutil.rmtree(output_dir, ignore_errors=True)
            raise
Beispiel #13
0
def parse_args():
    """ Parse command line arguments and initialize experiment. """
    parser = argparse.ArgumentParser()
    parser.add_argument("--task",
                        type=str,
                        required=True,
                        choices=['classification', 'sequence_labelling'],
                        help="The evaluation task.")
    parser.add_argument("--embedding",
                        type=str,
                        required=True,
                        choices=AVAILABLE_MODELS,
                        help="The model to use.")
    parser.add_argument(
        "--do_lower_case",
        action="store_true",
        help="Whether to apply lowercasing during tokenization.")
    parser.add_argument("--train_batch_size",
                        type=int,
                        default=1,
                        help="Batch size to use for training.")
    parser.add_argument("--eval_batch_size",
                        type=int,
                        default=1,
                        help="Batch size to use for evaluation.")
    parser.add_argument("--gradient_accumulation_steps",
                        type=int,
                        default=1,
                        help="Number of gradient accumulation steps.")
    parser.add_argument("--num_train_epochs",
                        type=int,
                        default=3,
                        help="Number of training epochs.")
    parser.add_argument(
        "--validation_ratio",
        default=0.5,
        type=float,
        help="Proportion of training set to use as a validation set.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay",
                        default=0.1,
                        type=float,
                        help="Weight decay if we apply some.")
    parser.add_argument("--warmup_ratio",
                        default=0.1,
                        type=int,
                        help="Linear warmup over warmup_ratio*total_steps.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--do_train",
                        action="store_true",
                        help="Do training & validation.")
    parser.add_argument("--do_predict",
                        action="store_true",
                        help="Do prediction on the test set.")
    parser.add_argument("--seed", type=int, default=42, help="Random seed.")

    args = parser.parse_args()
    args.start_time = datetime.datetime.now().strftime('%d-%m-%Y_%Hh%Mm%Ss')
    args.output_dir = os.path.join('results', args.task, args.embedding,
                                   f'{args.start_time}__seed-{args.seed}')

    # --------------------------------- INIT ---------------------------------

    # Set up logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(filename)s -   %(message)s",
        datefmt="%d/%m/%Y %H:%M:%S",
        level=logging.INFO)

    # Check for GPUs
    if torch.cuda.is_available():
        assert torch.cuda.device_count(
        ) == 1  # This script doesn't support multi-gpu
        args.device = torch.device("cuda")
        logging.info("Using GPU (`%s`)", torch.cuda.get_device_name(0))
    else:
        args.device = torch.device("cpu")
        logging.info("Using CPU")

    # Set random seed for reproducibility
    set_seed(seed_value=args.seed)

    return args