def main(): train_df = pd.read_csv(TRAIN_PATH) train_df['male'] = np.load( "../input/identity-column-data/male_labeled.npy") train_df['female'] = np.load( "../input/identity-column-data/female_labeled.npy") train_df['homosexual_gay_or_lesbian'] = np.load( "../input/identity-column-data/homosexual_gay_or_lesbian_labeled.npy") train_df['christian'] = np.load( "../input/identity-column-data/christian_labeled.npy") train_df['jewish'] = np.load( "../input/identity-column-data/jewish_labeled.npy") train_df['muslim'] = np.load( "../input/identity-column-data/muslim_labeled.npy") train_df['black'] = np.load( "../input/identity-column-data/black_labeled.npy") train_df['white'] = np.load( "../input/identity-column-data/white_labeled.npy") train_df['psychiatric_or_mental_illness'] = np.load( "../input/identity-column-data/psychiatric_or_mental_illness_labeled.npy" ) fold_df = pd.read_csv(FOLD_PATH) # y = np.where(train_df['target'] >= 0.5, 1, 0) y = train_df['target'].values y_aux = train_df[AUX_COLUMNS].values identity_columns_new = [] for column in identity_columns + ['target']: train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True, False) if column != "target": identity_columns_new.append(column + "_bin") # Overall weights = np.ones((len(train_df), )) / 4 # Subgroup weights += (train_df[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int) / 4 # Background Positive, Subgroup Negative weights += ( ((train_df["target"].values >= 0.5).astype(bool).astype(np.int) + (1 - (train_df[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int))) > 1).astype(bool).astype( np.int) / 4 # Background Negative, Subgroup Positive weights += ( ((train_df["target"].values < 0.5).astype(bool).astype(np.int) + (train_df[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype( np.int) / 4 loss_weight = 0.5 with timer('preprocessing text'): # df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]] train_df['comment_text'] = train_df['comment_text'].astype(str) train_df = train_df.fillna(0) with timer('load embedding'): tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=True) X_text = convert_lines_head_tail( train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, head_len, tokenizer) X_text = np.array(X_text).astype("int32") del tokenizer gc.collect() with timer('train'): train_index = fold_df.fold_id != fold_id valid_index = fold_df.fold_id == fold_id X_train, y_train, y_aux_train, w_train = X_text[train_index], y[ train_index].astype("float32"), y_aux[train_index].astype( "float32"), weights[train_index].astype("float32") X_val, y_val, y_aux_val, w_val = X_text[valid_index], y[valid_index].astype("float32"),\ y_aux[valid_index].astype("float32"), weights[valid_index].astype("float32") test_df = train_df[valid_index] train_size = len(X_train) del X_text, y, y_aux, weights, train_index, valid_index, train_df, fold_df gc.collect() model = BertForSequenceClassification.from_pretrained( WORK_DIR, cache_dir=None, num_labels=n_labels) model.zero_grad() model = model.to(device) y_train = np.concatenate( (y_train.reshape(-1, 1), w_train.reshape(-1, 1), y_aux_train), axis=1).astype("float32") y_val = np.concatenate( (y_val.reshape(-1, 1), w_val.reshape(-1, 1), y_aux_val), axis=1).astype("float32") del w_train, w_val, y_aux_train, y_aux_val gc.collect() train_dataset = torch.utils.data.TensorDataset( torch.tensor(X_train, dtype=torch.long), torch.tensor(y_train, dtype=torch.float32)) valid = torch.utils.data.TensorDataset( torch.tensor(X_val, dtype=torch.long), torch.tensor(y_val, dtype=torch.float32)) ran_sampler = torch.utils.data.RandomSampler(train_dataset) len_sampler = LenMatchBatchSampler(ran_sampler, batch_size=batch_size, drop_last=False) train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=len_sampler) valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size * 2, shuffle=False) del X_train, y_train, X_val, y_val gc.collect() LOGGER.info(f"done data loader setup") param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = int(epochs * train_size / batch_size / accumulation_steps) total_step = int(epochs * train_size / batch_size) optimizer = BertAdam(optimizer_grouped_parameters, lr=base_lr, warmup=0.005, t_total=num_train_optimization_steps) LOGGER.info(f"done optimizer loader setup") model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) # criterion = torch.nn.BCEWithLogitsLoss().to(device) criterion = CustomLoss(loss_weight).to(device) LOGGER.info(f"done amp setup") for epoch in range(epochs): LOGGER.info(f"Starting {epoch} epoch...") LOGGER.info(f"length {train_size} train...") if epoch == 1: for param_group in optimizer.param_groups: param_group['lr'] = base_lr * gammas[1] tr_loss, train_losses = train_one_epoch(model, train_loader, criterion, optimizer, device, accumulation_steps, total_step, n_labels, base_lr, gamma=gammas[2 * epoch]) LOGGER.info(f'Mean train loss: {round(tr_loss,5)}') torch.save(model.state_dict(), '{}_epoch{}_fold{}.pth'.format(exp, epoch, fold_id)) valid_loss, oof_pred = validate(model, valid_loader, criterion, device, n_labels) LOGGER.info(f'Mean valid loss: {round(valid_loss,5)}') del model gc.collect() torch.cuda.empty_cache() test_df["pred"] = oof_pred[:, 0] test_df = convert_dataframe_to_bool(test_df) bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns) LOGGER.info(bias_metrics_df) score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df)) LOGGER.info(f'final score is {score}') test_df.to_csv("oof.csv", index=False) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.legend() plt.xticks(xs) plt.xlabel('Iter') plt.savefig("loss.png")
def train(seed, depth, maxlen, batch_size, accumulation_steps, fold): config.fold = fold print(f"\nFOLD: {fold}") config.seed = seed config.max_sequence_length = maxlen config.batch_size = batch_size config.accumulation_steps = accumulation_steps config.bert_weight = f"../bert_weight/uncased_L-{depth}_H-768_A-12/" config.features = f"../bert_features_{maxlen}/" config.experiment = f"{depth}layers" config.checkpoint = f"{config.logdir}/{config.today}/kfold/fold_{fold}/{config.experiment}_" \ f"{config.batch_size}bs_{config.accumulation_steps}accum_{config.seed}seed_{config.max_sequence_length}/" print_config(config) np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed(config.seed) torch.backends.cudnn.deterministic = True # Data loaders train_loader, valid_loader, valid_df, loss_weight = get_kfold_data_loaders( config) loaders = {"train": train_loader, "valid": valid_loader} # Criterion criterion = CustomLoss(loss_weight) # Model and optimizer model = BertForTokenClassificationMultiOutput.from_pretrained( config.bert_weight, cache_dir=None, num_aux_labels=config.n_aux_targets) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = np.ceil( len(train_loader.dataset) / config.batch_size / config.accumulation_steps) * config.epochs optimizer = BertAdam(optimizer_grouped_parameters, lr=config.lr, warmup=0.01, t_total=num_train_optimization_steps) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) identity_valid = valid_df[config.identity_columns].copy() target_valid = valid_df.target.values auc_callback = AucCallback(identity=identity_valid, target=target_valid) # model runner runner = ModelRunner() # model training runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, main_metric='auc', minimize_metric=False, logdir=config.checkpoint, num_epochs=config.epochs, verbose=True, fp16={"opt_level": "O1"}, callbacks=[auc_callback])
def main(): train_df = pd.read_csv(TRAIN_PATH).sample(train_size+valid_size, random_state=seed) y = np.where(train_df['target'] >= 0.5, 1, 0) y_aux = train_df[AUX_COLUMNS].values identity_columns_new = [] for column in identity_columns + ['target']: train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True, False) if column != "target": identity_columns_new.append(column + "_bin") weights = np.ones((len(train_df),)) / 4 weights += (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype(np.int) / 4 weights += (((train_df["target"].values >= 0.5).astype(bool).astype(np.int) + (train_df[identity_columns].fillna(0).values < 0.5).sum(axis=1).astype(bool).astype(np.int)) > 1).astype( bool).astype(np.int) / 4 weights += (((train_df["target"].values < 0.5).astype(bool).astype(np.int) + (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype(np.int)) > 1).astype( bool).astype(np.int) / 4 loss_weight = 1.0 / weights.mean() with timer('preprocessing text'): #df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]] train_df['comment_text'] = train_df['comment_text'].astype(str) train_df = train_df.fillna(0) with timer('load embedding'): tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=True) X_text, train_lengths = convert_lines(train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, tokenizer) test_df = train_df[train_size:] with timer('train'): X_train, y_train, y_aux_train, w_train = X_text[:train_size], y[:train_size], y_aux[:train_size], weights[ :train_size] X_val, y_val, y_aux_val, w_val = X_text[train_size:], y[train_size:], y_aux[train_size:], weights[ train_size:] trn_lengths, val_lengths = train_lengths[:train_size], train_lengths[train_size:] model = BertForSequenceClassification.from_pretrained(WORK_DIR, cache_dir=None, num_labels=n_labels) model.zero_grad() model = model.to(device) y_train = np.concatenate((y_train.reshape(-1, 1), w_train.reshape(-1, 1), y_aux_train), axis=1) y_val = np.concatenate((y_val.reshape(-1, 1), w_val.reshape(-1, 1), y_aux_val), axis=1) train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.long), torch.tensor(y_train, dtype=torch.float)) valid = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.long), torch.tensor(y_val, dtype=torch.float)) ran_sampler = torch.utils.data.RandomSampler(train_dataset) len_sampler = LenMatchBatchSampler(ran_sampler, batch_size=batch_size, drop_last=False) train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=len_sampler) valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size * 2, shuffle=False) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] num_train_optimization_steps = int(epochs * train_size / batch_size / accumulation_steps) total_step = int(epochs * train_size / batch_size) optimizer = BertAdam(optimizer_grouped_parameters, lr=2e-5, warmup=0.05, t_total=num_train_optimization_steps) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) #criterion = torch.nn.BCEWithLogitsLoss().to(device) criterion = CustomLoss(loss_weight).to(device) LOGGER.info(f"Starting 1 epoch...") tr_loss, train_losses = train_one_epoch(model, train_loader, criterion, optimizer, device, accumulation_steps, total_step, n_labels) LOGGER.info(f'Mean train loss: {round(tr_loss,5)}') torch.save(model.state_dict(), '{}_dic'.format(exp)) valid_loss, oof_pred = validate(model, valid_loader, criterion, device, n_labels) del model gc.collect() torch.cuda.empty_cache() test_df["pred"] = oof_pred[:, 0] test_df = convert_dataframe_to_bool(test_df) bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns) LOGGER.info(bias_metrics_df) score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df)) LOGGER.info(f'final score is {score}') test_df.to_csv("oof.csv", index=False) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss'); plt.legend(); plt.xticks(xs); plt.xlabel('Iter') plt.savefig("loss.png")
# import pdb; pdb.set_trace() refine_net.load_state_dict(state_dict) vgg = VGG16(requires_grad=False) vgg.to(device) if torch.cuda.device_count() > 1 and MULTI_GPU: print("Using {} GPUs...".format(torch.cuda.device_count())) refine_net = nn.DataParallel(refine_net) else: print("GPU ID: {}".format(device)) refine_net = refine_net.to(device) d_loss_fn = nn.BCELoss() d_loss_fn = d_loss_fn.to(device) refine_loss_fn = CustomLoss() refine_loss_fn = refine_loss_fn.to(device) from dataset_cloth import define_dataset tfrecord_path = "/content/uplara_tops_v10_refined_grapy.record" batch_size = 1 trainset, trainset_length = define_dataset(tfrecord_path, batch_size, train=True) valset, valset_length = define_dataset(tfrecord_path, batch_size, train=False) tps_weights_path = "gs://experiments_logs/gmm/TOPS/short_sleeves_high_slope_loss/weights/model_44" tps_model = tf.keras.models.load_model(tps_weights_path, custom_objects={"tf": tf}, compile=False)
def train(seed, depth, maxlen, batch_size, accumulation_steps, model_name): config.seed = seed config.max_sequence_length = maxlen config.batch_size = batch_size config.accumulation_steps = accumulation_steps if depth != 24: config.bert_weight = f"../bert_weight/uncased_L-{depth}_H-768_A-12/" else: config.bert_weight = f"../bert_weight/uncased_L-{depth}_H-1024_A-16/" if model_name == 'bert': config.features = f"../bert_features_{maxlen}/" elif model_name == 'gpt2': config.features = f"../features_{maxlen}_gpt/" else: config.features = f"../features_{maxlen}_xlnet/" config.experiment = f"{depth}layers" config.checkpoint = f"{config.logdir}/{config.today}/{model_name}_{config.experiment}_" \ f"{config.batch_size}bs_{config.accumulation_steps}accum_{config.seed}seed_{config.max_sequence_length}/" print_config(config) np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed(config.seed) torch.backends.cudnn.deterministic = True # Data loaders train_loader, valid_loader, valid_df, loss_weight = get_data_loaders( config) loaders = {"train": train_loader, "valid": valid_loader} # Criterion criterion = CustomLoss(loss_weight) # Model and optimizer if model_name == 'bert': print("BERT MODEL") model = BertForTokenClassificationMultiOutput2.from_pretrained( config.bert_weight, cache_dir=None, num_aux_labels=config.n_aux_targets) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = np.ceil( len(train_loader.dataset) / config.batch_size / config.accumulation_steps) * config.epochs optimizer = BertAdam(optimizer_grouped_parameters, lr=config.lr, warmup=0.01, t_total=num_train_optimization_steps) elif model_name == 'gpt2': print("GPT2 MODEL") model = GPT2ClassificationMultioutput.from_pretrained( config.gpt2_weight, cache_dir=None, num_aux_labels=config.n_aux_targets) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = np.ceil( len(train_loader.dataset) / config.batch_size / config.accumulation_steps) * config.epochs optimizer = OpenAIAdam(optimizer_grouped_parameters, lr=config.lr, warmup=0.01, t_total=num_train_optimization_steps) elif model_name == 'xlnet': model = XLNetWithMultiOutput.from_pretrained( config.xlnet_weight, clf_dropout=0.4, n_class=6 # num_aux_labels=config.n_aux_targets ) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = np.ceil( len(train_loader.dataset) / config.batch_size / config.accumulation_steps) * config.epochs optimizer = OpenAIAdam(optimizer_grouped_parameters, lr=config.lr, warmup=0.01, t_total=num_train_optimization_steps) else: raise ("Model is not implemented") scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) model = model.cuda() from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level="O1") # if distributed_rank > -1: # from apex.parallel import DistributedDataParallel # model = DistributedDataParallel(model) model = torch.nn.DataParallel(model) if config.resume: checkpoint = torch.load(config.checkpoint + "/checkpoints/best.pth") import pdb pdb.set_trace() new_state_dict = {} old_state_dict = checkpoint['model_state_dict'] for k, v in old_state_dict.items(): new_state_dict["module." + k] = v model.load_state_dict(new_state_dict) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) criterion.load_state_dict(checkpoint['criterion_state_dict']) print("!!! Loaded checkpoint ", config.checkpoint + "/checkpoints/best.pth") identity_valid = valid_df[config.identity_columns].copy() target_valid = valid_df.target.values auc_callback = AucCallback(identity=identity_valid, target=target_valid) checkpoint_callback = IterationCheckpointCallback( save_n_last=2000, num_iters=10000, ) # model runner runner = ModelRunner() # model training runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, main_metric='auc', minimize_metric=False, logdir=config.checkpoint, num_epochs=config.epochs, verbose=True, fp16={"opt_level": "O1"}, callbacks=[auc_callback, checkpoint_callback])
num_workers=train_nworkers) test_dataset = torchvision.datasets.MNIST(root=data_root, train=False, download=True, transform=data_transform) test_loader = DataLoader(test_dataset, shuffle=True, batch_size=test_bs, pin_memory=True, num_workers=test_nworkers) device = utils.select_device(force_cpu=False) # instantiate network net = SimpleConvNet(1, 10).to(device) # criterion loss_fn = CustomLoss() # optimer optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.5) # load checkpoint if needed start_epoch = 0 start_niter = 0 if resume: ckpt = utils.load_checkpoint(ckpt_path) # custom method for loading last checkpoint net.load_state_dict(ckpt['model_state']) start_epoch = ckpt['epoch'] start_niter = ckpt['niter'] optimizer.load_state_dict(ckpt['optim_state']) print("last checkpoint restored") # if we want to run experiment on multiple GPUs we move the models there