def run_one_fold(fold_id): with timer('load csv data'): debug = config.DEBUG df_train = pd.read_csv( config.TRAIN_PATH).dropna().reset_index(drop=True) if debug: df_train = df_train.sample( 1000, random_state=SEED).dropna().reset_index(drop=True) # 一致しないものがある. # https://www.kaggle.com/c/tweet-sentiment-extraction/discussion/142011 df_train.loc[df_train['sentiment'] == 'neutral', 'selected_text'] = df_train[df_train['sentiment'] == 'neutral']['text'] num_folds = config.NUM_FOLDS kf = StratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=df_train, y=df_train[['sentiment']])) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] print(len(train_idx), len(val_idx)) gc.collect() with timer('prepare validation data'): train_dataset = TweetDataset( tweet=df_train.iloc[train_idx].text.values, sentiment=df_train.iloc[train_idx].sentiment.values, selected_text=df_train.iloc[train_idx].selected_text.values) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = TweetDataset( tweet=df_train.iloc[val_idx].text.values, sentiment=df_train.iloc[val_idx].sentiment.values, selected_text=df_train.iloc[val_idx].selected_text.values) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() with timer('create model'): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = TweetRoBERTaModel(config.ROBERTA_PATH) model = model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = transformers.AdamW(optimizer_parameters, lr=3e-5) scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) # pretrain_path = 'models/exp11_fold0.pth' # model.load_state_dict(torch.load(pretrain_path)) # LOGGER.info(f'pretrained model (exp11) loaded') with timer('training loop'): best_score = -999 best_epoch = 0 patience = 3 p = 0 for epoch in range(1, config.EPOCHS + 1): LOGGER.info("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score = engine.eval_fn(val_loader, model, device) LOGGER.info(f"Jaccard Score = {score}") if score > best_score: best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) LOGGER.info("save model at score={} on epoch={}".format( best_score, best_epoch)) p = 0 if p > 0: LOGGER.info( f'best score is not updated while {p} epochs of training') p += 1 if p > patience: LOGGER.info(f'Early Stopping') break LOGGER.info("best score={} on epoch={}".format(best_score, best_epoch))
def run_one_fold(fold_id): fnc_df = pd.read_csv(config.FNC_PATH) loading_df = pd.read_csv(config.LOADING_PATH) labels_df = pd.read_csv(config.TRAIN_SCORES_PATH) fnc_features, loading_features = list(fnc_df.columns[1:]), list( loading_df.columns[1:]) df = fnc_df.merge(loading_df, on="Id") labels_df["is_train"] = True df = df.merge(labels_df, on="Id", how="left") df['bin_age'] = pd.cut(df['age'], [i for i in range(0, 100, 10)], labels=False) df_test = df[df["is_train"] != True].copy() df_train = df[df["is_train"] == True].copy() num_folds = config.NUM_FOLDS kf = StratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=df_train, y=df_train[['bin_age']])) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] target_cols = [ 'age', 'domain1_var1', 'domain1_var2', 'domain2_var1', 'domain2_var2' ] print(len(train_idx), len(val_idx)) train_dataset = TReNDSDataset(df=df_train, target_cols=target_cols, indices=train_idx, map_path=config.TRAIN_MAP_PATH) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = TReNDSDataset(df=df_train, target_cols=target_cols, indices=val_idx, map_path=config.TRAIN_MAP_PATH) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() device = config.DEVICE model = resnet34() # https://github.com/Tencent/MedicalNet/blob/35ecd5be96ae4edfc1be29816f9847c11d067db0/model.py#L89 net_dict = model.state_dict() # pretrain = torch.load("inputs/pretrain/resnet_10.pth") LOGGER.info('pytorch 3d model pretrained weight loading ...') pretrain = torch.load("inputs/r3d34_K_200ep.pth") pretrain_dict = { k: v for k, v in pretrain['state_dict'].items() if k in net_dict.keys() } net_dict.update(pretrain_dict) model.load_state_dict(net_dict) print("pretrained model loaded !") model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-6) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, min_lr=1e-5) patience = 3 p = 0 min_loss = 999 best_score = -999 for epoch in range(1, config.EPOCHS + 1): LOGGER.info("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score, val_loss = engine.eval_fn(val_loader, model, device) scheduler.step() # scheduler.step(val_loss) if val_loss < min_loss: min_loss = val_loss best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) LOGGER.info("val loss is {}".format(val_loss)) LOGGER.info("save model at score={} on epoch={}".format( best_score, best_epoch)) p = 0 if p > 0: LOGGER.info( f'val loss is not updated while {p} epochs of training') p += 1 if p > patience: LOGGER.info(f'Early Stopping') break LOGGER.info("best score={} on epoch={}".format(best_score, best_epoch))
def run_one_fold(fold_id): with timer('load csv data'): debug = config.DEBUG df_train = pd.read_csv( config.TRAIN_PATH).dropna().reset_index(drop=True) if debug: df_train = df_train.sample( 1000, random_state=SEED).dropna().reset_index(drop=True) num_folds = 5 kf = StratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=df_train, y=df_train[['sentiment']])) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] print(len(train_idx), len(val_idx)) gc.collect() with timer('prepare validation data'): train_dataset = TweetDataset( tweet=df_train.iloc[train_idx].text.values, sentiment=df_train.iloc[train_idx].sentiment.values, selected_text=df_train.iloc[train_idx].selected_text.values) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = TweetDataset( tweet=df_train.iloc[val_idx].text.values, sentiment=df_train.iloc[val_idx].sentiment.values, selected_text=df_train.iloc[val_idx].selected_text.values) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() with timer('create model'): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = BERTBaseUncased() model = model.to(device) # t_max=10 # scheduler_cosine = CosineAnnealingLR(optimizer, T_max=t_max) # scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=5, # after_scheduler=scheduler_cosine) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = transformers.AdamW(optimizer_parameters, lr=5e-5) scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) with timer('training loop'): best_score = -999 best_epoch = 0 for epoch in range(1, config.EPOCHS + 1): LOGGER.info("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score, val_outputs = engine.eval_fn(val_loader, model, device) LOGGER.info(f"Jaccard Score = {score}") if score > best_score: best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) to_pickle( os.path.join(config.OUT_DIR, "{}_fold{}_oof.pkl".format(EXP_ID, fold_id)), [val_idx, val_outputs]) LOGGER.info("save model at score={} on epoch={}".format( best_score, best_epoch)) LOGGER.info("best score={} on epoch={}".format(best_score, best_epoch))
def run_one_fold(fold_id): fnc_df = pd.read_csv(config.FNC_PATH) loading_df = pd.read_csv(config.LOADING_PATH) labels_df = pd.read_csv(config.TRAIN_SCORES_PATH) fnc_features, loading_features = list(fnc_df.columns[1:]), list( loading_df.columns[1:]) df = fnc_df.merge(loading_df, on="Id") labels_df["is_train"] = True df = df.merge(labels_df, on="Id", how="left") df['bin_age'] = pd.cut(df['age'], [i for i in range(0, 100, 10)], labels=False) df_test = df[df["is_train"] != True].copy() df_train = df[df["is_train"] == True].copy() num_folds = config.NUM_FOLDS kf = StratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=df_train, y=df_train[['bin_age']])) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] target_cols = [ 'age', 'domain1_var1', 'domain1_var2', 'domain2_var1', 'domain2_var2' ] print(len(train_idx), len(val_idx)) train_dataset = TReNDSDataset(df=df_train, target_cols=target_cols, indices=train_idx, map_path=config.TRAIN_MAP_PATH) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = TReNDSDataset(df=df_train, target_cols=target_cols, indices=val_idx, map_path=config.TRAIN_MAP_PATH) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() device = config.DEVICE model = resnet10() model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-6) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, min_lr=1e-5) patience = 5 p = 0 min_loss = 999 best_score = -999 for epoch in range(1, config.EPOCHS + 1): print("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score, val_loss = engine.eval_fn(val_loader, model, device) scheduler.step() # scheduler.step(val_loss) if val_loss < min_loss: min_loss = val_loss best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) print("save model at score={} on epoch={}".format( best_score, best_epoch)) p = 0 if p > 0: print(f'val loss is not updated while {p} epochs of training') p += 1 if p > patience: print(f'Early Stopping') break print("best score={} on epoch={}".format(best_score, best_epoch))
def run_one_fold(fold_id): df_train = pd.read_csv(config.TRAIN_PATH) print(df_train.shape) DEBUG = 0 if DEBUG: df_train = df_train.head(100) TARGETS = 'isup_grade' kf = StratifiedKFold(n_splits=config.NUM_FOLDS, random_state=SEED) splits = list(kf.split(X=df_train, y=df_train[TARGETS].values)) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] train_dataset = PANDADataset(df=df_train, indices=train_idx, transform=data_transforms) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = PANDADataset(df=df_train, indices=val_idx, transform=data_transforms_test) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() device = config.DEVICE model = CustomSEResNeXt(model_name='se_resnext50_32x4d') model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=config.LR) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-6) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, min_lr=1e-5) patience = config.PATIENCE p = 0 min_loss = 999 best_score = -999 for epoch in range(1, config.EPOCHS + 1): LOGGER.info("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score, val_loss, val_ids, val_preds = engine.eval_fn( val_loader, model, device) scheduler.step() # scheduler.step(val_loss) if val_loss < min_loss: min_loss = val_loss best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) LOGGER.info("save model at score={} on epoch={}".format( best_score, best_epoch)) p = 0 if p > 0: LOGGER.info( f'val loss is not updated while {p} epochs of training') p += 1 if p > patience: LOGGER.info(f'Early Stopping') break to_pickle( os.path.join(config.OUT_DIR, '{}_fold{}.pkl'.format(EXP_ID, fold_id)), [val_ids, val_preds]) LOGGER.info("best score={} on epoch={}".format(best_score, best_epoch))
def run_one_fold(fold_id): with timer('load csv data'): debug = config.DEBUG df_train = pd.read_csv( config.TRAIN_PATH).dropna().reset_index(drop=True) if debug: df_train = df_train.sample( 1000, random_state=SEED).dropna().reset_index(drop=True) num_folds = 5 kf = StratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=df_train, y=df_train[['sentiment']])) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] print(len(train_idx), len(val_idx)) gc.collect() with timer('prepare validation data'): train_dataset = TweetDataset( tweet=df_train.iloc[train_idx].text.values, sentiment=df_train.iloc[train_idx].sentiment.values, selected_text=df_train.iloc[train_idx].selected_text.values) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = TweetDataset( tweet=df_train.iloc[val_idx].text.values, sentiment=df_train.iloc[val_idx].sentiment.values, selected_text=df_train.iloc[val_idx].selected_text.values) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() with timer('create model'): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #model_config = transformers.BertConfig.from_pretrained(config.BERT_PATH) model_config = transformers.BertConfig.from_pretrained( "bert-large-uncased-whole-word-masking", output_hidden_states=True) # model_config.output_hidden_states = True model = TweetModelLargeWWM("bert-large-uncased-whole-word-masking", model_config) model = model.to(device) # t_max=10 # scheduler_cosine = CosineAnnealingLR(optimizer, T_max=t_max) # scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=5, # after_scheduler=scheduler_cosine) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = transformers.AdamW(optimizer_parameters, lr=3e-5) scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) # https://www.kaggle.com/irustandi/bertlargeuncasedwwmfinetunedsquad # pretrain_path = 'inputs/bert-large-uncased-wwm-finetuned-squad/pytorch_model.bin' # model.load_state_dict(torch.load(pretrain_path)) # LOGGER.info(f'pretrained model (WWM uncased squad) loaded') with timer('training loop'): best_score = -999 best_epoch = 0 patience = 2 p = 0 for epoch in range(1, config.EPOCHS + 1): LOGGER.info("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score = engine.eval_fn(val_loader, model, device) LOGGER.info(f"Jaccard Score = {score}") if score > best_score: best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) LOGGER.info("save model at score={} on epoch={}".format( best_score, best_epoch)) p = 0 if p > 0: LOGGER.info( f'best score is not updated while {p} epochs of training') p += 1 if p > patience: LOGGER.info(f'Early Stopping') break LOGGER.info("best score={} on epoch={}".format(best_score, best_epoch))