def run_one_fold(fold_id): with timer('load csv data'): debug = config.DEBUG df_train = pd.read_csv( config.TRAIN_PATH).dropna().reset_index(drop=True) if debug: df_train = df_train.sample( 1000, random_state=SEED).dropna().reset_index(drop=True) # 一致しないものがある. # https://www.kaggle.com/c/tweet-sentiment-extraction/discussion/142011 df_train.loc[df_train['sentiment'] == 'neutral', 'selected_text'] = df_train[df_train['sentiment'] == 'neutral']['text'] num_folds = config.NUM_FOLDS kf = StratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=df_train, y=df_train[['sentiment']])) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] print(len(train_idx), len(val_idx)) gc.collect() with timer('prepare validation data'): train_dataset = TweetDataset( tweet=df_train.iloc[train_idx].text.values, sentiment=df_train.iloc[train_idx].sentiment.values, selected_text=df_train.iloc[train_idx].selected_text.values) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = TweetDataset( tweet=df_train.iloc[val_idx].text.values, sentiment=df_train.iloc[val_idx].sentiment.values, selected_text=df_train.iloc[val_idx].selected_text.values) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() with timer('create model'): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = TweetRoBERTaModel(config.ROBERTA_PATH) model = model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = transformers.AdamW(optimizer_parameters, lr=3e-5) scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) # pretrain_path = 'models/exp11_fold0.pth' # model.load_state_dict(torch.load(pretrain_path)) # LOGGER.info(f'pretrained model (exp11) loaded') with timer('training loop'): best_score = -999 best_epoch = 0 patience = 3 p = 0 for epoch in range(1, config.EPOCHS + 1): LOGGER.info("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score = engine.eval_fn(val_loader, model, device) LOGGER.info(f"Jaccard Score = {score}") if score > best_score: best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) LOGGER.info("save model at score={} on epoch={}".format( best_score, best_epoch)) p = 0 if p > 0: LOGGER.info( f'best score is not updated while {p} epochs of training') p += 1 if p > patience: LOGGER.info(f'Early Stopping') break LOGGER.info("best score={} on epoch={}".format(best_score, best_epoch))
return mod # replace relu to prelu def convert_model_ReLU2PReLU(module): mod = module if isinstance(module, torch.nn.ReLU): mod = nn.PReLU() for name, child in module.named_children(): mod.add_module(name, convert_model_ReLU2PReLU(child)) return mod batch_size_list = [36, 42, 64] with timer('load csv data'): fold_id = 0 epochs = 45 batch_size = batch_size_list[0] train = pd.read_csv('input/train.csv') y = train[["grapheme_root", "vowel_diacritic", "consonant_diacritic"]] num_folds = 5 kf = MultilabelStratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=train, y=y)) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] gc.collect()
def keroppinet34(pretrained=False, **kwargs): """Constructs a ResNet-34 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResMagicNet(BasicBlock, [3, 4, 6, 3], **kwargs) if pretrained: raise NotImplementedError() return model batch_size_list = [36, 42, 64] with timer('load csv data'): fold_id = 0 epochs = 45 batch_size = batch_size_list[1] train = pd.read_csv('input/train.csv') y = train[["grapheme_root", "vowel_diacritic", "consonant_diacritic"]] num_folds = 5 kf = MultilabelStratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=train, y=y)) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] # train_idx, val_idx = train_test_split(train.index.tolist(), test_size=0.15, random_state=SEED, stratify=train["vowel_diacritic"])
def run_one_fold(fold_id): with timer('load csv data'): debug = config.DEBUG df_train = pd.read_csv( config.TRAIN_PATH).dropna().reset_index(drop=True) if debug: df_train = df_train.sample( 1000, random_state=SEED).dropna().reset_index(drop=True) # df_train['text'] = df_train['text'].apply(lambda x: remove_initial_white_space(x)) df_train['text'] = df_train['text'].apply( lambda x: ' '.join(x.split())) # neutral_texts = df_train[df_train['sentiment']=='neutral']['text'] # neutral_selected_texts = df_train[df_train['sentiment']=='neutral']['selected_text'] # df_train['is_text_start_with_white_space'] = df_train['text'].apply(lambda x: check_initial_white_space(x)) # df_train['text_equal_selected_text'] = neutral_texts == neutral_selected_texts # df_pos = df_train[df_train['sentiment']=='positive'] # df_neg = df_train[df_train['sentiment']=='negative'] # neutral の中でも全文が selected されていないサンプル群 # df_neutral_use = df_train[(df_train['text_equal_selected_text']==0)&(df_train['is_text_start_with_white_space']==0)] # df_train = pd.concat([df_pos, df_neg, df_neutral_use]).reset_index(drop=True) df_train['text_token_len'] = df_train['text'].apply( lambda x: len(config.TOKENIZER.encode(x).ids)) df_train['selected_text_token_len'] = df_train['selected_text'].apply( lambda x: len(config.TOKENIZER.encode(x).ids)) df_train['total_token_len'] = df_train['text_token_len'] + df_train[ 'selected_text_token_len'] df_train = df_train[df_train['total_token_len'] < 97].reset_index( drop=True) # df_train = df_train[df_train['sentiment']!='neutral'].reset_index(drop=True) num_folds = config.NUM_FOLDS kf = StratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=df_train, y=df_train[['sentiment']])) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] print(len(train_idx), len(val_idx)) gc.collect() with timer('prepare validation data'): train_dataset = TweetDataset( tweet=df_train.iloc[train_idx].text.values, sentiment=df_train.iloc[train_idx].sentiment.values, selected_text=df_train.iloc[train_idx].selected_text.values) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = TweetDataset( tweet=df_train.iloc[val_idx].text.values, sentiment=df_train.iloc[val_idx].sentiment.values, selected_text=df_train.iloc[val_idx].selected_text.values) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() with timer('create model'): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # model_config = transformers.RobertaConfig.from_pretrained(roberta_path) model = transformers.RobertaForQuestionAnswering.from_pretrained( 'roberta-base') # model = TweetRoBERTaModelSimple(config.ROBERTA_PATH) model = model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = transformers.AdamW(optimizer_parameters, lr=3e-5, correct_bias=False) scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) # pretrain_path = 'models/exp11_fold0.pth' # model.load_state_dict(torch.load(pretrain_path)) # LOGGER.info(f'pretrained model (exp11) loaded') with timer('training loop'): min_loss = 999 best_score = -999 best_epoch = 0 patience = 3 p = 0 for epoch in range(1, config.EPOCHS + 1): LOGGER.info("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score, val_loss = engine.eval_fn(val_loader, model, device) if val_loss < min_loss: min_loss = val_loss best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) LOGGER.info("save model at score={} on epoch={}".format( best_score, best_epoch)) p = 0 if p > 0: LOGGER.info( f'min loss is not updated while {p} epochs of training') p += 1 if p > patience: LOGGER.info(f'Early Stopping') break LOGGER.info("best score={} on epoch={}".format(best_score, best_epoch))
def run_one_fold(fold_id): with timer('load csv data'): debug = config.DEBUG df_train = pd.read_csv( config.TRAIN_PATH).dropna().reset_index(drop=True) if debug: df_train = df_train.sample( 1000, random_state=SEED).dropna().reset_index(drop=True) num_folds = 5 kf = StratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=df_train, y=df_train[['sentiment']])) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] print(len(train_idx), len(val_idx)) gc.collect() with timer('prepare validation data'): train_dataset = TweetDataset( tweet=df_train.iloc[train_idx].text.values, sentiment=df_train.iloc[train_idx].sentiment.values, selected_text=df_train.iloc[train_idx].selected_text.values) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = TweetDataset( tweet=df_train.iloc[val_idx].text.values, sentiment=df_train.iloc[val_idx].sentiment.values, selected_text=df_train.iloc[val_idx].selected_text.values) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() with timer('create model'): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = BERTBaseUncased() model = model.to(device) # t_max=10 # scheduler_cosine = CosineAnnealingLR(optimizer, T_max=t_max) # scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=5, # after_scheduler=scheduler_cosine) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = transformers.AdamW(optimizer_parameters, lr=5e-5) scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) with timer('training loop'): best_score = -999 best_epoch = 0 for epoch in range(1, config.EPOCHS + 1): LOGGER.info("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score, val_outputs = engine.eval_fn(val_loader, model, device) LOGGER.info(f"Jaccard Score = {score}") if score > best_score: best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) to_pickle( os.path.join(config.OUT_DIR, "{}_fold{}_oof.pkl".format(EXP_ID, fold_id)), [val_idx, val_outputs]) LOGGER.info("save model at score={} on epoch={}".format( best_score, best_epoch)) LOGGER.info("best score={} on epoch={}".format(best_score, best_epoch))
def run_one_fold(fold_id): with timer('load csv data'): debug = config.DEBUG df_train = pd.read_csv( config.TRAIN_PATH).dropna().reset_index(drop=True) if debug: df_train = df_train.sample( 1000, random_state=SEED).dropna().reset_index(drop=True) not_neutral_df = df_train[df_train['sentiment'] != 'neutral'] df_train['is_text_start_with_white_space'] = df_train['text'].apply( lambda x: check_initial_white_space(x)) neutral_texts = df_train[df_train['sentiment'] == 'neutral']['text'] neutral_selected_texts = df_train[df_train['sentiment'] == 'neutral']['selected_text'] df_train[ 'text_equal_selected_text'] = neutral_texts == neutral_selected_texts special_df = df_train[(df_train['text_equal_selected_text'] == 0) & ( df_train['is_text_start_with_white_space'] == 0)] neutral_df_a = df_train[(df_train['text_equal_selected_text'] == 0) & ( df_train['is_text_start_with_white_space'] == 1)] neutral_df_b = df_train[(df_train['text_equal_selected_text'] == 1) & ( df_train['is_text_start_with_white_space'] == 0)] df_train = pd.concat([not_neutral_df, neutral_df_a, neutral_df_b]).reset_index(drop=True) len_df_train = len(df_train) df_train = pd.concat([df_train, special_df]).reset_index(drop=True) num_folds = config.NUM_FOLDS kf = StratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list( kf.split(X=df_train[:len_df_train], y=df_train[:len_df_train][['sentiment']])) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] train_idx = np.concatenate([ train_idx, np.array([ i for i in range(len_df_train, len_df_train + len(special_df)) ]) ]) print(len(train_idx), len(val_idx)) gc.collect() with timer('prepare validation data'): train_dataset = TweetDataset( tweet=df_train.iloc[train_idx].text.values, sentiment=df_train.iloc[train_idx].sentiment.values, selected_text=df_train.iloc[train_idx].selected_text.values) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = TweetDataset( tweet=df_train.iloc[val_idx].text.values, sentiment=df_train.iloc[val_idx].sentiment.values, selected_text=df_train.iloc[val_idx].selected_text.values) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() with timer('create model'): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = TweetRoBERTaModel(config.ROBERTA_PATH) # model = TweetRoBERTaModelSimple(config.ROBERTA_PATH) # model = TweetRoBERTaModelConv1dHeadV2(config.ROBERTA_PATH) model = model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = transformers.AdamW(optimizer_parameters, lr=3e-5, correct_bias=False) scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) # model = nn.DataParallel(model) # pretrain_path = 'models/exp11_fold0.pth' # model.load_state_dict(torch.load(pretrain_path)) # LOGGER.info(f'pretrained model (exp11) loaded') with timer('training loop'): best_score = -999 best_epoch = 0 patience = 3 p = 0 for epoch in range(1, config.EPOCHS + 1): LOGGER.info("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score, val_loss = engine.eval_fn(val_loader, model, device) if score > best_score: best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) LOGGER.info("save model at score={} on epoch={}".format( best_score, best_epoch)) p = 0 if p > 0: LOGGER.info( f'best score is not updated while {p} epochs of training') p += 1 if p > patience: LOGGER.info(f'Early Stopping') break LOGGER.info("best score={} on epoch={}".format(best_score, best_epoch))
def run_one_fold(fold_id): with timer('load csv data'): debug = config.DEBUG df_train = pd.read_csv( config.TRAIN_PATH).dropna().reset_index(drop=True) if debug: df_train = df_train.sample( 1000, random_state=SEED).dropna().reset_index(drop=True) num_folds = 5 kf = StratifiedKFold(n_splits=num_folds, random_state=SEED) splits = list(kf.split(X=df_train, y=df_train[['sentiment']])) train_idx = splits[fold_id][0] val_idx = splits[fold_id][1] print(len(train_idx), len(val_idx)) gc.collect() with timer('prepare validation data'): train_dataset = TweetDataset( tweet=df_train.iloc[train_idx].text.values, sentiment=df_train.iloc[train_idx].sentiment.values, selected_text=df_train.iloc[train_idx].selected_text.values) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=config.TRAIN_BATCH_SIZE, num_workers=0, pin_memory=True) val_dataset = TweetDataset( tweet=df_train.iloc[val_idx].text.values, sentiment=df_train.iloc[val_idx].sentiment.values, selected_text=df_train.iloc[val_idx].selected_text.values) val_loader = torch.utils.data.DataLoader( val_dataset, shuffle=False, batch_size=config.VALID_BATCH_SIZE, num_workers=0, pin_memory=True) del train_dataset, val_dataset gc.collect() with timer('create model'): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #model_config = transformers.BertConfig.from_pretrained(config.BERT_PATH) model_config = transformers.BertConfig.from_pretrained( "bert-large-uncased-whole-word-masking", output_hidden_states=True) # model_config.output_hidden_states = True model = TweetModelLargeWWM("bert-large-uncased-whole-word-masking", model_config) model = model.to(device) # t_max=10 # scheduler_cosine = CosineAnnealingLR(optimizer, T_max=t_max) # scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=5, # after_scheduler=scheduler_cosine) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = transformers.AdamW(optimizer_parameters, lr=3e-5) scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) # https://www.kaggle.com/irustandi/bertlargeuncasedwwmfinetunedsquad # pretrain_path = 'inputs/bert-large-uncased-wwm-finetuned-squad/pytorch_model.bin' # model.load_state_dict(torch.load(pretrain_path)) # LOGGER.info(f'pretrained model (WWM uncased squad) loaded') with timer('training loop'): best_score = -999 best_epoch = 0 patience = 2 p = 0 for epoch in range(1, config.EPOCHS + 1): LOGGER.info("Starting {} epoch...".format(epoch)) engine.train_fn(train_loader, model, optimizer, device, scheduler) score = engine.eval_fn(val_loader, model, device) LOGGER.info(f"Jaccard Score = {score}") if score > best_score: best_score = score best_epoch = epoch torch.save( model.state_dict(), os.path.join(config.OUT_DIR, '{}_fold{}.pth'.format(EXP_ID, fold_id))) LOGGER.info("save model at score={} on epoch={}".format( best_score, best_epoch)) p = 0 if p > 0: LOGGER.info( f'best score is not updated while {p} epochs of training') p += 1 if p > patience: LOGGER.info(f'Early Stopping') break LOGGER.info("best score={} on epoch={}".format(best_score, best_epoch))