def run(): dfx = pd.read_csv(config.TRAINING_FILE).fillna("none") dfx.sentiment = dfx.sentiment.apply(lambda x: 1 if x == "positive" else 0) df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) valid_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device("cuda") model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.001 }] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, scheduler, device) outputs, targets = engine.eval_fn(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print(f"Accuracy Score = {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): df1 = pd.read_csv("../input/jigsaw-toxic-comment-train.csv", usecols=["comment_text", "toxic"]) df2 = pd.read_csv("../input/jigsaw-unintended-bias-train.csv", usecols=["comment_text", "toxic"]) df_train = pd.concat([df1, df2], axis=0).reset_index(drop=True) df_valid = pd.read_csv("../input/validation.csv") train_dataset = dataset.BERTDataset( comment_text=df_train.comment_text.values, target=df_train.toxic.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.BERTDataset( comment_text=df_valid.comment_text.values, target=df_valid.toxic.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device(config.DEVICE) model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [{ "params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], "weight_decay": 0.001 }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0 }] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, targets = engine.eval_fn(valid_data_loader, model, device) targets = np.array(targets) >= 0.5 accuracy = metrics.roc_auc_score(targets, outputs) print(f"AUC Score = {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): df1 = pd.read_csv(config.TRAINING_FILE, usecols=["comment_text","toxic"]) train_dataset = dataset.BERTDataset( review=df1.comment_text.values, target=df1.toxic.values ) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4 ) df2=pd.read_csv("../input/validation.csv", usecols=["comment_text","toxic"]) valid_dataset = dataset.BERTDataset( review=df2.comment_text.values, target=df2.toxic.values ) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1 ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}, ] num_train_steps = int(len(df1) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps ) model = nn.DataParallel(model) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, targets = engine.eval_fn(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print(f"Accuracy Score = {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): dfx = pd.read_csv(config.TRAINING_FILE, nrows=30).dropna().reset_index(drop=True) df_train, df_valid = model_selection.train_test_split( dfx, test_size = 0.1, random_state = 42, stratify = dfx.sentiment.values ) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.TweetDataset( tweet = df_train.text.values, sentiment = df_train.sentiment.values, selected_text=df_train.selected_text.values ) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=1 ) valid_dataset = dataset.TweetDataset( tweet = df_valid.text.values, sentiment = df_valid.sentiment.values, selected_text=df_valid.selected_text.values ) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1 ) device = torch.device('cpu') model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] num_train_steps = int(len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps ) best_jaccard = 0 for epoch in range(config.EPOCHS): print("here") engine.train_fn(train_data_loader, model, optimizer, device, scheduler) mean_jac = engine.eval_fn(valid_data_loader, model, device) print("jaccard_score = {mean_jac}".format(mean_jac=mean_jac)) if(mean_jac>best_jaccard): torch.save(model.state_dict(), config.MODEL_PATH) best_jaccard = mean_jac
def run(opt_level="O2", keep_batchnorm_fp32=True, batch_size=5, nb_epochs=10, data_path="../inputs/IMDB_Dataset.csv", model_path="./"): df = pd.read_csv(data_path).fillna("none")[0:100] df.sentiment = df.sentiment.apply(lambda x: 1 if x == "positive" else 0) df_train, df_valid = model_selection.train_test_split( df, test_size=0.1, random_state=42, stratify=df.sentiment.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) # Creating the datasets train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) valid_dataset = dataset.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) # Creating the dataloaders train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, num_workers=10, drop_last=True) valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size, num_workers=10, drop_last=True) # Defining the model and sending to the device device = torch.device("cuda") model = BERTBaseUncased() model.to(device) parameters = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] # We don't want any decay for them optimizer_parameters = [{ "params": [p for n, p in parameters if not any(nd in n for nd in no_decay)], "weight_decay": 0.001 }, { "params": [p for n, p in parameters if any(nd in n for nd in no_decay)], "weight_decay": 0.0 }] num_train_steps = int(len(df_train) * nb_epochs / batch_size) # Defining the optimizer and the scheduler optimizer = AdamW(optimizer_parameters, lr=3e-5) # Initialize the pytorch model and the optimizer to allow automatic mixed-precision training model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level, keep_batchnorm_fp32=keep_batchnorm_fp32, loss_scale="dynamic") scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, # No warmup num_training_steps=num_train_steps) # Train the model engine.global_trainer(train_dataloader, valid_dataloader, model, optimizer, scheduler, device, nb_epochs, model_path)
def run(): dfx = pd.read_csv(config.TRAINING_FILE).fillna("none") dfx.sentiment = dfx.sentiment.apply( # can use label encoding lambda x: 1 if x == "positive" else 0 # can use map fn ) df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment. values # when split both train and val have same positive to negative sample ratio ) df_train = df_train.reset_index(drop=True) # 0 to length of df_train df_valid = df_valid.reset_index(drop=True) # 0 to length of df_valid train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device("cuda") # using cuda model = BERTBaseUncased() # calling from model.py param_optimizer = list( model.named_parameters()) # specify parameters to train no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] """ These parameters are adjustable, we should take a look at different layers and the decay we want, how much learning rate etc.""" num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) # model = nn.DataParallel(model) # converting to multi gpu model best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, target = engine.eval_fn(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(target, outputs) print(f"Accuracy score = {accuracy}") if accuracy > best_accuracy: torch.save( model.state_dict(), config.MODEL_PATH) # saving the model only if it improves best_accuracy = accuracy
def run(dataset_index): datasets = [ "gold.prep-auto.full.prep.{0}.csv", "gold.prep-auto.no-emoticons.prep.{0}.csv", "gold.prep-auto.prep.{0}.csv", "gold.prep-english.prep.{0}.csv", "gold.prep-peisenieks.prep.{0}.csv", "gold.prep.{0}.csv" ] # dataset_index = 5 #0-5 train_file = config.DATASET_LOCATION + datasets[dataset_index].format( "train") df_train = pd.read_csv(train_file).fillna("none") df_train.label = df_train.label.apply(label_encoder) valid_file = config.DATASET_LOCATION + datasets[dataset_index].format( "dev" ) #"gold.prep-auto.full.prep.dev.csv" #gold.prep-auto.no-emoticons.prep.dev.csv" #gold.prep-auto.prep.dev.csv" #"gold.prep-english.prep.dev.csv" #"gold.prep-peisenieks.prep.dev.csv" #"gold.prep.dev.csv" df_valid = pd.read_csv(valid_file).fillna("none") df_valid.label = df_valid.label.apply(label_encoder) test_file = config.DATASET_LOCATION + "eval.prep.test.csv" df_test = pd.read_csv(test_file).fillna("none") df_test.label = df_test.label.apply(label_encoder) logger.info(f"Bert Model: {config.BERT_PATH}") logger.info( f"Current date and time :{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} " ) logger.info(f"Train file: {train_file}") logger.info(f"Valid file: {valid_file}") logger.info(f"Test file: {test_file}") logger.info(f"Train size : {len(df_train):.4f}") logger.info(f"Valid size : {len(df_valid):.4f}") logger.info(f"Test size : {len(df_test):.4f}") train_dataset = dataset.BERTDataset(review=df_train.text.values, target=df_train.label.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4, shuffle=True) valid_dataset = dataset.BERTDataset(review=df_valid.text.values, target=df_valid.label.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) test_dataset = dataset.BERTDataset(review=df_test.text.values, target=df_test.label.values) test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') #torch.device("cuda") model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) # model = nn.DataParallel(model) best_accuracy = 0 for epoch in range(config.EPOCHS): logger.info(f"epoch={epoch}") train_loss, train_acc = engine.train_fn(train_data_loader, model, optimizer, device, scheduler) for tag, parm in model.named_parameters(): if parm.grad is not None: writer.add_histogram(tag, parm.grad.data.cpu().numpy(), epoch) outputs, targets, val_loss, val_acc = engine.eval_fn( valid_data_loader, model, device) val_mcc = metrics.matthews_corrcoef(outputs, targets) logger.info(f"val_MCC_Score = {val_mcc:.3f}") outputs, targets, test_loss, test_acc = engine.eval_fn( test_data_loader, model, device) test_mcc = metrics.matthews_corrcoef(outputs, targets) logger.info(f"test_MCC_Score = {test_mcc:.3f}") logger.info( f"train_loss={train_loss:.4f}, val_loss={val_loss:.4f}, test_loss={test_loss:.4f}" ) writer.add_scalar('loss/train', train_loss, epoch) # data grouping by `slash` writer.add_scalar('loss/val', val_loss, epoch) # data grouping by `slash` writer.add_scalar('loss/test', test_loss, epoch) # data grouping by `slash` logger.info( f"train_acc={train_acc:.3f}, val_acc={val_acc:.3f}, test_acc={test_acc:.3f}" ) writer.add_scalar('acc/train', train_acc, epoch) # data grouping by `slash` writer.add_scalar('acc/val', val_acc, epoch) # data grouping by `slash` writer.add_scalar('acc/test', test_acc, epoch) # data grouping by `slash` logger.info(f"val_mcc={val_acc:.3f}, test_mcc={test_acc:.3f}") writer.add_scalar('mcc/val', val_mcc, epoch) # data grouping by `slash` writer.add_scalar('mcc/test', test_mcc, epoch) # data grouping by `slash` accuracy = metrics.accuracy_score(targets, outputs) logger.info(f"Accuracy Score = {accuracy:.3f}") if accuracy > best_accuracy: print(f"Saving model with Accuracy Score = {accuracy:.3f}") torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): dfx = pd.read_csv(configr.TRAINING_FILE).fillna('none') dfx.sentiment = dfx.sentiment.map({"positive": 1, "negative": 0}) df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=configr.TRAIN_BATCH_SIZE, num_workers=1) valid_dataset = dataset.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=configr.VALID_BATCH_SIZE, num_workers=1) device = torch.device('cpu') model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = int( len(df_train) / configr.TRAIN_BATCH_SIZE * configr.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) best_accuracy = 0 for epoch in range(configr.EPOCHS): print("here") engine.train_fn(train_data_loader, model, optimizer, configr.ACCUMULATION, device) outputs, targets = engine.eval_fn(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print("accuracy_score = {accuracy}".format(accuracy=accuracy)) if (accuracy > best_accuracy): torch.save(model.state_dict(), configr.MODEL_PATH)
def train(): # this function trains the model # read the training file and fill NaN values with "none" # you can also choose to drop NaN values in this # specific dataset dfx = pd.read_csv(config_2.TRAINING_FILE).fillna("none") # sentiment = 1 if its positive # else sentiment = 0 dfx.sentiment = dfx.sentiment.apply(lambda x: 1 if x == "positive" else 0) # we split the data into single training # and validation fold df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values) # reset index df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) # initialize BERTDataset from dataset.py # for training dataset train_dataset = dataset_2.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) # create training dataloader train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config_2.TRAIN_BATCH_SIZE, num_workers=4) # initialize BERTDataset from dataset.py # for validation dataset valid_dataset = dataset_2.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) # create validation data loader valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config_2.VALID_BATCH_SIZE, num_workers=1) # initialize the cuda device # use cpu if you dont have GPU #device = torch.device("cuda") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # load model and send it to the device model = BERTBaseUncased() model.to(device) # create parameters we want to optimize # we generally dont use any decay for bias # and weight layers param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] # calculate the number of training steps # this is used by scheduler num_train_steps = int( len(df_train) / config_2.TRAIN_BATCH_SIZE * config_1.EPOCHS) # AdamW optimizer # AdamW is the most widely used optimizer # for transformer based networks optimizer = AdamW(optimizer_parameters, lr=3e-5) # fetch a scheduler # you can also try using reduce lr on plateau scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) # if you have multiple GPUs # model model to DataParallel # to use multiple GPUs model = nn.DataParallel(model) # start training the epochs best_accuracy = 0 for epoch in range(config_2.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, targets = engine.eval_fn(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print(f"Accuracy Score = {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config_2.MODEL_PATH) best_accuracy = accuracy
def run(): dfx = pd.read_csv(config.TRAINING_FILE, nrows=100).dropna().reset_index(drop=True) dfx.sentiment = dfx.sentiment.apply(lambda x: 1 if x == "positive" else 0) df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.TweetDataset( tweet=df_train.text.values, sentiment=df_train.sentiment.values, selected_text=df_train.selected_text.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.TweetDataset( tweet=df_valid.text.values, sentiment=df_valid.sentiment.values, selected_text=df_valid.selected_text.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device("cuda") model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) best_jaccard = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) jaccard = engine.eval_fn(valid_data_loader, model, device) print(f"Jaccard Score = {jaccard}") if jaccard > best_jaccard: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = jaccard
def train(): df = pd.read_csv(config.TRAINING_FILE).fillna("none") df['sentiment'] = df['sentiment'].map({"positive": 1, "negative": 0}) df_train, df_valid = train_test_split(df, test_size=0.1, random_state=42, stratify=df.sentiment.values) # reset index of both splits df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, shuffle=False, num_workers=4, ) valid_dataset = dataset.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) valid_dataloader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, shuffle=False, num_workers=4, ) device = torch.device("cuda") model = BERTBaseUncased() model.to(device) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] optimizer = AdamW(optimizer_grouped_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=0, num_training_steps=int(len(df_train) / config.TRAIN_BATCH_SIZE) * config.EPOCHS) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_dataloader, model, optimizer, device, scheduler) outputs, targets = engine.eval_fn(valid_dataloader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(outputs, targets) print(f"Accuracy: {accuracy:.3f}") if accuracy > best_accuracy: best_accuracy = accuracy torch.save(model.state_dict(), config.MODEL_PATH)
def run(): dfx = pd.read_csv(config.TRAINING_FILE).fillna("none") #convert positive to 1 and negative to 0 dfx.sentiment = dfx.sentiment.apply(lambda x: 1 if x == "positive" else 0) #stratify split so that class can be balanced for both train and validation ==>> it means number of positive class will be equal to negative class for train ===>>same for validation dataset also df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device("cuda") model = BERTBaseUncased() model.to(device) #specify what parameters you want to train param_optimizer = list(model.named_parameters()) #we don't want any deacy for these layer names such as bias and othr following things no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { #don't decay weight for above no_decay list else decay "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) #experiment with lr optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) #convert model to multi-gpu model --->> no need to do this if you have not multiple gpus model = nn.DataParallel( model) # use @amp.autocast() in model.py if DataParallel() is enabled scaler = amp.GradScaler( ) #from torch.cuda import amp #this is required if using autoatic mixed precision #and pass scaler to train_fun best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler, scaler) outputs, targets = engine.eval_fn(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print(f"Accuracy Score = {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): dfx = pd.read_csv( config.TRAINING_FILE).fillna("none").reset_index(drop=True) # df_test = pd.read_csv(config.TESTING_FILE).fillna("none").reset_index(drop=True) df_train, df_test = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.label.values) df_train = df_train.reset_index(drop=True) df_test = df_test.reset_index(drop=True) train_dataset = dataset.BERTDataset(text=df_train.title.values, label=df_train.label.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) test_dataset = dataset.BERTDataset(text=df_test.title.values, label=df_test.label.values) test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.TEST_BATCH_SIZE, num_workers=1) device = torch.device("cpu") model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, labels = engine.eval_fn(test_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(labels, outputs) print(f"Accuracy Score = {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): #df = preprocess() #df = pd.read_csv(config.PROCESSED_FILE) df = pd.read_csv('data/processed_train_data.csv') #print(df.columns) df_train, df_valid = model_selection.train_test_split( df, test_size=0.3, random_state=32, stratify=df.offensive.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.BERTDataset(review=df_train.text.values, target=df_train.offensive.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.BERTDataset(review=df_valid.text.values, target=df_valid.offensive.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) model = BERTBaseUncased() model.to(config.DEVICE) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) best_accuracy = 0 for epoch in range(5): engine.train_fn(train_data_loader, model, optimizer, config.DEVICE, scheduler) outputs, targets = engine.eval_fn(valid_data_loader, model, config.DEVICE) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print(f"Accuracy Score = {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): df = pd.read_csv(config.training_file).fillna("none") df.sentiment = df.sentiment.apply(lambda x: 1 if x == "positive" else 0) df_train, df_valid = model_selection.train_test_split( df, test_size=0.1, random_state=42, stratify=df.sentiment.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.bert_dataset(review=df_train.review.values, target=df_train.sentiment.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.train_batch, num_workers=4) valid_dataset = dataset.bert_dataset(review=df_valid.review.values, target=df_valid.sentiment.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.valid_batch, num_workers=1) device = 'cuda' if torch.cuda.is_available() else 'cpu' model = BERTBaseUncased() param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_params = [{ "params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001 }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_steps = int(len(df_train) / config.train_batch) * config.epochs optimizer = AdamW(optimizer_params, lr=3e-5) scheduler = WarmupLinearSchedule(optimizer=optimizer, warmup_steps=0, t_total=num_train_steps) best_accuracy = 0 for epoch in range(config.epochs): engine.train_fn(data_loader=train_data_loader, model=model, optimizer=optimizer, device=device, scheduler=scheduler) outputs, targets = engine.eval_fn(valid_data_loader, model=model, device=device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(outputs, targets) print(f"Accuracy Score= {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.model_path) best_accuracy = accuracy
def run(): train_filename, label = sys.argv[1:3] model_path = "models2/" + label + "_best.pt" assert 'train' in train_filename filenames = {'train': train_filename, 'dev': train_filename.replace('train', 'dev'), 'test':train_filename.replace('train', 'test')} dataframes = {} num_classes = 0 for subset, filename in filenames.items(): dataframes[subset] = preprocess(filename, label) num_classes = max(num_classes, max(dataframes[subset].ENCODE_CAT) + 1) dataloaders = {} for subset, filename in filenames.items(): if subset == 'train': batch_size = config.TRAIN_BATCH_SIZE num_workers = 4 else: batch_size = config.VALID_BATCH_SIZE num_worker = 1 dataloaders[subset] = process_dataset( dataframes[subset], batch_size, num_workers) device = torch.device(config.DEVICE) model = BERTBaseUncased(num_classes) model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=get_num_train_steps(filenames["train"], label) ) best_val_accuracy = float('-inf') best_val_epoch = None best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn( dataloaders["train"], model, optimizer, device, scheduler, epoch) outputs, targets = engine.eval_fn( dataloaders['dev'], model, device, epoch) accuracy = metrics.accuracy_score(outputs, targets) print(f"Validation Accuracy = {accuracy}") if accuracy > best_val_accuracy: torch.save(model.state_dict(), model_path) best_val_accuracy = accuracy best_val_epoch = epoch print("Best val accuracy till now {}".format(best_val_accuracy)) if best_val_epoch < (epoch - config.PATIENCE): break model.load_state_dict(torch.load(model_path)) for subset in ['train', 'dev', 'test']: outputs, targets = engine.eval_fn( dataloaders[subset], model, device, epoch) result_df_dicts = [] for o, t in zip(outputs, targets): result_df_dicts.append({"output":o, "target":t}) result_df = pd.DataFrame.from_dict(result_df_dicts) final_df = pd.concat([dataframes[subset], result_df], axis=1) for i in final_df.itertuples(): assert i.ENCODE_CAT == i.target result_file = "results2/" + subset + "_" + label + ".csv" final_df.to_csv(result_file)
def main(_): LEARNING_RATE = config.LEARNING_RATE DROPOUT = config.DROPOUT if FLAGS.lr: LEARNING_RATE = FLAGS.lr if FLAGS.dropout: DROPOUT = FLAGS.dropout train_file = config.TRAIN_PROC df_train = pd.read_csv(train_file).fillna("none") valid_file = config.DEVEL_PROC df_valid = pd.read_csv(valid_file).fillna("none") test_file = config.EVAL_PROC df_test = pd.read_csv(test_file).fillna("none") logger.info(f"Bert Model: {config.BERT_PATH}") logger.info(f"Current date and time :{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ") logger.info(f"Train file: {train_file}") logger.info(f"Valid file: {valid_file}") logger.info(f"Test file: {test_file}") logger.info(f"Train size : {len(df_train):.4f}") logger.info(f"Valid size : {len(df_valid):.4f}") logger.info(f"Test size : {len(df_test):.4f}") train_dataset = dataset.BERTDataset( review=df_train.text.values, target=df_train.label.values ) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4, shuffle=True ) valid_dataset = dataset.BERTDataset( review=df_valid.text.values, target=df_valid.label.values ) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1 ) test_dataset = dataset.BERTDataset( review=df_test.text.values, target=df_test.label.values ) test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1 ) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #torch.device("cuda") model = BERTBaseUncased(DROPOUT) model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ {'params': [p for n, p in param_optimizer if not any( nd in n for nd in no_decay)], 'weight_decay': 0.001}, {'params': [p for n, p in param_optimizer if any( nd in n for nd in no_decay)], 'weight_decay': 0.0}, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=LEARNING_RATE) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps ) # model = nn.DataParallel(model) best_accuracy = 0 for epoch in range(config.EPOCHS): logger.info(f"Epoch = {epoch}") train_loss, train_acc = engine.train_fn( train_data_loader, model, optimizer, device, scheduler) for tag, parm in model.named_parameters(): if parm.grad is not None: writer.add_histogram(tag, parm.grad.data.cpu().numpy(), epoch) outputs, targets, val_loss, val_acc = engine.eval_fn( valid_data_loader, model, device) val_mcc = metrics.matthews_corrcoef(outputs, targets) logger.info(f"val_MCC_Score = {val_mcc:.4f}") outputs, targets, test_loss, test_acc = engine.eval_fn( test_data_loader, model, device) test_mcc = metrics.matthews_corrcoef(outputs, targets) logger.info(f"test_MCC_Score = {test_mcc:.4f}") logger.info( f"train_loss={train_loss:.4f}, val_loss={val_loss:.4f}, test_loss={test_loss:.4f}") writer.add_scalar('loss/train', train_loss, epoch) # data grouping by `slash` writer.add_scalar('loss/val', val_loss, epoch) # data grouping by `slash` writer.add_scalar('loss/test', test_loss, epoch) # data grouping by `slash` logger.info( f"train_acc={train_acc:.4f}, val_acc={val_acc:.4f}, test_acc={test_acc:.4f}") writer.add_scalar('acc/train', train_acc, epoch) # data grouping by `slash` writer.add_scalar('acc/val', val_acc, epoch) # data grouping by `slash` writer.add_scalar('acc/test', test_acc, epoch) # data grouping by `slash` logger.info(f"val_mcc={val_acc:.4f}, test_mcc={test_acc:.4f}") writer.add_scalar('mcc/val', val_mcc, epoch) # data grouping by `slash` writer.add_scalar('mcc/test', test_mcc, epoch) # data grouping by `slash` accuracy = metrics.accuracy_score(targets, outputs) logger.info(f"Accuracy Score = {accuracy:.4f}") if accuracy < 0.4: logger.info(f"Something is very wrong! Accuracy is only {accuracy:.4f} Stopping...") break if accuracy > best_accuracy: logger.info(f"Saving model with Accuracy Score = {accuracy:.4f}") torch.save(model.state_dict(), config.MODEL_PATH[:-4] + "." + str(round(accuracy*100, 2)) + ".bin") best_accuracy = accuracy es = 0 else: es += 1 logger.info(f"Not improved for {es} times of 5. Best so far - {best_accuracy:.4f}") if es > 4: logger.info(f"Early stopping with best accuracy: {best_accuracy:.4f} and accuracy for this epoch: {accuracy:.4f} ...") break
def run(): df1 = pd.read_csv("../input/jigsaw-multilingual-toxic-comment-train.csv", usecols=['comment_text', 'toxic']) df1 = pd.read_csv("../input/jigsaw-unintended-bias-train.csv", usecols=['comment_text', 'toxic']) #combined df1 and df2 and made big dataframe df_train = pd.concat([df1, df2], axis=0).reset_index(drop=True) #validation dataframe has been given by kaggle df_valid - pd.read_csv("../input/validation.csv") train_dataset = dataset.BERTDataset( comment_text=df_train.comment_text.values, target=df_train.toxic.values) #-------------------------------------- #write sampler if using tpu else not train_sampler = torch.data.distributed.DistributedSampler( train_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True) #---------------------------------------- train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4, sampler=train_sampler, #problem with tpu when using torch_xla is that if batch size is not equal then it's going to crash , so use drop_last drop_last=True) valid_dataset = dataset.BERTDataset( comment_text=df_valid.comment_text.values, target=df_valid.toxic.values) #-------------------------------------- #write sampler if using tpu else not valid_sampler = torch.data.distributed.DistributedSampler( valid_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True) #---------------------------------------------- valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1, sampler=valid_sampler, #no need of drop_last here ) device = xm.xla_device() #xla_device means tpu model = BERTBaseUncased() # model.to(device) #no need to move data on device #specify what parameters you want to train param_optimizer = list(model.named_parameters()) #we don't want any deacy for these layer names such as bias and othr following things no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { #don't decay weight for above no_decay list else decay "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE / xm.xrt_world_size() * config.EPOCHS) lr = 3e-5 * xm.xrt_world_size() #experiment with lr optimizer = AdamW(optimizer_parameters, lr=lr) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) best_accuracy = 0 for epoch in range(config.EPOCHS): #parallel loader for tpus para_loader = pl.ParallelLoader(train_data_loader, [device]) engine.train_fn(para_loader.per_device_loader(device), model, optimizer, device, scheduler) parallel_loader = pl.ParallelLoader(valid_data_loader, [device]) outputs, targets = engine.eval_fn( para_loader.per_device_loader(device), model, device) #threshold the target instead of output targets = np.array(targets) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print(f"Accuracy Score = {accuracy}") if accuracy > best_accuracy: #instead of torch.save use xm.save xm.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): ''' Entire training loop - Create DataLoaders - Define Training Configuration - Launch Training Loop ''' # Num of available TPU cores if config.TPUs: n_TPUs = xm.xrt_world_size() DEVICE = xm.xla_device() else: DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' device = torch.device(DEVICE) # Read Data # df1 = pd.read_csv('data/jigsaw-toxic-comment-train.csv', usecols=['comment_text', 'toxic']) # df2 = pd.read_csv('data/jigsaw-unintended-bias-train.csv', usecols=['comment_text', 'toxic'], engine='python') # don't know why it was breaking with default C parser # df_train = df1 # pd.concat([df1,df2], axis=0).reset_index(drop=True) # df_valid = pd.read_csv('data/validation.csv') # Subsample df_train = pd.read_csv('data/jigsaw-toxic-comment-train-small.csv', usecols=['comment_text', 'toxic']) df_valid = pd.read_csv('data/validation-small.csv', usecols=['comment_text', 'toxic']) # Preprocess train_dataset = dataset.BERTDataset( comment=df_train.comment_text.values, target=df_train.toxic.values ) valid_dataset = dataset.BERTDataset( comment=df_valid.comment_text.values, target=df_valid.toxic.values ) drop_last=False train_sampler, valid_sampler = None, None if config.TPUs: drop_last=True train_sampler = DistributedSampler( train_dataset, num_replicas=n_TPUs, rank=xm.get_ordinal(), shuffle=True ) valid_sampler = DistributedSampler( valid_dataset, num_replicas=n_TPUs, rank=xm.get_ordinal(), shuffle=True ) # Create Data Loaders train_data_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4, drop_last=drop_last, sampler=train_sampler ) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1, drop_last=drop_last, sampler=valid_sampler ) # Machine Configuration if config.MODEL == 'bert': model = BERTBaseUncased() elif config.MODEL == 'distil-bert': model = DistilBERTBaseUncased() else: print('Model chosen in config not valid') exit() model.to(device) # Optimizer Configuration param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}, ] lr = config.LR num_train_steps = int(len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) # TODO: why do the LR increases because of a distributed training ? if config.TPUs: num_train_steps /= n_TPUs lr *= n_TPUs optimizer = AdamW(optimizer_parameters, lr=lr) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps ) if not config.TPUs: if N_GPU > 1: model = nn.DataParallel(model) # Training loop best_score = 0 for epoch in range(config.EPOCHS): if config.TPUs: train_loader = pl.ParallelLoader(train_data_loader, [device]) valid_loader = pl.ParallelLoader(valid_data_loader, [device]) train_fn(train_loader.per_device_loader(device), model, optimizer, device, scheduler) outputs, targets = eval_fn(valid_loader.per_device_loader(device), model, device) else: train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, targets = eval_fn(valid_data_loader, model, device) targets = np.array(targets) >= 0.5 # TODO: why ? auc_score = metrics.roc_auc_score(targets, outputs) # Save if best print(f"AUC Score = {auc_score}") if auc_score > best_score: if not config.TPUs: torch.save(model.state_dict(), config.MODEL_PATH) else: xm.save(model.state_dict(), config.MODEL_PATH) best_score = auc_score
def run(): dfx = pd.read_csv(config.TRAINING_FILE).dropna().reset_index(drop=True) #stratify split so that class can be balanced for both train and validation ==>> it means number of positive class will be equal to negative class for train ===>>same for validation dataset also df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.TweetDataset( tweet=df_train.text.values, target=df_train.sentiment.values, selected_text=df_train.selected_text.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.TweetDataset( tweet=df_valid.text.values, target=df_valid.sentiment.values, selected_text=df_valid.selected_text.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device("cuda") model = BERTBaseUncased() model.to(device) #specify what parameters you want to train param_optimizer = list(model.named_parameters()) #we don't want any deacy for these layer names such as bias and othr following things no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { #don't decay weight for above no_decay list else decay "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) #experiment with lr optimizer = AdamW(optimizer_parameters, lr=3e-5) #scheduler can be of your choice scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) #convert model to multi-gpu model --->> no need to do this if you have not multiple gpus model = nn.DataParallel(model) #evaluation matrix is jacccard best_jaccard = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) jaccard = engine.eval_fn(valid_data_loader, model, device) print(f"Jaccard Score = {jaccard}") if jaccard > best_jaccard: torch.save(model.state_dict(), config.MODEL_PATH) best_jaccard = jaccard
def run(): df_train = preprocess('./review-sentence_train_clean.csv') df_valid = preprocess('./review-sentence_dev_clean.csv') df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.BERTDataset(review=df_train.sentence.values, target=df_train.ENCODE_CAT.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.BERTDataset(review=df_valid.sentence.values, target=df_valid.ENCODE_CAT.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device(config.DEVICE) model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler, epoch) outputs, targets = engine.eval_fn(valid_data_loader, model, device, epoch) accuracy = metrics.accuracy_score(outputs, targets) print(f"Validation Accuracy = {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy print("Best val accuracy till now {}".format(best_accuracy))
valid_dataset = dataset.BERTDataset( comment_text=df_valid.comment_text.values, target=df_valid.toxic.values ) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1 ) device = torch.device("cuda") model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}, ] num_train_steps = int(len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps ) model = nn.DataParallel(model)