def run(): # Load data and little exploration data = utils.load_data(config.DATA_PATH) utils.data_exploration(data) # Data exploration just print # print(data.head()) # print(data.polarity.values) # Create dataLoader data, _ = model_selection.train_test_split(data, test_size=0.995, random_state=42, stratify=data.polarity.values) train, valid = model_selection.train_test_split( data, test_size=0.5, random_state=42, stratify=data.polarity.values) train_data_loader = utils.create_data_loader(train) valid_data_loader = utils.create_data_loader(valid, is_train=False) #S # Build Model and send it to device model = BuildModel() model = model.to(config.DEVICE) # Set weight decay to 0 fro no_decay params # Set weights decays to 0.01 for others param_optimizer = list(model.named_parameters()) no_decay = ["biais", "LayerNorm.biais", "LayerNorm.weight"] optimizer_parameters = [{ 'params': [tensor for name, tensor in param_optimizer if name in no_decay], 'weight_decay': 0 }, { 'params': [tensor for name, tensor in param_optimizer if name not in no_decay], 'weight_decay': config.WEIGHT_DECAY }] # This is the overall number of trainings that will be performed num_training_steps = int( (train.shape[0] / config.TRAIN_BATCH_SIZE) * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3 * 10 - 5) # Arbitrary set # Scheduler to performs adaptatite LR regarding epochs number # Scheduler_with_warm_up consiste a augmenter le LR dans les premiers # warm_up steps afin de converger plus vite dans les debuts scheduler = get_linear_schedule_with_warmup( optimizer=optimizer, num_training_steps=num_training_steps, num_warmup_steps=4) # model = nn.DataParallel(model) # if multiples GPU best_accuracy = 0 best_model_state = None for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, scheduler) outputs, targets = engine.eval_fn(valid_data_loader, model) outputs = np.where(np.array(outputs) > 0.5, 1, 0) accuracy = metrics.accuracy_score(np.array(targets), outputs) print(f"Accuracy, Epoch {epoch} : {accuracy}") if accuracy > best_accuracy: best_accuracy = accuracy best_model_state = model.state_dict print("Best accuracy : {best_accuracy}") print("Saving Model...") torch.save(best_model_state, config.MODEL_PATH)
p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = int( len(train_sentences) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup(optimizer, warmup_steps=0, t_total=num_train_steps) best_loss = np.inf for epoch in range(config.EPOCHS): train_loss = engine.train_fn(train_data_loader, model, optimizer, device, scheduler) test_loss = engine.eval_fn(valid_data_loader, model, device) print(f"Train Loss = {train_loss} Valid Loss = {test_loss}") if test_loss < best_loss: torch.save(model.state_dict(), config.MODEL_PATH) best_loss = test_loss
def run(): path = CONFIG.INPUT_PATH x_ray_image_names = os.listdir(path + '/CXR_png/') image_names = [] for name in x_ray_image_names: image_names.append(name.split('.')[0]) dataset_image_names = [] mask_image_names = os.listdir(path + '/masks/') for name in mask_image_names: name = name.split('.png')[0].split('_mask')[0] if name in image_names: dataset_image_names.append(name) image_transforms = alb.Compose([ alb.Normalize(CONFIG.mean, CONFIG.std, always_apply=True), alb.Resize(512, 512, always_apply=True), alb.pytorch.ToTensor() ]) mask_transforms = alb.Compose([ alb.Normalize(0, 1, always_apply=True), alb.Resize(512, 512, always_apply=True), alb.pytorch.ToTensor() ]) train_images_name, val_images_name = train_test_split(dataset_image_names) train_data = DataLoader.DataLoader( train_images_name, image_transforms, mask_transforms ) val_data = DataLoader.DataLoader( val_images_name, image_transforms, mask_transforms ) train_loader = torch.utils.data.DataLoader( train_data, num_workers=4, batch_size=CONFIG.Batch_size, pin_memory=True ) val_loader = torch.utils.data.DataLoader( val_data, num_workers=4, batch_size=CONFIG.Batch_size, pin_memory=True ) if torch.cuda.is_available(): accelarator = 'cuda' torch.backends.cudnn.benchmark = True else: accelarator = 'cpu' device = torch.device(accelarator) model = UNet.UNet(input_channels=3) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=CONFIG.LR) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=CONFIG.patience, threshold=CONFIG.scheduler_thresh, mode="min", factor=CONFIG.decay_factor ) best_loss = 1e4 print('------ [INFO] STARTING TRAINING ------') for epoch in range(CONFIG.Epochs): train_loss = engine.train_fn(model, train_loader, optimizer, device) val_loss = engine.eval_fn(model, val_loader, device) print(f'EPOCH -> {epoch+1}/{CONFIG.Epochs} | TRAIN LOSS = {train_loss} | VAL LOSS = {val_loss} | LR = {optimizer.param_groups[0]["lr"]}\n') scheduler.step(val_loss) if best_loss > val_loss: best_loss = val_loss best_model = model.state_dict() torch.save(best_model, CONFIG.MODEL_PATH) predict.predict('input/CXR_png/CHNCXR_0001_0.png')
def main(ckp_path=None): """ckp_path (str): checkpoint_path Train the model from scratch if ckp_path is None else Re-Train the model from previous checkpoint """ cli_args = get_train_args(__author__, __version__) # Variables data_dir = cli_args.data_dir save_dir = cli_args.save_dir file_name = cli_args.file_name use_gpu = cli_args.use_gpu # LOAD DATA data_loaders = load_data(data_dir, config.IMG_SIZE, config.BATCH_SIZE) # BUILD MODEL if ckp_path == None: model = initialize_model(model_name=config.MODEL_NAME, num_classes=config.NO_OF_CLASSES, feature_extract=True, use_pretrained=True) else: model = load_ckp(ckp_path) # Device is available or not device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # If the user wants the gpu mode, check if cuda is available if (use_gpu == True) and (torch.cuda.is_available() == False): print("GPU mode is not available, using CPU...") use_gpu = False # MOVE MODEL TO AVAILBALE DEVICE model.to(device) # DEFINE OPTIMIZER optimizer = optimizer_fn(model_name=config.MODEL_NAME, model=model, lr_rate=config.LR_RATE) # DEFINE SCHEDULER scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=5, factor=0.3, verbose=True) # DEFINE LOSS FUNCTION criterion = loss_fn() # LOAD BEST MODEL'S WEIGHTS best_model_wts = copy.deepcopy(model.state_dict()) # BEST VALIDATION SCORE if ckp_path == None: best_score = -1 # IF MODEL IS TRAIN FROM SCRATCH else: best_score = model.best_score # IF MODEL IS RE-TRAIN # NO OF ITERATION no_epochs = config.EPOCHS # KEEP TRACK OF LOSS AND ACCURACY IN EACH EPOCH stats = { 'train_losses': [], 'valid_losses': [], 'train_accuracies': [], 'valid_accuracies': [] } print("Models's Training Start......") for epoch in range(1, no_epochs + 1): train_loss, train_score = train_fn(data_loaders, model, optimizer, criterion, device, phase='train') val_loss, val_score = eval_fn(data_loaders, model, criterion, device=config.DEVICE, phase='valid') scheduler.step(val_loss) # SAVE MODEL'S WEIGHTS IF MODEL' VALIDATION ACCURACY IS INCREASED if val_score > best_score: print( 'Validation score increased ({:.6f} --> {:.6f}). Saving model ...' .format(best_score, val_score)) best_score = val_score best_model_wts = copy.deepcopy( model.state_dict()) #Saving the best model' weights # MAKE A RECORD OF AVERAGE LOSSES AND ACCURACY IN EACH EPOCH FOR PLOTING stats['train_losses'].append(train_loss) stats['valid_losses'].append(val_loss) stats['train_accuracies'].append(train_score) stats['valid_accuracies'].append(val_score) # PRINT TRAINING AND VALIDATION LOOS/ACCURACIES AFTER EACH EPOCH epoch_len = len(str(no_epochs)) print_msg = (f'[{epoch:>{epoch_len}}/{no_epochs:>{epoch_len}}] ' + '\t' + f'train_loss: {train_loss:.5f} ' + '\t' + f'train_score: {train_score:.5f} ' + '\t' + f'valid_loss: {val_loss:.5f} ' + '\t' + f'valid_score: {val_score:.5f}') print(print_msg) # load best model weights model.load_state_dict(best_model_wts) # create checkpoint variable and add important data model.class_to_idx = data_loaders['train'].dataset.class_to_idx model.best_score = best_score model.model_name = config.MODEL_NAME checkpoint = { 'epoch': no_epochs, 'lr_rate': config.LR_RATE, 'model_name': config.MODEL_NAME, 'batch_size': config.BATCH_SIZE, 'valid_score': best_score, 'optimizer': optimizer.state_dict(), 'state_dict': model.state_dict(), 'class_to_idx': model.class_to_idx } # SAVE CHECKPOINT save_ckp(checkpoint, save_dir, file_name) print("Models's Training is Successfull......") return model
def run(fold): dfx = pd.read_csv(config.TRAINING_FILE) df_train = dfx[dfx.kfold != fold].reset_index(drop=True) df_valid = dfx[dfx.kfold == fold].reset_index(drop=True) train_dataset = TweetDataset(tweet=df_train.text.values, sentiment=df_train.sentiment.values, selected_text=df_train.selected_text.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = TweetDataset(tweet=df_valid.text.values, sentiment=df_valid.sentiment.values, selected_text=df_valid.selected_text.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=2) device = torch.device("cuda") model_config = transformers.BertConfig.from_pretrained(config.BERT_PATH) model_config.output_hidden_states = True model = TweetModel(conf=model_config) model.to(device) num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }, ] optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) es = utils.EarlyStopping(patience=2, mode="max") print(f"Training is Starting for fold={fold}") for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler=scheduler) jaccard = engine.eval_fn(valid_data_loader, model, device) #print(f"Jaccard Score = {jaccard}") es(jaccard, model, model_path=f"model_{fold}.bin") if es.early_stop: print("Early stopping") break
def train(): # this function trains the model # read the training file and fill NaN values with "none" df = pd.read_csv(config.TRAINING_FILE).fillna("none") # map positive to 1 and negative to 0 df.sentiment = df.apply(lambda x: 1 if x == "positive" else 0) # split data into single training and validation fold df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=df.sentiment.values) # reset index df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) # initialize BERTDataset from dataset.py # for training dataset train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) # create training dataloader train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) # initialize BERTDataset from dataset.py # for training dataset valid_dataset = dataset.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) # create training dataloader valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) # initialize the cuda device # use cpu if you dont have GPU device = torch.device("cuda") # load model and send it to the device model = BERTBasedUncased() model.to(device) # create parameters we want to optimize # we generally dont use any decay for bias # and weight layers param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] # calculate the number of training steps # this is used by scheduler num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) # AdamW optimizer # AdamW is the most widely used optimizer # for transformer based networks optimizer = AdamW(optimizer_parameters, lr=3e-5) # fetch a scheduler # you can also try using reduce lr on plateau scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) # if you have multiple GPUs # model to DataParallel # to use multiple GPUs model = nn.DataParallel(model) # start training the epochs best_accuracy = 0 for epoch in range(config.EPOCHS): # train model engine.train_fn(train_data_loader, model, device, scheduler) # test the model outputs, targets = engine.eval_fn(valid_data_loader, model, device) # convert outputs to numpy array outputs = np.array(outputs) >= 0.5 # calculate the accuracy accuracy = metrics.accuracy_score(targets, outputs) # print the accuracy print(f"Accuracy Score = {accuracy}") # save model only this the accuracy is better than the best_accuracy (set up to 0) if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run_training(): image_files = glob.glob(os.path.join(config.DATA_DIR, "*.png")) targets_orig = [x.split("/")[-1][:-4].split('_')[0] for x in image_files] targets = [[c for c in x] for x in targets_orig] targets_flat = [c for clist in targets for c in clist] lbl_enc = preprocessing.LabelEncoder() lbl_enc.fit(targets_flat) np.save(config.LABEL_ENCODER_SAVE_PATH, lbl_enc.classes_) targets_enc = [lbl_enc.transform(x) for x in targets] # print(targets_enc) # new_targets_enc= [] # for i,target in enumerate(targets_enc): # tmp = np.array([-1,-1,-1,-1,-1]) # for idx, item in enumerate(target): # # print(idx) # # print('i',i) # tmp[idx] = item # # print(image_files[i]) # new_targets_enc.append(tmp) # print(new_targets_enc) targets_enc = np.array(targets_enc) targets_enc = targets_enc + 1 ( train_imgs, test_imgs, train_targets, test_targets, _, test_targets_orig, ) = model_selection.train_test_split( image_files, targets_enc, targets_orig, test_size=0.1, random_state=42 ) train_dataset = dataset.ClassificationDataset( image_paths=train_imgs, targets=train_targets, resize=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH), ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, shuffle=True, ) test_dataset = dataset.ClassificationDataset( image_paths=test_imgs, targets=test_targets, resize=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH), ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, shuffle=False, ) model = CaptchaModel(num_chars=len(lbl_enc.classes_)) model.to(config.DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=0.8, patience=5, verbose=True ) for epoch in range(config.EPOCHS): train_loss = engine.train_fn(model, train_loader, optimizer) valid_preds, test_loss = engine.eval_fn(model, test_loader) valid_captcha_preds = [] for vp in valid_preds: current_preds = decode_predictions(vp, lbl_enc) valid_captcha_preds.extend(current_preds) combined = list(zip(test_targets_orig, valid_captcha_preds)) print(combined[:10]) test_dup_rem = test_targets_orig accuracy = metrics.accuracy_score(test_dup_rem, valid_captcha_preds) print( f"Epoch={epoch}, Train Loss={train_loss}, Test Loss={test_loss} Accuracy={accuracy}" ) scheduler.step(test_loss) torch.save(model.state_dict(), config.MODEL_SAVE_PATH)
def run(): dfx = pd.read_csv(config.TRAINING_FILE).dropna().reset_index(drop=True) #stratify split so that class can be balanced for both train and validation ==>> it means number of positive class will be equal to negative class for train ===>>same for validation dataset also df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.TweetDataset( tweet=df_train.text.values, target=df_train.sentiment.values, selected_text=df_train.selected_text.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.TweetDataset( tweet=df_valid.text.values, target=df_valid.sentiment.values, selected_text=df_valid.selected_text.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device("cuda") model = BERTBaseUncased() model.to(device) #specify what parameters you want to train param_optimizer = list(model.named_parameters()) #we don't want any deacy for these layer names such as bias and othr following things no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { #don't decay weight for above no_decay list else decay "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) #experiment with lr optimizer = AdamW(optimizer_parameters, lr=3e-5) #scheduler can be of your choice scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) #convert model to multi-gpu model --->> no need to do this if you have not multiple gpus model = nn.DataParallel(model) #evaluation matrix is jacccard best_jaccard = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) jaccard = engine.eval_fn(valid_data_loader, model, device) print(f"Jaccard Score = {jaccard}") if jaccard > best_jaccard: torch.save(model.state_dict(), config.MODEL_PATH) best_jaccard = jaccard
def train(): data = open(config.Metadata).read().strip().split('\n')[:10] text_data, audio_file_name = preprocess(data) del data gc.collect() transforms = [ torchaudio.transforms.FrequencyMasking(freq_mask_param=15), torchaudio.transforms.TimeMasking(time_mask_param=35) ] train_text_data, val_text_data, train_audio_file_name, val_audio_file_name = train_test_split( text_data, audio_file_name, test_size=0.2 ) train_data = dataloader.TransformerLoader( files_name=train_audio_file_name, text_data=train_text_data, mel_transforms=transforms, normalize=True ) val_data = dataloader.TransformerLoader( files_name=val_audio_file_name, text_data=val_text_data, normalize=True ) pad_idx = 0 train_loader = torch.utils.data.DataLoader( train_data, batch_size=config.Batch_Size, num_workers=1, pin_memory=True, collate_fn=dataloader.MyCollate( pad_idx=pad_idx, spect_pad=-config.scaling_factor ) ) val_loader = torch.utils.data.DataLoader( val_data, batch_size=config.Batch_Size, num_workers=1, pin_memory=True, collate_fn=dataloader.MyCollate( pad_idx=pad_idx, spect_pad=-config.scaling_factor ) ) vocab_size = len(train_data.char_to_idx) + 1 model = TransformerTTS( vocab_size=vocab_size, embed_dims=config.embed_dims, hidden_dims=config.hidden_dims, heads=config.heads, forward_expansion=config.forward_expansion, num_layers=config.num_layers, dropout=config.dropout, mel_dims=config.n_mels, max_len=config.max_len, pad_idx=config.pad_idx ) # device = torch.device('cuda' if torch.cuda.is_available else 'cpu') # torch.backends.cudnn.benchmark = True device = torch.device('cpu') model = model.to(device) optimizer = transformers.AdamW(model.parameters(), lr=config.LR) num_training_steps = config.Epochs*len(train_data)//config.Batch_Size scheduler = transformers.get_cosine_schedule_with_warmup( optimizer, num_warmup_steps=config.warmup_steps*num_training_steps, num_training_steps=num_training_steps ) epoch_start = 0 if os.path.exists(config.checkpoint): checkpoint = torch.load(config.checkpoint) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) epoch_start = checkpoint['epoch'] print(f'---------[INFO] Restarting Training from Epoch {epoch_start} -----------\n') best_loss = 1e10 best_model = model.state_dict() print('--------- [INFO] STARTING TRAINING ---------\n') for epoch in range(epoch_start, config.Epochs): train_loss = engine.train_fn(model, train_loader, optimizer, scheduler, device) val_loss = engine.eval_fn(model, val_loader, device) print(f'EPOCH -> {epoch+1}/{config.Epochs} | TRAIN LOSS = {train_loss} | VAL LOSS = {val_loss} | LR = {scheduler.get_lr()[0]} \n') torch.save({ 'epoch' : epoch, 'model_state_dict' : model.state_dict(), 'optimizer_state_dict' : optimizer.state_dict(), 'scheduler_state_dict' : scheduler.state_dict(), 'loss': val_loss, }, config.checkpoint) if best_loss > val_loss: best_loss = val_loss best_model = model.state_dict() torch.save(best_model, config.Model_Path)
def run(): df1 = pd.read_csv("../data/jigsaw-toxic-comment-train.csv", usecols=["comment_text", "toxic"]) df2 = pd.read_csv("../data/jigsaw-unintended-bias-train.csv", usecols=["comment_text", "toxic"]) df_train = pd.concat([df1, df2], axis=0).reset_index(drop=True) df_valid = pd.read_csv("../data/validation.csv") train_dataset = dataset.BERTDataset( comment_text=df_train.comment_text.values, target=df_train.toxic.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.BERTDataset( comment_text=df_valid.comment_text.values, target=df_valid.toxic.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device("cuda") model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) model = nn.DataParallel(model) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, targets = engine.eval_fn(valid_data_loader, model, device) targets = np.array(targets) >= 0.5 accuracy = metrics.roc_auc_score(targets, outputs) print(f"AUC Score = {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): dfx = pd.read_csv(config.TRAINING_FILE, nrows=100).dropna().reset_index(drop=True) df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx["sentiment"].values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.TweetDataset( tweet=df_train["text"].values, sentiment=df_train["sentiment"].values, selected_text=df_train["selected_text"].values, ) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, # num_workers=4, ) valid_dataset = dataset.TweetDataset( tweet=df_valid["text"].values, sentiment=df_valid["sentiment"].values, selected_text=df_valid["selected_text"].values, ) valid_dataloader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALIDATION_BATCH_SIZE, # num_workers=1, ) device = torch.device("cpu") model = BERTBasedUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = len(dfx) / (config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) best_jaccard = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_dataloader, model, optimizer, device, scheduler) jaccard = engine.eval_fn(valid_dataloader, model, device) print(f"Jaccard score : {jaccard}") if jaccard > best_jaccard: torch.save(model.state_dict(), config.MODEL_PATH) best_jaccard = jaccard
def run(): df1 = pd.read_csv("../input/jigsaw-multilingual-toxic-comment-train.csv", usecols=['comment_text', 'toxic']) df1 = pd.read_csv("../input/jigsaw-unintended-bias-train.csv", usecols=['comment_text', 'toxic']) #combined df1 and df2 and made big dataframe df_train = pd.concat([df1, df2], axis=0).reset_index(drop=True) #validation dataframe has been given by kaggle df_valid - pd.read_csv("../input/validation.csv") train_dataset = dataset.BERTDataset( comment_text=df_train.comment_text.values, target=df_train.toxic.values) #-------------------------------------- #write sampler if using tpu else not train_sampler = torch.data.distributed.DistributedSampler( train_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True) #---------------------------------------- train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4, sampler=train_sampler, #problem with tpu when using torch_xla is that if batch size is not equal then it's going to crash , so use drop_last drop_last=True) valid_dataset = dataset.BERTDataset( comment_text=df_valid.comment_text.values, target=df_valid.toxic.values) #-------------------------------------- #write sampler if using tpu else not valid_sampler = torch.data.distributed.DistributedSampler( valid_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True) #---------------------------------------------- valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1, sampler=valid_sampler, #no need of drop_last here ) device = xm.xla_device() #xla_device means tpu model = BERTBaseUncased() # model.to(device) #no need to move data on device #specify what parameters you want to train param_optimizer = list(model.named_parameters()) #we don't want any deacy for these layer names such as bias and othr following things no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { #don't decay weight for above no_decay list else decay "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE / xm.xrt_world_size() * config.EPOCHS) lr = 3e-5 * xm.xrt_world_size() #experiment with lr optimizer = AdamW(optimizer_parameters, lr=lr) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) best_accuracy = 0 for epoch in range(config.EPOCHS): #parallel loader for tpus para_loader = pl.ParallelLoader(train_data_loader, [device]) engine.train_fn(para_loader.per_device_loader(device), model, optimizer, device, scheduler) parallel_loader = pl.ParallelLoader(valid_data_loader, [device]) outputs, targets = engine.eval_fn( para_loader.per_device_loader(device), model, device) #threshold the target instead of output targets = np.array(targets) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print(f"Accuracy Score = {accuracy}") if accuracy > best_accuracy: #instead of torch.save use xm.save xm.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def train(): df = pd.read_csv(config.TRAINING_FILE).fillna("none") df['sentiment'] = df['sentiment'].map({"positive": 1, "negative": 0}) df_train, df_valid = train_test_split(df, test_size=0.1, random_state=42, stratify=df.sentiment.values) # reset index of both splits df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, shuffle=False, num_workers=4, ) valid_dataset = dataset.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) valid_dataloader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, shuffle=False, num_workers=4, ) device = torch.device("cuda") model = BERTBaseUncased() model.to(device) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] optimizer = AdamW(optimizer_grouped_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=0, num_training_steps=int(len(df_train) / config.TRAIN_BATCH_SIZE) * config.EPOCHS) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_dataloader, model, optimizer, device, scheduler) outputs, targets = engine.eval_fn(valid_dataloader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(outputs, targets) print(f"Accuracy: {accuracy:.3f}") if accuracy > best_accuracy: best_accuracy = accuracy torch.save(model.state_dict(), config.MODEL_PATH)
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay':0.0} ] optimizer = AdamW(optimizer_grouped_parameters, lr=3e-5) total_steps = len(train_data_loader) * epochs scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=total_steps ) best_accuracy = 0 for epoch in range(config.epochs): engine.train_fn(train_data_loader, model, optimizer, device, scheduler, epoch) outputs, targets = engine.eval_fn(val_data_loader, model, device) outputs = np.array(targets) >= 0.5 #accuracy = metrics.roc_auc_score(targets, outputs) accuracy2 = metrics.accuracy_score(targets, outputs) #f1_score_micro = metrics.f1_score(targets, outputs, average='micro') #f1_score_macro = metrics.f1_score(targets, outputs, average='macro') #print(f"Epoch = {epoch}, roc_auc Score = {accuracy}") print(f"Epoch = {epoch}, Accuracy Score = {accuracy2}") #print(f"Epoch = {epoch}, f1_micro Score = {f1_score_micro}") #print(f"Epoch = {epoch}, f1_macro Score = {f1_score_macro}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.model_path)
def run(): train_dataset = torchvision.datasets.CIFAR10(root='input/data', train=True, download=True) val_dataset = torchvision.datasets.CIFAR10(root='input/data', train=False, download=True) train_transform = alb.Compose([ alb.Resize(config.image_height, config.image_width, always_apply=True), alb.Normalize(config.mean, config.std, always_apply=True), alb.HorizontalFlip(p=0.1), alb.RandomBrightness(p=0.2), alb.RandomContrast(p=0.1), alb.RGBShift(p=0.1), alb.GaussNoise(p=0.1), ]) val_transforms = alb.Compose([ alb.Resize(config.image_height, config.image_width, always_apply=True), alb.Normalize(config.mean, config.std, always_apply=True) ]) train_data = dataloader.dataloader(train_dataset, train_transform) val_data = dataloader.dataloader(val_dataset, val_transforms) train_loader = torch.utils.data.DataLoader(train_data, num_workers=4, pin_memory=True, batch_size=config.Batch_Size) val_loader = torch.utils.data.DataLoader(val_data, num_workers=4, pin_memory=True, batch_size=config.Batch_Size) model = ImageTransformer.ViT( patch_height=16, patch_width=16, embedding_dims=768, dropout=0.1, heads=4, num_layers=4, forward_expansion=4, max_len=int((32 * 32) / (16 * 16)), layer_norm_eps=1e-5, num_classes=10, ) if torch.cuda.is_available(): accelarator = 'cuda' else: accelarator = 'cpu' device = torch.device(accelarator) torch.backends.cudnn.benchmark = True model = model.to(device) optimizer = transformers.AdamW(model.parameters(), lr=config.LR, weight_decay=config.weight_decay) num_training_steps = int( (config.Epochs * len(train_dataset)) / config.Batch_Size) scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=int(0.1 * num_training_steps), num_training_steps=num_training_steps) best_acc = 0 best_model = 0 for epoch in range(config.Epochs): train_acc, train_loss = engine.train_fn(model, train_loader, optimizer, scheduler, device) val_acc, val_loss = engine.eval_fn(model, val_loader, device) print( f'\nEPOCH = {epoch+1} / {config.Epochs} | LR = {scheduler.get_last_lr()[0]}' ) print(f'TRAIN ACC = {train_acc*100}% | TRAIN LOSS = {train_loss}') print(f'VAL ACC = {val_acc*100}% | VAL LOSS = {val_loss}') if val_acc > best_acc: best_acc = val_acc best_model = model.state_dict() torch.save(best_model, config.Model_Path)
def run(): logger.info("using device: {}".format(config.DEVICE)) train_data = process_raw_data() train_list, test_list = train_test_split(train_data, test_size=0.2, random_state=34) # 加载GPT2模型 model, n_ctx = create_model(False) model.to(config.DEVICE) # 是否使用多块GPU进行并行运算: 可以选择要使用哪几块显卡来进行训练 multi_gpu = False if torch.cuda.is_available() and torch.cuda.device_count() > 1: logger.info("Using more than one GPUs to train...") os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = config.DEVICE_NUM model = DataParallel( model, device_ids=[int(i) for i in config.DEVICE_NUM.split(",")]) multi_gpu = True # 记录模型参数数量 num_parameters = sum( [parameter.numel() for parameter in model.parameters()]) logger.info("number of model parameters: {}".format(num_parameters)) # 加载数据 logger.info("loading training data") train_dataset = DialogueDataset(train_list, n_ctx) batch_num = len(train_dataset) // config.BATCH_SIZE test_dataset = DialogueDataset(test_list, n_ctx) test_batch_num = len(test_dataset) // config.BATCH_SIZE train_data_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=4, collate_fn=collate_fn) test_data_loader = DataLoader(test_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=1, collate_fn=collate_fn) # 计算所有epoch进行参数优化的总步数total_steps total_steps = int( len(train_data_loader) * config.EPOCHS / config.BATCH_SIZE / config.GRADIENT_ACCUMULATION) logger.info('total training steps = {}'.format(total_steps)) # 设置优化器,并且在初始训练时,使用warmup策略 optimizer = AdamW(model.parameters(), lr=config.LEARNING_RATE, correct_bias=True) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=config.WARM_STEPS, num_training_steps=total_steps) logger.info("start training...") best_loss = 100 best_accuracy = 0 for epoch in range(config.EPOCHS): train_fn(model, train_data_loader, optimizer, scheduler, epoch, batch_num, multi_gpu) loss, accuracy = eval_fn(model, test_data_loader, test_batch_num, multi_gpu) if loss < best_loss or accuracy > best_accuracy: logger.info('saving model for epoch {}, best loss: {}'.format( epoch + 1, loss)) model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained(config.MODEL_PATH) best_loss = loss best_accuracy = accuracy
def run_training(): image_files = glob.glob(os.path.join(config.DATA_DIR, "*.png")) targets_orig = [x.split("/")[-1][:-4] for x in image_files] # abcd = [a,b,c,d] targets = [[c for c in x] for x in targets_orig] targets_flat = [c for clist in targets for c in clist] lbl_enc = preprocessing.LabelEncoder() lbl_enc.fit(targets_flat) targets_enc = [lbl_enc.transform(x) for x in targets] targets_enc = np.array(targets_enc) + 1 # '0' is for unknown # print(targets_enc) # print(len(lbl_enc.classes_)) train_imgs, test_imgs, train_targets, test_targets, _, test_targets_orig = model_selection.train_test_split( image_files, targets_enc, targets_orig, test_size=0.1, random_state=42) train_dataset = dataset.ClassificationDataset( image_paths=train_imgs, targets=train_targets, resize=(config.IMAGE_HEIGHT,config.IMAGE_WIDTH) ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, shuffle=True ) test_dataset = dataset.ClassificationDataset( image_paths=test_imgs, targets=test_targets, resize=(config.IMAGE_HEIGHT,config.IMAGE_WIDTH) ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, shuffle=False ) model = CaptchaModel(num_chars=len(lbl_enc.classes_)) # print(torch.cuda.memory_summary(device=None, abbreviated=False)) model.to(config.DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=0.8, patience=5, verbose=True ) for epoch in range(config.EPOCHS): train_loss = engine.train_fn(model, train_loader, optimizer) valid_preds, test_loss = engine.eval_fn(model, test_loader) valid_cap_preds = [] for vp in valid_preds: current_preds = decode_predictions(vp,lbl_enc) valid_cap_preds.extend(current_preds) combined = list(zip(test_targets_orig, valid_cap_preds)) print(combined[:10]) test_dup_rem = [remove_duplicates(c) for c in test_targets_orig] accuracy = metrics.accuracy_score(test_dup_rem, valid_cap_preds) print(f"Epoch:{epoch}, train_loss={train_loss}, test_loss={test_loss}, accuracy={accuracy}") scheduler.step(test_loss)
def run(): dfx = pd.read_csv(config.TRAINING_FILE) print("Shape of datframe:",dfx.shape) df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.label.values ) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) print("Shape of train datframe:",df_train.shape) print("Shape of validation dataframe:",df_valid.shape) train_dataset = dataset.BERTDataset( sent=df_train.sentences.values, target=df_train.label.values ) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=8 ) valid_dataset = dataset.BERTDataset( sent=df_valid.sentences.values, target=df_valid.label.values ) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=2 ) device = torch.device(config.DEVICE) model = BERT_CLASSIFIER() if config.RETRAIN: DEVICE = 'cuda' model.load_state_dict(torch.load(config.RETRAIN_MODEL_LOC)) model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.1, }, { "params": [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] num_train_steps = int(len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=config.LEARNING_RATE) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps ) best_accuracy = 0 best_eval_loss = np.inf for epoch in range(config.EPOCHS): epoch_train_loss = engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, targets, epoch_eval_loss = engine.eval_fn(valid_data_loader, model, device) outputs = np.array(outputs) >= config.ACC_CUTOFF accuracy = metrics.accuracy_score(targets, outputs) print("Train loss = ", epoch_train_loss) print("Validation Loss = ", epoch_eval_loss) print("Accuracy Score =", accuracy) if config.TRAINING_MODE == 'ba': best_eval_loss = np.inf if accuracy > best_accuracy and epoch_eval_loss < best_eval_loss: print("Saving Model state") torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy best_eval_loss = epoch_eval_loss else: print("Saving model in dump folder") torch.save(model.state_dict(), config.MODEL_PATH_2 + f"{epoch}.bin")
def run(): print("---------- Starting Data Reading -------") df1 = pd.read_csv("../input/jigsaw-toxic-comment-train.csv", usecols=["comment_text", "toxic"]) df2 = pd.read_csv("../input/jigsaw-unintended-bias-train.csv", usecols=["comment_text", "toxic"]) df_train = pd.concat([df1, df2], axis=0).reset_index(drop=True) df_valid = pd.read_csv("../input/validation.csv") print("---- Data Read Sucessfully --- ") # # dfx = pd.read_csv(config.TRAINING_FILE).fillna("none") # # dfx["sentiment"] = dfx["sentiment"].apply( # # lambda x : 1 if x == "positive" else 0 # # ) # # df_train, df_valid = model_selection.train_test_split( # # dfx, # # test_size=0.1, # # random_state=42, # # stratify=dfx["sentiment"].values # # ) # df_train = df_train.reset_index(drop=True) # df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.BERTDataset( comment_text=df_train["comment_text"].values, target=df_train["toxic"].values) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4, ) valid_dataset = dataset.BERTDataset( comment_text=df_valid["comment_text"].values, target=df_train["toxic"].values) valid_dataloader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALIDATION_BATCH_SIZE, num_workers=1, ) print("---- DataLoaders Created Sucessfully --- ") device = torch.device("cuda") model = BERTBasedUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = len(dfx) / (config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_dataloader, model, optimizer, scheduler, device) outputs, targets = engine.eval_fn(valid_dataloader, model, device) targets = np.array(targets) >= 0.5 accuracy = metrics.roc_auc_score(targets, outputs) print(f"AUC Score {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): # Reading the data file dfx = pd.read_csv(config.TRAINING_FILE, usecols=["comment_text", "toxic"]).fillna("none") # Spliting data into training 90% and validation 10% df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.toxic.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) # pass the sentence and target from training dataset into class train_dataset = dataset.DISTILBERTDataset( comment_text=df_train.comment_text.values, target=df_train.toxic.values) # Combine the training inputs into a TensorDataset. train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) # pass the sentence and target from validation dataset into class valid_dataset = dataset.DISTILBERTDataset( comment_text=df_valid.comment_text.values, target=df_valid.toxic.values) # Combine the validation inputs into a TensorDataset. valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device("cuda") # define the device model = DISTILBERTBaseUncased() # define the model model.to(device) # copy the model to the gpu # Prepare optimizer and schedule (linear warmup and decay) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.01, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] # Create the numer of training steps, optimizer and scheduler num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=2e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) # running the loop for every epochs best_f1_score = 0 for epoch in range(config.EPOCHS): # passing training and validation funtion engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, targets = engine.eval_fn(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 # evalution metrics f1_score = metrics.f1_score(targets, outputs) print(f"F1 Score = {f1_score}") # saving the model if f1_score > best_f1_score: torch.save(model.state_dict(), config.MODEL_PATH) best_f1_score = f1_score
def run(fold): dfx = pd.read_csv(config.TRAINING_FILE) df_train = dfx[dfx.kfold != fold].reset_index(drop=True) df_valid = dfx[dfx.kfold == fold].reset_index(drop=True) train_dataset = dataset.TweetDataset( tweets=df_train.text.values, sentiments=df_train.sentiment.values, selected_texts=df_train.selected_text.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4, shuffle=True) valid_dataset = dataset.TweetDataset( tweets=df_valid.text.values, sentiments=df_valid.sentiment.values, selected_texts=df_valid.selected_text.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=4, shuffle=False) device = torch.device('cuda') model_config = transformers.XLNetConfig.from_pretrained( config.MODEL_CONFIG) model_config.output_hidden_states = True model = models.TweetModel(conf=model_config) model = model.to(device) num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': config.WEIGHT_DECAY }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] base_opt = transformers.AdamW(optimizer_parameters, lr=config.LEARNING_RATE) optimizer = torchcontrib.optim.SWA(base_opt, swa_start=int(num_train_steps * config.SWA_RATIO), swa_freq=config.SWA_FREQ, swa_lr=None) scheduler = transformers.get_linear_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=int(num_train_steps * config.WARMUP_RATIO), num_training_steps=num_train_steps) print(f'Training is starting for fold={fold}') for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler=scheduler) jaccard = engine.eval_fn(valid_data_loader, model, device) if config.USE_SWA: optimizer.swap_swa_sgd() torch.save(model.state_dict(), f'{config.MODEL_SAVE_PATH}/model_{fold}.bin') return jaccard
def main(fold): COMPUTE_CV = True DEVICE = "cuda" if torch.cuda.is_available() else "cpu" data = pd.read_csv('../train_fold.csv') data['filepath'] = data['image'].apply( lambda x: os.path.join('../', 'train_images', x)) target_encoder = LabelEncoder() data['label_group'] = target_encoder.fit_transform(data['label_group']) train = data[data['fold'] != fold].reset_index(drop=True) valid = data[data['fold'] == fold].reset_index(drop=True) # Defining DataSet train_dataset = ShopeeDataset( csv=train, transforms=get_transforms(img_size=DIM[0], trans_type='train'), mode='train', ) valid_dataset = ShopeeDataset( csv=valid, transforms=get_transforms(img_size=DIM[0], trans_type='valid'), mode='train', ) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, pin_memory=True, drop_last=True, num_workers=NUM_WORKERS) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=VALID_BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False, pin_memory=True, drop_last=False, ) # get adaptive margin tmp = np.sqrt( 1 / np.sqrt(data['label_group'].value_counts().sort_index().values)) margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05 # Defining Model for specific fold if model_version == "V1": model = ShopeeNet(**model_params) elif model_version == "V2": model = ShopeeNetV2(**model_params) else: model = ShopeeNetV3(**model_params) model.to(DEVICE) def fetch_loss(loss_type=None): if loss_type is None: loss = nn.CrossEntropyLoss() elif loss_type == 'arcface': loss = ArcFaceLossAdaptiveMargin(margins=margins, out_dim=model_params['n_classes'], s=80) return loss criterion = fetch_loss() criterion.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=scheduler_params['lr_start']) scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, EPOCHS) scheduler_warmup = GradualWarmupSchedulerV2( optimizer, multiplier=10, total_epoch=1, after_scheduler=scheduler_cosine) #Defining LR SChe scheduler = None # THE ENGINE LOOP best_loss = 2 << 13 for epoch in range(EPOCHS): scheduler_warmup.step(epoch - 1) train_loss = train_fn(train_loader, model, criterion, optimizer, DEVICE, epoch_th=epoch, scheduler=scheduler) valid_loss = eval_fn(valid_loader, model, criterion, DEVICE) print( 'Fold {} | Epoch {}/{} | Training | Loss: {:.4f} | Valid | Loss: {:.4f}' .format(fold, epoch + 1, EPOCHS, train_loss['loss'].avg, valid_loss['loss'].avg)) with open(log_name, 'a') as csvfile: writer = csv.writer(csvfile) writer.writerow([ fold, epoch + 1, train_loss['loss'].avg, valid_loss['loss'].avg ]) if valid_loss['loss'].avg < best_loss: best_loss = valid_loss['loss'].avg torch.save( model.state_dict(), os.path.join( "./models", model_name, f'{model_version}_fold_{fold}_model_{model_params["model_name"]}_IMG_SIZE_{DIM[0]}_{model_params["loss_module"]}.bin' )) print('best model found for epoch {}'.format(epoch))
def run(): dfx = pd.read_csv(config.TRAINING_FILE).fillna("none") dfx.sentiment = dfx.sentiment.apply( # can use label encoding lambda x: 1 if x == "positive" else 0 # can use map fn ) df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment. values # when split both train and val have same positive to negative sample ratio ) df_train = df_train.reset_index(drop=True) # 0 to length of df_train df_valid = df_valid.reset_index(drop=True) # 0 to length of df_valid train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device("cuda") # using cuda model = BERTBaseUncased() # calling from model.py param_optimizer = list( model.named_parameters()) # specify parameters to train no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] """ These parameters are adjustable, we should take a look at different layers and the decay we want, how much learning rate etc.""" num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) # model = nn.DataParallel(model) # converting to multi gpu model best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, target = engine.eval_fn(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(target, outputs) print(f"Accuracy score = {accuracy}") if accuracy > best_accuracy: torch.save( model.state_dict(), config.MODEL_PATH) # saving the model only if it improves best_accuracy = accuracy
def run(): dfx = pd.read_csv(config.TRAINING_FILE, nrows=config.NROWS).dropna().reset_index(drop=True) # dfx.sentiment = dfx.sentiment.apply( # lambda x: 1 if x =='positive' else 0 # ) print('Data Loaded') df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.5, random_state=42, stratify=dfx.sentiment.values) print('Data split into train data and validation data') df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.TweetDataset( tweet=df_train.text.values, sentiment=df_train.sentiment.values, selected_text=df_train.selected_text.values) print('Train data preprocessed and made into Tweet Dataset Object') train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, shuffle=True, num_workers=4) print('Train dataloader created') valid_dataset = dataset.TweetDataset( tweet=df_valid.text.values, sentiment=df_valid.sentiment.values, selected_text=df_valid.selected_text.values) print('Valid data preprocessed and made into Tweet Dataset Object') valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) print('Valid dataloader created') device = config.DEVICE conf = transformers.RobertaConfig.from_pretrained( f'{config.PATH}roberta-base-config.json') conf.output_hidden_states = False model = Roberta(conf) model.to(device) print('Model Object created') param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = utils.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) best_jaccard = 0 print('Starting Training....') for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) jaccard = engine.eval_fn(valid_data_loader, model, device) print(f'Jaccard Score : {jaccard}') if jaccard > best_jaccard: torch.save(model.state_dict(), config.MODEL_PATH) best_jaccard = jaccard
def main(_): LEARNING_RATE = config.LEARNING_RATE DROPOUT = config.DROPOUT SAVE = config.SAVE TUNE = False ESTOP = 5 if FLAGS.lr: LEARNING_RATE = FLAGS.lr if FLAGS.dropout: DROPOUT = FLAGS.dropout if FLAGS.save: SAVE = FLAGS.save if FLAGS.tune: TUNE = FLAGS.tune if FLAGS.estop: ESTOP = FLAGS.estop train_file = config.TRAIN_PROC df_train = pd.read_csv(train_file).fillna("none") valid_file = config.DEVEL_PROC df_valid = pd.read_csv(valid_file).fillna("none") test_file = config.EVAL_PROC df_test = pd.read_csv(test_file).fillna("none") logger.info(f"Bert Model: {config.BERT_PATH}") logger.info( f"Current date and time :{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} " ) logger.info(f"Train file: {train_file}") logger.info(f"Valid file: {valid_file}") logger.info(f"Test file: {test_file}") logger.info(f"Train size : {len(df_train):.4f}") logger.info(f"Valid size : {len(df_valid):.4f}") logger.info(f"Test size : {len(df_test):.4f}") valid_dataset = dataset.BERTDataset(text=df_valid.text.values, target=df_valid.label.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) test_dataset = dataset.BERTDataset(text=df_test.text.values, target=df_test.label.values) test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') #torch.device("cuda") model = BERTBaseUncased(DROPOUT) if TUNE: model.load_state_dict( torch.load(configtune.MODEL_PATH, map_location=torch.device(device))) model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=LEARNING_RATE) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) # model = nn.DataParallel(model) best_accuracy = 0 best_path = "" es = 1 for epoch in range(config.EPOCHS): if es > ESTOP: break df_train = shuffle(df_train) chunks = np.array_split(df_train, round(len(df_train) / SAVE)) for chunk in chunks: train_dataset = dataset.BERTDataset(text=chunk.text.values, target=chunk.label.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4, shuffle=True) logger.info(f"Epoch = {epoch}") train_loss, train_acc = engine.train_fn(train_data_loader, model, optimizer, device, scheduler) for tag, parm in model.named_parameters(): if parm.grad is not None: writer.add_histogram(tag, parm.grad.data.cpu().numpy(), epoch) outputs, targets, val_loss, val_acc = engine.eval_fn( valid_data_loader, model, device) val_mcc = metrics.matthews_corrcoef(outputs, targets) logger.info(f"val_MCC_Score = {val_mcc:.4f}") outputs, targets, test_loss, test_acc = engine.eval_fn( test_data_loader, model, device) test_mcc = metrics.matthews_corrcoef(outputs, targets) logger.info(f"test_MCC_Score = {test_mcc:.4f}") logger.info( f"train_loss={train_loss:.4f}, val_loss={val_loss:.4f}, test_loss={test_loss:.4f}" ) writer.add_scalar('loss/train', train_loss, epoch) writer.add_scalar('loss/val', val_loss, epoch) writer.add_scalar('loss/test', test_loss, epoch) logger.info( f"train_acc={train_acc:.4f}, val_acc={val_acc:.4f}, test_acc={test_acc:.4f}" ) writer.add_scalar('acc/train', train_acc, epoch) writer.add_scalar('acc/val', val_acc, epoch) writer.add_scalar('acc/test', test_acc, epoch) logger.info(f"val_mcc={val_acc:.4f}, test_mcc={test_acc:.4f}") writer.add_scalar('mcc/val', val_mcc, epoch) writer.add_scalar('mcc/test', test_mcc, epoch) accuracy = metrics.accuracy_score(targets, outputs) logger.info(f"Accuracy Score = {accuracy:.4f}") if accuracy < 0.4: logger.info( f"Something is very wrong! Accuracy is only {accuracy:.4f} Stopping..." ) break if accuracy > best_accuracy: logger.info( f"Saving model with Accuracy Score = {accuracy:.4f}") if len(best_path) > 0 and os.path.exists(best_path): #Delete previous best os.remove(best_path) best_path = config.MODEL_PATH[:-4] + "." + str( round(accuracy * 100, 2)) + ".bin" torch.save(model.state_dict(), best_path) best_accuracy = accuracy es = 0 else: es += 1 logger.info( f"Not improved for {es} times of {ESTOP}. Best so far - {best_accuracy:.4f}" ) if es > ESTOP: logger.info( f"Early stopping with best accuracy: {best_accuracy:.4f} and accuracy for this epoch: {accuracy:.4f} ..." ) break
def run(): df = pd.read_csv(CONFIG.INPUT_PATH + 'news_summary_more.csv').sample( frac=CONFIG.frac).reset_index(drop=True) print('--------- [INFO] TOKENIZING --------') loader = DataLoader.DataLoader(df) print(f'len of loader = {len(loader)}') split = int(CONFIG.split * len(loader)) indices = list(range(len(loader))) train_indices, val_indices = indices[split:], indices[:split] train_sampler = torch.utils.data.sampler.RandomSampler(train_indices) val_sampler = torch.utils.data.sampler.RandomSampler(val_indices) pickle.dump(loader.vocab.word_to_idx, open(CONFIG.INPUT_PATH + 'word_to_idx.pickle', 'wb')) pickle.dump(loader.vocab.idx_to_word, open(CONFIG.INPUT_PATH + 'idx_to_word.pickle', 'wb')) pad_idx = loader.vocab.word_to_idx["<PAD>"] train_loader = torch.utils.data.DataLoader( loader, batch_size=CONFIG.Batch_Size, num_workers=4, pin_memory=True, collate_fn=DataLoader.MyCollate(pad_idx), sampler=train_sampler) val_loader = torch.utils.data.DataLoader( loader, batch_size=CONFIG.Batch_Size, num_workers=4, pin_memory=True, collate_fn=DataLoader.MyCollate(pad_idx), sampler=val_sampler) if torch.cuda.is_available(): accelarator = 'cuda' torch.backends.cudnn.benchmark = True else: accelarator = 'cpu' vocab_size = len(loader.vocab.word_to_idx) device = torch.device(accelarator) model = Transformer.Transformer(input_vocab_size=vocab_size, out_vocab_size=vocab_size, max_len=CONFIG.max_len, embed_dims=CONFIG.embed_dims, pad_idx=pad_idx, heads=CONFIG.heads, forward_expansion=CONFIG.forward_expansion, num_layers=CONFIG.num_layers, dropout=CONFIG.dropout, device=device) model = model.to(device) decay_parmas = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimized_params = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in decay_parmas) ], 'weight_decay': 0.001 }, { 'params': [ p for n, p in model.named_parameters() if any(nd in n for nd in decay_parmas) ], 'weight_decay': 0.0 }] optimizer = transformers.AdamW(optimized_params, lr=CONFIG.LR) num_training_steps = CONFIG.Epochs * len(loader) // CONFIG.Batch_Size scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=CONFIG.Warmup_steps * num_training_steps, num_training_steps=num_training_steps) best_loss = 1e4 best_model = model.state_dict() print('--------- [INFO] STARTING TRAINING ---------') for epoch in range(CONFIG.Epochs): train_loss = engine.train_fn(model, train_loader, optimizer, scheduler, device, pad_idx) val_loss = engine.eval_fn(model, val_loader, device, pad_idx) print( f'EPOCH -> {epoch+1}/{CONFIG.Epochs} | TRAIN LOSS = {train_loss} | VAL LOSS = {val_loss}' ) if best_loss > val_loss: best_loss = val_loss best_model = model.state_dict() torch.save(best_model, CONFIG.MODEL_PATH) predict.predict( '''Saurav Kant, an alumnus of upGrad and IIIT-B's PG Program in Machine learning and Artificial Intelligence, was a Sr Systems Engineer at Infosys with almost 5 years of work experience. The program and upGrad's 360-degree career support helped him transition to a Data Scientist at Tech Mahindra with 90% salary hike. upGrad's Online Power Learning has powered 3 lakh+ careers.''' )
def run(): dfx = pd.read_csv(config.TRAINING_FILE).fillna("none") dfx.sentiment = dfx.sentiment.apply(lambda x: 1 if x == "positive" else 0) df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) #changed from coda to cpu device = torch.device("cpu") model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) #model = nn.DataParallel(model) best_accuracy = 0 for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, targets = engine.eval_fn(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print(f"Accuracy Score = {accuracy}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(dataset_index): datasets = [ "gold.prep-auto.full.prep.{0}.csv", "gold.prep-auto.no-emoticons.prep.{0}.csv", "gold.prep-auto.prep.{0}.csv", "gold.prep-english.prep.{0}.csv", "gold.prep-peisenieks.prep.{0}.csv", "gold.prep.{0}.csv" ] # dataset_index = 5 #0-5 train_file = config.DATASET_LOCATION + datasets[dataset_index].format( "train") df_train = pd.read_csv(train_file).fillna("none") df_train.label = df_train.label.apply(label_encoder) valid_file = config.DATASET_LOCATION + datasets[dataset_index].format( "dev" ) #"gold.prep-auto.full.prep.dev.csv" #gold.prep-auto.no-emoticons.prep.dev.csv" #gold.prep-auto.prep.dev.csv" #"gold.prep-english.prep.dev.csv" #"gold.prep-peisenieks.prep.dev.csv" #"gold.prep.dev.csv" df_valid = pd.read_csv(valid_file).fillna("none") df_valid.label = df_valid.label.apply(label_encoder) test_file = config.DATASET_LOCATION + "eval.prep.test.csv" df_test = pd.read_csv(test_file).fillna("none") df_test.label = df_test.label.apply(label_encoder) logger.info(f"Bert Model: {config.BERT_PATH}") logger.info( f"Current date and time :{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} " ) logger.info(f"Train file: {train_file}") logger.info(f"Valid file: {valid_file}") logger.info(f"Test file: {test_file}") logger.info(f"Train size : {len(df_train):.4f}") logger.info(f"Valid size : {len(df_valid):.4f}") logger.info(f"Test size : {len(df_test):.4f}") train_dataset = dataset.BERTDataset(review=df_train.text.values, target=df_train.label.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4, shuffle=True) valid_dataset = dataset.BERTDataset(review=df_valid.text.values, target=df_valid.label.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) test_dataset = dataset.BERTDataset(review=df_test.text.values, target=df_test.label.values) test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') #torch.device("cuda") model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) # model = nn.DataParallel(model) best_accuracy = 0 for epoch in range(config.EPOCHS): logger.info(f"epoch={epoch}") train_loss, train_acc = engine.train_fn(train_data_loader, model, optimizer, device, scheduler) for tag, parm in model.named_parameters(): if parm.grad is not None: writer.add_histogram(tag, parm.grad.data.cpu().numpy(), epoch) outputs, targets, val_loss, val_acc = engine.eval_fn( valid_data_loader, model, device) val_mcc = metrics.matthews_corrcoef(outputs, targets) logger.info(f"val_MCC_Score = {val_mcc:.3f}") outputs, targets, test_loss, test_acc = engine.eval_fn( test_data_loader, model, device) test_mcc = metrics.matthews_corrcoef(outputs, targets) logger.info(f"test_MCC_Score = {test_mcc:.3f}") logger.info( f"train_loss={train_loss:.4f}, val_loss={val_loss:.4f}, test_loss={test_loss:.4f}" ) writer.add_scalar('loss/train', train_loss, epoch) # data grouping by `slash` writer.add_scalar('loss/val', val_loss, epoch) # data grouping by `slash` writer.add_scalar('loss/test', test_loss, epoch) # data grouping by `slash` logger.info( f"train_acc={train_acc:.3f}, val_acc={val_acc:.3f}, test_acc={test_acc:.3f}" ) writer.add_scalar('acc/train', train_acc, epoch) # data grouping by `slash` writer.add_scalar('acc/val', val_acc, epoch) # data grouping by `slash` writer.add_scalar('acc/test', test_acc, epoch) # data grouping by `slash` logger.info(f"val_mcc={val_acc:.3f}, test_mcc={test_acc:.3f}") writer.add_scalar('mcc/val', val_mcc, epoch) # data grouping by `slash` writer.add_scalar('mcc/test', test_mcc, epoch) # data grouping by `slash` accuracy = metrics.accuracy_score(targets, outputs) logger.info(f"Accuracy Score = {accuracy:.3f}") if accuracy > best_accuracy: print(f"Saving model with Accuracy Score = {accuracy:.3f}") torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def run(): dfx = pd.read_csv(config.TRAINING_FILE, nrows=30).dropna().reset_index(drop=True) df_train, df_valid = model_selection.train_test_split( dfx, test_size = 0.1, random_state = 42, stratify = dfx.sentiment.values ) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.TweetDataset( tweet = df_train.text.values, sentiment = df_train.sentiment.values, selected_text=df_train.selected_text.values ) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=1 ) valid_dataset = dataset.TweetDataset( tweet = df_valid.text.values, sentiment = df_valid.sentiment.values, selected_text=df_valid.selected_text.values ) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1 ) device = torch.device('cpu') model = BERTBaseUncased() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] num_train_steps = int(len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps ) best_jaccard = 0 for epoch in range(config.EPOCHS): print("here") engine.train_fn(train_data_loader, model, optimizer, device, scheduler) mean_jac = engine.eval_fn(valid_data_loader, model, device) print("jaccard_score = {mean_jac}".format(mean_jac=mean_jac)) if(mean_jac>best_jaccard): torch.save(model.state_dict(), config.MODEL_PATH) best_jaccard = mean_jac
def run(fold): dfx = pd.read_csv(config.TRAINING_FILE) # Set train validation set split df_train = dfx[dfx.kfold != fold].reset_index(drop=True) df_valid = dfx[dfx.kfold == fold].reset_index(drop=True) train_dataset = TweetDataset( tweet=df_train.text.values, sentiment=df_train.sentiment.values, selected_text=df_train.selected_text.values ) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4 ) valid_dataset = TweetDataset( tweet=df_valid.text.values, sentiment=df_valid.sentiment.values, selected_text=df_valid.selected_text.values ) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=2 ) device = torch.device("cuda") model_config = transformers.BertConfig.from_pretrained(config.ROBERTA_PATH) model_config.output_hidden_states = True model = TweetModel(conf=model_config) model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] # Define two sets of parameters: those with weight decay, and those without optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) ''' Create a scheduler to set the learning rate at each training step "Create a schedule with a learning rate that decreases linearly after linearly increasing during a warmup period." (https://pytorch.org/docs/stable/optim.html) Since num_warmup_steps = 0, the learning rate starts at 3e-5, and then linearly decreases at each training step ''' scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps ) es = utils.EarlyStopping(patience=2, mode="max") print(f"Training is Starting for fold={fold}") logger.info("{} - {}".format("Training is Starting for fold", fold)) #model=nn.DataParallel(model) for epoch in range(3): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) jaccard=engine.eval_fn(valid_data_loader, model, device) print(f"Jaccard Score = {jaccard}") logger.info("EPOCHS {} - Jaccard Score - {}".format(epoch, jaccard)) es(jaccard, model, model_path=f"../models/nmodel_{fold}.bin") if es.early_stop: print("Early stopping") break