Example #1
0
def run():

    # Load data and little exploration
    data = utils.load_data(config.DATA_PATH)
    utils.data_exploration(data)  # Data exploration just print

    # print(data.head())
    # print(data.polarity.values)
    # Create dataLoader

    data, _ = model_selection.train_test_split(data,
                                               test_size=0.995,
                                               random_state=42,
                                               stratify=data.polarity.values)

    train, valid = model_selection.train_test_split(
        data, test_size=0.5, random_state=42, stratify=data.polarity.values)

    train_data_loader = utils.create_data_loader(train)
    valid_data_loader = utils.create_data_loader(valid, is_train=False)

    #S

    # Build Model and send it to device
    model = BuildModel()
    model = model.to(config.DEVICE)

    # Set weight decay to 0 fro no_decay params
    # Set weights decays to 0.01 for others
    param_optimizer = list(model.named_parameters())
    no_decay = ["biais", "LayerNorm.biais", "LayerNorm.weight"]

    optimizer_parameters = [{
        'params':
        [tensor for name, tensor in param_optimizer if name in no_decay],
        'weight_decay':
        0
    }, {
        'params':
        [tensor for name, tensor in param_optimizer if name not in no_decay],
        'weight_decay':
        config.WEIGHT_DECAY
    }]

    # This is the overall number of trainings that will be performed
    num_training_steps = int(
        (train.shape[0] / config.TRAIN_BATCH_SIZE) * config.EPOCHS)

    optimizer = AdamW(optimizer_parameters, lr=3 * 10 - 5)  # Arbitrary set

    # Scheduler to performs adaptatite LR regarding epochs number
    # Scheduler_with_warm_up consiste a augmenter le LR dans les premiers
    # warm_up steps afin de converger plus vite dans les debuts
    scheduler = get_linear_schedule_with_warmup(
        optimizer=optimizer,
        num_training_steps=num_training_steps,
        num_warmup_steps=4)

    # model = nn.DataParallel(model) # if multiples GPU

    best_accuracy = 0
    best_model_state = None

    for epoch in range(config.EPOCHS):
        engine.train_fn(train_data_loader, model, optimizer, scheduler)
        outputs, targets = engine.eval_fn(valid_data_loader, model)
        outputs = np.where(np.array(outputs) > 0.5, 1, 0)
        accuracy = metrics.accuracy_score(np.array(targets), outputs)
        print(f"Accuracy, Epoch {epoch} : {accuracy}")
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model_state = model.state_dict

    print("Best accuracy : {best_accuracy}")

    print("Saving Model...")
    torch.save(best_model_state, config.MODEL_PATH)
Example #2
0
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.001,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]

    num_train_steps = int(
        len(train_sentences) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                warmup_steps=0,
                                                t_total=num_train_steps)

    best_loss = np.inf
    for epoch in range(config.EPOCHS):
        train_loss = engine.train_fn(train_data_loader, model, optimizer,
                                     device, scheduler)
        test_loss = engine.eval_fn(valid_data_loader, model, device)
        print(f"Train Loss = {train_loss} Valid Loss = {test_loss}")
        if test_loss < best_loss:
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_loss = test_loss
Example #3
0
def run():
    path = CONFIG.INPUT_PATH 
    x_ray_image_names = os.listdir(path + '/CXR_png/')
    image_names = []
    for name in x_ray_image_names:
        image_names.append(name.split('.')[0])
    
    dataset_image_names = []
    mask_image_names = os.listdir(path + '/masks/')
    for name in mask_image_names:
        name = name.split('.png')[0].split('_mask')[0]
        if name in image_names:
            dataset_image_names.append(name)


    image_transforms = alb.Compose([
        alb.Normalize(CONFIG.mean, CONFIG.std, always_apply=True),
        alb.Resize(512, 512, always_apply=True),
        alb.pytorch.ToTensor()
    ])

    mask_transforms = alb.Compose([
        alb.Normalize(0, 1, always_apply=True),
        alb.Resize(512, 512, always_apply=True),
        alb.pytorch.ToTensor()
    ])

    train_images_name, val_images_name = train_test_split(dataset_image_names)

    train_data = DataLoader.DataLoader(
        train_images_name,
        image_transforms,
        mask_transforms
    )

    val_data = DataLoader.DataLoader(
        val_images_name,
        image_transforms,
        mask_transforms
    )

    train_loader = torch.utils.data.DataLoader(
        train_data,
        num_workers=4,
        batch_size=CONFIG.Batch_size,
        pin_memory=True
    )

    val_loader = torch.utils.data.DataLoader(
        val_data,
        num_workers=4,
        batch_size=CONFIG.Batch_size,
        pin_memory=True
    )

    if torch.cuda.is_available():
        accelarator = 'cuda'
        torch.backends.cudnn.benchmark = True
    else:
        accelarator = 'cpu'
    
    device = torch.device(accelarator)

    model = UNet.UNet(input_channels=3)

    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=CONFIG.LR)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=CONFIG.patience,
        threshold=CONFIG.scheduler_thresh,
        mode="min",
        factor=CONFIG.decay_factor
    )

    best_loss = 1e4
    
    print('------ [INFO] STARTING TRAINING ------')
    for epoch in range(CONFIG.Epochs):
        train_loss = engine.train_fn(model, train_loader, optimizer, device)
        val_loss = engine.eval_fn(model, val_loader, device)
        print(f'EPOCH -> {epoch+1}/{CONFIG.Epochs} | TRAIN LOSS = {train_loss} | VAL LOSS = {val_loss} | LR = {optimizer.param_groups[0]["lr"]}\n')
        scheduler.step(val_loss)
        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model.state_dict()
            torch.save(best_model, CONFIG.MODEL_PATH)
            predict.predict('input/CXR_png/CHNCXR_0001_0.png')
Example #4
0
def main(ckp_path=None):
    """ckp_path (str): checkpoint_path
    Train the model from scratch if ckp_path is None else
    Re-Train the model from previous checkpoint
    """
    cli_args = get_train_args(__author__, __version__)

    # Variables
    data_dir = cli_args.data_dir
    save_dir = cli_args.save_dir
    file_name = cli_args.file_name
    use_gpu = cli_args.use_gpu

    # LOAD DATA
    data_loaders = load_data(data_dir, config.IMG_SIZE, config.BATCH_SIZE)

    # BUILD MODEL
    if ckp_path == None:
        model = initialize_model(model_name=config.MODEL_NAME,
                                 num_classes=config.NO_OF_CLASSES,
                                 feature_extract=True,
                                 use_pretrained=True)
    else:
        model = load_ckp(ckp_path)

    # Device is available or not
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # If the user wants the gpu mode, check if cuda is available
    if (use_gpu == True) and (torch.cuda.is_available() == False):
        print("GPU mode is not available, using CPU...")
        use_gpu = False

    # MOVE MODEL TO AVAILBALE DEVICE
    model.to(device)

    # DEFINE OPTIMIZER
    optimizer = optimizer_fn(model_name=config.MODEL_NAME,
                             model=model,
                             lr_rate=config.LR_RATE)

    # DEFINE SCHEDULER
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="min",
                                                           patience=5,
                                                           factor=0.3,
                                                           verbose=True)

    # DEFINE LOSS FUNCTION
    criterion = loss_fn()

    # LOAD BEST MODEL'S WEIGHTS
    best_model_wts = copy.deepcopy(model.state_dict())

    # BEST VALIDATION SCORE
    if ckp_path == None:
        best_score = -1  # IF MODEL IS TRAIN FROM SCRATCH
    else:
        best_score = model.best_score  # IF MODEL IS RE-TRAIN

    # NO OF ITERATION
    no_epochs = config.EPOCHS
    # KEEP TRACK OF LOSS AND ACCURACY IN EACH EPOCH
    stats = {
        'train_losses': [],
        'valid_losses': [],
        'train_accuracies': [],
        'valid_accuracies': []
    }

    print("Models's Training Start......")

    for epoch in range(1, no_epochs + 1):
        train_loss, train_score = train_fn(data_loaders,
                                           model,
                                           optimizer,
                                           criterion,
                                           device,
                                           phase='train')
        val_loss, val_score = eval_fn(data_loaders,
                                      model,
                                      criterion,
                                      device=config.DEVICE,
                                      phase='valid')
        scheduler.step(val_loss)

        # SAVE MODEL'S WEIGHTS IF MODEL' VALIDATION ACCURACY IS INCREASED
        if val_score > best_score:
            print(
                'Validation score increased ({:.6f} --> {:.6f}).  Saving model ...'
                .format(best_score, val_score))
            best_score = val_score
            best_model_wts = copy.deepcopy(
                model.state_dict())  #Saving the best model' weights

        # MAKE A RECORD OF AVERAGE LOSSES AND ACCURACY IN EACH EPOCH FOR PLOTING
        stats['train_losses'].append(train_loss)
        stats['valid_losses'].append(val_loss)
        stats['train_accuracies'].append(train_score)
        stats['valid_accuracies'].append(val_score)

        # PRINT TRAINING AND VALIDATION LOOS/ACCURACIES AFTER EACH EPOCH
        epoch_len = len(str(no_epochs))
        print_msg = (f'[{epoch:>{epoch_len}}/{no_epochs:>{epoch_len}}] ' +
                     '\t' + f'train_loss: {train_loss:.5f} ' + '\t' +
                     f'train_score: {train_score:.5f} ' + '\t' +
                     f'valid_loss: {val_loss:.5f} ' + '\t' +
                     f'valid_score: {val_score:.5f}')
        print(print_msg)

    # load best model weights
    model.load_state_dict(best_model_wts)

    # create checkpoint variable and add important data
    model.class_to_idx = data_loaders['train'].dataset.class_to_idx
    model.best_score = best_score
    model.model_name = config.MODEL_NAME
    checkpoint = {
        'epoch': no_epochs,
        'lr_rate': config.LR_RATE,
        'model_name': config.MODEL_NAME,
        'batch_size': config.BATCH_SIZE,
        'valid_score': best_score,
        'optimizer': optimizer.state_dict(),
        'state_dict': model.state_dict(),
        'class_to_idx': model.class_to_idx
    }

    # SAVE CHECKPOINT
    save_ckp(checkpoint, save_dir, file_name)

    print("Models's Training is Successfull......")

    return model
Example #5
0
def run(fold):
    dfx = pd.read_csv(config.TRAINING_FILE)

    df_train = dfx[dfx.kfold != fold].reset_index(drop=True)
    df_valid = dfx[dfx.kfold == fold].reset_index(drop=True)

    train_dataset = TweetDataset(tweet=df_train.text.values,
                                 sentiment=df_train.sentiment.values,
                                 selected_text=df_train.selected_text.values)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4)

    valid_dataset = TweetDataset(tweet=df_valid.text.values,
                                 sentiment=df_valid.sentiment.values,
                                 selected_text=df_valid.selected_text.values)

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=2)

    device = torch.device("cuda")
    model_config = transformers.BertConfig.from_pretrained(config.BERT_PATH)
    model_config.output_hidden_states = True
    model = TweetModel(conf=model_config)
    model.to(device)

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.001
        },
        {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        },
    ]
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    es = utils.EarlyStopping(patience=2, mode="max")
    print(f"Training is Starting for fold={fold}")

    for epoch in range(config.EPOCHS):
        engine.train_fn(train_data_loader,
                        model,
                        optimizer,
                        device,
                        scheduler=scheduler)
        jaccard = engine.eval_fn(valid_data_loader, model, device)
        #print(f"Jaccard Score = {jaccard}")
        es(jaccard, model, model_path=f"model_{fold}.bin")
        if es.early_stop:
            print("Early stopping")
            break
Example #6
0
def train():
    # this function trains the model

    # read the training file and fill NaN values with "none"
    df = pd.read_csv(config.TRAINING_FILE).fillna("none")

    # map positive to 1 and negative to 0
    df.sentiment = df.apply(lambda x: 1 if x == "positive" else 0)

    # split data into single training and validation fold
    df_train, df_valid = model_selection.train_test_split(
        dfx, test_size=0.1, random_state=42, stratify=df.sentiment.values)
    # reset index
    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    # initialize BERTDataset from dataset.py
    # for training dataset
    train_dataset = dataset.BERTDataset(review=df_train.review.values,
                                        target=df_train.sentiment.values)

    # create training dataloader
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4)
    # initialize BERTDataset from dataset.py
    # for training dataset
    valid_dataset = dataset.BERTDataset(review=df_valid.review.values,
                                        target=df_valid.sentiment.values)

    # create training dataloader
    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    # initialize the cuda device
    # use cpu if you dont have GPU
    device = torch.device("cuda")

    # load model and send it to the device
    model = BERTBasedUncased()
    model.to(device)

    # create parameters we want to optimize
    # we generally dont use any decay for bias
    # and weight layers
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.001,
        },
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0,
        },
    ]

    # calculate the number of training steps
    # this is used by scheduler
    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)

    # AdamW optimizer
    # AdamW is the most widely used optimizer
    # for transformer based networks
    optimizer = AdamW(optimizer_parameters, lr=3e-5)

    # fetch a scheduler
    # you can also try using reduce lr on plateau
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    # if you have multiple GPUs
    # model to DataParallel
    # to use multiple GPUs
    model = nn.DataParallel(model)

    # start training the epochs
    best_accuracy = 0
    for epoch in range(config.EPOCHS):

        # train model
        engine.train_fn(train_data_loader, model, device, scheduler)

        # test the model
        outputs, targets = engine.eval_fn(valid_data_loader, model, device)

        # convert outputs to numpy array
        outputs = np.array(outputs) >= 0.5

        # calculate the accuracy
        accuracy = metrics.accuracy_score(targets, outputs)

        # print the accuracy
        print(f"Accuracy Score = {accuracy}")

        # save model only this the accuracy is better than the best_accuracy (set up to 0)
        if accuracy > best_accuracy:
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_accuracy = accuracy
Example #7
0
def run_training():
    image_files = glob.glob(os.path.join(config.DATA_DIR, "*.png"))
    targets_orig = [x.split("/")[-1][:-4].split('_')[0] for x in image_files]
    targets = [[c for c in x] for x in targets_orig]
    targets_flat = [c for clist in targets for c in clist]

    lbl_enc = preprocessing.LabelEncoder()
    lbl_enc.fit(targets_flat)
    np.save(config.LABEL_ENCODER_SAVE_PATH, lbl_enc.classes_)
    targets_enc = [lbl_enc.transform(x) for x in targets]
    # print(targets_enc)
    # new_targets_enc= []
    # for i,target in enumerate(targets_enc):
    #   tmp = np.array([-1,-1,-1,-1,-1])
    #   for idx, item in enumerate(target):
    #     # print(idx)
    #     # print('i',i)
    #     tmp[idx] = item
    #     # print(image_files[i])
    #   new_targets_enc.append(tmp)
    # print(new_targets_enc)
    targets_enc = np.array(targets_enc)
    targets_enc = targets_enc + 1

    (
        train_imgs,
        test_imgs,
        train_targets,
        test_targets,
        _,
        test_targets_orig,
    ) = model_selection.train_test_split(
        image_files, targets_enc, targets_orig, test_size=0.1, random_state=42
    )

    train_dataset = dataset.ClassificationDataset(
        image_paths=train_imgs,
        targets=train_targets,
        resize=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH),
    )
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.BATCH_SIZE,
        num_workers=config.NUM_WORKERS,
        shuffle=True,
    )
    test_dataset = dataset.ClassificationDataset(
        image_paths=test_imgs,
        targets=test_targets,
        resize=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH),
    )
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config.BATCH_SIZE,
        num_workers=config.NUM_WORKERS,
        shuffle=False,
    )

    model = CaptchaModel(num_chars=len(lbl_enc.classes_))
    model.to(config.DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, factor=0.8, patience=5, verbose=True
    )
    for epoch in range(config.EPOCHS):
        train_loss = engine.train_fn(model, train_loader, optimizer)
        valid_preds, test_loss = engine.eval_fn(model, test_loader)
        valid_captcha_preds = []
        for vp in valid_preds:
            current_preds = decode_predictions(vp, lbl_enc)
            valid_captcha_preds.extend(current_preds)
        combined = list(zip(test_targets_orig, valid_captcha_preds))
        print(combined[:10])
        test_dup_rem = test_targets_orig
        accuracy = metrics.accuracy_score(test_dup_rem, valid_captcha_preds)
        print(
            f"Epoch={epoch}, Train Loss={train_loss}, Test Loss={test_loss} Accuracy={accuracy}"
        )
        scheduler.step(test_loss)
        torch.save(model.state_dict(), config.MODEL_SAVE_PATH)
def run():
    dfx = pd.read_csv(config.TRAINING_FILE).dropna().reset_index(drop=True)

    #stratify split so that class can be balanced for both train and validation ==>> it means number of positive class will be equal to negative class for train ===>>same  for validation dataset also
    df_train, df_valid = model_selection.train_test_split(
        dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values)

    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    train_dataset = dataset.TweetDataset(
        tweet=df_train.text.values,
        target=df_train.sentiment.values,
        selected_text=df_train.selected_text.values)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4)

    valid_dataset = dataset.TweetDataset(
        tweet=df_valid.text.values,
        target=df_valid.sentiment.values,
        selected_text=df_valid.selected_text.values)

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    device = torch.device("cuda")
    model = BERTBaseUncased()
    model.to(device)

    #specify what parameters you want to train
    param_optimizer = list(model.named_parameters())

    #we don't want any deacy for these layer names such as bias and othr following things
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]

    optimizer_parameters = [
        {
            #don't decay weight for above no_decay list else decay
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.001,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)

    #experiment with lr
    optimizer = AdamW(optimizer_parameters, lr=3e-5)

    #scheduler can be of your choice
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    #convert model to multi-gpu model --->> no need to do this if you have not multiple gpus
    model = nn.DataParallel(model)

    #evaluation matrix is jacccard
    best_jaccard = 0

    for epoch in range(config.EPOCHS):
        engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        jaccard = engine.eval_fn(valid_data_loader, model, device)

        print(f"Jaccard Score = {jaccard}")
        if jaccard > best_jaccard:
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_jaccard = jaccard
Example #9
0
def train():
    data = open(config.Metadata).read().strip().split('\n')[:10]
    text_data, audio_file_name = preprocess(data)
    del data
    gc.collect()
    transforms = [
        torchaudio.transforms.FrequencyMasking(freq_mask_param=15),
        torchaudio.transforms.TimeMasking(time_mask_param=35)
    ]
    
    train_text_data, val_text_data, train_audio_file_name, val_audio_file_name = train_test_split(
        text_data, 
        audio_file_name, 
        test_size=0.2
        )

    train_data = dataloader.TransformerLoader(
        files_name=train_audio_file_name,
        text_data=train_text_data,
        mel_transforms=transforms,
        normalize=True
    )

    val_data = dataloader.TransformerLoader(
        files_name=val_audio_file_name,
        text_data=val_text_data,
        normalize=True
    )

    pad_idx = 0


    train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=config.Batch_Size,
        num_workers=1,
        pin_memory=True,
        collate_fn=dataloader.MyCollate(
            pad_idx=pad_idx, 
            spect_pad=-config.scaling_factor
        )
    )

    val_loader = torch.utils.data.DataLoader(
        val_data,
        batch_size=config.Batch_Size,
        num_workers=1,
        pin_memory=True,
        collate_fn=dataloader.MyCollate(
            pad_idx=pad_idx, 
            spect_pad=-config.scaling_factor
        )
    )

    vocab_size = len(train_data.char_to_idx) + 1

    model = TransformerTTS(
        vocab_size=vocab_size,
        embed_dims=config.embed_dims,
        hidden_dims=config.hidden_dims, 
        heads=config.heads,
        forward_expansion=config.forward_expansion,
        num_layers=config.num_layers,
        dropout=config.dropout,
        mel_dims=config.n_mels,
        max_len=config.max_len,
        pad_idx=config.pad_idx
    )
    # device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
    # torch.backends.cudnn.benchmark = True
    device = torch.device('cpu')
    model = model.to(device)

    optimizer = transformers.AdamW(model.parameters(), lr=config.LR)

    num_training_steps = config.Epochs*len(train_data)//config.Batch_Size

    scheduler = transformers.get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=config.warmup_steps*num_training_steps,
        num_training_steps=num_training_steps
    )

    epoch_start = 0

    if os.path.exists(config.checkpoint):
        checkpoint = torch.load(config.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        epoch_start = checkpoint['epoch']
        print(f'---------[INFO] Restarting Training from Epoch {epoch_start} -----------\n')

         

    best_loss = 1e10 
    best_model = model.state_dict()
    print('--------- [INFO] STARTING TRAINING ---------\n')
    for epoch in range(epoch_start, config.Epochs):
        train_loss = engine.train_fn(model, train_loader, optimizer, scheduler, device)
        val_loss = engine.eval_fn(model, val_loader, device)
        print(f'EPOCH -> {epoch+1}/{config.Epochs} | TRAIN LOSS = {train_loss} | VAL LOSS = {val_loss} | LR = {scheduler.get_lr()[0]} \n')
        
        torch.save({
            'epoch'                 : epoch,
            'model_state_dict'      : model.state_dict(),
            'optimizer_state_dict'  : optimizer.state_dict(),
            'scheduler_state_dict'  : scheduler.state_dict(),
            'loss': val_loss,
            }, config.checkpoint)

        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model.state_dict()
            torch.save(best_model, config.Model_Path)
def run():
    df1 = pd.read_csv("../data/jigsaw-toxic-comment-train.csv",
                      usecols=["comment_text", "toxic"])
    df2 = pd.read_csv("../data/jigsaw-unintended-bias-train.csv",
                      usecols=["comment_text", "toxic"])

    df_train = pd.concat([df1, df2], axis=0).reset_index(drop=True)

    df_valid = pd.read_csv("../data/validation.csv")

    train_dataset = dataset.BERTDataset(
        comment_text=df_train.comment_text.values,
        target=df_train.toxic.values)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4)

    valid_dataset = dataset.BERTDataset(
        comment_text=df_valid.comment_text.values,
        target=df_valid.toxic.values)

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    device = torch.device("cuda")
    model = BERTBaseUncased()
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.001
        },
        {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        },
    ]

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    model = nn.DataParallel(model)

    best_accuracy = 0
    for epoch in range(config.EPOCHS):
        engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        outputs, targets = engine.eval_fn(valid_data_loader, model, device)
        targets = np.array(targets) >= 0.5
        accuracy = metrics.roc_auc_score(targets, outputs)
        print(f"AUC Score = {accuracy}")
        if accuracy > best_accuracy:
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_accuracy = accuracy
Example #11
0
def run():
    dfx = pd.read_csv(config.TRAINING_FILE,
                      nrows=100).dropna().reset_index(drop=True)

    df_train, df_valid = model_selection.train_test_split(
        dfx, test_size=0.1, random_state=42, stratify=dfx["sentiment"].values)

    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    train_dataset = dataset.TweetDataset(
        tweet=df_train["text"].values,
        sentiment=df_train["sentiment"].values,
        selected_text=df_train["selected_text"].values,
    )

    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        # num_workers=4,
    )

    valid_dataset = dataset.TweetDataset(
        tweet=df_valid["text"].values,
        sentiment=df_valid["sentiment"].values,
        selected_text=df_valid["selected_text"].values,
    )

    valid_dataloader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALIDATION_BATCH_SIZE,
        # num_workers=1,
    )
    device = torch.device("cpu")
    model = BERTBasedUncased()
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]

    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.001,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]

    num_train_steps = len(dfx) / (config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    best_jaccard = 0
    for epoch in range(config.EPOCHS):
        engine.train_fn(train_dataloader, model, optimizer, device, scheduler)
        jaccard = engine.eval_fn(valid_dataloader, model, device)

        print(f"Jaccard score :  {jaccard}")

        if jaccard > best_jaccard:
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_jaccard = jaccard
Example #12
0
def run():
    df1 = pd.read_csv("../input/jigsaw-multilingual-toxic-comment-train.csv",
                      usecols=['comment_text', 'toxic'])
    df1 = pd.read_csv("../input/jigsaw-unintended-bias-train.csv",
                      usecols=['comment_text', 'toxic'])

    #combined df1 and df2 and made big dataframe
    df_train = pd.concat([df1, df2], axis=0).reset_index(drop=True)

    #validation dataframe has been given by kaggle
    df_valid - pd.read_csv("../input/validation.csv")

    train_dataset = dataset.BERTDataset(
        comment_text=df_train.comment_text.values,
        target=df_train.toxic.values)

    #--------------------------------------
    #write sampler if using tpu else not
    train_sampler = torch.data.distributed.DistributedSampler(
        train_dataset,
        num_replicas=xm.xrt_world_size(),
        rank=xm.get_ordinal(),
        shuffle=True)
    #----------------------------------------

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        num_workers=4,
        sampler=train_sampler,
        #problem with tpu when using torch_xla is that if batch size is not equal then it's going to crash , so use drop_last
        drop_last=True)

    valid_dataset = dataset.BERTDataset(
        comment_text=df_valid.comment_text.values,
        target=df_valid.toxic.values)

    #--------------------------------------
    #write sampler if using tpu else not
    valid_sampler = torch.data.distributed.DistributedSampler(
        valid_dataset,
        num_replicas=xm.xrt_world_size(),
        rank=xm.get_ordinal(),
        shuffle=True)
    #----------------------------------------------

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=1,
        sampler=valid_sampler,
        #no need of drop_last here
    )

    device = xm.xla_device()  #xla_device means tpu
    model = BERTBaseUncased()
    # model.to(device)  #no need to move data on device

    #specify what parameters you want to train
    param_optimizer = list(model.named_parameters())

    #we don't want any deacy for these layer names such as bias and othr following things
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]

    optimizer_parameters = [
        {
            #don't decay weight for above no_decay list else decay
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.001,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE / xm.xrt_world_size() *
        config.EPOCHS)

    lr = 3e-5 * xm.xrt_world_size()
    #experiment with lr
    optimizer = AdamW(optimizer_parameters, lr=lr)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    best_accuracy = 0
    for epoch in range(config.EPOCHS):

        #parallel loader for tpus
        para_loader = pl.ParallelLoader(train_data_loader, [device])
        engine.train_fn(para_loader.per_device_loader(device), model,
                        optimizer, device, scheduler)

        parallel_loader = pl.ParallelLoader(valid_data_loader, [device])
        outputs, targets = engine.eval_fn(
            para_loader.per_device_loader(device), model, device)

        #threshold the target instead of output
        targets = np.array(targets) >= 0.5
        accuracy = metrics.accuracy_score(targets, outputs)
        print(f"Accuracy Score = {accuracy}")
        if accuracy > best_accuracy:

            #instead of torch.save use xm.save
            xm.save(model.state_dict(), config.MODEL_PATH)
            best_accuracy = accuracy
Example #13
0
def train():
    df = pd.read_csv(config.TRAINING_FILE).fillna("none")
    df['sentiment'] = df['sentiment'].map({"positive": 1, "negative": 0})

    df_train, df_valid = train_test_split(df,
                                          test_size=0.1,
                                          random_state=42,
                                          stratify=df.sentiment.values)

    # reset index of both splits
    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    train_dataset = dataset.BERTDataset(review=df_train.review.values,
                                        target=df_train.sentiment.values)

    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        shuffle=False,
        num_workers=4,
    )

    valid_dataset = dataset.BERTDataset(review=df_valid.review.values,
                                        target=df_valid.sentiment.values)

    valid_dataloader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        shuffle=False,
        num_workers=4,
    )

    device = torch.device("cuda")
    model = BERTBaseUncased()
    model.to(device)

    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params': [
            p for n, p in model.named_parameters()
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.01
    }, {
        'params': [
            p for n, p in model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]

    optimizer = AdamW(optimizer_grouped_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=int(len(df_train) / config.TRAIN_BATCH_SIZE) *
        config.EPOCHS)

    best_accuracy = 0
    for epoch in range(config.EPOCHS):
        engine.train_fn(train_dataloader, model, optimizer, device, scheduler)
        outputs, targets = engine.eval_fn(valid_dataloader, model, device)

        outputs = np.array(outputs) >= 0.5
        accuracy = metrics.accuracy_score(outputs, targets)
        print(f"Accuracy: {accuracy:.3f}")
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            torch.save(model.state_dict(), config.MODEL_PATH)
                                {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay':0.0}
                               ] 


optimizer = AdamW(optimizer_grouped_parameters, lr=3e-5)
total_steps = len(train_data_loader) * epochs

scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=total_steps
    )


best_accuracy = 0
for epoch in range(config.epochs):
    engine.train_fn(train_data_loader, model, optimizer, device, scheduler, epoch)
    outputs, targets = engine.eval_fn(val_data_loader, model, device)

    outputs = np.array(targets) >= 0.5 

    #accuracy = metrics.roc_auc_score(targets, outputs)
    accuracy2 = metrics.accuracy_score(targets, outputs)
    #f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
    #f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

    #print(f"Epoch = {epoch}, roc_auc Score = {accuracy}")
    print(f"Epoch = {epoch}, Accuracy Score = {accuracy2}")
    #print(f"Epoch = {epoch}, f1_micro Score = {f1_score_micro}")
    #print(f"Epoch = {epoch}, f1_macro Score = {f1_score_macro}")

    if accuracy > best_accuracy:
        torch.save(model.state_dict(), config.model_path) 
Example #15
0
def run():
    train_dataset = torchvision.datasets.CIFAR10(root='input/data',
                                                 train=True,
                                                 download=True)
    val_dataset = torchvision.datasets.CIFAR10(root='input/data',
                                               train=False,
                                               download=True)

    train_transform = alb.Compose([
        alb.Resize(config.image_height, config.image_width, always_apply=True),
        alb.Normalize(config.mean, config.std, always_apply=True),
        alb.HorizontalFlip(p=0.1),
        alb.RandomBrightness(p=0.2),
        alb.RandomContrast(p=0.1),
        alb.RGBShift(p=0.1),
        alb.GaussNoise(p=0.1),
    ])

    val_transforms = alb.Compose([
        alb.Resize(config.image_height, config.image_width, always_apply=True),
        alb.Normalize(config.mean, config.std, always_apply=True)
    ])

    train_data = dataloader.dataloader(train_dataset, train_transform)
    val_data = dataloader.dataloader(val_dataset, val_transforms)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               num_workers=4,
                                               pin_memory=True,
                                               batch_size=config.Batch_Size)

    val_loader = torch.utils.data.DataLoader(val_data,
                                             num_workers=4,
                                             pin_memory=True,
                                             batch_size=config.Batch_Size)

    model = ImageTransformer.ViT(
        patch_height=16,
        patch_width=16,
        embedding_dims=768,
        dropout=0.1,
        heads=4,
        num_layers=4,
        forward_expansion=4,
        max_len=int((32 * 32) / (16 * 16)),
        layer_norm_eps=1e-5,
        num_classes=10,
    )

    if torch.cuda.is_available():
        accelarator = 'cuda'
    else:
        accelarator = 'cpu'

    device = torch.device(accelarator)
    torch.backends.cudnn.benchmark = True

    model = model.to(device)

    optimizer = transformers.AdamW(model.parameters(),
                                   lr=config.LR,
                                   weight_decay=config.weight_decay)

    num_training_steps = int(
        (config.Epochs * len(train_dataset)) / config.Batch_Size)

    scheduler = transformers.get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(0.1 * num_training_steps),
        num_training_steps=num_training_steps)

    best_acc = 0
    best_model = 0
    for epoch in range(config.Epochs):
        train_acc, train_loss = engine.train_fn(model, train_loader, optimizer,
                                                scheduler, device)
        val_acc, val_loss = engine.eval_fn(model, val_loader, device)
        print(
            f'\nEPOCH     =  {epoch+1} / {config.Epochs} | LR =  {scheduler.get_last_lr()[0]}'
        )
        print(f'TRAIN ACC = {train_acc*100}% | TRAIN LOSS = {train_loss}')
        print(f'VAL ACC   = {val_acc*100}% | VAL LOSS = {val_loss}')
        if val_acc > best_acc:
            best_acc = val_acc
            best_model = model.state_dict()

    torch.save(best_model, config.Model_Path)
Example #16
0
def run():
    logger.info("using device: {}".format(config.DEVICE))
    train_data = process_raw_data()
    train_list, test_list = train_test_split(train_data,
                                             test_size=0.2,
                                             random_state=34)

    # 加载GPT2模型
    model, n_ctx = create_model(False)
    model.to(config.DEVICE)
    # 是否使用多块GPU进行并行运算: 可以选择要使用哪几块显卡来进行训练
    multi_gpu = False
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        logger.info("Using more than one GPUs to train...")
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = config.DEVICE_NUM
        model = DataParallel(
            model, device_ids=[int(i) for i in config.DEVICE_NUM.split(",")])
        multi_gpu = True

    # 记录模型参数数量
    num_parameters = sum(
        [parameter.numel() for parameter in model.parameters()])
    logger.info("number of model parameters: {}".format(num_parameters))

    # 加载数据
    logger.info("loading training data")
    train_dataset = DialogueDataset(train_list, n_ctx)
    batch_num = len(train_dataset) // config.BATCH_SIZE
    test_dataset = DialogueDataset(test_list, n_ctx)
    test_batch_num = len(test_dataset) // config.BATCH_SIZE

    train_data_loader = DataLoader(train_dataset,
                                   batch_size=config.BATCH_SIZE,
                                   shuffle=True,
                                   num_workers=4,
                                   collate_fn=collate_fn)

    test_data_loader = DataLoader(test_dataset,
                                  batch_size=config.BATCH_SIZE,
                                  shuffle=True,
                                  num_workers=1,
                                  collate_fn=collate_fn)

    # 计算所有epoch进行参数优化的总步数total_steps
    total_steps = int(
        len(train_data_loader) * config.EPOCHS / config.BATCH_SIZE /
        config.GRADIENT_ACCUMULATION)
    logger.info('total training steps = {}'.format(total_steps))

    # 设置优化器,并且在初始训练时,使用warmup策略
    optimizer = AdamW(model.parameters(),
                      lr=config.LEARNING_RATE,
                      correct_bias=True)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=config.WARM_STEPS,
        num_training_steps=total_steps)

    logger.info("start training...")
    best_loss = 100
    best_accuracy = 0
    for epoch in range(config.EPOCHS):
        train_fn(model, train_data_loader, optimizer, scheduler, epoch,
                 batch_num, multi_gpu)
        loss, accuracy = eval_fn(model, test_data_loader, test_batch_num,
                                 multi_gpu)
        if loss < best_loss or accuracy > best_accuracy:
            logger.info('saving model for epoch {}, best loss: {}'.format(
                epoch + 1, loss))
            model_to_save = model.module if hasattr(model, 'module') else model
            model_to_save.save_pretrained(config.MODEL_PATH)
            best_loss = loss
            best_accuracy = accuracy
Example #17
0
def run_training():

    image_files = glob.glob(os.path.join(config.DATA_DIR, "*.png"))
    targets_orig = [x.split("/")[-1][:-4] for x in image_files]
    # abcd = [a,b,c,d]
    targets = [[c for c in x] for x in targets_orig]
    targets_flat = [c for clist in targets for c in clist]

    lbl_enc = preprocessing.LabelEncoder()
    lbl_enc.fit(targets_flat)

    targets_enc = [lbl_enc.transform(x) for x in targets]
    targets_enc = np.array(targets_enc) + 1  # '0' is for unknown

    # print(targets_enc)
    # print(len(lbl_enc.classes_))

    train_imgs, test_imgs, train_targets, test_targets, _, test_targets_orig = model_selection.train_test_split(
        image_files, targets_enc, targets_orig, test_size=0.1, random_state=42)

    train_dataset = dataset.ClassificationDataset(
        image_paths=train_imgs,
        targets=train_targets,
        resize=(config.IMAGE_HEIGHT,config.IMAGE_WIDTH)
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.BATCH_SIZE,
        num_workers=config.NUM_WORKERS,
        shuffle=True
    )

    test_dataset = dataset.ClassificationDataset(
        image_paths=test_imgs,
        targets=test_targets,
        resize=(config.IMAGE_HEIGHT,config.IMAGE_WIDTH)
    )
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config.BATCH_SIZE,
        num_workers=config.NUM_WORKERS,
        shuffle=False
    )

    model = CaptchaModel(num_chars=len(lbl_enc.classes_))
    # print(torch.cuda.memory_summary(device=None, abbreviated=False))

    model.to(config.DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        factor=0.8,
        patience=5,
        verbose=True
    )
    for epoch in range(config.EPOCHS):
        train_loss = engine.train_fn(model, train_loader, optimizer)
        valid_preds, test_loss = engine.eval_fn(model, test_loader)
        valid_cap_preds = []
        for vp in valid_preds:
            current_preds = decode_predictions(vp,lbl_enc)
            valid_cap_preds.extend(current_preds)
        combined = list(zip(test_targets_orig, valid_cap_preds))
        print(combined[:10])
        test_dup_rem = [remove_duplicates(c) for c in test_targets_orig]
        accuracy = metrics.accuracy_score(test_dup_rem, valid_cap_preds)

        print(f"Epoch:{epoch}, train_loss={train_loss}, test_loss={test_loss}, accuracy={accuracy}")
        scheduler.step(test_loss)
Example #18
0
def run():
    dfx = pd.read_csv(config.TRAINING_FILE)
    print("Shape of datframe:",dfx.shape)
    df_train, df_valid = model_selection.train_test_split(
        dfx, test_size=0.1, random_state=42, stratify=dfx.label.values
    )

    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)
    print("Shape of train datframe:",df_train.shape)
    print("Shape of validation dataframe:",df_valid.shape)

    train_dataset = dataset.BERTDataset(
        sent=df_train.sentences.values, target=df_train.label.values
    )

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=8
    )

    valid_dataset = dataset.BERTDataset(
        sent=df_valid.sentences.values, target=df_valid.label.values
    )

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=2
    )

    device = torch.device(config.DEVICE)
    model = BERT_CLASSIFIER()
    if config.RETRAIN:
            DEVICE = 'cuda'
            model.load_state_dict(torch.load(config.RETRAIN_MODEL_LOC))
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.1,
        },
        {
            "params": [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]

    num_train_steps = int(len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=config.LEARNING_RATE)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps
    )

    best_accuracy = 0
    best_eval_loss = np.inf

    for epoch in range(config.EPOCHS):
        epoch_train_loss = engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        outputs, targets, epoch_eval_loss = engine.eval_fn(valid_data_loader, model, device)
        outputs = np.array(outputs) >= config.ACC_CUTOFF
        accuracy = metrics.accuracy_score(targets, outputs)
        print("Train loss = ", epoch_train_loss)
        print("Validation Loss = ", epoch_eval_loss)
        print("Accuracy Score =", accuracy)
        if config.TRAINING_MODE == 'ba':
            best_eval_loss = np.inf
        if accuracy > best_accuracy and epoch_eval_loss < best_eval_loss:
            print("Saving Model state")
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_accuracy = accuracy
            best_eval_loss = epoch_eval_loss
        else:
            print("Saving model in dump folder")
            torch.save(model.state_dict(), config.MODEL_PATH_2 + f"{epoch}.bin")
Example #19
0
def run():
    print("---------- Starting Data Reading -------")
    df1 = pd.read_csv("../input/jigsaw-toxic-comment-train.csv",
                      usecols=["comment_text", "toxic"])
    df2 = pd.read_csv("../input/jigsaw-unintended-bias-train.csv",
                      usecols=["comment_text", "toxic"])

    df_train = pd.concat([df1, df2], axis=0).reset_index(drop=True)
    df_valid = pd.read_csv("../input/validation.csv")

    print("---- Data Read Sucessfully --- ")

    # # dfx = pd.read_csv(config.TRAINING_FILE).fillna("none")
    # # dfx["sentiment"] = dfx["sentiment"].apply(
    # #     lambda x : 1 if x == "positive" else 0
    # # )

    # # df_train, df_valid = model_selection.train_test_split(
    # #     dfx,
    # #     test_size=0.1,
    # #     random_state=42,
    # #     stratify=dfx["sentiment"].values
    # # )

    # df_train = df_train.reset_index(drop=True)
    # df_valid = df_valid.reset_index(drop=True)

    train_dataset = dataset.BERTDataset(
        comment_text=df_train["comment_text"].values,
        target=df_train["toxic"].values)

    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        num_workers=4,
    )

    valid_dataset = dataset.BERTDataset(
        comment_text=df_valid["comment_text"].values,
        target=df_train["toxic"].values)

    valid_dataloader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALIDATION_BATCH_SIZE,
        num_workers=1,
    )
    print("---- DataLoaders Created Sucessfully --- ")

    device = torch.device("cuda")

    model = BERTBasedUncased()
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]

    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.001,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]

    num_train_steps = len(dfx) / (config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    best_accuracy = 0
    for epoch in range(config.EPOCHS):
        engine.train_fn(train_dataloader, model, optimizer, scheduler, device)
        outputs, targets = engine.eval_fn(valid_dataloader, model, device)
        targets = np.array(targets) >= 0.5
        accuracy = metrics.roc_auc_score(targets, outputs)
        print(f"AUC Score {accuracy}")

        if accuracy > best_accuracy:
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_accuracy = accuracy
Example #20
0
def run():
    # Reading the data file
    dfx = pd.read_csv(config.TRAINING_FILE, usecols=["comment_text",
                                                     "toxic"]).fillna("none")

    # Spliting data into training 90% and validation 10%
    df_train, df_valid = model_selection.train_test_split(
        dfx, test_size=0.1, random_state=42, stratify=dfx.toxic.values)
    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    # pass the sentence and target from training dataset into class
    train_dataset = dataset.DISTILBERTDataset(
        comment_text=df_train.comment_text.values,
        target=df_train.toxic.values)

    # Combine the training inputs into a TensorDataset.
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4)

    # pass the sentence and target from validation dataset into class
    valid_dataset = dataset.DISTILBERTDataset(
        comment_text=df_valid.comment_text.values,
        target=df_valid.toxic.values)

    # Combine the validation inputs into a TensorDataset.
    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    device = torch.device("cuda")  # define the device
    model = DISTILBERTBaseUncased()  # define the model
    model.to(device)  # copy the model to the gpu

    # Prepare optimizer and schedule (linear warmup and decay)
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.01,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]

    # Create the numer of training steps, optimizer and scheduler
    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=2e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    # running the loop for every epochs
    best_f1_score = 0
    for epoch in range(config.EPOCHS):
        # passing training and validation funtion
        engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        outputs, targets = engine.eval_fn(valid_data_loader, model, device)
        outputs = np.array(outputs) >= 0.5
        # evalution metrics
        f1_score = metrics.f1_score(targets, outputs)
        print(f"F1 Score = {f1_score}")
        # saving the model
        if f1_score > best_f1_score:
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_f1_score = f1_score
Example #21
0
def run(fold):
    dfx = pd.read_csv(config.TRAINING_FILE)

    df_train = dfx[dfx.kfold != fold].reset_index(drop=True)
    df_valid = dfx[dfx.kfold == fold].reset_index(drop=True)

    train_dataset = dataset.TweetDataset(
        tweets=df_train.text.values,
        sentiments=df_train.sentiment.values,
        selected_texts=df_train.selected_text.values)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        num_workers=4,
        shuffle=True)

    valid_dataset = dataset.TweetDataset(
        tweets=df_valid.text.values,
        sentiments=df_valid.sentiment.values,
        selected_texts=df_valid.selected_text.values)

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=4,
        shuffle=False)

    device = torch.device('cuda')
    model_config = transformers.XLNetConfig.from_pretrained(
        config.MODEL_CONFIG)
    model_config.output_hidden_states = True
    model = models.TweetModel(conf=model_config)
    model = model.to(device)

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        config.WEIGHT_DECAY
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    base_opt = transformers.AdamW(optimizer_parameters,
                                  lr=config.LEARNING_RATE)
    optimizer = torchcontrib.optim.SWA(base_opt,
                                       swa_start=int(num_train_steps *
                                                     config.SWA_RATIO),
                                       swa_freq=config.SWA_FREQ,
                                       swa_lr=None)
    scheduler = transformers.get_linear_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=int(num_train_steps * config.WARMUP_RATIO),
        num_training_steps=num_train_steps)

    print(f'Training is starting for fold={fold}')

    for epoch in range(config.EPOCHS):
        engine.train_fn(train_data_loader,
                        model,
                        optimizer,
                        device,
                        scheduler=scheduler)
        jaccard = engine.eval_fn(valid_data_loader, model, device)

    if config.USE_SWA:
        optimizer.swap_swa_sgd()

    torch.save(model.state_dict(),
               f'{config.MODEL_SAVE_PATH}/model_{fold}.bin')

    return jaccard
Example #22
0
def main(fold):
    COMPUTE_CV = True
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

    data = pd.read_csv('../train_fold.csv')
    data['filepath'] = data['image'].apply(
        lambda x: os.path.join('../', 'train_images', x))

    target_encoder = LabelEncoder()

    data['label_group'] = target_encoder.fit_transform(data['label_group'])

    train = data[data['fold'] != fold].reset_index(drop=True)
    valid = data[data['fold'] == fold].reset_index(drop=True)
    # Defining DataSet
    train_dataset = ShopeeDataset(
        csv=train,
        transforms=get_transforms(img_size=DIM[0], trans_type='train'),
        mode='train',
    )

    valid_dataset = ShopeeDataset(
        csv=valid,
        transforms=get_transforms(img_size=DIM[0], trans_type='valid'),
        mode='train',
    )

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=TRAIN_BATCH_SIZE,
                                               pin_memory=True,
                                               drop_last=True,
                                               num_workers=NUM_WORKERS)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=VALID_BATCH_SIZE,
        num_workers=NUM_WORKERS,
        shuffle=False,
        pin_memory=True,
        drop_last=False,
    )
    # get adaptive margin
    tmp = np.sqrt(
        1 / np.sqrt(data['label_group'].value_counts().sort_index().values))
    margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05

    # Defining Model for specific fold
    if model_version == "V1":
        model = ShopeeNet(**model_params)
    elif model_version == "V2":
        model = ShopeeNetV2(**model_params)
    else:
        model = ShopeeNetV3(**model_params)
    model.to(DEVICE)

    def fetch_loss(loss_type=None):
        if loss_type is None:
            loss = nn.CrossEntropyLoss()
        elif loss_type == 'arcface':
            loss = ArcFaceLossAdaptiveMargin(margins=margins,
                                             out_dim=model_params['n_classes'],
                                             s=80)
        return loss

    criterion = fetch_loss()
    criterion.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=scheduler_params['lr_start'])
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, EPOCHS)
    scheduler_warmup = GradualWarmupSchedulerV2(
        optimizer,
        multiplier=10,
        total_epoch=1,
        after_scheduler=scheduler_cosine)

    #Defining LR SChe
    scheduler = None

    # THE ENGINE LOOP
    best_loss = 2 << 13

    for epoch in range(EPOCHS):
        scheduler_warmup.step(epoch - 1)
        train_loss = train_fn(train_loader,
                              model,
                              criterion,
                              optimizer,
                              DEVICE,
                              epoch_th=epoch,
                              scheduler=scheduler)
        valid_loss = eval_fn(valid_loader, model, criterion, DEVICE)

        print(
            'Fold {} | Epoch {}/{} | Training | Loss: {:.4f} | Valid | Loss: {:.4f}'
            .format(fold, epoch + 1, EPOCHS, train_loss['loss'].avg,
                    valid_loss['loss'].avg))
        with open(log_name, 'a') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([
                fold, epoch + 1, train_loss['loss'].avg, valid_loss['loss'].avg
            ])

        if valid_loss['loss'].avg < best_loss:
            best_loss = valid_loss['loss'].avg
            torch.save(
                model.state_dict(),
                os.path.join(
                    "./models", model_name,
                    f'{model_version}_fold_{fold}_model_{model_params["model_name"]}_IMG_SIZE_{DIM[0]}_{model_params["loss_module"]}.bin'
                ))
            print('best model found for epoch {}'.format(epoch))
def run():
    dfx = pd.read_csv(config.TRAINING_FILE).fillna("none")
    dfx.sentiment = dfx.sentiment.apply(  # can use label encoding
        lambda x: 1 if x == "positive" else 0  # can use map fn
    )

    df_train, df_valid = model_selection.train_test_split(
        dfx,
        test_size=0.1,
        random_state=42,
        stratify=dfx.sentiment.
        values  # when split both train and val have same positive to negative sample ratio
    )

    df_train = df_train.reset_index(drop=True)  # 0 to length of df_train
    df_valid = df_valid.reset_index(drop=True)  # 0 to length of df_valid

    train_dataset = dataset.BERTDataset(review=df_train.review.values,
                                        target=df_train.sentiment.values)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4)
    valid_dataset = dataset.BERTDataset(review=df_valid.review.values,
                                        target=df_valid.sentiment.values)

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    device = torch.device("cuda")  # using cuda
    model = BERTBaseUncased()  # calling from model.py

    param_optimizer = list(
        model.named_parameters())  # specify parameters to train
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.001
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    """ These parameters are adjustable, we should take a look at different layers and
    the decay we want, how much learning rate etc."""

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    # model = nn.DataParallel(model)              # converting to multi gpu model

    best_accuracy = 0
    for epoch in range(config.EPOCHS):
        engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        outputs, target = engine.eval_fn(valid_data_loader, model, device)
        outputs = np.array(outputs) >= 0.5
        accuracy = metrics.accuracy_score(target, outputs)
        print(f"Accuracy score = {accuracy}")
        if accuracy > best_accuracy:
            torch.save(
                model.state_dict(),
                config.MODEL_PATH)  # saving the model only if it improves
            best_accuracy = accuracy
def run():
    dfx = pd.read_csv(config.TRAINING_FILE,
                      nrows=config.NROWS).dropna().reset_index(drop=True)
    # dfx.sentiment = dfx.sentiment.apply(
    #     lambda x: 1 if x =='positive' else 0
    # )
    print('Data Loaded')
    df_train, df_valid = model_selection.train_test_split(
        dfx, test_size=0.5, random_state=42, stratify=dfx.sentiment.values)
    print('Data split into train data and validation data')
    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    train_dataset = dataset.TweetDataset(
        tweet=df_train.text.values,
        sentiment=df_train.sentiment.values,
        selected_text=df_train.selected_text.values)

    print('Train data preprocessed and made into Tweet Dataset Object')

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        shuffle=True,
        num_workers=4)

    print('Train dataloader created')
    valid_dataset = dataset.TweetDataset(
        tweet=df_valid.text.values,
        sentiment=df_valid.sentiment.values,
        selected_text=df_valid.selected_text.values)
    print('Valid data preprocessed and made into Tweet Dataset Object')
    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)
    print('Valid dataloader created')
    device = config.DEVICE
    conf = transformers.RobertaConfig.from_pretrained(
        f'{config.PATH}roberta-base-config.json')
    conf.output_hidden_states = False

    model = Roberta(conf)
    model.to(device)
    print('Model Object created')

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.001
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = utils.get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    best_jaccard = 0
    print('Starting Training....')
    for epoch in range(config.EPOCHS):
        engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        jaccard = engine.eval_fn(valid_data_loader, model, device)

        print(f'Jaccard Score : {jaccard}')
        if jaccard > best_jaccard:
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_jaccard = jaccard
def main(_):
    LEARNING_RATE = config.LEARNING_RATE
    DROPOUT = config.DROPOUT
    SAVE = config.SAVE
    TUNE = False
    ESTOP = 5

    if FLAGS.lr:
        LEARNING_RATE = FLAGS.lr
    if FLAGS.dropout:
        DROPOUT = FLAGS.dropout
    if FLAGS.save:
        SAVE = FLAGS.save
    if FLAGS.tune:
        TUNE = FLAGS.tune
    if FLAGS.estop:
        ESTOP = FLAGS.estop

    train_file = config.TRAIN_PROC
    df_train = pd.read_csv(train_file).fillna("none")

    valid_file = config.DEVEL_PROC
    df_valid = pd.read_csv(valid_file).fillna("none")

    test_file = config.EVAL_PROC
    df_test = pd.read_csv(test_file).fillna("none")

    logger.info(f"Bert Model: {config.BERT_PATH}")
    logger.info(
        f"Current date and time :{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} "
    )

    logger.info(f"Train file: {train_file}")
    logger.info(f"Valid file: {valid_file}")
    logger.info(f"Test file: {test_file}")

    logger.info(f"Train size : {len(df_train):.4f}")
    logger.info(f"Valid size : {len(df_valid):.4f}")
    logger.info(f"Test size : {len(df_test):.4f}")

    valid_dataset = dataset.BERTDataset(text=df_valid.text.values,
                                        target=df_valid.label.values)

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    test_dataset = dataset.BERTDataset(text=df_test.text.values,
                                       target=df_test.label.values)

    test_data_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')  #torch.device("cuda")
    model = BERTBaseUncased(DROPOUT)
    if TUNE:
        model.load_state_dict(
            torch.load(configtune.MODEL_PATH,
                       map_location=torch.device(device)))
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.001
        },
        {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        },
    ]

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=LEARNING_RATE)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    # model = nn.DataParallel(model)

    best_accuracy = 0
    best_path = ""
    es = 1
    for epoch in range(config.EPOCHS):
        if es > ESTOP:
            break

        df_train = shuffle(df_train)
        chunks = np.array_split(df_train, round(len(df_train) / SAVE))

        for chunk in chunks:
            train_dataset = dataset.BERTDataset(text=chunk.text.values,
                                                target=chunk.label.values)

            train_data_loader = torch.utils.data.DataLoader(
                train_dataset,
                batch_size=config.TRAIN_BATCH_SIZE,
                num_workers=4,
                shuffle=True)
            logger.info(f"Epoch = {epoch}")

            train_loss, train_acc = engine.train_fn(train_data_loader, model,
                                                    optimizer, device,
                                                    scheduler)

            for tag, parm in model.named_parameters():
                if parm.grad is not None:
                    writer.add_histogram(tag,
                                         parm.grad.data.cpu().numpy(), epoch)

            outputs, targets, val_loss, val_acc = engine.eval_fn(
                valid_data_loader, model, device)
            val_mcc = metrics.matthews_corrcoef(outputs, targets)
            logger.info(f"val_MCC_Score = {val_mcc:.4f}")

            outputs, targets, test_loss, test_acc = engine.eval_fn(
                test_data_loader, model, device)
            test_mcc = metrics.matthews_corrcoef(outputs, targets)
            logger.info(f"test_MCC_Score = {test_mcc:.4f}")

            logger.info(
                f"train_loss={train_loss:.4f}, val_loss={val_loss:.4f}, test_loss={test_loss:.4f}"
            )
            writer.add_scalar('loss/train', train_loss, epoch)
            writer.add_scalar('loss/val', val_loss, epoch)
            writer.add_scalar('loss/test', test_loss, epoch)

            logger.info(
                f"train_acc={train_acc:.4f}, val_acc={val_acc:.4f}, test_acc={test_acc:.4f}"
            )
            writer.add_scalar('acc/train', train_acc, epoch)
            writer.add_scalar('acc/val', val_acc, epoch)
            writer.add_scalar('acc/test', test_acc, epoch)

            logger.info(f"val_mcc={val_acc:.4f}, test_mcc={test_acc:.4f}")
            writer.add_scalar('mcc/val', val_mcc, epoch)
            writer.add_scalar('mcc/test', test_mcc, epoch)

            accuracy = metrics.accuracy_score(targets, outputs)
            logger.info(f"Accuracy Score = {accuracy:.4f}")

            if accuracy < 0.4:
                logger.info(
                    f"Something is very wrong! Accuracy is only {accuracy:.4f} Stopping..."
                )
                break

            if accuracy > best_accuracy:
                logger.info(
                    f"Saving model with Accuracy Score = {accuracy:.4f}")
                if len(best_path) > 0 and os.path.exists(best_path):
                    #Delete previous best
                    os.remove(best_path)
                best_path = config.MODEL_PATH[:-4] + "." + str(
                    round(accuracy * 100, 2)) + ".bin"
                torch.save(model.state_dict(), best_path)
                best_accuracy = accuracy
                es = 0
            else:
                es += 1
                logger.info(
                    f"Not improved for {es} times of {ESTOP}. Best so far - {best_accuracy:.4f}"
                )

                if es > ESTOP:
                    logger.info(
                        f"Early stopping with best accuracy: {best_accuracy:.4f} and accuracy for this epoch: {accuracy:.4f} ..."
                    )
                    break
Example #26
0
def run():
    df = pd.read_csv(CONFIG.INPUT_PATH + 'news_summary_more.csv').sample(
        frac=CONFIG.frac).reset_index(drop=True)
    print('--------- [INFO] TOKENIZING --------')
    loader = DataLoader.DataLoader(df)
    print(f'len of loader = {len(loader)}')

    split = int(CONFIG.split * len(loader))
    indices = list(range(len(loader)))
    train_indices, val_indices = indices[split:], indices[:split]
    train_sampler = torch.utils.data.sampler.RandomSampler(train_indices)
    val_sampler = torch.utils.data.sampler.RandomSampler(val_indices)

    pickle.dump(loader.vocab.word_to_idx,
                open(CONFIG.INPUT_PATH + 'word_to_idx.pickle', 'wb'))
    pickle.dump(loader.vocab.idx_to_word,
                open(CONFIG.INPUT_PATH + 'idx_to_word.pickle', 'wb'))

    pad_idx = loader.vocab.word_to_idx["<PAD>"]

    train_loader = torch.utils.data.DataLoader(
        loader,
        batch_size=CONFIG.Batch_Size,
        num_workers=4,
        pin_memory=True,
        collate_fn=DataLoader.MyCollate(pad_idx),
        sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        loader,
        batch_size=CONFIG.Batch_Size,
        num_workers=4,
        pin_memory=True,
        collate_fn=DataLoader.MyCollate(pad_idx),
        sampler=val_sampler)

    if torch.cuda.is_available():
        accelarator = 'cuda'
        torch.backends.cudnn.benchmark = True
    else:
        accelarator = 'cpu'

    vocab_size = len(loader.vocab.word_to_idx)

    device = torch.device(accelarator)

    model = Transformer.Transformer(input_vocab_size=vocab_size,
                                    out_vocab_size=vocab_size,
                                    max_len=CONFIG.max_len,
                                    embed_dims=CONFIG.embed_dims,
                                    pad_idx=pad_idx,
                                    heads=CONFIG.heads,
                                    forward_expansion=CONFIG.forward_expansion,
                                    num_layers=CONFIG.num_layers,
                                    dropout=CONFIG.dropout,
                                    device=device)

    model = model.to(device)
    decay_parmas = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']

    optimized_params = [{
        'params': [
            p for n, p in model.named_parameters()
            if not any(nd in n for nd in decay_parmas)
        ],
        'weight_decay':
        0.001
    }, {
        'params': [
            p for n, p in model.named_parameters()
            if any(nd in n for nd in decay_parmas)
        ],
        'weight_decay':
        0.0
    }]

    optimizer = transformers.AdamW(optimized_params, lr=CONFIG.LR)
    num_training_steps = CONFIG.Epochs * len(loader) // CONFIG.Batch_Size
    scheduler = transformers.get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=CONFIG.Warmup_steps * num_training_steps,
        num_training_steps=num_training_steps)

    best_loss = 1e4
    best_model = model.state_dict()
    print('--------- [INFO] STARTING TRAINING ---------')
    for epoch in range(CONFIG.Epochs):
        train_loss = engine.train_fn(model, train_loader, optimizer, scheduler,
                                     device, pad_idx)
        val_loss = engine.eval_fn(model, val_loader, device, pad_idx)
        print(
            f'EPOCH -> {epoch+1}/{CONFIG.Epochs} | TRAIN LOSS = {train_loss} | VAL LOSS = {val_loss}'
        )
        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model.state_dict()
            torch.save(best_model, CONFIG.MODEL_PATH)
            predict.predict(
                '''Saurav Kant, an alumnus of upGrad and IIIT-B's PG Program in Machine learning and Artificial Intelligence, was a Sr Systems Engineer at Infosys with almost 5 years of work experience. The program and upGrad's 360-degree career support helped him transition to a Data Scientist at Tech Mahindra with 90% salary hike. upGrad's Online Power Learning has powered 3 lakh+ careers.'''
            )
Example #27
0
def run():
    dfx = pd.read_csv(config.TRAINING_FILE).fillna("none")
    dfx.sentiment = dfx.sentiment.apply(lambda x: 1 if x == "positive" else 0)

    df_train, df_valid = model_selection.train_test_split(
        dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values)

    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    train_dataset = dataset.BERTDataset(review=df_train.review.values,
                                        target=df_train.sentiment.values)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4)

    valid_dataset = dataset.BERTDataset(review=df_valid.review.values,
                                        target=df_valid.sentiment.values)

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)
    #changed from coda to cpu
    device = torch.device("cpu")
    model = BERTBaseUncased()
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.001
        },
        {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        },
    ]

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    #model = nn.DataParallel(model)

    best_accuracy = 0
    for epoch in range(config.EPOCHS):
        engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        outputs, targets = engine.eval_fn(valid_data_loader, model, device)
        outputs = np.array(outputs) >= 0.5
        accuracy = metrics.accuracy_score(targets, outputs)
        print(f"Accuracy Score = {accuracy}")
        if accuracy > best_accuracy:
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_accuracy = accuracy
Example #28
0
def run(dataset_index):

    datasets = [
        "gold.prep-auto.full.prep.{0}.csv",
        "gold.prep-auto.no-emoticons.prep.{0}.csv",
        "gold.prep-auto.prep.{0}.csv", "gold.prep-english.prep.{0}.csv",
        "gold.prep-peisenieks.prep.{0}.csv", "gold.prep.{0}.csv"
    ]
    # dataset_index = 5 #0-5

    train_file = config.DATASET_LOCATION + datasets[dataset_index].format(
        "train")
    df_train = pd.read_csv(train_file).fillna("none")
    df_train.label = df_train.label.apply(label_encoder)

    valid_file = config.DATASET_LOCATION + datasets[dataset_index].format(
        "dev"
    )  #"gold.prep-auto.full.prep.dev.csv" #gold.prep-auto.no-emoticons.prep.dev.csv" #gold.prep-auto.prep.dev.csv" #"gold.prep-english.prep.dev.csv" #"gold.prep-peisenieks.prep.dev.csv" #"gold.prep.dev.csv"
    df_valid = pd.read_csv(valid_file).fillna("none")
    df_valid.label = df_valid.label.apply(label_encoder)

    test_file = config.DATASET_LOCATION + "eval.prep.test.csv"
    df_test = pd.read_csv(test_file).fillna("none")
    df_test.label = df_test.label.apply(label_encoder)

    logger.info(f"Bert Model: {config.BERT_PATH}")
    logger.info(
        f"Current date and time :{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} "
    )

    logger.info(f"Train file: {train_file}")
    logger.info(f"Valid file: {valid_file}")
    logger.info(f"Test file: {test_file}")

    logger.info(f"Train size : {len(df_train):.4f}")
    logger.info(f"Valid size : {len(df_valid):.4f}")
    logger.info(f"Test size : {len(df_test):.4f}")

    train_dataset = dataset.BERTDataset(review=df_train.text.values,
                                        target=df_train.label.values)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        num_workers=4,
        shuffle=True)

    valid_dataset = dataset.BERTDataset(review=df_valid.text.values,
                                        target=df_valid.label.values)

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    test_dataset = dataset.BERTDataset(review=df_test.text.values,
                                       target=df_test.label.values)

    test_data_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')  #torch.device("cuda")
    model = BERTBaseUncased()
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.001
        },
        {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        },
    ]

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    # model = nn.DataParallel(model)

    best_accuracy = 0
    for epoch in range(config.EPOCHS):
        logger.info(f"epoch={epoch}")

        train_loss, train_acc = engine.train_fn(train_data_loader, model,
                                                optimizer, device, scheduler)

        for tag, parm in model.named_parameters():
            if parm.grad is not None:
                writer.add_histogram(tag, parm.grad.data.cpu().numpy(), epoch)

        outputs, targets, val_loss, val_acc = engine.eval_fn(
            valid_data_loader, model, device)
        val_mcc = metrics.matthews_corrcoef(outputs, targets)
        logger.info(f"val_MCC_Score = {val_mcc:.3f}")

        outputs, targets, test_loss, test_acc = engine.eval_fn(
            test_data_loader, model, device)
        test_mcc = metrics.matthews_corrcoef(outputs, targets)
        logger.info(f"test_MCC_Score = {test_mcc:.3f}")

        logger.info(
            f"train_loss={train_loss:.4f}, val_loss={val_loss:.4f}, test_loss={test_loss:.4f}"
        )
        writer.add_scalar('loss/train', train_loss,
                          epoch)  # data grouping by `slash`
        writer.add_scalar('loss/val', val_loss,
                          epoch)  # data grouping by `slash`
        writer.add_scalar('loss/test', test_loss,
                          epoch)  # data grouping by `slash`

        logger.info(
            f"train_acc={train_acc:.3f}, val_acc={val_acc:.3f}, test_acc={test_acc:.3f}"
        )
        writer.add_scalar('acc/train', train_acc,
                          epoch)  # data grouping by `slash`
        writer.add_scalar('acc/val', val_acc,
                          epoch)  # data grouping by `slash`
        writer.add_scalar('acc/test', test_acc,
                          epoch)  # data grouping by `slash`

        logger.info(f"val_mcc={val_acc:.3f}, test_mcc={test_acc:.3f}")
        writer.add_scalar('mcc/val', val_mcc,
                          epoch)  # data grouping by `slash`
        writer.add_scalar('mcc/test', test_mcc,
                          epoch)  # data grouping by `slash`

        accuracy = metrics.accuracy_score(targets, outputs)
        logger.info(f"Accuracy Score = {accuracy:.3f}")

        if accuracy > best_accuracy:
            print(f"Saving model with Accuracy Score = {accuracy:.3f}")
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_accuracy = accuracy
def run():
    dfx = pd.read_csv(config.TRAINING_FILE, nrows=30).dropna().reset_index(drop=True)


    df_train, df_valid = model_selection.train_test_split(
        dfx, 
        test_size = 0.1,
        random_state = 42,
        stratify = dfx.sentiment.values
    )

    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    train_dataset = dataset.TweetDataset(
        tweet = df_train.text.values,
        sentiment = df_train.sentiment.values,
        selected_text=df_train.selected_text.values
    )

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        num_workers=1
    )

    valid_dataset = dataset.TweetDataset(
        tweet = df_valid.text.values,
        sentiment = df_valid.sentiment.values,
        selected_text=df_valid.selected_text.values
    )

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=1 
    )

    device = torch.device('cpu')
    model = BERTBaseUncased()
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.001,
        },
        {
            "params": [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]

    num_train_steps = int(len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps
    )

    best_jaccard = 0
    for epoch in range(config.EPOCHS):
        print("here")
        engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        mean_jac = engine.eval_fn(valid_data_loader, model, device)
        print("jaccard_score = {mean_jac}".format(mean_jac=mean_jac))
        if(mean_jac>best_jaccard):
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_jaccard = mean_jac
Example #30
0
def run(fold):
    dfx = pd.read_csv(config.TRAINING_FILE)

    # Set train validation set split
    df_train = dfx[dfx.kfold != fold].reset_index(drop=True)
    df_valid = dfx[dfx.kfold == fold].reset_index(drop=True)

    train_dataset = TweetDataset(
        tweet=df_train.text.values,
        sentiment=df_train.sentiment.values,
        selected_text=df_train.selected_text.values
    )

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        num_workers=4
    )

    valid_dataset = TweetDataset(
        tweet=df_valid.text.values,
        sentiment=df_valid.sentiment.values,
        selected_text=df_valid.selected_text.values
    )

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=2
    )

    device = torch.device("cuda")
    model_config = transformers.BertConfig.from_pretrained(config.ROBERTA_PATH)
    model_config.output_hidden_states = True
    model = TweetModel(conf=model_config)
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    # Define two sets of parameters: those with weight decay, and those without
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.001,
        },
        {
            "params": [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    '''
    Create a scheduler to set the learning rate at each training step
    "Create a schedule with a learning rate that decreases linearly after linearly increasing during a warmup period." (https://pytorch.org/docs/stable/optim.html)
    Since num_warmup_steps = 0, the learning rate starts at 3e-5, and then linearly decreases at each training step
    '''
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=num_train_steps
    )
    es = utils.EarlyStopping(patience=2, mode="max")
    print(f"Training is Starting for fold={fold}")
    logger.info("{} - {}".format("Training is Starting for fold", fold))
    #model=nn.DataParallel(model)

    for epoch in range(3):
        engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        jaccard=engine.eval_fn(valid_data_loader, model, device)
        print(f"Jaccard Score = {jaccard}")
        logger.info("EPOCHS {} - Jaccard Score - {}".format(epoch, jaccard))
        es(jaccard, model, model_path=f"../models/nmodel_{fold}.bin")
        if es.early_stop:
            print("Early stopping")
            break