Esempio n. 1
0
    def main(self):
        print('Starting Training...')
        loss_hist = Averager()
        loss = []
        validation_losses = []
        precisions = []
        itr = 1

        for epoch in range(self.num_epochs):
            self.model.train()
            loss_hist.reset()

            for images, targets in self.train_data_loader:

                images = list(image.to(self.device) for image in images)
                #         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                targets = [{k: v.long().to(self.device)
                            for k, v in t.items()} for t in targets]

                loss_dict = self.model(images, targets)

                losses = sum(loss for loss in loss_dict.values())
                self.validate()
                loss_value = losses.item()

                loss_hist.send(loss_value)
                loss.append(loss_value)

                self.optimizer.zero_grad()
                losses.backward()
                self.optimizer.step()

                if math.isnan(loss_value):
                    plot_grad_flow(self.model.named_parameters())
                    raise ValueError('Loss is nan')
                if itr % 50 == 0:
                    print(f"Iteration #{itr} loss: {loss_value}")

                itr += 1

            # update the learning rate
            if self.lr_scheduler is not None:
                self.lr_scheduler.step()

            if self.val_dataset:
                precision, validation_loss = self.validate()
                precisions.append(precision)
                validation_losses.append(validation_loss)
                print(f'Mean Precision for Validation Data: {precision}')
                print(f'Validation Loss: {validation_loss}')
            print(f"Epoch #{epoch} loss: {loss_hist.value}")
        print('Finished!')
        return loss, precisions, validation_losses
def validate(val_loader, model, device):
    model.eval()
    itr = 1
    loss_hist = Averager()
    loss_hist.reset()
    for images, targets, image_ids in val_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict)
        loss_value = losses.item()
        loss_hist.send(loss_value)
        if itr % 20 == 0:
            print(f"Iteration: {itr} loss: {loss_hist.value}")
        itr += 1
    return loss_hist.value
def train_fn(start_epochs,
             epochs,
             train_loader,
             val_loader,
             model,
             device,
             optimizer,
             best_loss,
             checkpoint_path,
             best_model_path,
             lr_scheduler=None):
    print("Starting Training")
    model.train()

    loss_hist = Averager()
    itr = 1
    train_loss = []
    validation_loss = []
    for epoch in range(start_epochs, epochs + 1):
        loss_hist.reset()

        for images, targets, image_ids in train_loader:

            images = list(image.to(device) for image in images)
            # images = images.to(device)
            targets = [{k: v.to(device)
                        for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())
            loss_value = losses.item()

            loss_hist.send(loss_value)

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            if itr % 10 == 0:
                print(f"Iteration #{itr} loss: {loss_value}")

            itr += 1

        # update the learning rate
        if lr_scheduler is not None:
            lr_scheduler.step()

        # val_loss = validate(val_loader, model, device)
        print(
            f"Epoch #{epoch} Train loss: {loss_hist.value}, Validation Loss : Commented"
        )
        train_loss.append(loss_hist.value)
        # validation_loss.append(val_loss)
        checkpoint = {
            'epoch': epoch + 1,
            # 'best_loss': val_loss,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }

        save_ckp(checkpoint, False, checkpoint_path, best_model_path)
        # if best_loss <= val_loss:
        #     print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(best_loss, val_loss))
        #     save_ckp(checkpoint, True, checkpoint_path, best_model_path)
        #     best_loss = val_loss

    return model, train_loss  #, validation_loss
Esempio n. 4
0
def run(train_path):
    df = pd.read_csv(train_path)
    print(df.shape)    
    df['x'] = df['bbox'].apply(lambda x: float(np.array(re.findall("([0-9]+[.]?[0-9]*)", x))[0]))
    df['y'] = df['bbox'].apply(lambda x: float(np.array(re.findall("([0-9]+[.]?[0-9]*)", x))[1]))
    df['w'] = df['bbox'].apply(lambda x: float(np.array(re.findall("([0-9]+[.]?[0-9]*)", x))[2]))
    df['h'] = df['bbox'].apply(lambda x: float(np.array(re.findall("([0-9]+[.]?[0-9]*)", x))[3]))
    df.drop(['bbox'], inplace=True, axis=1)
    
    # split the data 
    image_ids = df['image_id'].unique()
    valid_ids = image_ids[-665:]
    train_ids = image_ids[:-665]
    train_df = df[df['image_id'].isin(train_ids)]
    valid_df = df[df['image_id'].isin(valid_ids)]
    
    train_dataset = WheatDatasetTrain(train_df, config.DIR_TRAIN, get_train_transform())
    valid_dataset = WheatDatasetTrain(valid_df, config.DIR_TRAIN, get_valid_transform())

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BS,
        shuffle=False,
        num_workers=config.NUM_WORKERS,
        collate_fn=collate_fn
    )
    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALID_BS,
        shuffle=False,
        num_workers=config.NUM_WORKERS,
        collate_fn=collate_fn
    )
    # Device used is cuda
    device = torch.device('cuda')
    model = obtain_model()
    model.to(device)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
    lr_scheduler = None

    loss_hist = Averager()
    itr = 1

    for epoch in range(config.EPOCHS):
        loss_hist.reset()    
        for images, targets, image_ids in train_data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())
            loss_value = losses.item()

            loss_hist.send(loss_value)

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            if itr % 50 == 0:
                print(f"Iteration #{itr} loss: {loss_value}")
            
            itr += 1
    
        # update the learning rate
        if lr_scheduler is not None:
            lr_scheduler.step()
        
        print(f"Epoch #{epoch} loss: {loss_hist.value}")