Exemple #1
0
def Dataloader(split, fold):
    if split == 'train':
        fold_idx = [i for i in range(1, config['#fold'] + 1) if i != fold]
        dataset = Dataset(split=split, fold_idx=fold_idx)

        if config['sampler'] == 'ROS':
            labels = dataset.labels
            class_weight = {
                label: 1 / float(labels.count(label))
                for label in range(config['#class'])
            }
            weight = [class_weight[label] for label in labels]
            sampler = data.WeightedRandomSampler(weight,
                                                 len(weight) *
                                                 config['#sample_ROS'],
                                                 replacement=True)
            shuffle = False
        elif config['sampler'] == 'None':
            sampler = None
            shuffle = True

    else:
        fold_idx = [fold]
        dataset = Dataset(split=split, fold_idx=fold_idx)
        sampler = None
        shuffle = False

    data_loader = data.DataLoader(dataset,
                                  shuffle=shuffle,
                                  sampler=sampler,
                                  worker_init_fn=_init_fn,
                                  batch_size=config['#batch'],
                                  num_workers=config['#worker'])

    return data_loader
def get_dataloader(dataset, batch_size=128, clip=False, weights=None):
    if weights is not None:
        eps = 1e-1
        if clip:
            mean = weights.mean()
            var = weights.var()
            k = 2
            upper_bound = mean + k * var
            lower_bound = max(mean - k * var, eps)
            weight_list = np.array([
                lower_bound if i < lower_bound else
                (upper_bound if i > upper_bound else i) for i in weights
            ])
        else:
            weight_list = np.array([eps if i < eps else i for i in weights])
        sampler = data.WeightedRandomSampler(weight_list,
                                             len(weight_list),
                                             replacement=True)
        print(
            f'weight_list max: {weight_list.max()} min: {weight_list.min()} mean: {weight_list.mean()} var: {weight_list.var()}'
        )
    else:
        sampler = None
    dataloader = data.DataLoader(dataset=dataset,
                                 batch_size=batch_size,
                                 shuffle=False if sampler else True,
                                 sampler=sampler,
                                 num_workers=8,
                                 pin_memory=True)
    return dataloader
 def make_batch_loader(self, batch_size=None, shuffle=None):
     shuffle = self.shuffle if shuffle is None else shuffle
     if self.sampling is not None and shuffle:
         print(f"SAMPLING with {self.sampling}")
         num_answers = defaultdict(int)
         for q in self.questions:
             num_answers[q["answer"]] += 1
         if self.sampling == "uniform_answer":
             weights = [1 / num_answers[q["answer"]] for q in self.questions]
         sampler = data.WeightedRandomSampler(weights=weights, num_samples=len(self))
         batch_loader = data.DataLoader(
             dataset=self,
             batch_size=Options()["dataset.batch_size"],
             sampler=sampler,
             shuffle=False,
             pin_memory=Options()["misc.cuda"],
             num_workers=Options()["dataset.nb_threads"],
             collate_fn=self.collate_fn,
         )
     else:
         batch_loader = data.DataLoader(
             dataset=self,
             batch_size=Options()["dataset.batch_size"],
             shuffle=self.shuffle if shuffle is None else shuffle,
             pin_memory=Options()["misc.cuda"],
             num_workers=Options()["dataset.nb_threads"],
             collate_fn=self.collate_fn,
             sampler=None,
         )
     return batch_loader
Exemple #4
0
 def generateCaption(self, feature, stochastic=False):
     initial_input = torch.ones((feature.shape[0], 1)).long().to('cuda')
     #torch.tensor(1).to('cuda') # this is the '<start>'
     captions_compact = self.wordEmbedded(initial_input)
     feature = torch.unsqueeze(feature, 0)
     h_init = feature
     res = []
     for i in range(self.max_length):
         #print(lstm_input.shape)
         rnn_out, h_out = self.rnn(captions_compact, h_init)
         final = self.linear_Embed2Word(rnn_out)
         #print(lstm_final_word.shape)
         final = final.squeeze()
         if stochastic:
             final = self.caption_softmax(final / self.temperature)
             predicted = data.WeightedRandomSampler(weights=final,
                                                    num_samples=1,
                                                    replacement=False)
             predicted = torch.tensor(list(predicted)).long().to('cuda')
             predicted = torch.squeeze(predicted)
         else:
             _, predicted = final.max(1)
         #print(predicted.shape, predicted)
         res.append(predicted)
         captions_compact = self.wordEmbedded(predicted)
         captions_compact = torch.unsqueeze(captions_compact, 1)
     res = torch.stack(res, 1)
     #print(res.shape)
     return res
Exemple #5
0
def make_loader_mt(dataset, batch_size):
    """Construct sampler that randomly chooses N items from N-sample dataset,
    weighted so that it's even across all tasks (so no task implicitly has
    higher priority than the others). Assumes the given dataset is a
    TensorDataset produced by trajectories_to_dataset_mt."""
    task_ids = dataset.tensor_dict['obs'].task_id
    assert len(task_ids) > 0 and batch_size > 0, \
        f"either {len(task_ids)}=0 task IDs or {batch_size}=0 batch size"
    unique_ids, frequencies = torch.unique(task_ids,
                                           return_counts=True,
                                           sorted=True)
    # all tasks must be present for this to work
    assert torch.all(unique_ids == torch.arange(len(unique_ids))), (unique_ids)
    freqs_total = torch.sum(frequencies).to(torch.float)
    unique_weights = freqs_total / frequencies.to(torch.float)
    unique_weights = unique_weights / unique_weights.sum()
    weights = unique_weights[task_ids]

    # even out the number of samples to be a multiple of batch size, always
    n_samples = len(weights) + (-len(weights)) % batch_size
    assert n_samples >= len(weights) and 0 == n_samples % batch_size, \
        (batch_size, n_samples)
    weighted_sampler = data.WeightedRandomSampler(weights,
                                                  n_samples,
                                                  replacement=True)
    batch_sampler = data.BatchSampler(weighted_sampler,
                                      batch_size=batch_size,
                                      drop_last=True)

    loader = data.DataLoader(dataset,
                             pin_memory=False,
                             batch_sampler=batch_sampler,
                             collate_fn=fixed_default_collate)

    return loader
Exemple #6
0
 def generateCaption(self, feature, stochastic=False):
     initial_input = torch.ones((feature.shape[0], 1)).long().to('cuda')
     #torch.tensor(1).to('cuda') # this is the '<start>'
     lstm_input = self.wordEmbedded(initial_input)
     feature = torch.unsqueeze(feature, 0)
     hc_states = (feature, feature)
     res = []
     for i in range(self.max_length):
         #print(lstm_input.shape)
         lstm_output, hc_states = self.lstm(lstm_input, hc_states)
         lstm_final_word = self.linear_Embed2Word(lstm_output)
         #print(lstm_final_word.shape)
         lstm_final_word = lstm_final_word.squeeze()
         #print(lstm_final_word, "This is lstm output for generation 1")
         if stochastic:
             #print("Stochastic", self.temperature)
             lstm_final_word = self.caption_softmax(lstm_final_word /
                                                    self.temperature)
             predicted = data.WeightedRandomSampler(weights=lstm_final_word,
                                                    num_samples=1,
                                                    replacement=False)
             predicted = torch.tensor(list(predicted)).long().to('cuda')
             predicted = torch.squeeze(predicted)
             #print(predicted, predicted.shape)
         else:
             _, predicted = lstm_final_word.max(1)
             #predicted = torch.unsqueeze(predicted, 1)
             #print(predicted, "This is lstm output for generation 1")
         #print(predicted.shape, predicted)
         res.append(predicted)
         lstm_input = self.wordEmbedded(predicted)
         lstm_input = torch.unsqueeze(lstm_input, 1)
     res = torch.stack(res, 1)
     #print(res.shape)
     return res
Exemple #7
0
def data_sampler(dataset, shuffle, distributed, weights=None):
    if distributed:
        return data.distributed.DistributedSampler(dataset, shuffle=shuffle)

    if weights is not None:
        return data.WeightedRandomSampler(weights,
                                          len(weights),
                                          replacement=True)

    if shuffle:
        return data.RandomSampler(dataset)
    else:
        return data.SequentialSampler(dataset)
 def sampler(self):
     if self.cfg.AUGMENTATION.OVERSAMPLING == 'pixel':
         sampling_weights = np.array([
             float(self._get_label_data(city).size) for city in self.cities
         ])
     if self.cfg.AUGMENTATION.OVERSAMPLING == 'change':
         sampling_weights = np.array([
             float(np.sum(self._get_label_data(city)))
             for city in self.cities
         ])
     sampler = torch_data.WeightedRandomSampler(weights=sampling_weights,
                                                num_samples=self.length,
                                                replacement=True)
     return sampler
def get_dataloader(dataset, batch_size=128, clip=False, weights=None):
    if weights is not None:
        sampler = data.WeightedRandomSampler(weights, len(weights), replacement=True)
        print(f'weight_list max: {weights.max()} min: {weights.min()} mean: {weights.mean()} var: {weights.var()}')
    else:
        sampler = None
    dataloader = data.DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        shuffle=False if sampler else True,
        sampler=sampler,
        num_workers=8,
        pin_memory=True)
    return dataloader
def get_dataloader(dataset, batch_size=128, weights=None, eps=1e-6):
    if weights is not None:
        weight_list = [eps if i < eps else i for i in weights]
        sampler = data.WeightedRandomSampler(weight_list,
                                             len(weight_list),
                                             replacement=True)
    else:
        sampler = None
    dataloader = data.DataLoader(dataset=dataset,
                                 batch_size=batch_size,
                                 shuffle=False if sampler else True,
                                 sampler=sampler,
                                 num_workers=8,
                                 pin_memory=True)
    return dataloader
Exemple #11
0
    def _split_sampler(self, split):
        if split == 0.0:
            return None, None

        idx_full = np.arange(self.n_samples)

        np.random.seed(self.seed)
        np.random.shuffle(idx_full)

        if isinstance(split, int):
            assert split > 0
            assert split < self.n_samples, "validation set size is configured to be larger than entire dataset."
            len_valid = split
        else:
            len_valid = int(self.n_samples * split)

        ##################
        # 制定了测试文件(valtest)就将其作为验证集,否则将训练集分拆出测试集
        if self.val_file:
            valid_idx = self.valid_idx
            train_idx = np.array([idx for idx in idx_full if idx not in valid_idx])
        else:
            valid_idx = idx_full[0:len_valid]
            train_idx = np.delete(idx_full, np.arange(0, len_valid))

        #######################
        weights_per_class = 1. / torch.tensor(self.emotion_nums, dtype=torch.float)
        weights = [0] * self.n_samples
        for idx in range(self.n_samples):
            if idx in valid_idx:
                weights[idx] = 0.
            else:
                label = self.dataset[idx][0]
                weights[idx] = weights_per_class[label]
        weights = torch.tensor(weights)
        train_sampler = data.WeightedRandomSampler(weights=weights, num_samples=len(weights), replacement=True)
    
        valid_sampler = data.SubsetRandomSampler(valid_idx)

        # turn off shuffle option which is mutually exclusive with sampler
        self.shuffle = False
        self.n_samples = len(train_idx)

        return train_sampler, valid_sampler
Exemple #12
0
    def train_dataloader(self):
        # REQUIRED
        cfg = self.cfg
        use_edge_loss = cfg.MODEL.LOSS_TYPE == 'FrankensteinEdgeLoss'
        trfm = []
        trfm.append(BGR2RGB())
        if cfg.DATASETS.USE_CLAHE_VARI: trfm.append(VARI())
        if cfg.AUGMENTATION.RESIZE:
            trfm.append(Resize(scale=cfg.AUGMENTATION.RESIZE_RATIO))
        if cfg.AUGMENTATION.CROP_TYPE == 'uniform':
            trfm.append(UniformCrop(crop_size=cfg.AUGMENTATION.CROP_SIZE))
        elif cfg.AUGMENTATION.CROP_TYPE == 'importance':
            trfm.append(
                ImportanceRandomCrop(crop_size=cfg.AUGMENTATION.CROP_SIZE))
        if cfg.AUGMENTATION.RANDOM_FLIP_ROTATE: trfm.append(RandomFlipRotate())

        trfm.append(Npy2Torch())
        trfm = transforms.Compose(trfm)

        dataset = Xview2Detectron2Dataset(
            cfg.DATASETS.TRAIN[0],
            pre_or_post=cfg.DATASETS.PRE_OR_POST,
            include_image_weight=True,
            transform=trfm,
            include_edge_mask=use_edge_loss,
            use_clahe=cfg.DATASETS.USE_CLAHE_VARI,
        )

        dataloader_kwargs = {
            'batch_size': cfg.TRAINER.BATCH_SIZE,
            'num_workers': cfg.DATALOADER.NUM_WORKER,
            'shuffle': cfg.DATALOADER.SHUFFLE,
            'drop_last': True,
            'pin_memory': True,
        }
        # sampler
        if cfg.AUGMENTATION.IMAGE_OVERSAMPLING_TYPE == 'simple':
            image_p = self.image_sampling_weight(dataset.dataset_metadata)
            sampler = torch_data.WeightedRandomSampler(
                weights=image_p, num_samples=len(image_p))
            dataloader_kwargs['sampler'] = sampler
            dataloader_kwargs['shuffle'] = False
        dataloader = torch_data.DataLoader(dataset, **dataloader_kwargs)
        return dataloader
Exemple #13
0
def main():
    # hyper parameter
    batch_size = 128
    char_len = 150
    MAX_epoch = 50
    encode_dim = 250
    feat_num = 256
    device = torch.device('cuda:0')
    seed = 2434
    data_path = Path('../data/train.csv')
    patience = 7
    n_fold = 4
    momentum = 0.9
    lr = 0.01
    ##############################
    set_random_seed(seed)

    X_data, y_data = load_data(data_path)

    train = [(x, y) for x, y in zip(X_data, y_data)]

    kf = KFold(n_splits=n_fold)
    for i, (train_idx, valid_idx) in enumerate(kf.split(train)):
        print(f'Fold : {i+1}')
        train_fold = [train[i] for i in train_idx]
        valid_fold = [train[i] for i in valid_idx]

        char2idx, ignore_idx = make_char2idx([text for text, _ in train])

        target_arr = np.array([y for _, y in train_fold])
        weight_dict = {i: np.sum(target_arr == i) for i in range(2)}
        weghit = 1/torch.Tensor([weight_dict[i] for i in target_arr])
        sampler = data.WeightedRandomSampler(weghit, len(weghit))

        train_data = PrepreprocessData(
            train_fold, char_len, char2idx, ignore_idx)
        valid_data = PrepreprocessData(
            valid_fold, char_len, char2idx, ignore_idx)
        train_loader = data.DataLoader(dataset=train_data,
                                       batch_size=batch_size, sampler=sampler, num_workers=4, pin_memory=True)
        valid_loader = data.DataLoader(dataset=valid_data,
                                       batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

        model = CLCNN(encode_dim, char_len, len(char2idx), ignore_idx, feat_num).to(device)

        loss_func = nn.BCEWithLogitsLoss(reduction="sum")
        optimizer = torch.optim.SGD(
            model.parameters(), lr=lr, momentum=momentum)
        early_stopping = EarlyStopping(char2idx, patience=patience, verbose=True)

        for epoch in range(MAX_epoch):
            start_time = time.time()
            train_loss = trainer.train(
                model, train_loader, loss_func, device, optimizer)

            valid_loss = trainer.valid(model, valid_loader, loss_func, device)

            elapsed_time = time.time() - start_time
            print(
                f'Epoch {epoch+1}/{MAX_epoch} \t loss={train_loss:.4f} \t val_loss={valid_loss:.4f} \t time={elapsed_time:.2f}')

            early_stopping(valid_loss, model)
            if early_stopping.early_stop:
                print('stop')
                break
        print(f'{i}-fold best result valid loss : {early_stopping.best_score:2f}')
Exemple #14
0
            {"indices": [1]},
            [],
            {},
            data.SubsetRandomSampler(indices=[1]),
            id="SubsetRandomSamplerConf",
        ),
        pytest.param(
            "utils.data.sampler",
            "WeightedRandomSampler",
            {
                "weights": [1],
                "num_samples": 1
            },
            [],
            {},
            data.WeightedRandomSampler(weights=[1], num_samples=1),
            id="WeightedRandomSamplerConf",
        ),
        # TODO: investigate testing distributed instantiation
        # pytest.param(
        #    "utils.data.distributed",
        #    "DistributedSampler",
        #    {},
        #    [],
        #    {"dataset": dummy_dataset},
        #    data.DistributedSampler(group=dummy_group,dataset=dummy_dataset),
        #    id="DistributedSamplerConf",
        # ),
    ],
)
def test_instantiate_classes(
Exemple #15
0
    
    os.environ['PYTHONHASHSEED']=str(args.seed)
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = False

    vocabulary = vocab.VocabDictionary()
    vocabulary.create_from_count_file(args.vocabulary)

    train_dataset = text_dataset.SegmentationTextDataset(args.train_corpus, vocabulary, args.min_split_samples_batch_ratio, args.unk_noise_prob)

    if args.min_split_samples_batch_ratio > 0.0:
        sampler = data.WeightedRandomSampler(train_dataset.weights, len(train_dataset.weights))

        train_dataloader = data.DataLoader(train_dataset, num_workers=0, batch_size=args.batch_size,
                                           drop_last=True,
                                           collate_fn=text_dataset.collater, sampler=sampler)
    else:
        train_dataloader = data.DataLoader(train_dataset, num_workers=0, batch_size=args.batch_size, shuffle=True, drop_last=True,
                                           collate_fn=text_dataset.collater)

    dev_dataset = text_dataset.SegmentationTextDataset(args.dev_corpus, vocabulary)
    dev_dataloader = data.DataLoader(dev_dataset, num_workers=0, batch_size=args.batch_size, shuffle=False, drop_last=False,
                                     collate_fn=text_dataset.collater)

    if args.model_architecture == "ff_text":
        model = SimpleRNNFFTextModel(args, vocabulary).to(device)
    else:
Exemple #16
0
def train_model(training_data, output_dir, params):
    """
    Create the output file paths
    """
    checkpoint_file = os.path.join(output_dir, "chkpt.pth")
    trained_model = os.path.join(output_dir, "model.pth")
    metrics_file = os.path.join(output_dir, 'metrics.csv')
    best_model_file = os.path.join(output_dir, 'model_early_stop.pth')
    """
    Build the Data supply pipeline for the training and validation
    """
    input_data = {}
    input_data['train'], input_data['eval'] = AudioSpectDataset.get_datasets(
        inputfile=training_data,
        train_ratio=0.9,
        label_dict=params["label_dict"],
    )

    dataset_sizes = {x: len(input_data[x]) for x in ['train', 'eval']}
    """
    Handle class imbalance by oversampling. Define the sampler
    """
    all_labels = input_data['train'].get_all_labels()
    class_freq = np.bincount(all_labels)
    class_weights = 100.0 / class_freq
    each_samples_weight = class_weights[all_labels]
    sampler = data.WeightedRandomSampler(each_samples_weight,
                                         len(input_data['train']))

    dataloaders = {}
    for x in ['train', 'eval']:
        if x == 'train':
            dataloaders[x] = data.DataLoader(input_data[x],
                                             batch_size=params["batch_size"],
                                             sampler=sampler,
                                             num_workers=1)
        else:
            dataloaders[x] = data.DataLoader(input_data[x],
                                             batch_size=params["batch_size"],
                                             num_workers=1)
    """
    Getting ready for training
    1. Set up the model instance. 
    2. If checkpoint exists from an interrupted previous run, load from the checkpoint
    3. Set up the Loss function
    4. Set up the Optimizer
    """
    classifier = ChimpCallClassifier(
        num_labels=params["num_labels"],
        spectrogram_shape=params["spectrogram_shape"],
        dropout=params["dropout"]).float()

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            classifier = nn.DataParallel(classifier)
    classifier.to(device)

    loss_func = torch.nn.CrossEntropyLoss()

    optimizer_func = torch.optim.Adam(classifier.parameters(),
                                      lr=params["learning_rate"],
                                      eps=params["epsilon"],
                                      weight_decay=params["weight_decay"])

    exp_lr_scheduler = lr_scheduler.StepLR(
        optimizer_func, step_size=params["scheduler_step_size"], gamma=0.1)

    # if checkpoint from an interrupted run exists, load the model, optimizer, early stopper and metrics
    if os.path.exists(checkpoint_file):
        try:
            checkpoint_data = torch.load(checkpoint_file)
            classifier.load_state_dict(checkpoint_data['model_state_dict'])
            optimizer_func.load_state_dict(checkpoint_data['optim_state_dict'])
            start_epoch = checkpoint_data['epoch'] + 1
            metrics_list = checkpoint_data['metrics']
            lowest_loss = checkpoint_data['lowest_loss']
            best_epoch = checkpoint_data['best_epoch']
            print("Checkpoint found. Loaded!")
            print("Re-starting training with epoch# {}".format(
                start_epoch + 1))  # epochs shown on screen start from 1
        except Exception as e:
            print("Error in reinstating interrupted run. Error: {}".format(e))
            exit()
    else:
        start_epoch = 0
        metrics_list = []
        lowest_loss = 1e14
        best_epoch = 0
    """
    Training and validation
    """
    print("*" * 70)
    print("Training Starting")
    print(
        "To restart, when launching use the option: -o {}".format(output_dir))
    print("*" * 70)
    for epoch in range(start_epoch, params["num_epochs"]):
        epoch_loss = {'train': 0.0, 'eval': 0.0}
        epoch_accuracy = {'train': 0.0, 'eval': 0.0}
        labels_input = []
        labels_preds = []
        precision = recall = f1 = 0

        for phase in ['train', 'eval']:
            running_loss = 0.0
            running_corrects = 0

            if phase == 'train':
                classifier.train()
            else:
                classifier.eval()

            for samples in dataloaders[phase]:
                input_spects = samples['spectrogram'].to(device)
                input_labels = samples['label'].to(device)
                optimizer_func.zero_grad(
                )  # Clear off the gradients from any past operation
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = classifier(input_spects)  # Do the forward pass
                    loss = loss_func(outputs,
                                     input_labels)  # Calculate the loss
                    if phase == 'train':
                        loss.backward(
                        )  # Calculate the gradients with help of back propagation
                        optimizer_func.step(
                        )  # Ask the optimizer to adjust the parameters based on the gradients
                # Record the predictions
                _, predicted = torch.max(
                    outputs, 1
                )  # the indexes are the predicted classes. Need only that from torch.max
                # set up metrics
                running_loss += loss.item() * input_labels.size(
                    0)  # accumulate the loss
                running_corrects += (predicted == input_labels).sum()
                if phase == 'eval':
                    labels_preds.append(predicted)
                    labels_input.append(input_labels)

            if phase == 'train':
                exp_lr_scheduler.step()
            """
            Calculate performance metrics for the train and eval runs of this epoch
            """
            epoch_loss[phase] = running_loss / dataset_sizes[phase]
            epoch_accuracy[phase] = running_corrects.item(
            ) / dataset_sizes[phase]
            if phase == 'eval':
                all_input_labels = torch.cat(labels_input).cpu()
                all_preds_labels = torch.cat(labels_preds).cpu()
                precision = precision_score(all_input_labels,
                                            all_preds_labels,
                                            average='weighted')
                recall = recall_score(all_input_labels,
                                      all_preds_labels,
                                      average='weighted')
                f1 = f1_score(all_input_labels,
                              all_preds_labels,
                              average='weighted')
                cfm = confusion_matrix(all_input_labels,
                                       all_preds_labels,
                                       labels=range(params["num_labels"]))

        # Finish up the Epoch: Save model & optimizer state, metrics and earlystop. Print performance. Check early stopping
        metrics_list.append({
            "epoch": epoch + 1,
            "train_loss": epoch_loss['train'],
            "train_acc": epoch_accuracy['train'],
            "eval_loss": epoch_loss['eval'],
            "accuracy": epoch_accuracy['eval'],
            "precision": precision,
            "recall": recall,
            "f1": f1,
            "cfm": cfm
        })
        print(
            'Epoch %2d/%d, Training (Loss: %.4f, Acc: %.2f ), '
            'Validation (Loss: %.4f, Acc: %.2f , precision: %.2f, recall: %.2f, f1: %.2f) '
            % (epoch + 1, params["num_epochs"], epoch_loss['train'],
               epoch_accuracy['train'] * 100, epoch_loss['eval'],
               epoch_accuracy['eval'] * 100, precision * 100, recall * 100,
               f1 * 100))

        if epoch_loss['eval'] < lowest_loss:
            best_epoch = epoch
            lowest_loss = epoch_loss['eval']
            torch.save(classifier.state_dict(), best_model_file)

        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': classifier.state_dict(),
                'optim_state_dict': optimizer_func.state_dict(),
                'best_epoch': best_epoch,
                'lowest_loss': lowest_loss,
                'metrics': metrics_list
            }, checkpoint_file)
    """
    Save to disk: final model and all epoch metrics as csv file
    Final Model: the best performing model is always saved at the location 'best_model_file' because of the Early Stop code
    """
    classifier.load_state_dict(torch.load(best_model_file))
    torch.save(
        {
            'model': classifier.state_dict(),
            'labels':
            {idx: label
             for label, idx in params["label_dict"].items()},
            'spectrogram_shape': params["spectrogram_shape"],
            'dropout': params["dropout"]
        }, trained_model)
    pd.DataFrame(metrics_list,
                 columns=[
                     "epoch", "train_loss", "train_acc", "eval_loss",
                     "accuracy", "precision", "recall", "f1", "cfm"
                 ]).to_csv(metrics_file, index=False, header=True)

    #  clean up, remove the temporary files used to store runtime state
    if os.path.exists(best_model_file):
        os.remove(best_model_file)
    if os.path.exists(checkpoint_file):
        os.remove(checkpoint_file)

    print("Training Complete! Epoch #{:2d} saved".format(best_epoch + 1))
    print("Model at: {}".format(trained_model))
    print("Metrics at: {}".format(metrics_file))
def create_weighted_sampler(labels):
    labels_unique, counts = np.unique(labels, return_counts=True)
    class_weights = [sum(counts) / c for c in counts]
    example_weights = [class_weights[int(e)] for e in labels]
    sampler = data_utils.WeightedRandomSampler(example_weights, len(labels))
    return sampler
Exemple #18
0
def create_train_val_data_loaders(data_dir,
                                  *,
                                  min_pts=75,
                                  batch_size=32,
                                  validation_frac=0.2,
                                  num_of_workers=0):
    """
    Return pair of pytorch dataloaders for train and validation sets.
    """
    # sample => scale => (if train) random jitter and random rotation along z axis => transform to Pytorch tensor

    mps_transform = MinPointSampler(min_pts, replace_flag=True)
    pt_scaler = PointScaler()
    points_aug = RndPointsAugmentations(jitter_b=0.3)

    train_transforms = transforms.Compose(
        [mps_transform, points_aug, pt_scaler,
         transforms.ToTensor()])

    val_transforms = transforms.Compose(
        [mps_transform, pt_scaler,
         transforms.ToTensor()])

    train_data = datasets.DatasetFolder(
        data_dir,
        loader=lambda x: np.load(x).astype(np.float32),
        extensions=("npy"),
        transform=train_transforms)

    val_data = datasets.DatasetFolder(
        data_dir,
        loader=lambda x: np.load(x).astype(np.float32),
        extensions=("npy"),
        transform=val_transforms)

    dataset_len = len(train_data)

    indices = np.arange(dataset_len)

    val_abs_size = np.int(np.floor(validation_frac * dataset_len))

    np.random.shuffle(indices)

    train_id, val_id = indices[val_abs_size:], indices[:val_abs_size]

    all_dataset = train_data.samples.copy()
    all_targets = train_data.targets.copy()

    train_data.samples = [all_dataset[i] for i in train_id]
    train_data.targets = [all_targets[i] for i in train_id]

    train_weight = 1 / np.array([(np.array(train_data.targets) == tgt).sum()
                                 for tgt in np.unique(train_data.targets)])

    train_samples_weight = torch.tensor(
        [train_weight[tgt] for tgt in train_data.targets])

    train_sampler = tdata.WeightedRandomSampler(train_samples_weight,
                                                len(train_samples_weight))

    train_loader = tdata.DataLoader(train_data,
                                    sampler=train_sampler,
                                    batch_size=batch_size,
                                    num_workers=num_of_workers,
                                    drop_last=True)

    if validation_frac > 0:
        val_data.samples = [all_dataset[i] for i in val_id]
        val_data.targets = [all_targets[i] for i in val_id]

        # readjust probabilities for unbalanced classes

        val_weight = 1 / np.array([(np.array(val_data.targets) == tgt).sum()
                                   for tgt in np.unique(val_data.targets)])

        val_samples_weight = torch.tensor(
            [val_weight[tgt] for tgt in val_data.targets])

        val_sampler = tdata.WeightedRandomSampler(val_samples_weight,
                                                  len(val_samples_weight))

        val_loader = tdata.DataLoader(val_data,
                                      sampler=val_sampler,
                                      batch_size=batch_size,
                                      num_workers=num_of_workers,
                                      drop_last=True)
    else:
        val_loader = None

    return train_loader, val_loader
Exemple #19
0
def main():
    ia.seed(1)

    train_datapath = "./food11re/skewed_training"
    valid_datapath = "./food11re/validation"
    test_datapath = "./food11re/evaluation"

    transform = transforms.Compose([
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        ImgAugTransform(), lambda x: PIL.Image.fromarray(x),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    train_dataset = Food11Dataset(train_datapath, is_train=True)
    train_dataset_folder = torchvision.datasets.ImageFolder(
        root='./food11re/skewed_training', transform=transform)
    valid_dataset = Food11Dataset(valid_datapath, is_train=False)
    test_dataset = Food11Dataset(test_datapath, is_train=False)

    #wts = [100, 781, 67, 169, 196, 75, 757, 1190, 194, 67, 2857]
    #train_dataset.augmentation(wts)

    weight = []
    for i in range(11):
        class_count = train_dataset_folder.targets.count(i)
        weight.append(1. / (class_count / len(train_dataset_folder.targets)))

    samples_weight = np.array([weight[t] for _, t in train_dataset_folder])
    weighted_sampler = data.WeightedRandomSampler(samples_weight,
                                                  num_samples=15000,
                                                  replacement=True)

    randon_sampler = data.RandomSampler(train_dataset,
                                        replacement=True,
                                        num_samples=9000,
                                        generator=None)

    print(
        "----------------------------------------------------------------------------------"
    )
    print("Dataset bf. loading - ", train_datapath)
    print(train_dataset.show_details())

    print(
        "----------------------------------------------------------------------------------"
    )
    print("Dataset bf. loading - ", valid_datapath)
    print(valid_dataset.show_details())

    print(
        "----------------------------------------------------------------------------------"
    )
    print("Dataset bf. loading - ", test_datapath)
    print(test_dataset.show_details())

    train_folder_loader = DataLoader(dataset=train_dataset_folder,
                                     num_workers=0,
                                     batch_size=100,
                                     sampler=weighted_sampler)
    train_loader = DataLoader(dataset=train_dataset,
                              num_workers=0,
                              batch_size=100,
                              sampler=randon_sampler)
    valid_loader = DataLoader(dataset=valid_dataset,
                              num_workers=0,
                              batch_size=100,
                              shuffle=False)
    test_loader = DataLoader(dataset=test_dataset,
                             num_workers=0,
                             batch_size=100,
                             shuffle=False)

    data_loading(train_folder_loader, train_dataset)
    data_loading(train_loader, train_dataset)
    data_loading(valid_loader, valid_dataset)
    data_loading(test_loader, test_dataset)
def train(net, data, classes, test_features, test_classes):
  criterion = nn.CrossEntropyLoss()
  # optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
  optimizer = optim.Adam(net.parameters(), lr=0.05)

  t_dataset = Data.TensorDataset(data, classes)

  samples_weight = [1/5, 1/5, 1/5]

  sampler = Data.WeightedRandomSampler(samples_weight, 30)

  # Batch size is one
  loader = Data.DataLoader(dataset=t_dataset, batch_size=1, num_workers=0, shuffle=True)

  d = data[0]
  c = classes[0]
  co_far = 0

  print()
  print()
  print()

  for epoch in range(99):  # loop over the dataset multiple times
    ct = 0
    running_loss = 0.0
    for i, data in enumerate(loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        outputs = torch.reshape(outputs, (1,3))
        # print(outputs.tolist())

        # print(outputs)
        # print(labels)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics

        running_loss += loss.item()
        ct += 1
    if ((epoch+1) % 10 == 0):

      record_acc(net, test_features, test_classes)

      # cor = torch.argmax(net(torch.Tensor([d]))).item() == c.item()
      # if cor:
      #   co_far += 1
      # print(cor)
      # print()
      print('[%d] loss: %.8f' % (epoch + 1,  running_loss/ct))
      ct = 0
      running_loss = 0.0

  return net
Exemple #21
0
def train(cfg, writer, logger, start_iter=0, model_only=False, gpu=-1, save_dir=None):

    # Setup seeds and config
    torch.manual_seed(cfg.get("seed", 1337))
    torch.cuda.manual_seed(cfg.get("seed", 1337))
    np.random.seed(cfg.get("seed", 1337))
    random.seed(cfg.get("seed", 1337))
    
    # Setup device
    if gpu == -1:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    else:
        device = torch.device("cuda:%d" %gpu if torch.cuda.is_available() else "cpu")

    # Setup Augmentations
    augmentations = cfg["training"].get("augmentations", None)
    if cfg["data"]["dataset"] == "softmax_cityscapes_convention":
        data_aug = get_composed_augmentations_softmax(augmentations)
    else:
        data_aug = get_composed_augmentations(augmentations)

    # Setup Dataloader
    data_loader = get_loader(cfg["data"]["dataset"])
    data_path = cfg["data"]["path"]

    t_loader = data_loader(
        data_path,
        config = cfg["data"],
        is_transform=True,
        split=cfg["data"]["train_split"],
        img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]),
        augmentations=data_aug,
    )
    v_loader = data_loader(
        data_path,
        config = cfg["data"],
        is_transform=True,
        split=cfg["data"]["val_split"],
        img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]),
    )

    sampler = None
    if "sampling" in cfg["data"]:
        sampler = data.WeightedRandomSampler(
            weights = get_sampling_weights(t_loader, cfg["data"]["sampling"]),
            num_samples = len(t_loader),
            replacement = True
        )
    n_classes = t_loader.n_classes
    trainloader = data.DataLoader(
        t_loader,
        batch_size=cfg["training"]["batch_size"],
        num_workers=cfg["training"]["n_workers"],
        sampler=sampler,
        shuffle=sampler==None,
    )
    valloader = data.DataLoader(
        v_loader, batch_size=cfg["training"]["batch_size"], num_workers=cfg["training"]["n_workers"]
    )

    # Setup Metrics
    running_metrics_val = {"seg": runningScoreSeg(n_classes)}
    if "classifiers" in cfg["data"]:
        for name, classes in cfg["data"]["classifiers"].items():
            running_metrics_val[name] = runningScoreClassifier( len(classes) )
    if "bin_classifiers" in cfg["data"]:
        for name, classes in cfg["data"]["bin_classifiers"].items():
            running_metrics_val[name] = runningScoreClassifier(2)

    # Setup Model
    model = get_model(cfg["model"], n_classes).to(device)
    
    total_params = sum(p.numel() for p in model.parameters())
    print( 'Parameters:',total_params )

    if gpu == -1:
        model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    else:
        model = torch.nn.DataParallel(model, device_ids=[gpu])
    
    model.apply(weights_init)
    pretrained_path='weights/hardnet_petite_base.pth'
    weights = torch.load(pretrained_path)
    model.module.base.load_state_dict(weights)

    # Setup optimizer, lr_scheduler and loss function
    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {k: v for k, v in cfg["training"]["optimizer"].items() if k != "name"}

    optimizer = optimizer_cls(model.parameters(), **optimizer_params)
    print("Using optimizer {}".format(optimizer))

    scheduler = get_scheduler(optimizer, cfg["training"]["lr_schedule"])
    loss_dict = get_loss_function(cfg, device)

    if cfg["training"]["resume"] is not None:
        if os.path.isfile(cfg["training"]["resume"]):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(cfg["training"]["resume"])
            )
            checkpoint = torch.load(cfg["training"]["resume"], map_location=device)
            model.load_state_dict(checkpoint["model_state"], strict=False)
            if not model_only:
                optimizer.load_state_dict(checkpoint["optimizer_state"])
                scheduler.load_state_dict(checkpoint["scheduler_state"])
                start_iter = checkpoint["epoch"]
            logger.info(
                "Loaded checkpoint '{}' (iter {})".format(
                    cfg["training"]["resume"], checkpoint["epoch"]
                )
            )
        else:
            logger.info("No checkpoint found at '{}'".format(cfg["training"]["resume"]))

    if cfg["training"]["finetune"] is not None:
        if os.path.isfile(cfg["training"]["finetune"]):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(cfg["training"]["finetune"])
            )
            checkpoint = torch.load(cfg["training"]["finetune"])
            model.load_state_dict(checkpoint["model_state"])

    val_loss_meter = averageMeter()
    time_meter = averageMeter()

    best_iou = -100.0
    i = start_iter
    flag = True
    loss_all = 0
    loss_n = 0

    while i <= cfg["training"]["train_iters"] and flag:
        for (images, label_dict, _) in trainloader:
            i += 1
            start_ts = time.time()
            scheduler.step()
            model.train()

            images = images.to(device)
            optimizer.zero_grad()
            output_dict = model(images)

            loss = compute_loss(    # considers key names in loss_dict and output_dict
                loss_dict, images, label_dict, output_dict, device, t_loader
            )
            
            loss.backward()         # backprops sum of loss tensors, frozen components will have no grad_fn
            optimizer.step()
            c_lr = scheduler.get_lr()

            if i%1000 == 0:             # log images, seg ground truths, predictions
                pred_array = output_dict["seg"].data.max(1)[1].cpu().numpy()
                gt_array = label_dict["seg"].data.cpu().numpy()
                softmax_gt_array = None
                if "softmax" in label_dict:
                    softmax_gt_array = label_dict["softmax"].data.max(1)[1].cpu().numpy()
                write_images_to_board(t_loader, images, gt_array, pred_array, i, name = 'train', softmax_gt = softmax_gt_array)

                if save_dir is not None:
                    image_array = images.data.cpu().numpy().transpose(0, 2, 3, 1)
                    write_images_to_dir(t_loader, image_array, gt_array, pred_array, i, save_dir, name = 'train', softmax_gt = softmax_gt_array)

            time_meter.update(time.time() - start_ts)
            loss_all += loss.item()
            loss_n += 1
            
            if (i + 1) % cfg["training"]["print_interval"] == 0:
                fmt_str = "Iter [{:d}/{:d}]  Loss: {:.4f}  Time/Image: {:.4f}  lr={:.6f}"
                print_str = fmt_str.format(
                    i + 1,
                    cfg["training"]["train_iters"],
                    loss_all / loss_n,
                    time_meter.avg / cfg["training"]["batch_size"],
                    c_lr[0],
                )

                print(print_str)
                logger.info(print_str)
                writer.add_scalar("loss/train_loss", loss.item(), i + 1)
                time_meter.reset()

            if (i + 1) % cfg["training"]["val_interval"] == 0 or (i + 1) == cfg["training"][
                "train_iters"
            ]:
                torch.cuda.empty_cache()
                model.eval() # set batchnorm and dropouts to work in eval mode
                loss_all = 0
                loss_n = 0
                with torch.no_grad(): # Deactivate torch autograd engine, less memusage
                    for i_val, (images_val, label_dict_val, _) in tqdm(enumerate(valloader)):
                        
                        images_val = images_val.to(device)
                        output_dict = model(images_val)
                        
                        val_loss = compute_loss(
                            loss_dict, images_val, label_dict_val, output_dict, device, v_loader
                        )
                        val_loss_meter.update(val_loss.item())

                        for name, metrics in running_metrics_val.items():
                            gt_array = label_dict_val[name].data.cpu().numpy()
                            if name+'_loss' in cfg['training'] and cfg['training'][name+'_loss']['name'] == 'l1':  # for binary classification
                                pred_array = output_dict[name].data.cpu().numpy()
                                pred_array = np.sign(pred_array)
                                pred_array[pred_array == -1] = 0
                                gt_array[gt_array == -1] = 0
                            else:
                                pred_array = output_dict[name].data.max(1)[1].cpu().numpy()

                            metrics.update(gt_array, pred_array)

                softmax_gt_array = None # log validation images
                pred_array = output_dict["seg"].data.max(1)[1].cpu().numpy()
                gt_array = label_dict_val["seg"].data.cpu().numpy()
                if "softmax" in label_dict_val:
                    softmax_gt_array = label_dict_val["softmax"].data.max(1)[1].cpu().numpy()
                write_images_to_board(v_loader, images_val, gt_array, pred_array, i, 'validation', softmax_gt = softmax_gt_array)
                if save_dir is not None:
                    images_val = images_val.cpu().numpy().transpose(0, 2, 3, 1)
                    write_images_to_dir(v_loader, images_val, gt_array, pred_array, i, save_dir, name='validation', softmax_gt = softmax_gt_array)

                logger.info("Iter %d Val Loss: %.4f" % (i + 1, val_loss_meter.avg))
                writer.add_scalar("loss/val_loss", val_loss_meter.avg, i + 1)

                for name, metrics in running_metrics_val.items():
                    
                    overall, classwise = metrics.get_scores()
                    
                    for k, v in overall.items():
                        logger.info("{}_{}: {}".format(name, k, v))
                        writer.add_scalar("val_metrics/{}_{}".format(name, k), v, i + 1)

                        if k == cfg["training"]["save_metric"]:
                            curr_performance = v

                    for metric_name, metric in classwise.items():
                        for k, v in metric.items():
                            logger.info("{}_{}_{}: {}".format(name, metric_name, k, v))
                            writer.add_scalar("val_metrics/{}_{}_{}".format(name, metric_name, k), v, i + 1)

                    metrics.reset()
                
                state = {
                      "epoch": i + 1,
                      "model_state": model.state_dict(),
                      "optimizer_state": optimizer.state_dict(),
                      "scheduler_state": scheduler.state_dict(),
                }
                save_path = os.path.join(
                    writer.file_writer.get_logdir(),
                    "{}_{}_checkpoint.pkl".format(cfg["model"]["arch"], cfg["data"]["dataset"]),
                )
                torch.save(state, save_path)

                if curr_performance >= best_iou:
                    best_iou = curr_performance
                    state = {
                        "epoch": i + 1,
                        "model_state": model.state_dict(),
                        "best_iou": best_iou,
                    }
                    save_path = os.path.join(
                        writer.file_writer.get_logdir(),
                        "{}_{}_best_model.pkl".format(cfg["model"]["arch"], cfg["data"]["dataset"]),
                    )
                    torch.save(state, save_path)
                torch.cuda.empty_cache()

            if (i + 1) == cfg["training"]["train_iters"]:
                flag = False
                break
Exemple #22
0
def train_net(net, cfg):

    log_path = cfg.OUTPUT_DIR
    summarize_config(cfg)

    optimizer = optim.Adam(net.parameters(),
                           lr=cfg.TRAINER.LR,
                           weight_decay=0.0005)
    weighted_criterion = False
    if cfg.MODEL.LOSS_TYPE == 'CrossEntropyLoss':
        criterion = cross_entropy_loss
    elif cfg.MODEL.LOSS_TYPE == 'SoftDiceMulticlassLoss':
        criterion = soft_dice_loss_multi_class
    elif cfg.MODEL.LOSS_TYPE == 'SoftDiceMulticlassLossDebug':
        criterion = soft_dice_loss_multi_class_debug
    elif cfg.MODEL.LOSS_TYPE == 'GeneralizedDiceLoss':
        criterion = generalized_soft_dice_loss_multi_class
    elif cfg.MODEL.LOSS_TYPE == 'JaccardLikeLoss':
        criterion = jaccard_like_loss_multi_class
    elif cfg.MODEL.LOSS_TYPE == 'ComboLoss':
        criterion = combo_loss
        weighted_criterion = cfg.TRAINER.CE_CLASS_BALANCE.ENABLED
        weights = 1 / torch.tensor(cfg.TRAINER.CE_CLASS_BALANCE.WEIGHTS)
        weights = weights.cuda()

    if cfg.MODEL.PRETRAINED.ENABLED:
        net = load_pretrained(net, cfg)

    if torch.cuda.device_count() > 1:
        print(torch.cuda.device_count(), " GPUs!")
        net = nn.DataParallel(net)
    net.to(device)
    bg_class = cfg.MODEL.BACKGROUND.TYPE
    trfm = build_transforms(
        cfg,
        for_training=True,
        use_gts_mask=cfg.DATASETS.LOCALIZATION_MASK.TRAIN_USE_GTS_MASK)
    dataset = Xview2Detectron2DamageLevelDataset(cfg.DATASETS.TRAIN[0],
                                                 pre_or_post='post',
                                                 include_image_weight=True,
                                                 background_class=bg_class,
                                                 transform=trfm)

    dataloader_kwargs = {
        'batch_size': cfg.TRAINER.BATCH_SIZE,
        'num_workers': cfg.DATALOADER.NUM_WORKER,
        'shuffle': cfg.DATALOADER.SHUFFLE,
        'drop_last': True,
    }

    # sampler
    if cfg.AUGMENTATION.IMAGE_OVERSAMPLING_TYPE == 'simple':
        image_p = image_sampling_weight(dataset.dataset_metadata)
        sampler = torch_data.WeightedRandomSampler(weights=image_p,
                                                   num_samples=len(image_p))
        dataloader_kwargs['sampler'] = sampler
        dataloader_kwargs['shuffle'] = False

    dataloader = torch_data.DataLoader(dataset, **dataloader_kwargs)

    max_epochs = cfg.TRAINER.EPOCHS
    global_step = 0
    for epoch in range(max_epochs):
        start = timeit.default_timer()
        print('Starting epoch {}/{}.'.format(epoch + 1, max_epochs))
        epoch_loss = 0

        net.train()
        loss_set, f1_set = [], []
        loss_component_set = []
        positive_pixels_set = [
        ]  # Used to evaluated image over sampling techniques
        for i, batch in enumerate(dataloader):
            x = batch['x'].to(device)
            y_gts = batch['y'].to(device)
            image_weight = batch['image_weight']

            # # TODO DEBUG
            # xtest  = x.cpu().permute(0,2,3,1).contiguous().numpy()
            # ytest = y_gts.cpu().permute(0,2,3,1).contiguous().numpy()
            # testy = ytest[0, ..., 1:4] # ignore bg
            # plt.imshow(testy)
            # plt.savefig('test_y.png')
            #
            # postx = xtest[0, ..., :3]
            # plt.imshow(postx)
            # plt.savefig('test_post.png')
            #
            # prex = xtest[0, ..., 4:7]
            # plt.imshow(prex)
            # plt.savefig('test_pre.png')
            # # TODO END DEBUGn

            optimizer.zero_grad()

            y_pred = net(x)
            ce_loss = 0
            dice_loss = 0
            if weighted_criterion:
                loss, (ce_loss, dice_loss) = criterion(y_pred, y_gts, weights)
            else:
                loss = criterion(y_pred, y_gts)
            epoch_loss += loss.item()

            loss.backward()
            optimizer.step()

            loss_set.append(loss.item())
            # loss_component_set.append(loss_component.cpu().detach().numpy())
            positive_pixels_set.extend(image_weight.cpu().numpy())

            if global_step % 10000 == 0 and global_step > 0:
                check_point_name = f'cp_{global_step}.pkl'
                save_path = os.path.join(log_path, check_point_name)
                torch.save(net.state_dict(), save_path)

            if global_step % 100 == 0 and global_step > 0:
                # time per 100 steps
                stop = timeit.default_timer()
                time_per_n_batches = stop - start

                max_mem, max_cache = gpu_stats()

                print(
                    f'step {global_step},  avg loss: {np.mean(loss_set):.4f}, cuda mem: {max_mem} MB, cuda cache: {max_cache} MB, time: {time_per_n_batches:.2f}s',
                    flush=True)

                log_data = {
                    'loss': np.mean(loss_set),
                    'ce_component_loss': ce_loss,
                    'dice_component_loss': dice_loss,
                    'gpu_memory': max_mem,
                    'time': time_per_n_batches,
                    'total_positive_pixels': np.mean(positive_pixels_set),
                    'step': global_step,
                }

                wandb.log(log_data)

                loss_set = []
                positive_pixels_set = []
                start = stop

            # torch.cuda.empty_cache()
            global_step += 1

        # Evaluation for multiclass F1 score
        dmg_model_eval(net,
                       cfg,
                       device,
                       max_samples=100,
                       step=global_step,
                       epoch=epoch)
        dmg_model_eval(net,
                       cfg,
                       device,
                       max_samples=100,
                       run_type='TRAIN',
                       step=global_step,
                       epoch=epoch)
Exemple #23
0
def train_net(net, cfg):

    log_path = cfg.OUTPUT_DIR
    writer = SummaryWriter(log_path)

    run_config = {}
    run_config['CONFIG_NAME'] = cfg.NAME
    run_config['device'] = device
    run_config['log_path'] = cfg.OUTPUT_DIR
    run_config['training_set'] = cfg.DATASETS.TRAIN
    run_config['test set'] = cfg.DATASETS.TEST
    run_config['epochs'] = cfg.TRAINER.EPOCHS
    run_config['learning rate'] = cfg.TRAINER.LR
    run_config['batch size'] = cfg.TRAINER.BATCH_SIZE
    table = {
        'run config name': run_config.keys(),
        ' ': run_config.values(),
    }
    print(tabulate(
        table,
        headers='keys',
        tablefmt="fancy_grid",
    ))

    optimizer = optim.Adam(net.parameters(),
                           lr=cfg.TRAINER.LR,
                           weight_decay=0.0005)
    if cfg.MODEL.LOSS_TYPE == 'BCEWithLogitsLoss':
        criterion = nn.BCEWithLogitsLoss()
    elif cfg.MODEL.LOSS_TYPE == 'CrossEntropyLoss':
        balance_weight = [cfg.MODEL.NEGATIVE_WEIGHT, cfg.MODEL.POSITIVE_WEIGHT]
        balance_weight = torch.tensor(balance_weight).float().to(device)
        criterion = nn.CrossEntropyLoss(weight=balance_weight)
    elif cfg.MODEL.LOSS_TYPE == 'SoftDiceLoss':
        criterion = soft_dice_loss
    elif cfg.MODEL.LOSS_TYPE == 'SoftDiceBalancedLoss':
        criterion = soft_dice_loss_balanced
    elif cfg.MODEL.LOSS_TYPE == 'JaccardLikeLoss':
        criterion = jaccard_like_loss
    elif cfg.MODEL.LOSS_TYPE == 'ComboLoss':
        criterion = lambda pred, gts: F.binary_cross_entropy_with_logits(
            pred, gts) + soft_dice_loss(pred, gts)
    elif cfg.MODEL.LOSS_TYPE == 'WeightedComboLoss':
        criterion = lambda pred, gts: 2 * F.binary_cross_entropy_with_logits(
            pred, gts) + soft_dice_loss(pred, gts)
    elif cfg.MODEL.LOSS_TYPE == 'FrankensteinLoss':
        criterion = lambda pred, gts: F.binary_cross_entropy_with_logits(
            pred, gts) + jaccard_like_balanced_loss(pred, gts)
    elif cfg.MODEL.LOSS_TYPE == 'FrankensteinEdgeLoss':
        criterion = frankenstein_edge_loss

    if torch.cuda.device_count() > 1:
        print(torch.cuda.device_count(), " GPUs!")
        net = nn.DataParallel(net)
    net.to(device)
    global_step = 0
    epochs = cfg.TRAINER.EPOCHS

    use_edge_loss = cfg.MODEL.LOSS_TYPE == 'FrankensteinEdgeLoss'

    for name, _ in net.named_parameters():
        print(name)

    trfm = []
    trfm.append(BGR2RGB())
    if cfg.DATASETS.USE_CLAHE_VARI: trfm.append(VARI())
    if cfg.AUGMENTATION.RESIZE:
        trfm.append(Resize(scale=cfg.AUGMENTATION.RESIZE_RATIO))
    if cfg.AUGMENTATION.CROP_TYPE == 'uniform':
        trfm.append(UniformCrop(crop_size=cfg.AUGMENTATION.CROP_SIZE))
    elif cfg.AUGMENTATION.CROP_TYPE == 'importance':
        trfm.append(ImportanceRandomCrop(crop_size=cfg.AUGMENTATION.CROP_SIZE))
    if cfg.AUGMENTATION.RANDOM_FLIP_ROTATE: trfm.append(RandomFlipRotate())

    trfm.append(Npy2Torch())
    trfm = transforms.Compose(trfm)

    # reset the generators
    dataset = Xview2Detectron2Dataset(
        cfg.DATASETS.TRAIN[0],
        pre_or_post=cfg.DATASETS.PRE_OR_POST,
        include_image_weight=True,
        transform=trfm,
        include_edge_mask=use_edge_loss,
        edge_mask_type=cfg.MODEL.EDGE_WEIGHTED_LOSS.TYPE,
        use_clahe=cfg.DATASETS.USE_CLAHE_VARI,
    )

    dataloader_kwargs = {
        'batch_size': cfg.TRAINER.BATCH_SIZE,
        'num_workers': cfg.DATALOADER.NUM_WORKER,
        'shuffle': cfg.DATALOADER.SHUFFLE,
        'drop_last': True,
        'pin_memory': True,
    }

    # sampler
    if cfg.AUGMENTATION.IMAGE_OVERSAMPLING_TYPE == 'simple':
        image_p = image_sampling_weight(dataset.dataset_metadata)
        sampler = torch_data.WeightedRandomSampler(weights=image_p,
                                                   num_samples=len(image_p))
        dataloader_kwargs['sampler'] = sampler
        dataloader_kwargs['shuffle'] = False

    dataloader = torch_data.DataLoader(dataset, **dataloader_kwargs)

    for epoch in range(epochs):
        start = timeit.default_timer()
        print('Starting epoch {}/{}.'.format(epoch + 1, epochs))
        epoch_loss = 0

        net.train()
        # mean AP, mean AUC, max F1
        mAP_set_train, mAUC_set_train, maxF1_train = [], [], []
        loss_set, f1_set = [], []
        positive_pixels_set = [
        ]  # Used to evaluated image over sampling techniques
        for i, batch in enumerate(dataloader):
            optimizer.zero_grad()

            x = batch['x'].to(device)
            y_gts = batch['y'].to(device)
            image_weight = batch['image_weight']

            y_pred = net(x)

            if cfg.MODEL.LOSS_TYPE == 'CrossEntropyLoss':
                # y_pred = y_pred # Cross entropy loss doesn't like single channel dimension
                y_gts = y_gts.long(
                )  # Cross entropy loss requires a long as target

            if use_edge_loss:
                edge_mask = y_gts[:, [0]]
                y_gts = y_gts[:, 1:]
                edge_loss_scale = edge_loss_warmup_schedule(cfg, global_step)
                loss, ce_loss, jaccard_loss, edge_loss = criterion(
                    y_pred, y_gts, edge_mask, edge_loss_scale)
                wandb.log({
                    'ce_loss': ce_loss,
                    'jaccard_loss': jaccard_loss,
                    'edge_loss': edge_loss,
                    'step': global_step,
                    'edge_loss_scale': edge_loss_scale,
                })
            else:
                loss = criterion(y_pred, y_gts)

            epoch_loss += loss.item()

            loss.backward()
            optimizer.step()

            loss_set.append(loss.item())
            positive_pixels_set.extend(image_weight.cpu().numpy())

            if global_step % 100 == 0 or global_step == 0:
                # time per 100 steps
                stop = timeit.default_timer()
                time_per_n_batches = stop - start

                if global_step % 10000 == 0 and global_step > 0:
                    check_point_name = f'cp_{global_step}.pkl'
                    save_path = os.path.join(log_path, check_point_name)
                    torch.save(net.state_dict(), save_path)

                # Averaged loss and f1 writer

                # writer.add_scalar('f1/train', np.mean(f1_set), global_step)

                max_mem, max_cache = gpu_stats()
                print(
                    f'step {global_step},  avg loss: {np.mean(loss_set):.4f}, cuda mem: {max_mem} MB, cuda cache: {max_cache} MB, time: {time_per_n_batches:.2f}s',
                    flush=True)

                wandb.log({
                    'loss': np.mean(loss_set),
                    'gpu_memory': max_mem,
                    'time': time_per_n_batches,
                    'total_positive_pixels': np.mean(positive_pixels_set),
                    'step': global_step,
                })

                loss_set = []
                positive_pixels_set = []

                start = stop

            # torch.cuda.empty_cache()
            global_step += 1

        if epoch % 2 == 0:
            # Evaluation after every other epoch
            model_eval(net,
                       cfg,
                       device,
                       max_samples=100,
                       step=global_step,
                       epoch=epoch)
            model_eval(net,
                       cfg,
                       device,
                       max_samples=100,
                       run_type='TRAIN',
                       step=global_step,
                       epoch=epoch)
Exemple #24
0
 def get_sampler(self):
     self.sampler = data.WeightedRandomSampler(
         torch.tensor(self.priority_weights.cpu()), self.num_samples)
     return self.sampler