Example #1
0
def main(args):
    config_path = os.path.join(args.model_dir, 'config.json')
    with open(config_path) as f:
        config = json.load(f)

    logging.info('loading embedding...')
    with open(config['model_parameters']['embedding'], 'rb') as f:
        embedding = pickle.load(f)
        config['model_parameters']['embedding'] = embedding.vectors

    logging.info('loading valid data...')
    with open(config['model_parameters']['valid'], 'rb') as f:
        valid = pickle.load(f)
    logging.info('loading train data...')
    with open(config['train'], 'rb') as f:
        train = pickle.load(f)

    PredictorClass = BestPredictor

    predictor = PredictorClass(metrics=[Recall(at=1),
                                        Recall(at=5)],
                               **config['model_parameters'])

    model_checkpoint = ModelCheckpoint(
        os.path.join(args.model_dir, 'model.pkl'), 'loss', 1, 'all')
    metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json'))
    logging.info('start training!')
    predictor.fit_dataset(train, valid, train.collate_fn,
                          [model_checkpoint, metrics_logger])
Example #2
0
def main(args):
    print(args)
    config_path = os.path.join(args.model_dir, 'config.json')
    with open(config_path) as f:
        config = json.load(f)

    logging.info('loading embedding...')
    with open(config['model_parameters']['embedding'], 'rb') as f:
        embedding = pickle.load(f)
        config['model_parameters']['embedding'] = embedding.vectors

    logging.info('loading valid data...')
    with open(config['model_parameters']['valid'], 'rb') as f:
        config['model_parameters']['valid'] = pickle.load(f)

    logging.info('loading train data...')
    with open(config['train'], 'rb') as f:
        train = pickle.load(f)

    if config['arch'] == 'ExampleNet':
        from example_predictor import ExamplePredictor
        PredictorClass = ExamplePredictor
    predictor = PredictorClass(metrics=[Recall(1), Recall(10)],
                               **config['model_parameters'])

    if args.load is not None:
        predictor.load(args.load)

    model_checkpoint = ModelCheckpoint(
        os.path.join(args.model_dir, 'model.pkl'), 'loss', 1, 'all')
    metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json'))

    logging.info('start training!')
    predictor.fit_dataset(train, train.collate_fn,
                          [model_checkpoint, metrics_logger])
def training(args, train_loader, valid_loader, model, device, split):
    """Training Procedure"""
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    criterion = torch.nn.BCELoss()
    metric = Recall()
    total_iter = 0
    best_valid_f1 = 0

    for epoch in range(args.epochs):
        train_trange = tqdm(enumerate(train_loader),
                            total=len(train_loader),
                            desc='training')
        train_loss = 0
        for i, batch in train_trange:
            answer = batch['label'].to(device)
            prob = run_iter(batch, model, device, training=True)
            loss = criterion(prob, answer)
            train_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_iter += 1
            metric.update(prob, answer)
            train_trange.set_postfix(loss=train_loss / (i + 1),
                                     **{metric.name: metric.print_score()})

            if total_iter % args.eval_steps == 0:
                valid_f1 = validation(valid_loader, model, device)
                if valid_f1 > best_valid_f1:
                    best_valid_f1 = valid_f1
                    torch.save(
                        model,
                        os.path.join(
                            args.model_dir,
                            'fine_tuned_roberta_{}.pkl'.format(split)))
Example #4
0
def main(args):

    config_path = os.path.join(args.model_dir, 'config.json')
    with open(config_path) as f:
        config = json.load(f)

    logging.info(f"Using cuda device: {config['cuda_ids']}")
    os.environ['CUDA_VISIBLE_DEVICES'] = config['cuda_ids']

    logging.info('loading embedding...')
    with open(config['model_parameters']['embedding'], 'rb') as f:
        embedding = pickle.load(f)
        config['model_parameters']['embedding'] = embedding.vectors

    logging.info('loading valid data...')
    with open(config['model_parameters']['valid'], 'rb') as f:
        config['model_parameters']['valid'] = pickle.load(f)

    logging.info('loading train data...')
    with open(config['train'], 'rb') as f:
        train = pickle.load(f)

    if config['arch'] == 'ExampleNet':
        from example_predictor import ExamplePredictor
        PredictorClass = ExamplePredictor
    elif config['arch'] == 'RnnBaselineNet':
        from rnnbaseline_predictor import RnnBaselinePredictor
        PredictorClass = RnnBaselinePredictor
    elif config['arch'] == 'RnnAttentionNet':
        from rnnattention_predictor import RnnAttentionPredictor
        PredictorClass = RnnAttentionPredictor
    elif config['arch'] == 'RnnTransformerNet':
        from rnntransformer_predictor import RnnTransformerPredictor
        PredictorClass = RnnTransformerPredictor

    predictor = PredictorClass(
        metrics=[Recall(), Recall(1), Recall(5)],
        **config['model_parameters']
    )

    if args.load is not None:
        predictor.load(args.load)

    model_checkpoint = ModelCheckpoint(
        os.path.join(args.model_dir, 'model.pkl'),
        'loss', 1, 'all'
    )
    metrics_logger = MetricsLogger(
        os.path.join(args.model_dir, 'log.json')
    )

    logging.info('start training!')
    predictor.fit_dataset(train,
                          train.collate_fn,
                          [model_checkpoint, metrics_logger])
Example #5
0
def main(args):
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    model_dir, exp_dir = os.path.split(
        args.output_dir[:-1]) if args.output_dir[-1] == '/' else os.path.split(
            args.output_dir)
    config_path = os.path.join(model_dir, 'config.json')
    with open(config_path) as f:
        config = json.load(f)
    logging.info(f'Save config file to {args.output_dir}.')
    with open(os.path.join(args.output_dir, 'config.json'), 'w') as f:
        json.dump(config, f, indent=4)

    logging.info('Loading training data...')
    with open(config['train_path'], 'rb') as f:
        train = pickle.load(f)
    train.context_padded_len = config['train_context_padded_len']
    train.option_padded_len = config['train_option_padded_len']
    train.n_negative = config['train_n_negative']

    logging.info('Loading validation data...')
    with open(config['valid_path'], 'rb') as f:
        valid = pickle.load(f)
        config['model_parameters']['valid'] = valid
    valid.context_padded_len = config['valid_context_padded_len']
    valid.option_padded_len = config['valid_option_padded_len']

    logging.info('Loading preproprecessed word embedding...')
    with open(config['embedding_path'], 'rb') as f:
        embedding = pickle.load(f)
        config['model_parameters']['embedding'] = embedding

    metric = Recall(at=10)
    predictor = Predictor(training=True,
                          metrics=[metric],
                          device=args.device,
                          **config['model_parameters'])
    model_checkpoint = ModelCheckpoint(filepath=os.path.join(
        args.output_dir, 'model'),
                                       monitor=metric.name,
                                       mode='max',
                                       all_saved=False)
    metrics_logger = MetricsLogger(
        log_dest=os.path.join(args.output_dir, 'log.json'))

    if args.load_dir is not None:
        predictor.load(args.load_dir)

    logging.info('Start training.')
    start = time.time()
    predictor.fit_dataset(data=train,
                          collate_fn=train.collate_fn,
                          callbacks=[model_checkpoint, metrics_logger],
                          output_dir=args.output_dir)
    end = time.time()
    total = end - start
    hrs, mins, secs = int(total // 3600), int(
        (total % 3600) // 60), int(total % 60)
    logging.info('End training.')
    logging.info(f'Total time: {hrs}hrs {mins}mins {secs}secs.')
Example #6
0
def main(args):
    config_path = os.path.join(args.model_dir, 'config.json')
    with open(config_path) as f:
        config = json.load(f)

    logging.info('loading embedding...')
    with open(config['model']['embedding'], 'rb') as f:
        embedding = pickle.load(f)
        config['model']['embedding'] = embedding.vectors

    logging.info('loading valid data...')
    with open(config['model']['valid'], 'rb') as f:
        config['model']['valid'] = pickle.load(f)

    logging.info('loading train data...')
    with open(config['train'], 'rb') as f:
        train = pickle.load(f)

    predictor = Predictor(arch=config['arch'],
                          device=args.device,
                          metrics=[Recall()],
                          **config['model'])

    if args.load is not None:
        predictor.load(args.load)

    model_checkpoint = ModelCheckpoint(
        os.path.join(args.model_dir, 'model.pkl'), **config['callbacks'])
    metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json'))

    logging.info('start training!')
    predictor.fit_dataset(train, train.collate_fn, model_checkpoint,
                          metrics_logger)
Example #7
0
File: kfolds.py Project: mluzu/iia
def lg_k_folds(X_train, y_train, lr, b, epochs, lamda, bias, k=5, verbose=False):
    results = {
        'accuracy': [],
        'recall': [],
        'precision': []
    }
    metric_means = {}
    accuracy = Accuracy()
    recall = Recall()
    precision = Precision()
    chunk_size = int(len(X_train) / k)

    logistic_regression = LogisticRegression(bias)

    for i in range(0, len(X_train), chunk_size):
        end = i + chunk_size if i + chunk_size <= len(X_train) else len(X_train)
        new_X_valid = X_train[i: end]
        new_y_valid = y_train[i: end]
        new_X_train = np.concatenate([X_train[: i], X_train[end:]])
        new_y_train = np.concatenate([y_train[: i], y_train[end:]])
        logistic_regression.fit(new_X_train, new_y_train,  lr, b, epochs, lamda, verbose=verbose)
        predictions = logistic_regression.predict(new_X_valid)

        results['accuracy'].append(accuracy(new_y_valid, predictions))
        results['recall'].append(recall(new_y_valid, predictions))
        results['precision'].append(precision(new_y_valid, predictions))

    metric_means['accuracy'] = np.mean(results['accuracy'])
    metric_means['recall'] = np.mean(results['recall'])
    metric_means['precision'] = np.mean(results['precision'])

    return metric_means
Example #8
0
def main(args):
    config_path = os.path.join(args.model_dir, 'config.json')
    with open(config_path) as f:
        config = json.load(f)

    logging.info('loading embedding...')
    with open(config['model_parameters']['embedding'], 'rb') as f:
        embedding = pickle.load(f)
        config['model_parameters']['embedding'] = embedding.vectors

    logging.info('loading valid data...')
    with open(config['model_parameters']['valid'], 'rb') as f:
        config['model_parameters']['valid'] = pickle.load(f)

    logging.info('loading train data...')
    with open(config['train'], 'rb') as f:
        train = pickle.load(f)
    #print(train) #dataset.DialogDataset object
    #word2index = embedding.word_dict
    #index2word =  {v: k for k, v in word2index.items()}

    if config['arch'] == 'ExampleNet':
        #from example_predictor import ExamplePredictor
        #from rnn_predictor import RNNPredictor
        #from best_predictor import BestRNNAttPredictor
        from rnnatt_predictor import RNNAttPredictor
        #PredictorClass = ExamplePredictor
        #PredictorClass = RNNPredictor
        PredictorClass = RNNAttPredictor
        #PredictorClass = BestRNNAttPredictor

    #print("config['model_parameters']: ", config['model_parameters']) #it's a dict; {'valid': dataset.DialogDataset object, 'embedding':a big tensor}
    #print("**config['model_parameters']: ", **config['model_parameters'])
    predictor = PredictorClass(metrics=[Recall()],
                               **config['model_parameters'])
    # **dict : https://stackoverflow.com/questions/21809112/what-does-tuple-and-dict-means-in-python
    #input()
    if args.load is not None:
        predictor.load(args.load)

    model_checkpoint = ModelCheckpoint(
        os.path.join(args.model_dir,
                     'model_rnnatt_6_negative_samples_0324.pkl'), 'loss', 1,
        'all')
    metrics_logger = MetricsLogger(
        os.path.join(args.model_dir,
                     'log_rnnatt_6_neagtive_samples_0324.json'))

    logging.info('start training!')
    predictor.fit_dataset(train, train.collate_fn,
                          [model_checkpoint, metrics_logger])
Example #9
0
 def metrics_dict(self, prefix="train"):
     if self.tusk == "classification":
         return {
             f"{prefix}_top1": Top(n=1),
             f"{prefix}_top5": Top(n=5),
             f"{prefix}_MRR": MRR()
         }
     elif self.tusk == "generation":
         ignore_idxs = (self.dm.target_eos_idx, self.dm.target_pad_idx)
         return {
             f"{prefix}_accuracy": Accuracy(),
             f"{prefix}_precision": Precision(ignore_idxs),
             f"{prefix}_recall": Recall(ignore_idxs),
             f"{prefix}_F1": F1(ignore_idxs)
         }
     else:
         return ValueError(f"{self.tusk} tusk is not supported")
Example #10
0
def main(args):
    # load config
    config_path = os.path.join(args.model_dir, 'config.json')
    with open(config_path) as f:
        config = json.load(f)

    # load embedding
    logging.info('loading embedding...')
    with open('./src/embedding.pkl', 'rb') as f:
        embedding = pickle.load(f)
        config['model']['embedding'] = embedding.vectors

    predictor = Predictor(
        arch=config['arch'],
        device=args.device,
        metrics=[Recall()],
        **config['model']
    )
    
    if args.epoch == None:
        # use best model
        model_path = os.path.join(args.model_dir, 'model.pkl')
    else:
        # use specific epoch model
        model_path = os.path.join(args.model_dir, 'model.pkl.{}'.format(args.epoch))
        
    logging.info('loading model from {}'.format(model_path))
    predictor.load(model_path)
    
    preprocessor = Preprocessor(None)
    preprocessor.embedding = embedding
    
    logging.info('Processing test from {}'.format(args.test_path))
    test = preprocessor.get_dataset(args.test_path, 4)
    logging.info('predicting...')
    predicts = predictor.predict_dataset(test, test.collate_fn)

    output_path = args.pred_path
    write_predict_csv(predicts, test, output_path)
def validation(dataloader, model, device):
    metric = Recall()
    criterion = torch.nn.BCELoss()
    valid_trange = tqdm(enumerate(dataloader),
                        total=len(dataloader),
                        desc='validation')
    model.eval()
    valid_loss = 0
    for i, batch in valid_trange:
        prob = run_iter(batch, model, device, training=False)
        answer = batch['label'].to(device)
        loss = criterion(prob, answer)
        valid_loss += loss.item()
        metric.update(prob, answer)
        valid_trange.set_postfix(loss=valid_loss / (i + 1),
                                 **{metric.name: metric.print_score()})
    return metric.get_f1()
Example #12
0
    batch_list = np.linspace(1, 30, 30)
    kfolds_b = np.zeros(batch_list.shape)
    for i, b in enumerate(batch_list):
        kfolds_b[i] = k_folds(X_train, y_train.reshape(-1, 1), best_lr, b, 100)
    best_b = batch_list[np.argmax(kfolds_b)]

    # Fit model and predict with optimized parameters
    logistic_regression = LogisticRegression()
    logistic_regression.fit(X_train, y_train.reshape(-1, 1), best_lr, best_b, 50000)
    print(logistic_regression.model)
    predictions = logistic_regression.predict(X_test)
    slope = -(logistic_regression.model[1] / logistic_regression.model[2])
    intercept = -(logistic_regression.model[0] / logistic_regression.model[2])

    # Metrics
    metrics = [Precision(), Accuracy(), Recall()]
    for metric in metrics:
        print('{metric}: {value}'.format(metric=metric.__class__.__name__, value=metric(y_test, predictions[:, 0])))

    # Graphics
    plt.figure(1)
    plt.scatter(dataset.dataset['x_1'], dataset.dataset['x_2'], c=dataset.dataset['y'])
    plt.xlabel('X1')
    plt.ylabel('X2')
    ax = plt.gca()
    y_vals = intercept + (slope * dataset.dataset['x_1'])
    ax.autoscale(False)
    plt.plot(dataset.dataset['x_1'], y_vals, c="k")
    plt.show()

    f, (ax, bx) = plt.subplots(2, 1, sharey='col')
Example #13
0
def main(args):
    config_path = os.path.join(args.model_dir, 'config.json')
    with open(config_path) as f:
        config = json.load(f)

    logging.info('loading embedding...')
    with open(config['model_parameters']['embedding'], 'rb') as f:
        embedding = pickle.load(f)
        config['model_parameters']['embedding'] = embedding.vectors

    logging.info('loading valid data...')
    with open(config['model_parameters']['valid'], 'rb') as f:
        config['model_parameters']['valid'] = pickle.load(f)
        #valid = pickle.load(f)

    logging.info('loading train data...')
    with open(config['train'], 'rb') as f:
        train = pickle.load(f)

    if config['arch'] == 'ExampleNet':
        #from modules import ExampleNet
        from predictors import ExamplePredictor
        PredictorClass = ExamplePredictor
        predictor = PredictorClass(
            metrics=[Recall(1), Recall(10)],
            batch_size=128,
            max_epochs=1000000,
            dropout_rate=0.2,
            learning_rate=1e-3,
            grad_accumulate_steps=1,
            loss='BCELoss',  #BCELoss, FocalLoss
            margin=0,
            threshold=None,
            similarity='MLP',  #inner_product, Cosine, MLP
            device=args.device,
            **config['model_parameters'])
    elif config['arch'] == 'RnnNet':
        from predictors import RnnPredictor
        PredictorClass = RnnPredictor
        predictor = PredictorClass(
            metrics=[Recall(1), Recall(10)],
            batch_size=512,
            max_epochs=1000000,
            dropout_rate=0.2,
            learning_rate=1e-3,
            grad_accumulate_steps=1,
            loss='FocalLoss',  #BCELoss, FocalLoss
            margin=0,
            threshold=None,
            similarity='Cosine',  #inner_product, Cosine, MLP
            device=args.device,
            **config['model_parameters'])

    elif config['arch'] == 'RnnAttentionNet':
        from predictors import RnnAttentionPredictor
        PredictorClass = RnnAttentionPredictor
        predictor = PredictorClass(
            metrics=[Recall(1), Recall(10)],
            batch_size=32,
            max_epochs=1000000,
            dropout_rate=0.2,
            learning_rate=1e-3,
            grad_accumulate_steps=1,
            loss='BCELoss',  #BCELoss, FocalLoss
            margin=0,
            threshold=None,
            similarity='MLP',  #inner_product, Cosine, MLP
            device=args.device,
            **config['model_parameters'])
    else:
        logging.warning('Unknown config["arch"] {}'.format(config['arch']))

    if args.load is not None:
        predictor.load(args.load)

    #def ModelCheckpoint(filepath, monitor='loss', verbose=0, mode='min')
    model_checkpoint = ModelCheckpoint(os.path.join(args.model_dir,
                                                    'model.pkl'),
                                       monitor='Recall@{}'.format(10),
                                       verbose=1,
                                       mode='all')
    metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json'))

    early_stopping = EarlyStopping(os.path.join(args.model_dir, 'model.pkl'),
                                   monitor='Recall@{}'.format(10),
                                   verbose=1,
                                   mode='max',
                                   patience=30)

    logging.info('start training!')
    #print ('train', train)
    predictor.fit_dataset(train, train.collate_fn,
                          [model_checkpoint, metrics_logger, early_stopping])
Example #14
0
            cv2.cvtColor(np.vstack((vis_pred, vis_true)), cv2.COLOR_BGR2RGB))

    y2_true = y2_true.astype('int64')
    y2_pred = np.squeeze(y2_pred > 0.5).astype('int64')

    TP, FP, FN, TN = tpfpfn(y1_pred, y1_true)
    TP_1 += TP
    FP_1 += FP
    FN_1 += FN
    TN_1 += TN

    TP, FP, FN, TN = tpfpfn(y2_pred, y2_true)

    mAccuracy_2 += Accuracy(TP, FP, FN, TN) / dlina
    mPrecision_2 += Precision(TP, FP) / dlina
    mRecall_2 += Recall(TP, FN) / dlina
    mIU_2 += IU(TP, FP, FN) / dlina
    mF1_2 += F1(TP, FP, FN) / dlina

mAccuracy_1 = Accuracy(TP_1, FP_1, FN_1, TN_1)
mPrecision_1 = Precision(TP_1, FP_1)
mRecall_1 = Recall(TP_1, FN_1)
mIU_1 = IU(TP_1, FP_1, FN_1)
mF1_1 = F1(TP_1, FP_1, FN_1)

print("CLASS accuracy: {}".format(mAccuracy_1))
print("CLASS precision: {}".format(mPrecision_1))
print("CLASS recall: {}".format(mRecall_1))
print("CLASS iu: {}".format(mIU_1))
print("CLASS f1: {}".format(mF1_1))
    cfg = edict(json.load(f))

if isinstance(cfg.batch_size, list) and isinstance(cfg.long_side, list):
    list_batch = cfg.batch_size
    list_res = cfg.long_side
elif isinstance(cfg.batch_size, int) and isinstance(cfg.long_side, int):
    list_batch = [cfg.batch_size]
    list_res = [cfg.long_side]
else:
    raise Exception("'batch_size' and 'long_side' in config file should be same instance!!!")

loss_func = BCEWithLogitsLoss()

# data_dir = '/home/tungthanhlee/bdi_xray/data/images'
data_dir = '/home/dual1/thanhtt/assigned_jpeg'
metrics_dict = {'acc': ACC(), 'auc':AUC(), 'precision':Precision(), 'recall':Recall(), 'specificity':Specificity(), 'f1':F1()}

model_names=[
    'dense',
    'dense',
    'dense',
    # 'resnet',
    # 'dense',
    # 'efficient',
    #'resnest'
    ]
ids = [
    '121',
    '121',
    '121',
    # '101',
Example #16
0
                             dicom=False,
                             type=cfg.type)
val_loader = create_loader(cfg.dev_csv,
                           data_dir,
                           cfg,
                           mode='val',
                           dicom=False,
                           type=cfg.type)

# loss_func = BCELoss()
# loss_func = BCEWithLogitsLoss()
loss_func = MSELoss()

metrics_dict = {
    'auc': AUC(),
    'sensitivity': Recall(),
    'specificity': Specificity(),
    'f1': F1()
}
loader_dict = {'train': train_loader, 'val': val_loader}

#------------------------------- additional config for ensemble ---------------------------------------
model_names = [
    'dense',
    'resnet',
    'dense',
    # 'efficient',
    #'resnest'
]
ids = [
    '121',
Example #17
0
def main():
    '''
    Execute training
    '''
    import wandb
    if use_wandb:
        wandb.init(project="compvis_dtd_{}".format(model_name))
        wandb.config.update({
            "Model": model_name,
            "Learning Rate": lr,
            "Batch Size": batch_size,
            "Tiled": tiled,
            "Loss function": loss
        })
    else:
        from torch.utils.tensorboard import SummaryWriter
        writer = SummaryWriter()

    model, mask_size = get_model()
    train_loader, valid_loader = get_data_loaders(mask_size)

    loss_ = get_loss(mask_size)

    optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
    metrics = [
        Accuracy(),
        AccuracyT1(),
        Fscore(),
        Recall(),
        Precision(),
    ]

    train_epoch = TrainEpoch(model,
                             loss=loss_,
                             metrics=metrics,
                             optimizer=optimizer,
                             device=device,
                             verbose=True)
    valid_epoch = ValidEpoch(model,
                             loss=loss_,
                             metrics=metrics,
                             device=device,
                             verbose=True)

    best_loss = 9999
    count_not_improved = 0
    for i in range(max_num_epoch):

        train_logs = train_epoch.run(train_loader)
        valid_logs = valid_epoch.run(valid_loader)

        if best_loss > valid_logs[loss_.__name__] + 0.00005:
            best_loss = valid_logs[loss_.__name__]
            save_model(i, loss_, model, valid_logs)
            count_not_improved = 0
        else:
            count_not_improved += 1

        if i % 50 == 0:
            save_model(i, loss_, model, valid_logs)

        if use_wandb:
            logs = {
                "epoch": i,
                "train loss": train_logs[loss_.__name__],
                "valid loss": valid_logs[loss_.__name__]
            }
            for m in metrics:
                m_name = m.__name__
                logs["{} train".format(m_name)] = train_logs[m_name]
                logs["{} valid".format(m_name)] = valid_logs[m_name]
            wandb.log(logs)
        else:
            writer.add_scalar('{}/Loss/train'.format(model_name),
                              train_logs[loss_.__name__], i)
            writer.add_scalar('{}/Loss/valid'.format(model_name),
                              valid_logs[loss_.__name__], i)

            for m in metrics:
                m_name = m.__name__
                writer.add_scalar('{}/{}/train'.format(model_name, m_name),
                                  train_logs[m_name], i)
                writer.add_scalar('{}({}/valid'.format(model_name, m_name),
                                  valid_logs[m_name], i)

        # Early stopping
        if with_early_stopping and count_not_improved > 3:
            print("Early stopping!")
            break
        # results = dummy_vid_pred.dummy_video_preds(video_pred_path)

        vid_preds = parse_vid_preds(video_pred_path)
        vid_filter = VidFilter(vid_preds)
        dummy_seg = DummySegmentPrediction(
            "validate_strat_split/test",
            base_folder=Youtube8M.dream_segment_folder,
            whitelist=whitelist,
            vid_filter=vid_filter)
        results = dummy_seg.dummy_seg_preds_light(segment_pred_path)
        with open(cache_path, 'w') as f:
            json.dump(results, f)
    else:
        with open(cache_path, 'r') as f:
            results = json.load(f)
            results = {int(x): y for x, y in results.iteritems()}
    from metrics import MAPK, Recall, Precision
    dataset = Youtube8M("validate_strat_split/test",
                        Youtube8M.dream_segment_folder)
    labels = dataset.load_label_map()
    mapk = MAPK(results, labels, pred_has_score=True)
    print(mapk.score_per_class())
    print(mapk.mean_class_score())

    recall = Recall(results, labels, pred_has_score=True)
    print(recall.score_per_class())
    print(recall.mean_class_score())

    precision = Precision(results, labels, pred_has_score=True)
    print(precision.score_per_class())
    print("precision: ", precision.mean_class_score())
Example #19
0
def main(**kwargs):
    kwargs.setdefault('data_size', 500)
    kwargs.setdefault('epochs', 600)
    kwargs.setdefault('learning_rate', 0.001)
    kwargs.setdefault('patience', None)
    kwargs.setdefault('ewc', 0)
    kwargs.setdefault('batch_size', 128)
    kwargs.setdefault('cuda', None)
    kwargs.setdefault('dry_run', False)
    kwargs.setdefault('name', None)
    kwargs.setdefault('seed', 1337)
    kwargs.setdefault('verbose', 'WARN')
    kwargs.setdefault('task', ['+mnist', '-mnist'])
    args = SimpleNamespace(**kwargs)

    logging.basicConfig(
        level=args.verbose,
        style='{',
        format='[{levelname:.4}][{asctime}][{name}:{lineno}] {msg}',
    )

    logger.debug('parameters of this experiment')
    for key, val in args.__dict__.items():
        logger.debug(f' {key:.15}: {val}')

    seed(args.seed)

    datasets = {
        'mnist': MNIST(),
        'fashion': FashionMNIST(),
    }

    if args.name is None:
        now = np.datetime64('now')
        args.name = f'exp-{now}'
        logger.info(f'experiment name not given, defaulting to {args.name}')

    # In some cases, we must move the network to it's cuda device before
    # constructing the optimizer. This is annoying, and this logic is
    # duplicated in the estimator class. Ideally, I'd like the estimator to
    # handle cuda allocation _after_ the optimizer has been constructed...
    net = AlexNet(10, shape=(1, 27, 27))
    if args.cuda is None:
        args.cuda = 0 if torch.cuda.is_available() else False
    if args.cuda is not False:
        net = net.cuda(args.cuda)

    opt = O.Adagrad(net.parameters(),
                    lr=args.learning_rate,
                    weight_decay=0.004)
    loss = N.CrossEntropyLoss()
    model = EwcClassifier(net,
                          opt,
                          loss,
                          name=args.name,
                          cuda=args.cuda,
                          dry_run=args.dry_run)

    for task in args.tasks:
        data = datasets[task[1:]]
        train, test = data.load()

        if task[0] == '+':
            print(f'-------- Fitting {task[1:]} --------')
            model.fit(train,
                      epochs=args.epochs,
                      patience=args.patience,
                      batch_size=args.batch_size)
            model.consolidate(train,
                              alpha=args.ewc,
                              batch_size=args.batch_size)
            print()

        if task[0] == '-':
            print(f'-------- Scoring {task[1:]} --------')
            scores = {
                'accuracy': Accuracy(),
                'true positives': TruePositives(),
                'false positives': FalsePositives(),
                'true negatives': TrueNegatives(),
                'false negatives': FalseNegatives(),
                'precision': Precision(),
                'recall': Recall(),
                'f-score': FScore(),
            }
            for metric, criteria in scores.items():
                score = model.test(test, criteria, batch_size=args.batch_size)
                print(f'{metric:15}: {score}')
            print()
    def __init__(self, args):
        logging.basicConfig(format='%(asctime)s | %(levelname)s | %(message)s',
                            level=logging.INFO,
                            datefmt='%Y-%m-%d %H:%M:%S')

        logging.info('Initiating task: %s' % args.taskname)

        self.config = config(args)
        if not all([os.path.isfile(i) for i in self.config.pickle_files]):
            logging.info('Preprocesing data.....')
            if args.pick == 'neg4':
                build_processed_data(self.config.datadir,
                                     self.config.pickledir,
                                     neg_num=4)
            elif args.pick == 'last':
                build_processed_data(self.config.datadir,
                                     self.config.pickledir,
                                     last=True)
            elif args.pick == 'difemb':
                build_processed_data(self.config.datadir,
                                     self.config.pickledir,
                                     difemb=True)

            else:
                build_processed_data(self.config.datadir,
                                     self.config.pickledir)

        else:
            logging.info('Preprocesing already done.')

        with open(os.path.join(self.config.pickledir, 'embedding.pkl'),
                  'rb') as f:
            embedding = pickle.load(f)
            embedding = embedding.vectors
        self.embedding_dim = embedding.size(1)
        self.embedding = torch.nn.Embedding(embedding.size(0),
                                            embedding.size(1))
        self.embedding.weight = torch.nn.Parameter(embedding)

        self.Modelfunc = {
            'lin': LinearNet,
            'rnn': RnnNet,
            'att': RnnAttentionNet,
            'best': BestNet,
            'gru': GruNet,
            'last': LastNet,
        }

        if os.path.exists(self.config.outputdir):
            if args.resume == False:
                logging.info(
                    'Warning, task already exists, add --resume True, exiting')
                sys.exit(0)
            else:
                logging.info('Resuming....')
                with open(self.config.modeltype_path, 'r') as f:
                    resume_type = f.read()
                self.model = self.Modelfunc[resume_type]
                logging.info('model type is %s, model to be constructed' %
                             resume_type)
        else:
            os.mkdir(self.config.outputdir)
            with open(self.config.modeltype_path, 'w') as f:
                f.write(args.modeltype)
            self.model = self.Modelfunc[args.modeltype](self.embedding_dim)
            logging.info('model type is %s, model created' % args.modeltype)

        model_checkpoint = ModelCheckpoint(self.config.modelpath, 'loss', 1,
                                           'all')
        metrics_logger = MetricsLogger(self.config.logpath)

        if args.resume:
            self.config.start_epoch = metrics_logger.load()
            if args.resume_epoch != -1:
                self.config.resumepath = self.config.modelpath + '.%d' % self.config.resume_epoch
            else:
                self.config.resumepath = self.config.modelpath + '.%d' % (
                    self.config.start_epoch - 1)

            self.model = self.model(self.embedding_dim)
            self.model.load_state_dict(torch.load(self.config.resumepath))
            logging.info('config loaded, model constructed and loaded')

        print(self.model)

        logging.info('loading dataloaders')
        self.trainloader, self.testloader, self.validloader = make_dataloader(
            self.config.pickledir)

        self.metrics = [Recall()]
        self.callbacks = [model_checkpoint, metrics_logger]

        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        if self.device == 'cuda':
            self.model.to(self.device)
            self.embedding.to(self.device)

        self.criterion = torch.nn.BCEWithLogitsLoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=self.config.lr)
Example #21
0
            FP_direct += FP
            FN_direct += FN
            TN_direct += TN
        elif cl == 2:
            TP_alternative += TP
            FP_alternative += FP
            FN_alternative += FN
            TN_alternative += TN
        TP_mean_segm += TP
        FP_mean_segm += FP
        FN_mean_segm += FN
        TN_mean_segm += TN
   
mAccuracy_0 = Accuracy(TP_0, FP_0, FN_0, TN_0)
mPrecision_0 = Precision(TP_0, FP_0)
mRecall_0 = Recall(TP_0, FN_0)
mIU_0 = IU(TP_0, FP_0, FN_0)
mF1_0 = F1(TP_0, FP_0, FN_0)

mAccuracy_1 = Accuracy(TP_1, FP_1, FN_1, TN_1)
mPrecision_1 = Precision(TP_1, FP_1)
mRecall_1 = Recall(TP_1, FN_1)
mIU_1 = IU(TP_1, FP_1, FN_1)
mF1_1 = F1(TP_1, FP_1, FN_1)

mAccuracy_2 = Accuracy(TP_2, FP_2, FN_2, TN_2)
mPrecision_2 = Precision(TP_2, FP_2)
mRecall_2 = Recall(TP_2, FN_2)
mIU_2 = IU(TP_2, FP_2, FN_2)
mF1_2 = F1(TP_2, FP_2, FN_2)
Example #22
0
from unet_rcnn import UnetRCNN
from metrics import Accuracy, Precision, Recall, AvgMeterWrapper, ConfusionMatrix, MetricWrapper, Auc, MultiLabelIoU, MultiLabelAccuracy
from loss import RgLoss
from dataset import RgLoader
__all__ = ['model_dict', 'loader_dict', 'loss_dict', 'metric_dict']

model_dict = {
    'UnetRCNN': UnetRCNN,
}

loader_dict = {'RgLoader': RgLoader}

loss_dict = {'RgLoss': RgLoss}

metric_dict = {
    'cls': {
        'Accuracy': AvgMeterWrapper(Accuracy()),
        'Specificity & Recall': AvgMeterWrapper(Recall()),
        'NPV & Precision': AvgMeterWrapper(Precision()),
        'AUC': MetricWrapper(metric=Auc(), idx=1),
        'Confusion_Matrix': ConfusionMatrix(2)
    },
    'seg': {
        'IoU': AvgMeterWrapper(MultiLabelIoU())
    },
    'kf': {
        'Localization_Accuracy': AvgMeterWrapper(MultiLabelAccuracy())
    },
}
Example #23
0
with open(cfg_path) as f:
    cfg = edict(json.load(f))

data_dir = '/home/tungthanhlee/thanhtt/assigned_jpeg'

train_loader = create_loader(cfg.train_csv, data_dir, cfg, mode='train')
val_loader = create_loader(cfg.dev_csv, data_dir, cfg, mode='val')

loss_func = BCEWithLogitsLoss()

metrics_dict = {
    'acc': ACC(),
    'auc': AUC(),
    'f1': F1(),
    'precision': Precision(),
    'recall': Recall()
}
loader_dict = {'train': train_loader, 'val': val_loader}

chexpert_model = CheXpert_model(cfg, loss_func, metrics_dict)
# print(chexpert_model.model)

# chexpert_model.load_ckp(cfg.ckp_path)
# print(chexpert_model.model.state_dict().keys())
# chexpert_model.save_backbone('experiment/Resnet101_chexmic/checkpoint/backbone.ckpt')
# chexpert_model.freeze_backbone()

writer = SummaryWriter(os.path.join('experiment', cfg.log_dir))
ckp_dir = os.path.join('experiment', cfg.log_dir, 'checkpoint')

chexpert_model.train(train_loader,