예제 #1
0
def main():
    """Gets config file, loads specified components, and runs experiment."""
    # Get overall config dict.
    parser = ArgumentParser()
    parser.add_argument("config")
    config_file = parser.parse_args().config
    with open(config_file) as f:
        config = yaml.safe_load(f)

    # Initialize logger.
    run_name = get_run_name(config)
    print(f"Running {run_name}")
    logger = Logger(config["logger"], run_name, config_file)
    logger.p(f"Using config file {config_file}.")

    logger.p("Loading model...")
    model = load_model(config["model"])

    logger.p("Loading dataset...")
    dataset = load_dataset(config["dataset"])

    logger.p("Loading metrics...")
    metric_computers = load_metrics_computers(config["metrics"])

    logger.p("Running experiment...")
    run_experiment(model, dataset, logger, metric_computers,
                   config["experiment"])

    logger.p("Done! Closing logger...")
    logger.close()

    print("Have a nice day!")
예제 #2
0
def main(argv):
    pp = ParameterParser(argv)
    if not pp.random_seed is None:
        rn.seed(pp.random_seed)
        np.random.seed(pp.random_seed)
        tr.manual_seed(pp.random_seed)
    if pp.device.type == 'cuda':
        if not pp.random_seed is None:
            tr.backends.cudnn.deterministic = True
            tr.backends.cudnn.benchmark = False
        else:
            tr.backends.cudnn.deterministic = False
            tr.backends.cudnn.benchmark = True

    dataset = FoldDataset(pp.input_files, pp.seq_len)
    valid_size = int(pp.valid_prop * len(dataset))
    train, valid = dt.random_split(dataset,
                                   (len(dataset) - valid_size, valid_size))

    train_loader = None
    if pp.upsample:
        train_sampler = ImbalancedDatasetSampler(train,
                                                 max_imbalance=1.0,
                                                 num_samples=8 * pp.batch_size)

        train_loader = dt.DataLoader(train,
                                     batch_size=pp.batch_size,
                                     shuffle=True,
                                     sampler=train_sampler,
                                     pin_memory=True)
    else:
        train_loader = dt.DataLoader(train,
                                     batch_size=pp.batch_size,
                                     shuffle=True,
                                     pin_memory=True)

    valid_loader = dt.DataLoader(valid,
                                 batch_size=pp.batch_size,
                                 pin_memory=True)

    model = mirDNN(pp)
    model.train()
    log = Logger(pp.logfile)

    if not pp.model_file is None and os.path.isfile(pp.model_file):
        model.load(pp.model_file)

    log.write('epoch\ttrainLoss\tvalidAUC\tlast_imp\n')
    epoch = 0
    train_loss = 100
    valid_auc = 0
    best_valid_auc = 0
    last_improvement = 0
    while last_improvement < pp.early_stop:
        nbatch = 0
        for x, v, y in train_loader:
            new_loss = model.train_step(x, v, y)
            train_loss = 0.99 * train_loss + 0.01 * new_loss
            nbatch += 1
            if nbatch >= 1000: continue

        preds, labels = tr.Tensor([]), tr.Tensor([])
        for x, v, y in valid_loader:
            z = model(x, v).cpu().detach()
            preds = tr.cat([preds, z.squeeze()])
            labels = tr.cat([labels, y.squeeze()])
        pr, rc, _ = precision_recall_curve(labels, preds)
        valid_auc = 10 * auc(rc, pr) + 0.9 * valid_auc

        last_improvement += 1
        if valid_auc > best_valid_auc:
            best_valid_auc = valid_auc
            last_improvement = 0
            model.save(pp.model_file)

        log.write('%d\t%.4f\t%.4f\t%d\n' %
                  (epoch, train_loss, valid_auc, last_improvement))
        epoch += 1
    log.close()