def run_a_trial(args):
    """Run one TPE meta optimisation step and save its results."""
    results_pickle_file = os.path.join(RESULTS_DIR,
                                       "{}.pkl".format(args.dataset))

    max_evals = nb_evals = 1
    print("Attempt to resume a past training if it exists:")
    try:
        # https://github.com/hyperopt/hyperopt/issues/267
        trials = pickle.load(open(results_pickle_file, "rb"))
        print("Found saved Trials! Loading...")
        max_evals = len(trials.trials) + nb_evals
        print("Rerunning from {} trials to add another one.".format(
            len(trials.trials)))
    except:
        trials = Trials()
        print("Starting from scratch: new trials.")

    best = fmin(get_optimizer(args.dataset, args.device),
                Model.HYPERPARAMETERS_SPACE,
                algo=tpe.suggest,
                trials=trials,
                max_evals=max_evals)
    pickle.dump(trials, open(results_pickle_file, "wb"))

    print("\nOPTIMIZATION STEP COMPLETE.\n")
Esempio n. 2
0
def get_best_threshold(ensemble_model):
    """Run through a validation dataset, get best iou threshold."""
    train_gen, val_gen = get_generators()

    # Add metrics
    ensemble_model.compile(loss='binary_crossentropy', optimizer=get_optimizer(), metrics=get_metric(scan=True))
    preds_test = ensemble_model.evaluate_generator(generator=val_gen, verbose=True)

    # Now, simply fetch the right threshold
    ts = np.linspace(0, 1, len(preds_test))
    best = ts[np.argmax(preds_test)]

    print("Found {} to be the best threshold!" % best)

    return best
Esempio n. 3
0
def run_possibilities(dataset_path, logs_path, possibilities):
    x_train_labeled, x_train_unlabeled, y_train_labeled, x_val, y_val = get_data(
        dataset_path=dataset_path,
        normalization=NORMALIZATION,
        unlabeled_percentage=UNLABELED_PERCENTAGE,
        seed=SEED)
    _, evaluation_mapping, _ = timit.get_phone_mapping()
    n_classes = get_number_of_classes()

    for consistency_loss, schedule, sigma, consistency_scale, stabilization_scale, xi in possibilities:
        hparams = {
            'consistency_loss': consistency_loss,
            'schedule': schedule,
            'sigma': sigma,
            'consistency_scale': consistency_scale,
            'stabilization_scale': stabilization_scale,
            'xi': xi
        }

        for k, v in hparams.items():
            print(f'{k}={v}, ', end='')
        print()

        config = Config(version='mono_directional',
                        n_hidden_layers=N_HIDDEN_LAYERS,
                        n_units=N_UNITS,
                        n_epochs=N_EPOCHS,
                        batch_size=BATCH_SIZE,
                        unlabeled_percentage=UNLABELED_PERCENTAGE,
                        optimizer=OPTIMIZER,
                        consistency_loss=consistency_loss,
                        consistency_scale=consistency_scale,
                        stabilization_scale=stabilization_scale,
                        xi=xi,
                        sigma=sigma,
                        schedule=schedule,
                        schedule_length=SCHEDULE_LENGTH,
                        normalization=NORMALIZATION,
                        seed=SEED)

        logs_path_ = logs_path / str(config)
        if logs_path_.is_dir(
        ):  # skip what already done (e.g. in case of crashes)
            print('already done, skipping...')
            continue
        logs_path_.mkdir(parents=True)
        logs_path_ = str(logs_path_)

        model = DualStudent(n_classes=n_classes,
                            n_hidden_layers=config.n_hidden_layers,
                            n_units=config.n_units,
                            consistency_loss=config.consistency_loss,
                            consistency_scale=config.consistency_scale,
                            stabilization_scale=config.stabilization_scale,
                            xi=config.xi,
                            padding_value=PADDING_VALUE,
                            sigma=config.sigma,
                            schedule=config.schedule,
                            schedule_length=config.schedule_length,
                            version=config.version)

        model.compile(optimizer=get_optimizer(config.optimizer))

        model.train(x_labeled=x_train_labeled,
                    x_unlabeled=x_train_unlabeled,
                    y_labeled=y_train_labeled,
                    n_epochs=config.n_epochs,
                    batch_size=config.batch_size,
                    seed=config.seed)

        results = model.test(x=x_val,
                             y=y_val,
                             batch_size=config.batch_size,
                             evaluation_mapping=evaluation_mapping)

        with tf.summary.create_file_writer(logs_path_).as_default():
            hp.hparams(hparams)
            for k, v in results.items():
                tf.summary.scalar(k, v, step=N_EPOCHS)
Esempio n. 4
0
def train_ray(opt, checkpoint_dir=None, data_dir="../data"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    data = get_dataset(opt)
    g = data[0]
    if opt['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(opt['gpu'])

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_classes
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']]

    models = []
    optimizers = []
    datas = [g for i in range(opt['num_init'])]

    for split in range(opt['num_init']):
        if opt['model'] == 'GAT':
            model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'],
                        n_classes, heads, F.elu, opt['in_drop'],
                        opt['attn_drop'], opt['negative_slope'],
                        opt['residual'], opt)
        elif opt['model'] == 'AGNN':
            model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'],
                         n_classes, opt['in_drop'], opt)

        train_this = train
        model = model.to(device)
        models.append(model)

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)

        # model = model.to(device)
        parameters = [p for p in model.parameters() if p.requires_grad]

        optimizer = get_optimizer(opt['optimizer'],
                                  parameters,
                                  lr=opt['lr'],
                                  weight_decay=opt['weight_decay'])
        optimizers.append(optimizer)

        # The `checkpoint_dir` parameter gets passed by Ray Tune when a checkpoint
        # should be restored.
        if checkpoint_dir:
            checkpoint = os.path.join(checkpoint_dir, "checkpoint")
            model_state, optimizer_state = torch.load(checkpoint)
            model.load_state_dict(model_state)
            optimizer.load_state_dict(optimizer_state)

    for epoch in range(1, opt['epochs']):
        loss = np.mean([
            train_this(model, optimizer, features, train_mask,
                       labels)[0].item()
            for model, optimizer in zip(models, optimizers)
        ])
        train_accs, val_accs, tmp_test_accs = average_test(models, datas)
        with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
            best = np.argmax(val_accs)
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save(
                (models[best].state_dict(), optimizers[best].state_dict()),
                path)
        tune.report(loss=loss,
                    accuracy=np.mean(val_accs),
                    test_acc=np.mean(tmp_test_accs),
                    train_acc=np.mean(train_accs))
Esempio n. 5
0
def train_ray_int(opt, checkpoint_dir=None, data_dir="../data"):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    data = get_dataset(opt)
    g = data[0]
    if opt['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(opt['gpu'])

    # if opt["num_splits"] > 0:
    #   dataset.data = set_train_val_test_split(
    #     23 * np.random.randint(0, opt["num_splits"]),  # random prime 23 to make the splits 'more' random. Could remove
    #     dataset.data,
    #     num_development=5000 if opt["dataset"] == "CoauthorCS" else 1500)

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_classes
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
  #Edges %d
  #Classes %d
  #Train samples %d
  #Val samples %d
  #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']]
    if opt['model'] == 'GAT':
        model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'],
                    n_classes, heads, F.elu, opt['in_drop'], opt['attn_drop'],
                    opt['negative_slope'], opt['residual'], opt)
    elif opt['model'] == 'AGNN':
        model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'],
                     n_classes, opt['in_drop'], opt)

    model = model.to(device)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = get_optimizer(opt["optimizer"],
                              parameters,
                              lr=opt["lr"],
                              weight_decay=opt["weight_decay"])

    if checkpoint_dir:
        checkpoint = os.path.join(checkpoint_dir, "checkpoint")
        model_state, optimizer_state = torch.load(checkpoint)
        model.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)
    train_this = train
    this_test = test_OGB if opt['dataset'] == 'ogbn-arxiv' else test
    best_time = best_epoch = train_acc = val_acc = test_acc = 0
    for epoch in range(1, opt["epoch"]):
        # loss = train(model, optimizer, data)
        loss = train_this(model, optimizer, features, train_mask,
                          labels)[0].item()
        if opt["no_early"]:
            tmp_train_acc, tmp_val_acc, tmp_test_acc = this_test(model, g)
            best_time = opt['time']
        else:
            tmp_train_acc, tmp_val_acc, tmp_test_acc = this_test(model, g)
        if tmp_val_acc > val_acc:
            best_epoch = epoch
            train_acc = tmp_train_acc
            val_acc = tmp_val_acc
            test_acc = tmp_test_acc
        with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((model.state_dict(), optimizer.state_dict()), path)
        tune.report(loss=loss,
                    accuracy=val_acc,
                    test_acc=test_acc,
                    train_acc=train_acc,
                    best_time=best_time,
                    best_epoch=best_epoch)
Esempio n. 6
0
def predict_dl_model(data_type,
                     variation,
                     input_level,
                     word_embed_type,
                     word_embed_trainable,
                     batch_size,
                     learning_rate,
                     optimizer_type,
                     model_name,
                     checkpoint_dir=None,
                     return_proba=True,
                     **kwargs):
    config = ModelConfig()
    config.variation = variation
    config.input_level = input_level
    if '_aug' in variation:
        config.max_len = {
            'word': config.aug_word_max_len,
            'char': config.aug_char_max_len
        }
    config.word_embed_type = word_embed_type
    config.word_embed_trainable = word_embed_trainable
    config.word_embeddings = np.load(
        format_filename(PROCESSED_DATA_DIR,
                        EMBEDDING_MATRIX_TEMPLATE,
                        variation=variation,
                        type=word_embed_type))
    config.batch_size = batch_size
    config.learning_rate = learning_rate
    config.optimizer = get_optimizer(optimizer_type, learning_rate)
    if checkpoint_dir is not None:
        config.checkpoint_dir = checkpoint_dir
    config.exp_name = '{}_{}_{}_{}_{}'.format(
        variation, model_name, input_level, word_embed_type,
        'tune' if word_embed_trainable else 'fix')

    print('Logging Info - Experiment: ', config.exp_name)
    if model_name == 'bilstm':
        model = BiLSTM(config, **kwargs)
    elif model_name == 'cnnrnn':
        model = CNNRNN(config, **kwargs)
    elif model_name == 'dcnn':
        model = DCNN(config, **kwargs)
    elif model_name == 'dpcnn':
        model = DPCNN(config, **kwargs)
    elif model_name == 'han':
        model = HAN(config, **kwargs)
    elif model_name == 'multicnn':
        model = MultiTextCNN(config, **kwargs)
    elif model_name == 'rcnn':
        model = RCNN(config, **kwargs)
    elif model_name == 'rnncnn':
        model = RNNCNN(config, **kwargs)
    elif model_name == 'cnn':
        model = TextCNN(config, **kwargs)
    elif model_name == 'vdcnn':
        model = VDCNN(config, **kwargs)
    else:
        raise ValueError('Model Name Not Understood : {}'.format(model_name))

    model_save_path = path.join(config.checkpoint_dir,
                                '{}.hdf5'.format(config.exp_name))
    if not path.exists(model_save_path):
        raise FileNotFoundError('Model Not Found: {}'.format(model_save_path))
    # load the best model
    model.load_best_model()

    data = load_processed_data(variation, input_level, data_type)

    if data is None:
        return None, config.exp_name

    if return_proba:
        return model.predict_proba(data), config.exp_name
    else:
        return model.predict(data), config.exp_name
Esempio n. 7
0
logging.basicConfig(format='%(asctime)s %(module)s - %(funcName)s: %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')
logging.getLogger().setLevel(logging.INFO)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Restoring training run from a given checkpoint')
    parser.add_argument('--checkpoint-path', help="The path of the checkpoint file", required=True)
    args = parser.parse_args()
    checkpoint = torch.load(args.checkpoint_path)
    opts = checkpoint['opts']

    logging.info("Loading the data")
    model_opts = create_model_opts(opts)
    train_data, test_data = get_data(opts, model_opts)

    logging.info("Restore the {0} model to epoch {1} on {2} dataset(Loss:{3}, train accuracy:{4})".format(opts.model, checkpoint["epoch"], opts.data, checkpoint["loss"], checkpoint["train_accuracy"]))
    model = models.get_model(opts, datasets_info[opts.data], pretrained=False)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.train()

    optimizer, lr_scheduler = get_optimizer(opts, model)
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    training_model = convert_to_ipu_model(model, opts, optimizer)


    train(training_model, train_data, opts, lr_scheduler, range(checkpoint["epoch"]+1, opts.epoch+1), optimizer)

    checkpoint_folder = os.path.dirname(os.path.realpath(args.checkpoint_path))
    checkpoint_files = [os.path.join(checkpoint_folder, file_name) for file_name in os.listdir(checkpoint_folder)]
    validate_checkpoints(checkpoint_files, test_data=test_data)
Esempio n. 8
0
    opts = checkpoint['opts']
    utils.Logger.setup_logging_folder(opts)

    logging.info("Loading the data")
    model_opts = create_model_opts(opts)
    train_data = data.get_data(opts, model_opts, train=True, async_dataloader=True)
    if not opts.no_validation:
        inference_model_opts = poptorch.Options().deviceIterations(max(opts.device_iterations, 1+len(opts.pipeline_splits)))
        inference_model_opts.replicationFactor(opts.replicas)
        test_data = data.get_data(opts, inference_model_opts, train=False, async_dataloader=True)

    logging.info(f"Restore the {opts.model} model to epoch {checkpoint['epoch']} on {opts.data} dataset(Loss:{checkpoint['loss']}, train accuracy:{checkpoint['train_accuracy']})")
    model = models.get_model(opts, data.datasets_info[opts.data], pretrained=False)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.train()

    optimizer = get_optimizer(opts, model)
    lr_scheduler = get_lr_scheduler(opts, optimizer)

    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    # set the LR scheduler to the correct position
    lr_scheduler.last_epoch += checkpoint["epoch"]
    training_model = convert_to_ipu_model(model, opts, optimizer)


    train(training_model, train_data, opts, lr_scheduler, range(checkpoint["epoch"]+1, opts.epoch+1), optimizer)
    if not opts.no_validation:
        checkpoint_folder = os.path.dirname(os.path.realpath(args.checkpoint_path))
        checkpoint_files = [os.path.join(checkpoint_folder, file_name) for file_name in os.listdir(checkpoint_folder) if file_name.endswith(".pt")]
        validate_checkpoints(checkpoint_files, test_data)
Esempio n. 9
0
                                   opts,
                                   train=True,
                                   async_dataloader=True)

    logging.info(
        f"Restore the {args.model} model to epoch {checkpoint['epoch']} on {args.data} dataset(Loss:{checkpoint['loss']}, train accuracy:{checkpoint['train_accuracy']})"
    )
    model = models.get_model(args,
                             datasets.datasets_info[args.data],
                             pretrained=False,
                             use_mixup=args.mixup_enabled,
                             use_cutmix=args.cutmix_enabled)
    models.load_model_state_dict(model, checkpoint['model_state_dict'])
    model.train()

    optimizer = get_optimizer(args, model)
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    lr_scheduler = get_lr_scheduler(args,
                                    optimizer,
                                    len(train_data),
                                    start_epoch=checkpoint["epoch"])
    training_model = convert_to_ipu_model(model, args, optimizer)

    if args.validation_mode == "during":
        training_validation_func = get_validation_function(args, model).func
    else:
        training_validation_func = None

    train(training_model, train_data, args, lr_scheduler,
          range(checkpoint["epoch"] + 1, args.epoch + 1), optimizer,
          training_validation_func)