Exemple #1
0
def main():
    client_config = ClientConfig(
        idx=args.idx,
        master_ip_addr=args.master_ip,
        action=""
    )
    print("start")
    
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    tasks = []
    task = asyncio.ensure_future(get_init_config(client_config))
    tasks.append(task)
    loop.run_until_complete(asyncio.wait(tasks))
    loop.close()

    train_dataset, test_dataset = datasets.load_datasets(client_config.custom["dataset_type"])
    train_loader = utils.create_dataloaders(train_dataset, batch_size=args.batch_size, selected_idxs=client_config.custom["train_data_idxes"])
    test_loader = utils.create_dataloaders(test_dataset, batch_size=128, selected_idxs=client_config.custom["test_data_idxes"], shuffle=False)

    while True:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        tasks = []
        tasks.append(
            asyncio.ensure_future(
                local_training(client_config, train_loader, test_loader)
            )
        )
        loop.run_until_complete(asyncio.wait(tasks))

        loop.close()
Exemple #2
0
def train_autoencoder():
    data = utils.load_data(root_dir='./data/', mode='train')
    data, target, features, date = utils.preprocess_data(data, nn=True)
    dataset = utils.FinData(data=data, target=target, date=date)
    p = {'batch_size': 4597,
         'dim_1': 231,
         'dim_2': 851,
         'dim_3': 777,
         'dim_4': 192,
         'hidden': 50,
         'dropout': 0.017122456592972537,
         'lr': 0.0013131268366473552,
         'activation': nn.GELU,
         'label_smoothing': 0.09401544509474698,
         'weight_decay': 0.005078413740277699,
         'amsgrad': True}
    train_idx = [i for i in range(len(data))]
    val_idx = [i for i in range(10000)]
    dataloaders = utils.create_dataloaders(dataset=dataset,
                                           indexes={
                                               'train': train_idx, 'val': val_idx},
                                           batch_size=p['batch_size'])

    checkpoint_callback = ModelCheckpoint(
        dirpath='logs', monitor='t_loss', mode='min', save_top_k=1, period=10)
    input_size = data.shape[-1]
    output_size = 1
    model = AutoEncoder(input_size=input_size,
                        output_size=output_size, params=p)
    es = EarlyStopping(monitor='t_loss', patience=10,
                       min_delta=0.0005, mode='min')
    trainer = pl.Trainer(max_epochs=500, gpus=1, callbacks=[checkpoint_callback, es],
                         precision=16)
    trainer.fit(model, train_dataloader=dataloaders['train'])
Exemple #3
0
def train_cross_val(p):
    data_ = load_data(root_dir='./data/', mode='train')
    data_, target_, features, date = preprocess_data(data_, nn=True)

    gts = PurgedGroupTimeSeriesSplit(n_splits=5, group_gap=5)

    input_size = data_.shape[-1]
    output_size = 1
    tb_logger = pl_loggers.TensorBoardLogger('logs/')
    models = []
    for i, (train_idx, val_idx) in enumerate(gts.split(data_, groups=date)):
        idx = np.concatenate([train_idx, val_idx])
        data = copy.deepcopy(data_[idx])
        target = copy.deepcopy(target_[idx])
        checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join(
            'models/', "fold_{}".format(i)),
                                                           monitor="val_auc",
                                                           mode='max',
                                                           save_top_k=1,
                                                           period=10)
        model = Classifier(input_size=input_size,
                           output_size=output_size,
                           params=p)
        if p['activation'] == nn.ReLU:
            model.apply(lambda m: init_weights(m, 'relu'))
        elif p['activation'] == nn.LeakyReLU:
            model.apply(lambda m: init_weights(m, 'leaky_relu'))
        train_idx = [i for i in range(0, max(train_idx) + 1)]
        val_idx = [i for i in range(len(train_idx), len(idx))]
        data[train_idx] = calc_data_mean(data[train_idx],
                                         './cache',
                                         train=True,
                                         mode='mean')
        data[val_idx] = calc_data_mean(data[val_idx],
                                       './cache',
                                       train=False,
                                       mode='mean')
        dataset = FinData(data=data, target=target, date=date)
        dataloaders = create_dataloaders(dataset,
                                         indexes={
                                             'train': train_idx,
                                             'val': val_idx
                                         },
                                         batch_size=p['batch_size'])
        es = EarlyStopping(monitor='val_auc',
                           patience=10,
                           min_delta=0.0005,
                           mode='max')
        trainer = pl.Trainer(logger=tb_logger,
                             max_epochs=500,
                             gpus=1,
                             callbacks=[checkpoint_callback, es],
                             precision=16)
        trainer.fit(model,
                    train_dataloader=dataloaders['train'],
                    val_dataloaders=dataloaders['val'])
        torch.save(model.state_dict(), f'models/fold_{i}_state_dict.pth')
        models.append(model)
    return models, features
Exemple #4
0
def final_train(p, load=False):
    data_ = load_data(root_dir='./data/', mode='train')
    data, target, features, date = preprocess_data(data_, nn=True)
    input_size = data.shape[-1]
    output_size = 1
    train_idx, val_idx = date[date <= 450].index.values.tolist(), date[
        date > 450].index.values.tolist()
    data[train_idx] = calc_data_mean(data[train_idx], './cache', train=True)
    data[val_idx] = calc_data_mean(data[val_idx], './cache', train=False)
    checkpoint_callback = pl.callbacks.ModelCheckpoint(
        filepath='models/full_train',
        monitor="val_auc",
        mode='max',
        save_top_k=1,
        period=10)
    model = Classifier(input_size=input_size,
                       output_size=output_size,
                       params=p)
    if p['activation'] == nn.ReLU:
        model.apply(lambda m: init_weights(m, 'relu'))
    elif p['activation'] == nn.LeakyReLU:
        model.apply(lambda m: init_weights(m, 'leaky_relu'))
    dataset = FinData(data, target, date)
    dataloaders = create_dataloaders(dataset,
                                     indexes={
                                         'train': train_idx,
                                         'val': val_idx
                                     },
                                     batch_size=p['batch_size'])
    es = EarlyStopping(monitor='val_auc',
                       patience=10,
                       min_delta=0.0005,
                       mode='max')
    trainer = pl.Trainer(max_epochs=500,
                         gpus=1,
                         callbacks=[checkpoint_callback, es],
                         precision=16)
    trainer.fit(model,
                train_dataloader=dataloaders['train'],
                val_dataloaders=dataloaders['val'])
    torch.save(model.state_dict(), 'models/final_train.pth')
    return model, features
Exemple #5
0
def train(args, states=None):

    config_obj = Config(args.config_file)
    config = config_obj.elements

    # make training runs deterministic
    set_seed(seed_value=config['random_seed'])

    logging.info("Loading datasets...")
    dataset, labels = load_embeddings(data_path=config['data'],
                                      label_path=config['labels'])

    train_loader, val_loader, test_loader = create_dataloaders(
        dataset,
        labels,
        batch_size=config['batch_size'],
        random_seed=config['random_seed'],
        balance=config['correct_imbalance'],
    )

    model = TextCNN(
        num_classes=config['num_classes'],
        embedding_size=config['embedding_size'],
        num_filters=config['num_filters'],
        dropout_rate=config['dropout'],
    )
    if torch.cuda.is_available():
        model.cuda()

    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])

    best_metric = 0

    # loop over the dataset multiple times
    for epoch in range(1, config['num_epochs'] + 1):
        logging.info(
            f"==================== Epoch: {epoch} ====================")
        running_losses = []
        for i, data in enumerate(train_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            if torch.cuda.is_available():
                inputs, labels = inputs.cuda(), labels.cuda()

            # zero the parameter gradients before each pass
            optimizer.zero_grad()

            # forward
            probs, classes = model(inputs)
            # backprop
            loss = loss_function(probs, labels)
            loss.backward()
            # update/optimize
            optimizer.step()

            # Log summary
            running_losses.append(loss.item())
            if i % args.log_interval == 0:
                interval_loss = sum(running_losses) / len(running_losses)
                logging.info(f"step = {i}, loss = {interval_loss}")
                running_losses = []

            if i % args.test_interval == 0:
                dev_metric = eval(
                    val_loader,
                    model,
                    loss_function,
                    args.eval_metric,
                )
                if dev_metric > best_metric:
                    best_metric = dev_metric
                    states = {
                        "epoch": epoch,
                        "step": i,
                        "model": model.state_dict(),
                        "optimizer": optimizer.state_dict()
                    }
                    save_model_state(save_dir=args.model_dir,
                                     step=i,
                                     states=states)

    print(f"Finished Training, best {args.eval_metric}: {best_metric}")
Exemple #6
0
def optimize(trial: optuna.Trial, data_dict):
    gts = PurgedGroupTimeSeriesSplit(n_splits=5, group_gap=10)
    input_size = data_dict['data'].shape[-1]
    output_size = 5
    checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join(
        'models/', "trial_resnet_{}".format(trial.number)),
                                                       monitor="val_auc",
                                                       mode='max')
    logger = MetricsCallback()
    metrics = []
    sizes = []
    # trial_file = 'HPO/nn_hpo_2021-01-05.pkl'
    trial_file = None
    p = create_param_dict(trial, trial_file)
    p['batch_size'] = trial.suggest_int('batch_size', 8000, 15000)
    for i, (train_idx, val_idx) in enumerate(
            gts.split(data_dict['data'], groups=data_dict['date'])):
        idx = np.concatenate([train_idx, val_idx])
        data = copy.deepcopy(data_dict['data'][idx])
        target = copy.deepcopy(data_dict['target'][idx])
        date = copy.deepcopy(data_dict['date'][idx])
        train_idx = [i for i in range(0, max(train_idx) + 1)]
        val_idx = [i for i in range(len(train_idx), len(idx))]
        data[train_idx] = calc_data_mean(data[train_idx],
                                         './cache',
                                         train=True,
                                         mode='mean')
        data[val_idx] = calc_data_mean(data[val_idx],
                                       './cache',
                                       train=False,
                                       mode='mean')
        model = Classifier(input_size, output_size, params=p)
        # model.apply(init_weights)
        dataset = FinData(data=data, target=target, date=date, multi=True)
        dataloaders = create_dataloaders(dataset,
                                         indexes={
                                             'train': train_idx,
                                             'val': val_idx
                                         },
                                         batch_size=p['batch_size'])
        es = EarlyStopping(monitor='val_loss',
                           patience=10,
                           min_delta=0.0005,
                           mode='min')
        trainer = pl.Trainer(logger=False,
                             max_epochs=500,
                             gpus=1,
                             callbacks=[
                                 checkpoint_callback, logger,
                                 PyTorchLightningPruningCallback(
                                     trial, monitor='val_loss'), es
                             ],
                             precision=16)
        trainer.fit(model,
                    train_dataloader=dataloaders['train'],
                    val_dataloaders=dataloaders['val'])
        val_loss = logger.metrics[-1]['val_loss'].item()
        metrics.append(val_loss)
        sizes.append(len(train_idx))
    metrics_mean = weighted_mean(metrics, sizes)
    return metrics_mean
Exemple #7
0
def train(args, states=None):

    config_obj = Config(args.config_file)
    config = config_obj.elements

    # make training runs deterministic
    set_seed(seed_value=config['random_seed'])

    logging.info("Loading datasets...")
    dataset, labels = load_tokens(
        input_id_path=config['input_id'],
        token_type_id_path=config['token_type_id'],
        attention_mask_path=config['attention_mask'],
        label_path=config['labels'],
    )

    train_loader, val_loader, test_loader = create_dataloaders(
        dataset,
        labels,
        batch_size=config['batch_size'],
        random_seed=config['random_seed'],
        balance=config['correct_imbalance'],
    )

    model = BertForSequenceClassification.from_pretrained(
        "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext",
        num_labels=2,
        output_attentions=False,
        output_hidden_states=False,
    )

    if torch.cuda.is_available():
        model.cuda()

    loss_function = nn.CrossEntropyLoss()
    # optimizer = AdamW(model.parameters(), lr=config['lr'])
    optimizer = torch.optim.SGD(model.parameters(), lr=config['lr'])

    total_train_steps = config['num_epochs'] * len(train_loader)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=total_train_steps,
    )

    best_metric = 0

    # loop over the dataset multiple times
    for epoch in range(1, config['num_epochs'] + 1):
        logging.info(
            f"==================== Epoch: {epoch} ====================")
        running_losses = []
        for i, data in enumerate(train_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            input_ids, token_type_ids, attention_mask, labels = data

            if torch.cuda.is_available():
                input_ids = input_ids.cuda()
                token_type_ids = token_type_ids.cuda()
                attention_mask = attention_mask.cuda()
                labels = labels.cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            _, logits = model(
                input_ids=input_ids,
                token_type_ids=token_type_ids,
                attention_mask=attention_mask,
                labels=labels,
            )
            # probs = F.softmax(logits, dim=1)

            # backprop
            loss = loss_function(logits, labels)
            loss.backward()

            # clip gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            # update/optimize
            optimizer.step()
            # update learning rate
            scheduler.step()

            # Log summary
            running_losses.append(loss.item())
            if i % args.log_interval == 0:
                interval_loss = sum(running_losses) / len(running_losses)
                logging.info(f"step = {i}, loss = {interval_loss}")
                running_losses = []

            if i % args.test_interval == 0:
                dev_metric = eval(
                    val_loader,
                    model,
                    loss_function,
                    args.eval_metric,
                )
                if dev_metric > best_metric:
                    best_metric = dev_metric
                    states = {
                        "epoch": epoch,
                        "step": i,
                        "model": model.state_dict(),
                        "optimizer": optimizer.state_dict()
                    }
                    save_model_state(save_dir=args.model_dir,
                                     step=i,
                                     states=states)

    print(f"Finished Training, best {args.eval_metric}: {best_metric}")
Exemple #8
0
args = parser.parse_args()

if __name__ == "__main__":
    # get the values from the params
    data_dir = args.data_dir
    checkpoint_path = args.checkpoint_path
    arch = args.arch
    lr = args.learning_rate
    dropout = args.dropout
    hidden_units = args.hidden_units
    epochs = args.epochs
    gpu = args.gpu

    print("These are the arguments supplied! :\n {}".format(args))

    image_datasets, dataloaders = utils.create_dataloaders(data_dir)

    # nn model stuff
    supported_archs = utils.get_supported_archs()

    if arch not in supported_archs:
        print(
            "'{}' not supported. Please choose either vgg16 or alexnet".format(
                arch))
    else:
        pretrained_model = supported_archs[arch][0]
        model_input = supported_archs[arch][1]

        food_classifier = utils.setup_nn(pretrained_model, model_input,
                                         hidden_units, dropout)