Ejemplo n.º 1
0
def get_dga_sdirs(args, data, labels):
    device = get_device(args)
    sdirs = []
    for x, y in zip(data, labels):
        # dga_bs: dist grad accum. batch size
        dataloader = get_dataloader(x, y, args.dga_bs, shuffle=False)
        count = 0
        for xiter, yiter in dataloader:
            model, loss_type = get_model(args, False)
            loss_fn = get_loss_fn(loss_type)
            opt = get_optim(args, model)

            loss, _ = forward(model, xiter, yiter, opt, loss_fn, device)
            loss.backward()
            sdirs.append(get_model_grads(model, flatten=True))
            count += 1
            if count >= args.num_dga:
                break

    stacked = [[] for _ in range(len(sdirs[0]))]

    for l in range(len(sdirs[0])):
        for i in range(len(sdirs)):
            stacked[l].append(sdirs[i][l].flatten())

    sdirs = [[] for _ in range(args.ncomponent)]
    for l, layer in enumerate(stacked):
        layer = torch.stack(layer, dim=0).T.cpu().numpy()
        layer, _ = pca_transform(layer, args.ncomponent)
        for i in range(args.ncomponent):
            sdirs[i].append(layer[:, i].flatten())

    assert len(sdirs) == args.ncomponent

    return sdirs
Ejemplo n.º 2
0
def predict(data_path, model_weights_path, network, test_df_path, save_path,
            size, channels, neighbours, classification_head):
    model = get_model(network, classification_head)
    model.encoder.conv1 = nn.Conv2d(count_channels(channels) * neighbours,
                                    64,
                                    kernel_size=(7, 7),
                                    stride=(2, 2),
                                    padding=(3, 3),
                                    bias=False)

    model, device = UtilsFactory.prepare_model(model)

    if classification_head:
        model.load_state_dict(torch.load(model_weights_path))
    else:
        checkpoint = torch.load(model_weights_path, map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])

    test_df = pd.read_csv(test_df_path)

    predictions_path = os.path.join(save_path, "predictions")

    if not os.path.exists(predictions_path):
        os.makedirs(predictions_path, exist_ok=True)
        print("Prediction directory created.")

    for _, image_info in tqdm(test_df.iterrows()):
        filename = '_'.join([image_info['name'], image_info['position']])
        image_path = get_filepath(data_path,
                                  image_info['dataset_folder'],
                                  'images',
                                  filename,
                                  file_type='tiff')

        image_tensor = filter_by_channels(read_tensor(image_path), channels,
                                          neighbours)
        if image_tensor.ndim == 2:
            image_tensor = np.expand_dims(image_tensor, -1)

        image = transforms.ToTensor()(image_tensor)
        if classification_head:
            prediction, label = model.predict(
                image.view(1,
                           count_channels(channels) * neighbours, size,
                           size).to(device, dtype=torch.float))
        else:
            prediction = model.predict(
                image.view(1,
                           count_channels(channels) * neighbours, size,
                           size).to(device, dtype=torch.float))

        result = prediction.view(size, size).detach().cpu().numpy()

        cv.imwrite(get_filepath(predictions_path, filename, file_type='png'),
                   result * 255)
Ejemplo n.º 3
0
def generate_frechet_metric_callback(callback_args, device, outdir, dataset):
    mean = callback_args['transform']['mean']
    std = callback_args['transform']['std']
    total_samples = callback_args['total_samples']
    batch_size = callback_args['sample_size']
    classifier_model = get_model(callback_args['classifier_model_args'])
    classifier_model_layer = callback_args['classifier_model_layer']
    if classifier_model_layer:
        classifier_model = torch.nn.Sequential(
            *list(classifier_model.children())[:classifier_model_layer])
    classifier_model = classifier_model.to(device)
    transform = batch_normalize_transform(mean, std)
    return FrechetInceptionScoreCallback(outdir=outdir,
                                         classifier=classifier_model,
                                         batch_size=batch_size,
                                         total_samples=total_samples,
                                         transform=transform,
                                         device=device,
                                         dataset=dataset)
Ejemplo n.º 4
0
def generate_inception_metric_callback(callback_args, device, outdir):
    mode = callback_args['mode']
    if mode == 'gan':
        mean = callback_args['transform']['mean']
        std = callback_args['transform']['std']
        total_samples = callback_args['total_samples']
        batch_size = callback_args['sample_size']
        classifier_model = get_model(
            callback_args['classifier_model_args']).to(device)
        transform = batch_normalize_transform(mean, std)
        return InceptionScoreCallback(classifier_model,
                                      outdir,
                                      batch_size=batch_size,
                                      total_samples=total_samples,
                                      transform=transform,
                                      mode=mode,
                                      device=device)
    else:
        raise NotImplementedError(
            'generate_inception_metric_callback for classification is not implemented'
        )
Ejemplo n.º 5
0
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('--epochs', type=int, default=50)

    parser.add_argument('--n_hidden', type=int, default=64)

    parser.add_argument('--n_layers', type=int, default=2)

    args = parser.parse_args()

    w2v_model = get_model()
    # vocab, embed dims
    VOCAB_SIZE, EMBED_DIM = w2v_model.wv.vectors.shape
    # w2ind from w2v
    w2ind = {
        token: token_index
        for token_index, token in enumerate(w2v_model.wv.index2word)
    }
    # padding token for now
    TRG_PAD_IDX = w2ind["."]  # this is 0
    # sentence marker token inds
    sos_ind = w2ind['<sos>']
    eos_ind = w2ind['<eos>']
    # adjusted sequence length
    SEQ_LEN = 5 + 2  # sos, eos tokens
    # padded vectorized states of token indexes
    d = torch.load('../dat/processed/padded_vectorized_states_v3.pt')
    # train test valid split
    """
    train_d = {}
    test_d = {}
    valid_d = {}
    for index, vects in d.items():
        if torch.rand(1) < 0.1:
            test_d[index] = vects
        elif torch.rand(1) < 0.2:
            valid_d[index] = vects
        else:
            train_d[index] = vects
    print(f'train % = {len(train_d)/len(d)}')
    print(f'test % = {len(test_d)/len(d)}')
    print(f'valid % = {len(valid_d)/len(d)}\n')
    """
    # all data
    train_d = d
    valid_d = d
    print(len(d))

    clip = 1
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    enc = EncRnn(hidden_size=args.n_hidden,
                 num_layers=args.n_layers,
                 embed_size=EMBED_DIM)
    dec = DecRnn(hidden_size=args.n_hidden,
                 num_layers=args.n_layers,
                 embed_size=EMBED_DIM,
                 output_size=VOCAB_SIZE)
    model = Seq2SeqAttn(enc, dec, TRG_PAD_IDX, VOCAB_SIZE, device).to(device)

    save_model_instance()

    optimizer = torch.optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX).to(device)

    assert w2v_model.vocabulary.sorted_vocab == True
    word_counts = {
        word: vocab_obj.count
        for word, vocab_obj in w2v_model.wv.vocab.items()
    }
    word_counts = sorted(word_counts.items(), key=lambda x: -x[1])
    words = [t[0] for t in word_counts]

    model.apply(init_weights)
    train(train_d, valid_d, w2v_model, words, model, optimizer, criterion,
          sos_ind, eos_ind, TRG_PAD_IDX, SEQ_LEN, clip, device, args.epochs)

    # evaluate(test_d, w2v_model, words, model, criterion, sos_ind, eos_ind, TRG_PAD_IDX, SEQ_LEN, device, type='Test')

    observe(w2v_model, words, model, d, sos_ind, eos_ind, TRG_PAD_IDX, SEQ_LEN,
            device)
parser.add_argument('--select_nodes', default=0, type=int)
args = parser.parse_args()

device = 'cuda' if torch.cuda.is_available() else 'cpu'
''' Meta-name to be used as prefix on all savings'''
oname = args.net + '_' + args.dataset + '/'
SAVE_DIR = args.save_path + 'adjacency/' + oname
START_LAYER = 3 if args.net in ['vgg', 'resnet'] else 0
THRESHOLDS = args.thresholds
''' If save directory doesn't exist create '''
if not os.path.exists(SAVE_DIR):
    os.makedirs(SAVE_DIR)

# Build models
print('==> Building model..')
net = get_model(args.net, args.dataset)
net = net.to(device)

if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True
''' Prepare criterion '''
if args.dataset in [
        'cifar10', 'cifar10_gray', 'vgg_cifar10_adversarial', 'imagenet'
]:
    criterion = nn.CrossEntropyLoss()
elif args.dataset in ['mnist', 'mnist_adverarial']:
    criterion = F.nll_loss
''' Define label manipulator '''
manipulator = load_manipulator(args.permute_labels, args.binarize_labels)
''' Instead of building graph on the entire set of nodes, pick a subset '''
Ejemplo n.º 7
0
def train(args):
    set_random_seed(42)
    model = get_model(args.network)
    print('Loading model')
    model.encoder.conv1 = nn.Conv2d(
        count_channels(args.channels), 64, kernel_size=(7, 7),
        stride=(2, 2), padding=(3, 3), bias=False)
    model, device = UtilsFactory.prepare_model(model)

    train_df = pd.read_csv(args.train_df).to_dict('records')
    val_df = pd.read_csv(args.val_df).to_dict('records')

    ds = Dataset(args.channels, args.dataset_path, args.image_size, args.batch_size, args.num_workers)
    loaders = ds.create_loaders(train_df, val_df)

    if(args.optimizer=='Adam'):
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    elif(args.optimizer=='SGD'):
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
    else:
        print('Unknown argument. Return to the default optimizer (Adam)')
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    criterion = BCE_Dice_Loss(bce_weight=0.2)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[10, 20, 40], gamma=0.3
    )

    save_path = os.path.join(
        args.logdir,
        args.name
    )

    # model runner
    runner = SupervisedRunner()

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            DiceCallback()
        ],
        logdir=save_path,
        num_epochs=args.epochs,
        verbose=True
    )

    infer_loader = collections.OrderedDict([('infer', loaders['valid'])])
    runner.infer(
        model=model,
        loaders=infer_loader,
        callbacks=[
            CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
            InferCallback()
        ],
    )
Ejemplo n.º 8
0
def main():
    # general
    debug = False
    epochs = 10
    batch_size = 12
    num_workers = 0
    lr = 0.01

    # dataset
    base_dir = '/dataset/kaggle/38-cloud'
    datatype_train = 'train'  # 'test'
    datatype_test = 'test'
    include_nir = True
    train_ratio = 0.8
    test_mask = False

    # transforms
    name_trans_train = 'albu_train_0'
    name_trans_val = 'albu_val_0'
    kwargs_trans = {
        'resize': None  # (384, 384)
    }
    name_preprocessing = 'xxxx'

    # model
    model_name = 'unet_0'
    out_channels = 2
    kwargs_model = {'in_channels': 4}
    resume = os.path.join("./logs/38_cloud_test/checkpoints",
                          "cls_epoch_9.pth")

    # log
    log_base_dir = os.path.join("./logs/38_cloud_test", model_name)

    non_null_rate = 1.0
    cloud_rate = None
    processes = 1

    torch.backends.cudnn.benchmark = True
    if debug:
        device = 'cpu'
    else:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # transform
    transforms_test = get_transform(name=name_trans_val, **kwargs_trans)

    # preprocessing
    preprocessing = None
    # ENCODER = 'resnet50'
    # ENCODER_WEIGHTS = 'imagenet'
    # preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)
    # preprocessing = get_preprocessing(preprocessing_fn=preprocessing_fn)

    # dataset
    dataset_test = L8CLoudDataset(base_dir=base_dir,
                                  datatype=datatype_test,
                                  transforms=transforms_test,
                                  preprocessing=preprocessing,
                                  include_nir=include_nir,
                                  non_null_rate=non_null_rate,
                                  cloud_rate=cloud_rate,
                                  processes=processes,
                                  test_mask=test_mask)

    # DataLoader
    test_dl = DataLoader(dataset_test,
                         batch_size=batch_size,
                         shuffle=False,
                         num_workers=num_workers)
    print(test_dl.__len__())

    # model
    model = get_model(name=model_name,
                      out_channels=out_channels,
                      **kwargs_model)
    model.to(device)
    if resume is not None:
        model.load_state_dict(torch.load(resume, map_location=device))

    # loss
    criterion = nn.CrossEntropyLoss().to(device)

    # check model
    xb, yb = next(iter(test_dl))
    print(xb.shape, yb.shape)
    print(model)
    print(summary(model, input_size=tuple(xb.shape[1:])))

    test_loss = test_org(model,
                         test_dl,
                         criterion=criterion,
                         device=device,
                         acc_fn=acc_metric)

    print(test_loss)
    # print figures
    dir_dest = "./temp/38_cloud_test"
    results_show(ds=dataset_test,
                 list_index=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                 model=model,
                 save=True,
                 dir_dest=dir_dest,
                 fname='test.png',
                 fname_time=True,
                 show=False,
                 fig_img_size=4,
                 cmp_input='gray',
                 cmp_out='jet',
                 class_num=2)
Ejemplo n.º 9
0
def train_gan(arguments):
    """ Setup result directory and enable logging to file in it """
    outdir = make_results_dir(arguments)
    logger.init(outdir, logging.INFO)
    logger.info('Arguments:\n{}'.format(pformat(arguments)))
    """ Initialize Tensorboard """
    tensorboard_writer = initialize_tensorboard(outdir)
    """ Set random seed throughout python, pytorch and numpy """
    logger.info('Using Random Seed value as: %d' % arguments['random_seed'])
    torch.manual_seed(
        arguments['random_seed'])  # Set for pytorch, used for cuda as well.
    random.seed(arguments['random_seed'])  # Set for python
    np.random.seed(arguments['random_seed'])  # Set for numpy
    """ Set device - cpu or gpu """
    device = torch.device(
        f"cuda:{opt.gpu}" if torch.cuda.is_available() else "cpu")
    logger.info(f'Using device - {device}')
    """ Load Model with weights(if available) """
    G: torch.nn.Module = get_model(
        arguments.get('generator_model_args')).to(device)
    D: torch.nn.Module = get_model(
        arguments.get('discriminator_model_args')).to(device)

    if arguments['mode'] == 'dcgan':
        G.apply(weights_init)
        D.apply(weights_init)
    """ Create optimizer """
    G_optimizer = create_optimizer(G.parameters(),
                                   arguments['generator_optimizer_args'])
    D_optimizer = create_optimizer(D.parameters(),
                                   arguments['discriminator_optimizer_args'])
    """ Create Loss """
    loss = torch.nn.BCELoss().to(device=device)  # GAN
    """ Load parameters for the Dataset """
    dataset: BaseDataset = create_dataset(arguments['dataset_args'],
                                          arguments['train_data_args'],
                                          arguments['val_data_args'])
    """ Generate all callbacks """
    callbacks: List[Callbacks] = generate_callbacks(arguments, dataset, device,
                                                    outdir)

    # """ Create loss function """
    # criterion = create_loss(arguments['loss_args'])
    """ Debug the inputs to model and save graph to tensorboard """
    dataset.debug()

    # Only One model is allowed
    # G_dummy_input = torch.rand(size=(1, arguments['generator_model_args']['model_constructor_args']['latent_dim']))
    # D_dummy_input = (torch.rand(1,
    #                           arguments['dataset_args']['name'].value['channels'],
    #                           32, 32  # *arguments['dataset_args']['name'].value['image_size']  # ToDo Fix this
    #                           ))
    # tensorboard_writer.save_graph('Generator', G, G_dummy_input.to(device))
    # tensorboard_writer.save_graph('Discriminator', D, D_dummy_input.to(device))
    logger.info(G)
    logger.info(D)

    def reset_grad():
        G.zero_grad()
        D.zero_grad()

    batch_size = arguments['train_data_args']['batch_size']
    z_dim = arguments['generator_model_args']['model_constructor_args']['nz']

    generator = infinite_train_gen(dataset.train_dataloader)
    interval_length = 10 if is_debug_mode() else 400
    num_intervals = 1 if is_debug_mode() else int(arguments['num_iterations'] /
                                                  interval_length)

    global_step = 0

    # TO allocate memory required for the GPU during training and validation
    run_callbacks(
        callbacks,
        model=(G, D),
        optimizer=(G_optimizer,
                   D_optimizer),  # To Save optimizer dict for retraining.
        mode=CallbackMode.ON_NTH_ITERATION,
        iteration=global_step)
    reset_grad()

    for it in range(num_intervals):

        logger.info(f'Interval {it + 1}/{num_intervals}')

        # Set model in train mode
        G.train()
        D.train()

        t = trange(interval_length)
        for _ in t:
            if arguments['mode'] == 'dcgan':
                D_loss, G_loss = train_gan_iter(D, D_optimizer, G, G_optimizer,
                                                loss, device, generator,
                                                batch_size, reset_grad, z_dim,
                                                tensorboard_writer,
                                                global_step)
            elif arguments['mode'] == 'wgan-wp':
                D_loss, G_loss = train_wgan_iter(D, D_optimizer, G,
                                                 G_optimizer, device,
                                                 generator, batch_size,
                                                 reset_grad, z_dim,
                                                 tensorboard_writer,
                                                 global_step)
            elif arguments['mode'] == 'wgan-noise-adversarial':
                D_loss, G_loss = train_noisy_wgan_iter(
                    D,
                    D_optimizer,
                    G,
                    G_optimizer,
                    device,
                    generator,
                    batch_size,
                    reset_grad,
                    z_dim,
                    tensorboard_writer,
                    global_step,
                    contamination_loss_weight=arguments[
                        'contamination_loss_weight'])

            # Log D_Loss and G_Loss in progress_bar
            t.set_postfix(D_Loss=D_loss.data.cpu().item(),
                          G_Loss=G_loss.data.cpu().item())

            # Save Loss In Tensorboard
            tensorboard_writer.save_scalars(
                f'{arguments["mode"].upper()}_Loss', {
                    'Discriminator' if arguments['mode'] == 'dcgan' else 'Critic':
                    D_loss.data.cpu().item(),
                    'Generator':
                    G_loss.data.cpu().item()
                }, global_step)
            global_step += 1

        print(
            f'Discriminator Loss: {D_loss.data.cpu().item()}, Generator Loss: {G_loss.data.cpu().item()}'
        )

        run_callbacks(
            callbacks,
            model=(G, D),
            optimizer=(G_optimizer,
                       D_optimizer),  # To Save optimizer dict for retraining.
            mode=CallbackMode.ON_NTH_ITERATION,
            iteration=global_step)
        reset_grad()
Ejemplo n.º 10
0
_, workers = get_fl_graph(hook, args.num_workers)
print('Loading data: {}'.format(paths.data_path))
X_trains, _, y_trains, _, meta = pkl.load(open(paths.data_path, 'rb'))

test_loader = get_loader(args.dataset,
                         args.test_batch_size,
                         train=False,
                         noise=args.noise)

print('+' * 80)

# ------------------------------------------------------------------------------
# Fire the engines
# ------------------------------------------------------------------------------

model, loss_type = get_model(args, ckpt_path=args.load_model)
if args.batch_size == 0:
    args.batch_size = int(meta['batch_size'])
    print("Resetting batch size: {}...".format(args.batch_size))

print('+' * 80)
h_epoch = []
h_acc_test = []
h_acc_train = []
h_acc_train_std = []
h_loss_test = []
h_loss_train = []
h_loss_train_std = []
h_uplink = []
h_grad_agg = []
h_error = []
Ejemplo n.º 11
0
test_loader = get_loader(args.dataset,
                         args.test_batch_size,
                         train=False,
                         shuffle=False,
                         subset=args.repeat,
                         force_resize=cfg.model_im_size[args.clf])
print('Train size: ', len(train_loader.dataset))
print('Test size: ', len(test_loader.dataset))

print('+' * 80)

# ------------------------------------------------------------------------------
# Fire the engines
# ------------------------------------------------------------------------------

model, loss_type = get_model(args)
agg_type = 'averaging'

if 'sgd' in args.paradigm:
    optimizer = optim.SGD(params=model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=5e-4)
elif 'adam' in args.paradigm:
    optimizer = optim.Adam(params=model.parameters(), lr=args.lr)
if args.scheduler:
    print('Initializing scheduler...')
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                     T_max=args.epochs)

if loss_type == 'hinge':
Ejemplo n.º 12
0
def train(args):
    set_random_seed(42)
    model = get_model(args.network, args.classification_head)
    print('Loading model')

    model.encoder.conv1 = nn.Conv2d(count_channels(args.channels) *
                                    args.neighbours,
                                    64,
                                    kernel_size=(7, 7),
                                    stride=(2, 2),
                                    padding=(3, 3),
                                    bias=False)

    model, device = UtilsFactory.prepare_model(model)

    train_df = pd.read_csv(args.train_df).to_dict('records')
    val_df = pd.read_csv(args.val_df).to_dict('records')

    ds = Dataset(args.channels, args.dataset_path, args.image_size,
                 args.batch_size, args.num_workers, args.neighbours,
                 args.classification_head)
    loaders = ds.create_loaders(train_df, val_df)

    save_path = os.path.join(args.logdir, args.name)

    optimizer = get_optimizer(args.optimizer, args.lr, model)

    if not args.classification_head:
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[10, 40, 80, 150, 300], gamma=0.1)

        criterion = get_loss(args.loss)

        runner = SupervisedRunner()
        if args.model_weights_path:
            checkpoint = torch.load(args.model_weights_path,
                                    map_location='cpu')
            model.load_state_dict(checkpoint['model_state_dict'])

        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=[DiceCallback()],
                     logdir=save_path,
                     num_epochs=args.epochs,
                     verbose=True)

        infer_loader = collections.OrderedDict([('infer', loaders['valid'])])
        runner.infer(
            model=model,
            loaders=infer_loader,
            callbacks=[
                CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
                InferCallback()
            ],
        )
    else:
        criterion = get_loss('multi')
        net = Model(model,
                    optimizer,
                    criterion,
                    batch_metrics=[
                        classification_head_accuracy, segmentation_head_dice
                    ])
        net = net.to(device)
        net.fit_generator(loaders['train'],
                          loaders['valid'],
                          epochs=args.epochs,
                          callbacks=[
                              ModelCheckpoint(
                                  f'{save_path}/checkpoints/best.pth', ),
                              MultiStepLR(milestones=[10, 40, 80, 150, 300],
                                          gamma=0.1)
                          ])
Ejemplo n.º 13
0
def main(args):
    dict_args = vars(args)

    model_name = dict_args['model_name']

    model = get_model(model_name, dict_args)

    checkpoint_path = dict_args['checkpoints_path']
    if not os.path.exists(checkpoint_path):
        os.mkdir(checkpoint_path)

    if dict_args['log_system'] == 'wandb':

        logger = WandbLogger(project='source_separation', tags=model_name, offline=False, id=dict_args['run_id'])
        logger.log_hyperparams(model.hparams)
        logger.watch(model, log='all')

    elif dict_args['log_system'] == 'tensorboard':
        raise NotImplementedError
    else:
        logger = True  # default

    model_dir = checkpoint_path +dict_args['model_name']
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)
    ckpt_dir = '{}/{}'.format(model_dir, dict_args['run_id'])
    if not os.path.exists(ckpt_dir):
        os.mkdir(ckpt_dir)

    checkpoint_callback = ModelCheckpoint(
        filepath=ckpt_dir,
        save_top_k=dict_args['save_top_k'],
        verbose=False,
        monitor='val_loss',
        prefix=dict_args['model_name'] + '_',
        save_last=True,
        save_weights_only= True
    )

    early_stop_callback = EarlyStopping(
        monitor='val_loss',
        min_delta=0.0,
        patience=dict_args['patience'],
        verbose=False

    )
    if dict_args['float16']:
        trainer = Trainer(
            gpus=dict_args['gpus'],
            precision=16,
            logger=logger,
            checkpoint_callback=checkpoint_callback,
            early_stop_callback=early_stop_callback,
            distributed_backend=dict_args['distributed_backend']
        )
    else:
        trainer = Trainer(
            gpus=dict_args['gpus'],
            logger=logger,
            checkpoint_callback=checkpoint_callback,
            early_stop_callback=early_stop_callback,
            distributed_backend=dict_args['distributed_backend']
        )

    data_provider = DataProvider(**dict_args)

    n_fft, hop_length, num_frame = [dict_args[key] for key in ['n_fft', 'hop_length', 'num_frame']]

    train_dataloader = data_provider.get_train_dataloader(n_fft, hop_length, num_frame)
    valid_dataloader = data_provider.get_valid_dataloader(n_fft, hop_length, num_frame)

    trainer.fit(model, train_dataloader, valid_dataloader)
Ejemplo n.º 14
0
import torch
import sys
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
from models.utils import get_model
from models.config import TOKENS_RAW_CUTOFF
from models.seq2seqattn import init_weights, EncRnn, DecRnn, Seq2SeqAttn
from collections import deque
import random
import torch.optim as optim
import math
#Load in models and helper functions

w2v_model = get_model()
# w2ind from w2v
w2ind = {
    token: token_index
    for token_index, token in enumerate(w2v_model.wv.index2word)
}
# sorted vocab words
assert w2v_model.vocabulary.sorted_vocab == True
word_counts = {
    word: vocab_obj.count
    for word, vocab_obj in w2v_model.wv.vocab.items()
}
word_counts = sorted(word_counts.items(), key=lambda x: -x[1])
words = [t[0] for t in word_counts]
# sentence marker token inds
sos_ind = w2ind['<sos>']
Ejemplo n.º 15
0
def objective(arguments):
    """
    Main Pipeline for training and cross-validation. ToDo - Testing will be done separately in test.py.
    """
    """ Setup result directory and enable logging to file in it """
    outdir = make_results_dir(arguments)
    logger.init(outdir, logging.INFO)
    logger.info('Arguments:\n{}'.format(pformat(arguments)))
    """ Initialize Tensorboard """
    tensorboard_writer = initialize_tensorboard(outdir)
    """ Set random seed throughout python, pytorch and numpy """
    logger.info('Using Random Seed value as: %d' % arguments['random_seed'])
    torch.manual_seed(
        arguments['random_seed'])  # Set for pytorch, used for cuda as well.
    random.seed(arguments['random_seed'])  # Set for python
    np.random.seed(arguments['random_seed'])  # Set for numpy
    """ Set device - cpu or gpu """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    logger.info(f'Using device - {device}')
    """ Load Model with weights(if available) """
    model: torch.nn.Module = get_model(arguments.get('model_args')).to(device)
    """ Create loss function """
    criterion = create_loss(arguments['loss_args'])
    """ Create optimizer """
    optimizer = create_optimizer(model.parameters(),
                                 arguments['optimizer_args'])
    """ Load parameters for the Dataset """
    dataset: BaseDataset = create_dataset(arguments['dataset_args'],
                                          arguments['train_data_args'],
                                          arguments['val_data_args'])
    """ Generate all callbacks """
    callbacks: List[Callbacks] = generate_callbacks(arguments, dataset, device,
                                                    outdir)
    """ Debug the inputs to model and save graph to tensorboard """
    dataset.debug()
    dummy_input = (torch.rand(
        1,
        arguments['dataset_args']['name'].value['channels'],
        *arguments['dataset_args']['name'].value['image_size'],
    )).to(device)
    tensorboard_writer.save_graph(model, dummy_input)
    """ Pipeline - loop over the dataset multiple times """
    max_validation_accuracy = 0
    itr = 0

    best_model_path = None
    delete_old_models = True

    run_callbacks(callbacks,
                  model=model,
                  optimizer=optimizer,
                  mode=CallbackMode.ON_TRAIN_BEGIN)
    for epoch in range(arguments['nb_epochs']):
        """ Train the model """
        train_data_args = arguments['train_data_args']
        if train_data_args['to_train']:
            train_dataloader = dataset.train_dataloader
            progress_bar = ProgressBar(
                target=len(train_dataloader),
                clear=True,
                description=f"Training {epoch + 1}/{arguments['nb_epochs']}: ")
            loss_running_average = RunningAverage()

            run_callbacks(callbacks,
                          model=model,
                          optimizer=optimizer,
                          mode=CallbackMode.ON_EPOCH_BEGIN,
                          epoch=epoch)
            model.train()
            for i, data in enumerate(train_dataloader, 0):
                # get the inputs
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # Forward Pass
                outputs = model(inputs)

                classification_loss = criterion(outputs, labels)
                tensorboard_writer.save_scalar('Classification_Loss',
                                               classification_loss.item(), itr)
                classification_loss.backward()
                optimizer.step()

                # Compute running loss. Not exact but efficient.
                running_loss = loss_running_average.add_new_sample(
                    classification_loss.item())
                progress_bar.update(i + 1, [
                    ('current loss', classification_loss.item()),
                    ('running loss', running_loss),
                ])
                tensorboard_writer.save_scalar('Training_Loss',
                                               classification_loss, itr)
                itr += 1

            # Callbacks ON_EPOCH_END should be run only when training is enabled. Thus call here.
            run_callbacks(callbacks,
                          model=model,
                          optimizer=optimizer,
                          mode=CallbackMode.ON_EPOCH_END,
                          epoch=epoch)
        """ Validate the model """
        val_data_args = arguments['val_data_args']
        if val_data_args['validate_step_size'] > 0 and \
                epoch % val_data_args['validate_step_size'] == 0:
            correct, total = 0, 0
            validation_dataloader = dataset.validation_dataloader
            progress_bar = ProgressBar(
                target=len(validation_dataloader),
                clear=True,
                description=f"Validating {epoch + 1}/{arguments['nb_epochs']}: "
            )
            model.eval()
            with torch.no_grad():
                for i, data in enumerate(validation_dataloader, 0):
                    inputs, labels = data
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    outputs = model(inputs)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

                    progress_bar.update(i + 1, [
                        ('Batch Accuracy', 100 * correct / total),
                    ])

            val_accuracy = 100 * correct / total
            tensorboard_writer.save_scalar('Validation_Accuracy', val_accuracy,
                                           itr)
            logger.info(
                f'Accuracy of the network on the {dataset.get_val_dataset_size} validation images: {val_accuracy} %%'
            )
            """ Save Model """
            if val_accuracy > max_validation_accuracy:
                if delete_old_models and best_model_path:
                    delete_old_file(best_model_path)
                best_model_path = os.path.join(
                    outdir,
                    f'epoch_{epoch:04}-model-val_accuracy_{val_accuracy}.pth')
                torch.save(model.state_dict(), best_model_path)
                max_validation_accuracy = val_accuracy

        tensorboard_writer.flush()

        # Exit loop if training not needed
        if not train_data_args['to_train']:
            break

    run_callbacks(callbacks,
                  model=model,
                  optimizer=optimizer,
                  mode=CallbackMode.ON_TRAIN_END)

    logger.info('Finished Training')
    close_tensorboard()
    logger.info(f'Max Validation accuracy is {max_validation_accuracy}')
    return max_validation_accuracy  # Return in case later u wanna add hyperopt.