layers_per_stack = args.layers,
        total_samples = total_samples,
        l2_lambda = args.L2,
		bias = args.bias,
        dropout = args.dropout,
        use_bayesian = args.bayesian,
        backwards = args.backwards,
        multi_gpu = True
	)
    print(model.summary())

    if args.multi_gpu:
        model = nn.DataParallel(model)

    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr = args.learning_rate)

    if args.anneal_learning_rates:
        T_0 = 1
        T_mult = 2
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0, T_mult)

    else:
        scheduler = None

    model_save_name = args.results_dir / Path("model.torch")
    model_save_name_latest = args.results_dir / Path("model_latest.torch")

    if model_save_name.exists():
        print(f"Loading saved model from {model_save_name}...")
        if args.multi_gpu:
Exemple #2
0
    print("Data loaded!")

    model = WaveNet(input_channels=NUM_TOKENS,
                    residual_channels=args.residual_channels,
                    out_channels=NUM_TOKENS,
                    stacks=args.stacks,
                    layers_per_stack=args.layers,
                    total_samples=train_length,
                    l2_lambda=args.L2,
                    bias=args.bias,
                    dropout=args.dropout,
                    use_bayesian=args.bayesian,
                    backwards=args.backwards).to(device)

    print(model.summary())
    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate)  #, weight_decay = args.L2)

    if args.anneal_learning_rates:
        T_0 = 1  # Emil: I just picked a small number, no clue if any good
        T_mult = 2
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0, T_mult)
    else:
        scheduler = None

    model_save_name = args.results_dir / Path("model.torch")
    if model_save_name.exists():
        print(f"Loading saved model from {model_save_name}...")
        model.load_state_dict(
            torch.load(model_save_name, map_location=device)["state_dict"])
        print(f"Model loaded.")
Exemple #3
0
def train(args):

    # Arugments & parameters
    dataset = args.dataset
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    filename = args.filename
    batch_size = args.batch_size  # Use an audio clip as a mini-batch. Must
    # be 1 if audio clips has different length.
    condition = args.condition
    cuda = args.cuda

    quantize_bins = config.quantize_bins
    dilations = config.dilations

    # Paths
    models_dir = os.path.join(workspace, 'models',
                              'dataset={}'.format(dataset), filename,
                              'condition={}'.format(condition))
    create_folder(models_dir)

    # Data Generator
    Dataset = get_dataset(dataset)
    train_dataset = Dataset(dataset_dir, data_type='train')
    validate_dataset = Dataset(dataset_dir, data_type='validate')

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=1,
                                               pin_memory=True)

    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=1,
                                                  pin_memory=True)

    # Model
    model = WaveNet(
        dilations,
        residual_channels=config.residual_channels,
        dilation_channels=config.dilation_channels,
        skip_channels=config.skip_channels,
        quantize_bins=config.quantize_bins,
        global_condition_channels=config.global_condition_channels,
        global_condition_cardinality=Dataset.global_condition_cardinality,
        use_cuda=cuda)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.)

    train_bgn_time = time.time()
    iteration = 0

    while True:
        for (batch_x, global_condition) in train_loader:
            '''batch_x: (batch_size, seq_len)
            global_condition: (batch_size,)
            '''

            print('iteration: {}, input size: {}'.format(
                iteration, batch_x.shape))

            # Evaluate
            if iteration % 1000 == 0:
                train_fin_time = time.time()
                evaluate_bgn_time = time.time()
                loss = evaluate(model, validate_loader, condition, cuda)

                print('-----------------')
                logging.info(
                    'iteration: {}, loss: {:.3f}, train_time: {:.3f}, '
                    'validate time: {:.3f} s'.format(
                        iteration, loss, train_fin_time - train_bgn_time,
                        time.time() - evaluate_bgn_time))

                train_bgn_time = time.time()

            # Save model
            if iteration % 10000 == 0:
                save_out_dict = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                }

                save_out_path = os.path.join(
                    models_dir, 'md_{}_iters.tar'.format(iteration))

                torch.save(save_out_dict, save_out_path)
                logging.info('Save model to {}'.format(save_out_path))

            # Move data to GPU
            if condition:
                global_condition = move_data_to_gpu(global_condition, cuda)
            else:
                global_condition = None

            batch_x = move_data_to_gpu(batch_x, cuda)

            # Prepare input and target data
            batch_input = batch_x[:, 0:-1]
            output_width = batch_input.shape[-1] - model.receptive_field + 1
            batch_target = batch_x[:, -output_width:]

            # Forward
            model.train()
            batch_output = model(batch_input, global_condition)
            loss = _loss_func(batch_output, batch_target)

            # Backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print('loss: {:.3f}'.format(loss.data.cpu().numpy()))

            iteration += 1