예제 #1
0
def train_autoencoder(device, args):
    # model definition
    model = FeatureExtractor()
    model.to(device)
    # data definition
    all_chunks = []
    # concatenate all chunk files
    # note that it is independent of the
    # class of each chunk sinc we are creating
    # a generative dataset
    for label in filesystem.listdir_complete(filesystem.train_audio_chunks_dir):
        chunks = filesystem.listdir_complete(label)
        all_chunks = all_chunks + chunks
    train_chunks, eval_chunks = train_test_split(all_chunks, test_size=args.eval_size)
    # transforms and dataset
    trf = normalize

    train_dataset = GenerativeDataset(train_chunks, transforms=trf)
    eval_dataset = GenerativeDataset(eval_chunks, transforms=trf)
    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True,
                                num_workers=4, collate_fn=None,pin_memory=True)
    eval_dataloader = DataLoader(eval_dataset, batch_size=1, shuffle=True,
                                num_workers=4, collate_fn=None,pin_memory=True)

    # main loop
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    loss_criterion = SoftDTW(use_cuda=True, gamma=0.1)
    train_count = 0
    eval_count = 0
    for epoch in range(args.n_epochs):
        print('Epoch:', epoch, '/', args.n_epochs)
        train_count = train_step(model, train_dataloader, optimizer, loss_criterion, args.verbose_epochs, device, train_count)
        eval_count = eval_step(model, eval_dataloader, loss_criterion, args.verbose_epochs, device, eval_count)
        torch.save(model.state_dict(), os.path.join(wandb.run.dir, 'model_checkpoint.pt'))
예제 #2
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    model_path = args.model
    input_path = args.input
    sound_path = args.output
    model = FeatureExtractor()
    model.load_state_dict(torch.load(model_path))
    device = torch.device('cuda')
    cpu_device = torch.device('cpu')
    model.to(device)
    #data = normalize(torchaudio.load(input_path)[0][0].reshape(1, -1))
    data = torch.from_numpy(normalize(torch.randn(1,
                                                  132480))).float().to(device)
    data = data.reshape(1, 1, -1)
    model.eval()
    sound = model(data)
    print(functional.mse_loss(sound, data).item())
    sound = sound.to(cpu_device)
    torchaudio.save(sound_path, sound.reshape(-1), 44100)
예제 #3
0
def main(args):
    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    image_dir = "data/"
    json_path = image_dir + "annotations/captions_train2014.json"
    root_dir = image_dir + "train2014"

    dataset = CocoDataset(json_path=json_path,
                          root_dir=root_dir,
                          transform=transform)

    data_loader = get_data_loader(dataset, batch_size=32)

    # Build models
    encoder = FeatureExtractor(args.embed_size).eval(
    )  # eval mode (batchnorm uses moving mean/variance)
    decoder = CaptionGenerator(args.embed_size, args.hidden_size,
                               len(dataset.vocabulary), args.num_layers)
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Load the trained model parameters
    encoder.load_state_dict(torch.load(args.encoder_path))
    decoder.load_state_dict(torch.load(args.decoder_path))

    # Prepare an image
    image = load_image(args.image, transform)
    image_tensor = image.to(device)

    # Generate an caption from the image
    feature = encoder(image_tensor)
    sampled_ids = decoder.sample(feature)
    sampled_ids = sampled_ids[0].cpu().numpy(
    )  # (1, max_seq_length) -> (max_seq_length)

    # Convert word_ids to words
    sampled_caption = []
    for word_id in sampled_ids:
        word = data_loader.dataset.id_to_word[word_id]
        sampled_caption.append(word)
        if word == '<end>':
            break
    sentence = ' '.join(sampled_caption)

    # Print out the image and the generated caption
    print(sentence)
    image = Image.open(args.image)
    plt.imshow(np.asarray(image))
예제 #4
0
def main(args):
    np.random.seed(0)
    torch.manual_seed(0)

    with open('config.yaml', 'r') as file:
        stream = file.read()
        config_dict = yaml.safe_load(stream)
        config = mapper(**config_dict)

    disc_model = Discriminator(input_shape=(config.data.channels,
                                            config.data.hr_height,
                                            config.data.hr_width))
    gen_model = GeneratorResNet()
    feature_extractor_model = FeatureExtractor()
    plt.ion()

    if config.distributed:
        disc_model.to(device)
        disc_model = nn.parallel.DistributedDataParallel(disc_model)
        gen_model.to(device)
        gen_model = nn.parallel.DistributedDataParallel(gen_model)
        feature_extractor_model.to(device)
        feature_extractor_model = nn.parallel.DistributedDataParallel(
            feature_extractor_model)
    elif config.gpu:
        # disc_model = nn.DataParallel(disc_model).to(device)
        # gen_model = nn.DataParallel(gen_model).to(device)
        # feature_extractor_model = nn.DataParallel(feature_extractor_model).to(device)
        disc_model = disc_model.to(device)
        gen_model = gen_model.to(device)
        feature_extractor_model = feature_extractor_model.to(device)
    else:
        return

    train_dataset = ImageDataset(config.data.path,
                                 hr_shape=(config.data.hr_height,
                                           config.data.hr_width),
                                 lr_shape=(config.data.lr_height,
                                           config.data.lr_width))
    test_dataset = ImageDataset(config.data.path,
                                hr_shape=(config.data.hr_height,
                                          config.data.hr_width),
                                lr_shape=(config.data.lr_height,
                                          config.data.lr_width))

    if config.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.data.batch_size,
        shuffle=config.data.shuffle,
        num_workers=config.data.workers,
        pin_memory=config.data.pin_memory,
        sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=config.data.batch_size,
                                             shuffle=config.data.shuffle,
                                             num_workers=config.data.workers,
                                             pin_memory=config.data.pin_memory)

    if args.train:
        # trainer settings
        trainer = GANTrainer(config.train, train_loader,
                             (disc_model, gen_model, feature_extractor_model))
        criterion = nn.MSELoss().to(device)
        disc_optimizer = torch.optim.Adam(disc_model.parameters(),
                                          config.train.hyperparameters.lr)
        gen_optimizer = torch.optim.Adam(gen_model.parameters(),
                                         config.train.hyperparameters.lr)
        fe_optimizer = torch.optim.Adam(feature_extractor_model.parameters(),
                                        config.train.hyperparameters.lr)

        trainer.setCriterion(criterion)
        trainer.setDiscOptimizer(disc_optimizer)
        trainer.setGenOptimizer(gen_optimizer)
        trainer.setFEOptimizer(fe_optimizer)

        # evaluator settings
        evaluator = GANEvaluator(
            config.evaluate, val_loader,
            (disc_model, gen_model, feature_extractor_model))
        # optimizer = torch.optim.Adam(disc_model.parameters(), lr=config.evaluate.hyperparameters.lr,
        # 	weight_decay=config.evaluate.hyperparameters.weight_decay)
        evaluator.setCriterion(criterion)

    if args.test:
        pass

    # Turn on benchmark if the input sizes don't vary
    # It is used to find best way to run models on your machine
    cudnn.benchmark = True
    start_epoch = 0
    best_precision = 0

    # optionally resume from a checkpoint
    if config.train.resume:
        [start_epoch,
         best_precision] = trainer.load_saved_checkpoint(checkpoint=None)

    # change value to test.hyperparameters on testing
    for epoch in range(start_epoch, config.train.hyperparameters.total_epochs):
        if config.distributed:
            train_sampler.set_epoch(epoch)

        if args.train:
            trainer.adjust_learning_rate(epoch)
            trainer.train(epoch)
            prec1 = evaluator.evaluate(epoch)

        if args.test:
            pass

        # remember best prec@1 and save checkpoint
        if args.train:
            is_best = prec1 > best_precision
            best_precision = max(prec1, best_precision)
            trainer.save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': disc_model.state_dict(),
                    'best_precision': best_precision,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                checkpoint=None)