def train_autoencoder(device, args): # model definition model = FeatureExtractor() model.to(device) # data definition all_chunks = [] # concatenate all chunk files # note that it is independent of the # class of each chunk sinc we are creating # a generative dataset for label in filesystem.listdir_complete(filesystem.train_audio_chunks_dir): chunks = filesystem.listdir_complete(label) all_chunks = all_chunks + chunks train_chunks, eval_chunks = train_test_split(all_chunks, test_size=args.eval_size) # transforms and dataset trf = normalize train_dataset = GenerativeDataset(train_chunks, transforms=trf) eval_dataset = GenerativeDataset(eval_chunks, transforms=trf) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, collate_fn=None,pin_memory=True) eval_dataloader = DataLoader(eval_dataset, batch_size=1, shuffle=True, num_workers=4, collate_fn=None,pin_memory=True) # main loop optimizer = optim.Adam(model.parameters(), lr=args.lr) loss_criterion = SoftDTW(use_cuda=True, gamma=0.1) train_count = 0 eval_count = 0 for epoch in range(args.n_epochs): print('Epoch:', epoch, '/', args.n_epochs) train_count = train_step(model, train_dataloader, optimizer, loss_criterion, args.verbose_epochs, device, train_count) eval_count = eval_step(model, eval_dataloader, loss_criterion, args.verbose_epochs, device, eval_count) torch.save(model.state_dict(), os.path.join(wandb.run.dir, 'model_checkpoint.pt'))
def main(): parser = get_parser() args = parser.parse_args() model_path = args.model input_path = args.input sound_path = args.output model = FeatureExtractor() model.load_state_dict(torch.load(model_path)) device = torch.device('cuda') cpu_device = torch.device('cpu') model.to(device) #data = normalize(torchaudio.load(input_path)[0][0].reshape(1, -1)) data = torch.from_numpy(normalize(torch.randn(1, 132480))).float().to(device) data = data.reshape(1, 1, -1) model.eval() sound = model(data) print(functional.mse_loss(sound, data).item()) sound = sound.to(cpu_device) torchaudio.save(sound_path, sound.reshape(-1), 44100)
def main(args): # Image preprocessing transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) image_dir = "data/" json_path = image_dir + "annotations/captions_train2014.json" root_dir = image_dir + "train2014" dataset = CocoDataset(json_path=json_path, root_dir=root_dir, transform=transform) data_loader = get_data_loader(dataset, batch_size=32) # Build models encoder = FeatureExtractor(args.embed_size).eval( ) # eval mode (batchnorm uses moving mean/variance) decoder = CaptionGenerator(args.embed_size, args.hidden_size, len(dataset.vocabulary), args.num_layers) encoder = encoder.to(device) decoder = decoder.to(device) # Load the trained model parameters encoder.load_state_dict(torch.load(args.encoder_path)) decoder.load_state_dict(torch.load(args.decoder_path)) # Prepare an image image = load_image(args.image, transform) image_tensor = image.to(device) # Generate an caption from the image feature = encoder(image_tensor) sampled_ids = decoder.sample(feature) sampled_ids = sampled_ids[0].cpu().numpy( ) # (1, max_seq_length) -> (max_seq_length) # Convert word_ids to words sampled_caption = [] for word_id in sampled_ids: word = data_loader.dataset.id_to_word[word_id] sampled_caption.append(word) if word == '<end>': break sentence = ' '.join(sampled_caption) # Print out the image and the generated caption print(sentence) image = Image.open(args.image) plt.imshow(np.asarray(image))
def main(args): np.random.seed(0) torch.manual_seed(0) with open('config.yaml', 'r') as file: stream = file.read() config_dict = yaml.safe_load(stream) config = mapper(**config_dict) disc_model = Discriminator(input_shape=(config.data.channels, config.data.hr_height, config.data.hr_width)) gen_model = GeneratorResNet() feature_extractor_model = FeatureExtractor() plt.ion() if config.distributed: disc_model.to(device) disc_model = nn.parallel.DistributedDataParallel(disc_model) gen_model.to(device) gen_model = nn.parallel.DistributedDataParallel(gen_model) feature_extractor_model.to(device) feature_extractor_model = nn.parallel.DistributedDataParallel( feature_extractor_model) elif config.gpu: # disc_model = nn.DataParallel(disc_model).to(device) # gen_model = nn.DataParallel(gen_model).to(device) # feature_extractor_model = nn.DataParallel(feature_extractor_model).to(device) disc_model = disc_model.to(device) gen_model = gen_model.to(device) feature_extractor_model = feature_extractor_model.to(device) else: return train_dataset = ImageDataset(config.data.path, hr_shape=(config.data.hr_height, config.data.hr_width), lr_shape=(config.data.lr_height, config.data.lr_width)) test_dataset = ImageDataset(config.data.path, hr_shape=(config.data.hr_height, config.data.hr_width), lr_shape=(config.data.lr_height, config.data.lr_width)) if config.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.data.batch_size, shuffle=config.data.shuffle, num_workers=config.data.workers, pin_memory=config.data.pin_memory, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config.data.batch_size, shuffle=config.data.shuffle, num_workers=config.data.workers, pin_memory=config.data.pin_memory) if args.train: # trainer settings trainer = GANTrainer(config.train, train_loader, (disc_model, gen_model, feature_extractor_model)) criterion = nn.MSELoss().to(device) disc_optimizer = torch.optim.Adam(disc_model.parameters(), config.train.hyperparameters.lr) gen_optimizer = torch.optim.Adam(gen_model.parameters(), config.train.hyperparameters.lr) fe_optimizer = torch.optim.Adam(feature_extractor_model.parameters(), config.train.hyperparameters.lr) trainer.setCriterion(criterion) trainer.setDiscOptimizer(disc_optimizer) trainer.setGenOptimizer(gen_optimizer) trainer.setFEOptimizer(fe_optimizer) # evaluator settings evaluator = GANEvaluator( config.evaluate, val_loader, (disc_model, gen_model, feature_extractor_model)) # optimizer = torch.optim.Adam(disc_model.parameters(), lr=config.evaluate.hyperparameters.lr, # weight_decay=config.evaluate.hyperparameters.weight_decay) evaluator.setCriterion(criterion) if args.test: pass # Turn on benchmark if the input sizes don't vary # It is used to find best way to run models on your machine cudnn.benchmark = True start_epoch = 0 best_precision = 0 # optionally resume from a checkpoint if config.train.resume: [start_epoch, best_precision] = trainer.load_saved_checkpoint(checkpoint=None) # change value to test.hyperparameters on testing for epoch in range(start_epoch, config.train.hyperparameters.total_epochs): if config.distributed: train_sampler.set_epoch(epoch) if args.train: trainer.adjust_learning_rate(epoch) trainer.train(epoch) prec1 = evaluator.evaluate(epoch) if args.test: pass # remember best prec@1 and save checkpoint if args.train: is_best = prec1 > best_precision best_precision = max(prec1, best_precision) trainer.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': disc_model.state_dict(), 'best_precision': best_precision, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=None)