num_blocks = int(sys.argv[2]) num_layers = int(sys.argv[3]) num_hidden = int(sys.argv[4]) duration = int(sys.argv[5]) Quantification = 256 if (duration * sample_rate / 2) % 2 == 0: num_time_samples = int(sample_rate * duration) else: num_time_samples = int(sample_rate * duration) - 1 #Initialize the model before restoring it model = Model(num_time_samples=num_time_samples, num_channels=1, gpu_fraction=1.0, num_classes=Quantification, num_blocks=num_blocks, num_layers=num_layers, num_hidden=num_hidden) #Restoring the model model.restore() #Creating the Generator to make the prediction generator = Generator(model) seed = np.random.uniform(low=-1.0, high=1.0) random_input = [[seed]] random_inputs = [] #for i in range(num_hidden-1):
type=bool, default=False, help='whether to resume training existing models') args = parser.parse_args() return args if __name__ == '__main__': args = set_args() # construct model wave_model = Model(args.x_len, num_channels=1, num_classes=args.num_classes, num_blocks=args.num_blocks, num_layers=args.num_layers, num_hidden=args.num_hidden, kernel_size=args.kernel_size) if not (args.device == 'default'): wave_model.set_device(torch.device(args.device)) # create dataset and dataloader filelist = list_files(args.data) dataset = AudioData(filelist, args.x_len, y_len=wave_model.output_width - 1, num_classes=args.num_classes, store_tracks=True)
parser.add_argument('--stopping_loss', type=float, default=0.1, help='loss at which training stops') FLAGS, unparsed = parser.parse_known_args() SAMPLE_RATE = 24000 inputs, targets = make_batch('assets/SMvocals.wav') num_time_samples = inputs.shape[1] num_channels = 1 gpu_fraction = 1 model = Model(num_time_samples=num_time_samples, num_channels=num_channels, gpu_fraction=gpu_fraction, num_layers=FLAGS.num_layers or 5, learning_rate=FLAGS.learning_rate, stopping_loss=FLAGS.stopping_loss) tic = time() model.train(inputs, targets) toc = time() print('Training took {} seconds.'.format(toc - tic)) generator = Generator(model) # Get first sample of input input_ = inputs[:, 0:1, 0] tic = time()
from IPython.display import Audio #get_ipython().magic(u'matplotlib inline') # In[ ]: inputs, targets = make_batch('assets/voice.wav') num_time_samples = inputs.shape[1] num_channels = 1 gpu_fraction = 1.0 model = Model(num_time_samples=num_time_samples, num_channels=num_channels, gpu_fraction=gpu_fraction) Audio(inputs.reshape(inputs.shape[1]), rate=44100) # In[ ]: tic = time() model.train(inputs, targets) toc = time() print('Training took {} seconds.'.format(toc-tic))
from time import time from wavenet.utils import make_batch from wavenet.models import Model, Generator num_channels = 1 gpu_fraction = 1.0 num_classes = 2048 inputs, targets = make_batch('assets/voice.wav', num_classes) num_time_samples = inputs.shape[1] print inputs.shape, targets.shape, num_time_samples model = Model( #num_time_samples=num_time_samples, num_channels=num_channels, gpu_fraction=gpu_fraction, num_classes=num_classes, prob_model_type='softmax') tic = time() model.train(inputs, targets) toc = time() print('Training took {} seconds.'.format(toc - tic))
duration = 2 if (duration * sample_rate / 2) % 2 == 0: num_time_samples = int(sample_rate * duration) else: num_time_samples = int(sample_rate * duration) - 1 num_channels = 1 gpu_fraction = 1.0 num_classes = Quantification num_blocks = 2 num_layers = 12 num_hidden = 256 model = Model(num_time_samples=num_time_samples, num_channels=num_channels, gpu_fraction=gpu_fraction, num_classes=num_classes, num_blocks=num_blocks, num_layers=num_layers, num_hidden=num_hidden) inputlist = [] targetlist = [] for w in WavList: path = 'assets/' + w + '.wav' inputs, targets = make_batch(path, sample_rate, duration=duration) inputlist.append(inputs) targetlist.append(targets) inputlist = np.stack(inputlist)
def main(): ############################## # Get args args = get_arguments() ############################## ############################## # Build data chunk config_str = "_".join([ str(args.sample_rate), str(args.sample_size), str(args.sliding_ratio), str(args.silence_threshold) ]) files_dir = args.data_dir npy_dir = files_dir + '/' + config_str lock_file_db = files_dir + '/lock' # Check if exists while (os.path.isfile(lock_file_db)): # Wait for the end of construction by another process time.sleep(1) if not os.path.isdir(npy_dir): try: # Build if not ff = open(lock_file_db, 'w') build_db.main(files_dir, npy_dir, args.sample_rate, args.sample_size, args.sliding_ratio, args.silence_threshold) ff.close() except: shutil.rmtree(npy_dir) finally: os.remove(lock_file_db) # data_statistics.bar_activations(save_dir, save_dir, sample_size_padded) ############################## ############################## # Init dirs utils.init_directory(args.logdir_root) if args.summary: logdir_summary = os.path.join(args.logdir_root, 'summary') utils.init_directory(logdir_summary) # Save logdir_save = os.path.join(args.logdir_root, 'save') # Wave logdir_wav = os.path.join(args.logdir_root, 'wav') utils.init_directory(logdir_wav) ############################## ############################## # Get Data and Split them # Get list of data chunks chunk_list = build_db.find_files(npy_dir, pattern="*.npy") # To always have the same train/validate split, init the random seed random.seed(210691) random.shuffle(chunk_list) # Adapt batch_size if we have very few files num_chunk = len(chunk_list) batch_size = min(args.batch_size, num_chunk) # Split 90 / 10 training_chunks = chunk_list[:int(0.9 * num_chunk)] valid_chunks = chunk_list[int(0.9 * num_chunk):] ############################## ############################## # Create network import time ttt = time.time() model = Model(num_time_samples=args.sample_size, num_channels=1, num_classes=args.q_levels, num_blocks=args.num_blocks, num_layers=args.num_layers, num_hidden=args.num_hidden, filter_width=args.filter_width, gpu_fraction=0.9) print("TTT: Instanciate network : {}".format(time.time() - ttt)) ############################## ############################## # Train tic = time.time() model.train(training_chunks, valid_chunks, args.batch_size, args.valid_freq, args.generate_freq) toc = time.time() print('Training took {} seconds.'.format(toc - tic)) ############################## ############################## # Generate generator = Generator(model) # Get first sample of input input_ = inputs[:, 0:1, 0] tic = time() predictions = generator.run(input_, 32000) toc = time() print('Generating took {} seconds.'.format(toc - tic)) ############################## return
model_path = 'networks/snare_20/' wav_template = 'rendered/snare_20/final/{}.wav' samples_path = 'data/drum_samples' len_sample = 0.2 # [s] # 0.1s hi-hat_20, 0.2s snare len_sample = int(16000 * len_sample) # Load samples sorted_f = pickle.load(open(samples_path, 'rb')) samples = sorted_f['Snare'] # j = 450 # kick_20 # j = 840 # hihat_20 j = 948 # snare_20 init_values = [load_sample(s, len_sample)[0] for s in samples[j:j+20]] init_values = np.array(init_values)[:,0,0,0] print init_values.shape # Create the model model = Model(num_time_samples=len_sample, num_channels=1, gpu_fraction=1.0) last_epoch = model.load_model(model_path) for i in xrange(1000): # Pick a random start init_value = np.random.choice(init_values) generator = Generator(model) generate_sample(generator, len_sample, wav_template.format(i), init_value = init_value)
# Sample random samples for i in range(1): # j = 450 # kick_20 # j = 840 # hihat_20 j = 948 # snare_20 batches = [load_sample(s, len_sample) for s in samples[j:j + 20]] inputs = np.array([b[0] for b in batches]) targets = np.array([b[1] for b in batches]) print 'Loaded {} samples'.format(len(batches)) all_inputs = inputs[:, 0, :, 0].flatten() save_sample(all_inputs, 'rendered/snare_20/inputs_{}.wav'.format(j)) # Create the model model = Model(num_time_samples=len_sample, num_channels=1, gpu_fraction=1.0) last_epoch = model.load_model(model_path) if last_epoch: start = last_epoch + 1 else: start = 0 inds = np.arange(len(batches)) for i in xrange(start, 10000000): # Shuffle random.shuffle(inds) inputs = inputs[inds] targets = targets[inds]
num_classes=args.num_classes, store_tracks=False, class_label=idx) return temp if __name__ == '__main__': args = load_parameters() # construct model wave_model = Model( layers=args.num_layers, num_classes=args.num_classes, blocks=args.num_blocks, kernel_size=args.kernel_size, dilation_channels=args.dilation_channels, residual_channels=args.residual_channels, skip_channels=args.skip_channels, end_channels=args.end_channels, bias=args.bias, output_length=args.output_width, ) if torch.cuda.is_available(): args.device = 'cuda' if not (args.device == 'default'): wave_model.set_device(torch.device(args.device)) # create dataset and dataloader datasets = [] meta = [] for idx, label in enumerate(args.class_labels):