valid_itr.reset() prev_h1, prev_h2, prev_h3 = [np_zeros((minibatch_size, n_hid)) for i in range(3)] prev_kappa = np_zeros((minibatch_size, att_size)) prev_w = np_zeros((minibatch_size, n_chars)) bias = args.bias if args.sample is not None: predict_function = checkpoint_dict["predict_function"] attention_function = checkpoint_dict["attention_function"] sample_function = checkpoint_dict["sample_function"] if args.write is not None: sample_string = args.write print("Sampling using sample string %s" % sample_string) oh = dense_to_one_hot( np.array([vocabulary[c] for c in sample_string]), vocabulary_size) c_mb = np.zeros( (len(oh), minibatch_size, oh.shape[-1])).astype(c_mb.dtype) c_mb[:len(oh), :, :] = oh[:, None, :] c_mb = c_mb[:len(oh)] c_mb_mask = np.ones_like(c_mb[:, :, 0]) if args.sample_length is None: raise ValueError("Broken...") # Automatic sampling stop as described in Graves' paper # Assume an average of 30 timesteps per char n_steps = 30 * c_mb.shape[0] step_inc = n_steps max_steps = 25000 max_steps_buf = max_steps + n_steps
X_mb, X_mb_mask, c_mb, c_mb_mask = next(valid_itr) valid_itr.reset() prev_h1, prev_h2, prev_h3 = [ np_zeros((minibatch_size, n_hid)) for i in range(3) ] prev_kappa = np_zeros((minibatch_size, att_size)) prev_w = np_zeros((minibatch_size, n_chars)) if args.sample is not None: predict_function = checkpoint_dict["predict_function"] attention_function = checkpoint_dict["attention_function"] sample_function = checkpoint_dict["sample_function"] if args.write is not None: sample_string = args.write print("Sampling using sample string %s" % sample_string) oh = dense_to_one_hot( np.array([vocabulary[c] for c in sample_string]), vocabulary_size) c_mb = np.zeros( (len(oh), minibatch_size, oh.shape[-1])).astype(c_mb.dtype) c_mb[:len(oh), :, :] = oh[:, None, :] c_mb = c_mb[:len(oh)] c_mb_mask = np.ones_like(c_mb[:, :, 0]) if args.sample_length is None: raise ValueError("NYI - use -sl or --sample_length ") else: fixed_steps = args.sample_length completed = [] init_x = np.zeros_like(X_mb[0]) for i in range(fixed_steps): rvals = sample_function(init_x, c_mb, c_mb_mask, prev_h1,
from theano.tensor.shared_randomstreams import RandomStreams from kdllib import gradient_clipping, make_weights, make_biases from kdllib import dense_to_one_hot, adam from kdllib import fetch_fruitspeech_spectrogram from kdllib import midiwrap, fetch_nottingham import cPickle as pickle midiread, midiwrite = midiwrap() #Don't use a python long as this don't work on 32 bits computers. np.random.seed(0xbeef) rng = RandomStreams(seed=np.random.randint(1 << 30)) theano.config.warn.subtensor_merge_bug = False key_range, dt, dataset = fetch_nottingham() dataset = [dense_to_one_hot(d, n_classes=2) for d in dataset] DEBUG = True def fast_dropout(rng, x, debug=DEBUG): ''' Multiply activations by N(1,1) ''' if debug: return x else: mask = rng.normal(size=x.shape, avg=1., dtype=theano.config.floatX) return x * mask