return npr.poisson(npr.gamma(r, p/(1-p), size=size)) def fit_maxlike(x, r_guess): # follows Wikipedia's section on negative binomial max likelihood assert np.var(x) > np.mean(x), "Likelihood-maximizing parameters don't exist!" loglike = lambda r, p: np.sum(negbin_loglike(r, p, x)) p = lambda r: np.sum(x) / np.sum(r+x) rprime = lambda r: grad(loglike)(r, p(r)) r = newton(rprime, r_guess) return r, p(r) if __name__ == "__main__": # generate data npr.seed(0) data = negbin_sample(r=5, p=0.5, size=1000) # fit likelihood-extremizing parameters r, p = fit_maxlike(data, r_guess=1) # report fit print('Fit parameters:') print('r={r}, p={p}'.format(r=r, p=p)) print('Check that we are at a local stationary point:') loglike = lambda r, p: np.sum(negbin_loglike(r, p, data)) grad_both = multigrad(loglike, argnums=[0,1]) print(grad_both(r, p)) import matplotlib.pyplot as plt
return "".join([chr(np.argmax(c)) for c in one_hot_matrix]) def build_dataset(filename, sequence_length, alphabet_size, max_lines=-1): """Loads a text file, and turns each line into an encoded sequence.""" with open(filename) as f: content = f.readlines() content = content[:max_lines] content = [line for line in content if len(line) > 2] # Remove blank lines seqs = np.zeros((sequence_length, len(content), alphabet_size)) for ix, line in enumerate(content): padded_line = (line + " " * sequence_length)[:sequence_length] seqs[:, ix, :] = string_to_one_hot(padded_line, alphabet_size) return seqs if __name__ == '__main__': npr.seed(1) input_size = output_size = 128 # The first 128 ASCII characters are the common ones. state_size = 40 seq_length = 30 param_scale = 0.01 train_iters = 100 # Learn to predict our own source code. train_inputs = build_dataset(lstm_filename, seq_length, input_size, max_lines=60) pred_fun, loglike_fun, num_weights = build_rnn(input_size, state_size, output_size) def print_training_prediction(weights): print("Training text Predicted text") logprobs = np.asarray(pred_fun(weights, train_inputs)) for t in range(logprobs.shape[1]):