def cached(func): mkdir(cachedir) cachebase = os.path.join(cachedir, func.__module__ + func.__name__) def replace_arrays(v): if isinstance(v, np.ndarray): return hashlib.sha1(v).hexdigest() if isinstance(v, scipy.sparse.csr.csr_matrix): out = hashlib.sha1(v.data) out.update(v.indices) out.update(v.indptr) return out.hexdigest() return v @wraps(func) def wrapped(*args, **kwargs): argdict = \ {k:replace_arrays(v) for k,v in inspect.getcallargs(func,*args,**kwargs).iteritems()} closurevals = \ [replace_arrays(cell.cell_contents) for cell in func.__closure__ or []] key = str(hash(frozenset(argdict.items() + closurevals))) cachefile = cachebase + '.' + key if os.path.isfile(cachefile): with gzip.open(cachefile, 'r') as infile: value = pickle.load(infile) return value else: value = func(*args,**kwargs) with gzip.open(cachefile, 'w') as outfile: pickle.dump(value, outfile, protocol=-1) return value return wrapped
print("Singular vector ", d, " Singular value, ", S[d]) print("Right: ") print(top_k(5, pi_vd)) print("Left: ") print(top_k(5, pi_ud)) if __name__ == "__main__": run = 3 results_dir = os.path.join("results", "ap_indiv", "run%03d" % run) # Make sure the results directory exists from pgmult.internals.utils import mkdir if not os.path.exists(results_dir): print("Making results directory: ", results_dir) mkdir(results_dir) # Load the AP news documents Xs, words = load() # N_docs = 1 docs = slice(0,20) T_split = 10 # Filter out documents shorter than 2 * T_split Xfilt = filter(lambda X: X.shape[0] > 5*T_split, Xs) Xtrain = [X[:-T_split] for X in Xfilt[docs]] Xtest = [X[-T_split:] for X in Xfilt[docs]] # Perform inference for a range of latent state dimensions and models N_samples = 200
legendargs={ "columnspacing": 0.75, "handletextpad": 0.1 }) fig.savefig(os.path.join(results_dir, "legend.pdf")) if __name__ == "__main__": run = 5 results_dir = os.path.join("results", "dna", "run%03d" % run) # Make sure the results directory exists from pgmult.internals.utils import mkdir if not os.path.exists(results_dir): print("Making results directory: ", results_dir) mkdir(results_dir) # Load data Xs, key = load_data() # Split data into two T_end = Xs[0].shape[0] T_split = 10 Xtrain = [Xs[0][:T_end - T_split, :]] Xtest = Xs[0][T_end - T_split:T_end, :] K = len(key) # Perform inference for a range of latent state dimensions and models N_samples = 1000 all_results = [] # Ds = np.array([2, 3, 4, 5, 6])
def fit_joint_corpus(): run = 13 results_dir = os.path.join("results", "ap", "run%03d" % run) # Make sure the results directory exists from pgmult.internals.utils import mkdir if not os.path.exists(results_dir): print("Making results directory: ", results_dir) mkdir(results_dir) # Load the AP news documents Xs, words = load() N_docs = 10 T_split = 10 # Filter out documents shorter than 2 * T_split Xfilt = filter(lambda X: X.shape[0] > 2*T_split, Xs) Xtrain = [X[:-T_split] for X in Xfilt[:N_docs]] Xtest = [X[-T_split:] for X in Xfilt[:N_docs]] # Perform inference for a range of latent state dimensions and models N_samples = 500 all_results = [] Ds = np.array([10]) models = ["SBM-LDS", "HMM", "Raw LDS" , "LNM-LDS", "SBM-LDS (pMCMC)"] methods = [fit_lds_model, fit_hmm, fit_gaussian_lds_model, fit_ln_lds_model, fit_lds_model_with_pmcmc] for D in Ds: D_results = [] for model, method in zip(models, methods): results_file = os.path.join(results_dir, "results_%s_D%d.pkl.gz" % (model, D)) if os.path.exists(results_file): print("Loading from: ", results_file) with gzip.open(results_file, "r") as f: D_model_results = cPickle.load(f) else: print("Fitting ", model, " for D=",D) D_model_results = method(Xtrain, Xtest, D, N_samples) with gzip.open(results_file, "w") as f: print("Saving to: ", results_file) cPickle.dump(D_model_results, f, protocol=-1) D_results.append(D_model_results) all_results.append(D_results) # Plot log likelihoods for the results using one D res_index = 0 plot_log_likelihood(all_results[res_index], models, run, outname="train_ll_vs_time_D%d.pdf" % Ds[res_index]) plot_pred_log_likelihood(all_results[res_index], models, run, outname="pred_ll_vs_time_D%d.pdf" % Ds[res_index]) # Make a bar chart of all the results plot_pred_ll_vs_D(all_results, Ds, Xtrain, Xtest, run) plt.show()
def fit_joint_corpus(): run = 13 results_dir = os.path.join("results", "ap", "run%03d" % run) # Make sure the results directory exists from pgmult.internals.utils import mkdir if not os.path.exists(results_dir): print("Making results directory: ", results_dir) mkdir(results_dir) # Load the AP news documents Xs, words = load() N_docs = 10 T_split = 10 # Filter out documents shorter than 2 * T_split Xfilt = [X for X in Xs if X.shape[0] > 2 * T_split] Xtrain = [X[:-T_split] for X in Xfilt[:N_docs]] Xtest = [X[-T_split:] for X in Xfilt[:N_docs]] # Perform inference for a range of latent state dimensions and models N_samples = 500 all_results = [] Ds = np.array([10]) models = ["SBM-LDS", "HMM", "Raw LDS", "LNM-LDS", "SBM-LDS (pMCMC)"] methods = [ fit_lds_model, fit_hmm, fit_gaussian_lds_model, fit_ln_lds_model, fit_lds_model_with_pmcmc ] for D in Ds: D_results = [] for model, method in zip(models, methods): results_file = os.path.join(results_dir, "results_%s_D%d.pkl.gz" % (model, D)) if os.path.exists(results_file): print("Loading from: ", results_file) with gzip.open(results_file, "r") as f: D_model_results = pickle.load(f) else: print("Fitting ", model, " for D=", D) D_model_results = method(Xtrain, Xtest, D, N_samples) with gzip.open(results_file, "w") as f: print("Saving to: ", results_file) pickle.dump(D_model_results, f, protocol=-1) D_results.append(D_model_results) all_results.append(D_results) # Plot log likelihoods for the results using one D res_index = 0 plot_log_likelihood(all_results[res_index], models, run, outname="train_ll_vs_time_D%d.pdf" % Ds[res_index]) plot_pred_log_likelihood(all_results[res_index], models, run, outname="pred_ll_vs_time_D%d.pdf" % Ds[res_index]) # Make a bar chart of all the results plot_pred_ll_vs_D(all_results, Ds, Xtrain, Xtest, run) plt.show()