def Von_Luxburg_approximations(t): ones = np.ones_like(t) n = t.shape[0] # d_v is the in-degree, which is the column sum. this # multiplication does the right thing d_v = np.sum(t, axis=0) * ones # transpose to get d_u d_u = d_v.T # vol(G) is sum of degrees, which we interpret in the directed # case as sum of out-degrees (which is anyway equal to sum of # in-degrees); because weights are transition probabilities, each # out-degree = 1; so vol(G) = n. vol_G = float(n) mfpt_vla = vol_G * 1.0 / d_v set_self_transition_zero(mfpt_vla) ct_vla = vol_G * (1.0 / d_u + 1.0 / d_v) set_self_transition_zero(ct_vla) return mfpt_vla, ct_vla
def compare_MFPT_estimate_RW_v_exact(dirname): def get_indices_of_common_entries(a, b): result = [] for i in b: try: j = a.index(i) result.append(j) except ValueError: pass assert len(result) == len(b) return result filename = dirname + "/MFPT.dat" mfpt = np.genfromtxt(filename) filename = dirname + "/compare_MFPT_estimate_RW_v_exact.tex" f = open(filename, "w") if "depth_2" in dirname: lengths = [1298, 12980, 129800] elif "depth_1" in dirname: # NB with 18, too few values to run correlation lengths = [180, 1800, 18000] for length in lengths: # mfpte: read, mask nan, mask len < 5 (100x100) filename = dirname + "/estimate_MFPT_using_RW_" + str( length) + "/MFPT.dat" mfpte = np.genfromtxt(filename, usemask=True, missing_values="NaN,nan") filename = dirname + "/estimate_MFPT_using_RW_" + str( length) + "/MFPT_len.dat" mfpte_len = np.genfromtxt(filename) min_vals = 5 # an attempt at reliability print("%d of %d values of length < 5" % (np.sum(mfpte_len < min_vals), len(mfpte_len)**2)) mfpte[mfpte_len < min_vals] = np.ma.masked # mfpt: copy, select sampled only to make it 100x100 mfpt_tmp = mfpt.copy() # need to restrict mfpt_tmp to the 100x100 entries which are # indicated by the trees_sampled.dat file filename = dirname + "/estimate_MFPT_using_RW_" + str( length) + "/trees_sampled.dat" trees_sampled = open(filename).read().strip().split("\n") filename = dirname + "/all_trees.dat" all_trees = open(filename).read().strip().split("\n") indices = get_indices_of_common_entries(all_trees, trees_sampled) # the selected indices are into both the rows and columns mfpt_tmp = mfpt_tmp[indices][:, indices] # mfpte will contain the self-hitting time on the diagonal: we # want zero there for true comparison. set_self_transition_zero(mfpte) # reshape both mfpte = mfpte.reshape(len(mfpte)**2) mfpt_tmp = mfpt_tmp.reshape(len(mfpt_tmp)**2) # correlate using mask f.write("Number of samples: " + str(length) + "\n") corr, p = get_pearson_r(mfpt_tmp, mfpte) f.write("Pearson R correlation " + str(corr) + "; ") f.write("p-value " + str(p) + ". ") corr, p = get_spearman_rho(mfpt_tmp, mfpte) f.write("Spearman rho correlation " + str(corr) + "; ") f.write("p-value " + str(p) + ". ") if len(mfpte) < 1000: corr, p = get_kendall_tau(mfpt_tmp, mfpte) f.write("Kendall tau correlation " + str(corr) + "; ") f.write("p-value " + str(p) + ". ") else: f.write( "Omitting Kendall tau because it is infeasible for large matrices. " ) f.write("\n") f.close()
def compare_MFPT_estimate_RW_v_exact(dirname): def get_indices_of_common_entries(a, b): result = [] for i in b: try: j = a.index(i) result.append(j) except ValueError: pass assert len(result) == len(b) return result filename = dirname + "/MFPT.dat" mfpt = np.genfromtxt(filename) filename = dirname + "/compare_MFPT_estimate_RW_v_exact.tex" f = open(filename, "w") if "depth_2" in dirname: lengths = [1298, 12980, 129800] elif "depth_1" in dirname: # NB with 18, too few values to run correlation lengths = [180, 1800, 18000] for length in lengths: # mfpte: read, mask nan, mask len < 5 (100x100) filename = dirname + "/estimate_MFPT_using_RW_" + str(length) + "/MFPT.dat" mfpte = np.genfromtxt(filename, usemask=True, missing_values="NaN,nan") filename = dirname + "/estimate_MFPT_using_RW_" + str(length) + "/MFPT_len.dat" mfpte_len = np.genfromtxt(filename) min_vals = 5 # an attempt at reliability print("%d of %d values of length < 5" % (np.sum(mfpte_len < min_vals), len(mfpte_len)**2)) mfpte[mfpte_len < min_vals] = np.ma.masked # mfpt: copy, select sampled only to make it 100x100 mfpt_tmp = mfpt.copy() # need to restrict mfpt_tmp to the 100x100 entries which are # indicated by the trees_sampled.dat file filename = dirname + "/estimate_MFPT_using_RW_" + str(length) + "/trees_sampled.dat" trees_sampled = open(filename).read().strip().split("\n") filename = dirname + "/all_trees.dat" all_trees = open(filename).read().strip().split("\n") indices = get_indices_of_common_entries(all_trees, trees_sampled) # the selected indices are into both the rows and columns mfpt_tmp = mfpt_tmp[indices][:,indices] # mfpte will contain the self-hitting time on the diagonal: we # want zero there for true comparison. set_self_transition_zero(mfpte) # reshape both mfpte = mfpte.reshape(len(mfpte)**2) mfpt_tmp = mfpt_tmp.reshape(len(mfpt_tmp)**2) # correlate using mask f.write("Number of samples: " + str(length) + "\n") corr, p = get_pearson_r(mfpt_tmp, mfpte) f.write("Pearson R correlation " + str(corr) + "; ") f.write("p-value " + str(p) + ". ") corr, p = get_spearman_rho(mfpt_tmp, mfpte) f.write("Spearman rho correlation " + str(corr) + "; ") f.write("p-value " + str(p) + ". ") if len(mfpte) < 1000: corr, p = get_kendall_tau(mfpt_tmp, mfpte) f.write("Kendall tau correlation " + str(corr) + "; ") f.write("p-value " + str(p) + ". ") else: f.write("Omitting Kendall tau because it is infeasible for large matrices. ") f.write("\n") f.close()