def plot_grouped_adjacency(): fig, ax = plt.subplots(1, 1, figsize=(6, 4)) for l, (g, title) in enumerate([(g_cs, "Computer Science")]): # Vertices are ordered by prestige in the dataset adj = nx.to_numpy_matrix(g, dtype=int) # Scale adjacency matrix by a vertex's outdegree. # Edges i -> j are from row_i -> col_j groups = np.linspace(0, 100, 11) grouped_by_row = [] for i, row in enumerate(adj): in_edges = [] for rank, edges in enumerate(row[0].tolist()[0]): for j in range(int(edges)): in_edges.append(rank) grouped_row, _ = np.histogram(in_edges, groups) grouped_by_row.append(grouped_row) grouped = [np.zeros(len(groups) - 1) for i in range(len(groups) - 1)] for i, row in enumerate(grouped_by_row): for j in range(len(groups) - 1): if i <= groups[j + 1]: for k, elem in enumerate(row): grouped[j][k] += elem break colors = iter(cm.rainbow(np.linspace(0, 1, 3))) r, g, b = next(colors)[:3] # Unpack RGB vals (0. to 1., not 0 to 255). cdict = { 'red': ((0.0, 1.0, 1.0), (1.0, r, r)), 'green': ((0.0, 1.0, 1.0), (1.0, g, g)), 'blue': ((0.0, 1.0, 1.0), (1.0, b, b)) } custom_cmap = LinearSegmentedColormap('custom_cmap', cdict) cax = ax.matshow(grouped, cmap=custom_cmap) ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) labels = ['%.0f' % group for group in groups] ax.set_xticklabels(labels, fontsize=12) ax.set_yticklabels(labels, fontsize=12) if l == 0: ax.set_ylabel(r"Prestige of PhD Institution, $\pi$", fontsize=16) ax.set_xlabel(r"Prestige of Hiring Institution, $\pi$", fontsize=16) plot_utils.finalize(ax) plt.tight_layout() plt.savefig("results/grouped_adjacency.eps", format='eps', dpi=1000) plt.clf()
def plot_centrality(): colors = iter(cm.rainbow(np.linspace(0, 1, 3))) markers = Line2D.filled_markers fig = plt.figure(figsize=(6.0, 4.)) ax = plt.gca() for i, (faculty_graph, school_metadata, dept) in enumerate([(g_cs, meta_cs, "Computer Science")]): x = []; y = [] max_pi = 0 max_c = 0 ccs = sorted(nx.strongly_connected_components(faculty_graph), key=len, reverse=True) cc = ccs[0] for vertex in cc: c = 0 path_lengths = nx.single_source_shortest_path_length(faculty_graph, source=vertex).values() if len(path_lengths) > 0: c = np.nanmean(path_lengths) label = school_metadata[vertex]['institution'] x.append(school_metadata[vertex]['pi']) y.append(c) if school_metadata[vertex]['pi'] > max_pi: max_pi = school_metadata[vertex]['pi'] if c > max_c: max_c = c if label in ['MIT']: plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='0.4', xytext=(50, 50), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': '0.4'}, fontsize = plot_utils.LEGEND_SIZE, zorder=2) plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='white', xytext=(50, 50), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': 'white', 'lw': '2.1'}, zorder = 1, fontsize = plot_utils.LEGEND_SIZE) if label in ['University of Colorado, Boulder']: if label == 'University of Colorado, Boulder': label = 'University of Colorado,\nBoulder' plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='0.4', xytext=(25, -45), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': '0.4'}, zorder = 4, fontsize = plot_utils.LEGEND_SIZE) plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='white', xytext=(25, -45), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': 'white', 'lw': '2.1'}, zorder = 3, fontsize = plot_utils.LEGEND_SIZE) if label in ['New Mexico State University']: if label == 'New Mexico State University': label = 'New Mexico\nState University' plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='0.4', xytext=(25, -100), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': '0.4'}, zorder = 6, fontsize = plot_utils.LEGEND_SIZE) plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='white', xytext=(25, -100), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': 'white', 'lw': '2.1'}, zorder = 5, fontsize = plot_utils.LEGEND_SIZE) ax.scatter(x, y, edgecolor='w', clip_on=False, zorder=1, color=next(colors), s=28) slope, intercept, r_value, p_value, std_err = linregress(x, y) plt.plot([0, max(x)], [slope*i + intercept for i in [0, max(x)]], color=plot_utils.ALMOST_BLACK, label='Slope: %.4f\n$R^{2}$: %.4f' % (slope, r_value**2), zorder=7) plt.xlabel(r'Universities Sorted by Prestige, $\pi$', fontsize=plot_utils.LABEL_SIZE) plt.ylabel(r'Average Path Length, $\langle \ell \rangle$', fontsize=plot_utils.LABEL_SIZE) plot_utils.finalize(ax) plt.xlim(0, max_pi) plt.ylim(1, max_c) plt.legend(loc='upper left', fontsize=plot_utils.LEGEND_SIZE, frameon=False) plt.savefig("results/centrality.eps", bbox_inches='tight', format='eps', dpi=1000) plt.clf()
def plot_random_hop_size(cache_dirs, ylim=(0, 1)): fig, ax = plt.subplots(1, 1, figsize=(6.0, 4.0), sharey=True) (title, cache_dir) = cache_dirs cache = pickle.load(open(cache_dir, 'rb')) meta = meta_of_dir(cache_dir) graph = graph_of_dir(cache_dir) results_size = defaultdict(list) for p in cache["size"].keys(): for node, sizes in cache["size"][p].items(): if node is bad_node_of_dir(cache_dir): continue avg = np.average(sizes) if not np.isnan(avg) and not np.isinf(avg): result = (meta[node]["pi"], avg) results_size[p].append(result) results_size[p] = sorted(results_size[p], key=lambda x: x[0]) filtered = sorted(cache["size"].keys())[1::2] length_of_results = len(filtered) colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results))) markers = Line2D.filled_markers; count = -1 for p, data in sorted(results_size.items(), key=lambda x: x[0]): if p not in filtered: continue c = next(colors); count += 1; m = markers[count] ax.scatter(*zip(*data), color=c, label='{0:.2f}'.format(p), s=28, marker=m, edgecolor='w', clip_on=False, zorder=1) x = [pi for (pi, length) in data if not np.isnan(length) and not np.isinf(length)] max_pi = max(x) if p > 0: # Fit a logistic curve to this y = [length for (pi, length) in data if not np.isnan(length) and not np.isinf(length)] popt, pcov = curve_fit(curve, np.array(x), np.array(y), bounds=(0., [1., 2., 200.])) y = curve(x, *popt) ax.plot(x, y, color=c) ax.set_xlim(0, max_pi) ax.set_xlabel(r'University Prestige, $\pi$', fontsize=plot_utils.LABEL_SIZE) ax.set_ylabel(r'Epidemic Size, $\frac{Y}{N}$', fontsize=plot_utils.LABEL_SIZE) plot_utils.finalize(ax) plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=plot_utils.LEGEND_SIZE, title="Jump\nProbability, $q$", scatterpoints=1, frameon=False) plt.ylim(ylim) plt.savefig('results/size-results-of-ALL-SI-random-hops.eps', bbox_inches='tight', format='eps', dpi=1000)
def plot_size_infection_probability(cache_dirs, threshold=0.00, bins=range(0, 100, 10)): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12.0, 4.0), sharey=True) (title, cache_dir) = cache_dirs cache = pickle.load(open(cache_dir, 'rb')) meta = meta_of_dir(cache_dir) graph = graph_of_dir(cache_dir) results_size = defaultdict(list) for p in cache["size"].keys(): for node, sizes in cache["size"][p].items(): if node is bad_node_of_dir(cache_dir): continue pi = meta[node]["pi"] avg = np.average(sizes) if not np.isnan(avg) and not np.isinf(avg): result = (p, avg) results_size[pi].append(result) # Remove data below a threshold for pi, data in results_size.copy().items(): trend = [size for _, size in data] if max(trend) <= threshold: del results_size[pi] else: results_size[pi] = sorted(data, key=lambda x: x[0]) # Bin the remaining data if bins != None: left_endpoint = bins[0] percentiles = np.percentile(results_size.keys(), bins[1:]) bin_means = defaultdict(list) for i, bin_edge in enumerate(percentiles): bin_values = [] for pi in results_size.keys(): if left_endpoint < pi <= bin_edge: bin_values.extend(results_size[pi]) #break bin_means[(i + 1)] = average_across_infection_probability(bin_values) left_endpoint = bin_edge results_size = bin_means length_of_results = len(results_size.keys()) colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results))) for pi, data in sorted(results_size.items(), key=lambda x: x[0]): data = sorted(data, key=lambda x: x[0]) c = next(colors) ax1.scatter(*zip(*data), color=c, label='{0}'.format(int(pi * 10)), edgecolor='w', clip_on=False, zorder=1, s=28) # Fit a logistic curve to this x = [ p for (p, size) in data if not np.isnan(size) and not np.isinf(size) ] y = [ size for (p, size) in data if not np.isnan(size) and not np.isinf(size) ] popt, pcov = curve_fit(curve, np.array(x), np.array(y), bounds=([0., -150., -5.], [1., 0., 5.])) x_fine = np.arange(0.0, 1.01, 0.01) y = curve(x_fine, *popt) ax1.plot(x_fine, y, color=c) r = -2.7 k = 0.91 def scale_x(x, d): return (1.0 * x) / (-1.0 * math.log(1.0 - d)) x = [] y = [] for x_i, y_i in data: if scale_x(x_i, pi * (1.0 / 10.0)) > 0: x.append(scale_x(x_i, pi * (1.0 / 10.0))) y.append(y_i) if pi in [1]: ax2.scatter(x, y, color=c, label='{0}st Decile'.format(int(pi)), edgecolor='w', clip_on=False, zorder=1, s=28) elif pi in [2]: ax2.scatter(x, y, color=c, label='{0}nd Decile'.format(int(pi)), edgecolor='w', clip_on=False, zorder=1, s=28) elif pi in [3]: ax2.scatter(x, y, color=c, label='{0}rd Decile'.format(int(pi)), edgecolor='w', clip_on=False, zorder=1, s=28) elif pi in [4, 5, 6, 7, 8, 9]: ax2.scatter(x, y, color=c, label='{0}th Decile'.format(int(pi)), edgecolor='w', clip_on=False, zorder=1, s=28) # Fit a curve to the whole thing! def scale_y(scaled_x): return (1.0 / (1.0 + math.exp(r * (k + math.log(scaled_x))))) x_total = np.arange(0.01, 10.01, 0.01) ax2.plot(x_total, [scale_y(i) for i in x_total], color='black', label="Generic") ax1.tick_params(labelsize=12) ax2.tick_params(labelsize=12) ax1.set_ylim(0, 1.) ax2.set_ylim(0, 1.) ax2.set_xscale("log") ax2.set_xlim(0.04, 10) ax1.set_xlim(0, 1) ax1.set_xlabel(r'Transmission Probability, $p$', fontsize=plot_utils.LABEL_SIZE) ax2.set_xlabel(r'Effective Transmission Probability, $p^{*}$', fontsize=plot_utils.LABEL_SIZE) ax1.set_ylabel(r'Epidemic Size, $\frac{Y}{N}$', fontsize=plot_utils.LABEL_SIZE) plot_utils.finalize(ax1) plot_utils.finalize(ax2) plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=plot_utils.LEGEND_SIZE, scatterpoints=1, frameon=False) plt.savefig('results/infectious-size-results-of-ALL-SI.eps', bbox_inches='tight', format='eps', dpi=1000)
def plot_si_prestige_length(cache_dirs, ylim=(0, 5)): fig, ax = plt.subplots(1, 1, figsize=(6.0, 4.0), sharey=True) (title, cache_dir) = cache_dirs cache = pickle.load(open(cache_dir, 'rb')) meta = meta_of_dir(cache_dir) graph = graph_of_dir(cache_dir) results_length = defaultdict(list) for p in cache["length"].keys(): for node, lengths in cache["length"][p].items(): if node is bad_node_of_dir(cache_dir): continue avg = np.average(lengths) y = normalize(graph, node, avg) if not np.isnan(avg) and not np.isinf(avg) and not np.isnan(y): result = (meta[node]["pi"], y) results_length[p].append(result) results_length[p] = sorted(results_length[p], key=lambda x: x[0]) for ratio, data in results_length.copy().items(): avg_by_prestige = defaultdict(list) for pi, length in data: avg_by_prestige[pi].append(length) results_length[ratio] = [(pi, np.average(lengths)) for pi, lengths in avg_by_prestige.items()] results_length[ratio] = sorted(results_length[ratio], key=lambda x: x[0]) filtered = sorted(cache["length"].keys())[1::2] length_of_results = len(filtered) colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results))) markers = Line2D.filled_markers count = -1 for p, data in sorted(results_length.items(), key=lambda x: x[0]): if p not in filtered: continue c = next(colors) count += 1 m = markers[count] ax.scatter(*zip(*data), color=c, label='{0:.2f}'.format(p), s=28, marker=m, edgecolor='w', clip_on=False, zorder=1) x = np.array([ pi for (pi, length) in data if not np.isnan(length) and not np.isinf(length) ]) max_pi = max(x) y = np.array([ length for (pi, length) in data if not np.isnan(length) and not np.isinf(length) ]) # Fit a linear curve to this # regr = LinearRegression() # regr.fit(x.reshape(-1, 1), y.reshape(-1, 1)) # interval = np.array([min(x), max(x)]) # ax.plot(interval, interval*regr.coef_[0] + regr.intercept_, color=c) # Fit a LOWESS curve to this lowess = sm.nonparametric.lowess z = lowess(y, x, return_sorted=False) ax.plot(x, z, color=c) ax.set_xlim(0, max_pi) ax.tick_params(labelsize=12) ax.set_xlabel(r'University Prestige, $\pi$', fontsize=plot_utils.LABEL_SIZE) ax.set_ylabel(r'Normalized Epidemic Length, $\frac{L}{\ell}$', fontsize=plot_utils.LABEL_SIZE) plot_utils.finalize(ax) plt.ylim(ylim) plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=plot_utils.LEGEND_SIZE, title='Transmission\nProbability, $p$', frameon=False, scatterpoints=1) plt.savefig('results/length-results-of-ALL-SI.eps', bbox_inches='tight', format='eps', dpi=1000)
def plot_sis_or_sir_prestige_length(cache_dirs, epidemic_type, ylim=(0, 10)): fig, axarray = plt.subplots(1, len(cache_dirs), figsize=(6.9 * 2, 5.0), sharey=True) for i, ax in enumerate(axarray): (title, cache_dir) = cache_dirs[i] print("title: {0}".format(title)) cache = pickle.load(open(cache_dir, 'rb')) meta = meta_of_dir(cache_dir) graph = graph_of_dir(cache_dir) results_length = defaultdict(list) for (p, r) in cache["length"].keys(): if r == 0.0: continue # can't divide by zero below. is this the right thing to do? for node, lengths in cache["length"][p, r].items(): #print(np.average(lengths)) if node is bad_node_of_dir(cache_dir): continue result = (meta[node]["pi"], normalize(graph, node, np.average(lengths))) results_length[p / r].append(result) # different values of p and r will return the same p/r. average these values? for ratio, data in results_length.copy().items(): avg_by_prestige = defaultdict(list) for pi, length in data: avg_by_prestige[pi].append(length) results_length[ratio] = [ (pi, np.average(lengths)) for pi, lengths in avg_by_prestige.items() ] results_length[ratio] = sorted(results_length[ratio], key=lambda x: x[0]) filtered = ["1.0", "2.0", "3.0", "4.0", "5.0"] length_of_results = len(filtered) colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results))) markers = Line2D.filled_markers count = -1 for ratio, data in sorted(results_length.items(), key=lambda x: x[0]): if "%.1f" % ratio not in filtered: continue c = next(colors) count += 1 ax.scatter(*zip(*data), color=c, label='{0:.2f}'.format(ratio), marker=markers[count], edgecolor='w', clip_on=False, zorder=1, s=28) # fit a linear curve to this x = np.array([ pi for (pi, length) in data if not np.isnan(length) and not np.isinf(length) ]) max_pi = max(x) y = np.array([ length for (pi, length) in data if not np.isnan(length) and not np.isinf(length) ]) regr = LinearRegression() regr.fit(x.reshape(-1, 1), y.reshape(-1, 1)) interval = np.array([min(x), max(x)]) print("infection probability: {0}\tcurve_fit: {1}".format( ratio, [regr.coef_[0], regr.intercept_])) ax.plot(interval, interval * regr.coef_[0] + regr.intercept_, color=c) ax.set_xlim(0, max_pi) #ax.set_title(title, y=1.05, fontsize=16) ax.tick_params(labelsize=12) if i == 0: ax.set_xlabel(r'University Prestige, $\pi$', fontsize=plot_utils.LABEL_SIZE) ax.set_ylabel(r'Normalized Epidemic Length, $L$', fontsize=plot_utils.LABEL_SIZE) plot_utils.finalize(ax) plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=plot_utils.LEGEND_SIZE, title=r'$p/r$', scatterpoints=1, frameon=False) plt.ylim(ylim) plt.savefig( 'results/test/length-results-of-ALL-{0}.eps'.format(epidemic_type), bbox_inches='tight', format='eps', dpi=1000) plt.clf()
def plot_sis_or_sir_prestige_size(cache_dirs, epidemic_type, ylim=(0, 1)): fig, axarray = plt.subplots(1, len(cache_dirs), figsize=(6.9 * 2, 5.0), sharey=True) for i, ax in enumerate(axarray): (title, cache_dir) = cache_dirs[i] print("title: {0}".format(title)) cache = pickle.load(open(cache_dir, 'rb')) meta = meta_of_dir(cache_dir) graph = graph_of_dir(cache_dir) results_size = defaultdict(list) for (p, r) in cache["size"].keys(): if r == 0.0: continue # can't divide by zero below. is this the right thing to do? for node, sizes in cache["size"][p, r].items(): if node is bad_node_of_dir(cache_dir): continue result = (meta[node]["pi"], np.average(sizes)) results_size[p / r].append(result) # different values of p and r will return the same p/r. average these values? for ratio, data in results_size.copy().items(): avg_by_prestige = defaultdict(list) for pi, size in data: avg_by_prestige[pi].append(size) results_size[ratio] = [(pi, np.average(sizes)) for pi, sizes in avg_by_prestige.items()] results_size[ratio] = sorted(results_size[ratio], key=lambda x: x[0]) filtered = ["1.0", "2.0", "3.0", "4.0", "5.0"] length_of_results = len(filtered) colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results))) markers = Line2D.filled_markers count = -1 for ratio, data in sorted(results_size.items(), key=lambda x: x[0]): if "%.1f" % ratio not in filtered: continue c = next(colors) count += 1 m = markers[count] ax.scatter(*zip(*data), color=c, label='{0:.2f}'.format(ratio), marker=m, edgecolor='w', clip_on=False, zorder=1, s=28) x = [ pi for (pi, length) in data if not np.isnan(length) and not np.isinf(length) ] max_pi = max(x) if ratio > 0: # fit a logistic curve to this y = [ length for (pi, length) in data if not np.isnan(length) and not np.isinf(length) ] popt, pcov = curve_fit(curve, np.array(x), np.array(y), bounds=(0., [1., 2., 200.])) print("infection probability: {0}\tcurve_fit: {1}".format( ratio, popt)) y = curve(x, *popt) ax.plot(x, y, color=c) #ax.plot(*zip(*data), color=next(colors), label='p/r = {0:.2f}'.format(ratio), marker = 'o') ax.set_xlim(0, max_pi) #ax.set_title(title, y=1.05, fontsize=16) ax.tick_params(labelsize=12) if i == 0: ax.set_xlabel(r'University Prestige, $\pi$', fontsize=plot_utils.LABEL_SIZE) ax.set_ylabel(r'Epidemic Size, $S$', fontsize=plot_utils.LABEL_SIZE) plot_utils.finalize(ax) plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=plot_utils.LEGEND_SIZE, title=r'$p/r$', scatterpoints=1, frameon=False) plt.ylim(ylim) plt.savefig( 'results/test/size-results-of-ALL-{}.eps'.format(epidemic_type), bbox_inches='tight', format='eps', dpi=1000) plt.clf()
def plot_si_prestige_size(cache_dirs): fig, ax = plt.subplots(1, 1, figsize=(6.0, 4.0), sharey=True) #for i, ax in enumerate(axarray): (title, cache_dir) = cache_dirs print("title: {0}".format(title)) cache = pickle.load(open(cache_dir, 'rb')) meta = meta_of_dir(cache_dir) graph = graph_of_dir(cache_dir) results_size = defaultdict(list) for p in cache["size"].keys(): for node, sizes in cache["size"][p].items(): if node is bad_node_of_dir(cache_dir): continue avg = np.average(sizes) if not np.isnan(avg) and not np.isinf(avg): result = (meta[node]["pi"], avg) results_size[p].append(result) results_size[p] = sorted(results_size[p], key=lambda x: x[0]) filtered = sorted(cache["size"].keys())[1::2] length_of_results = len(filtered) colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results))) markers = Line2D.filled_markers count = -1 for p, data in sorted(results_size.items(), key=lambda x: x[0]): if p not in filtered: continue c = next(colors) count += 1 m = markers[count] ax.scatter(*zip(*data), color=c, label='{0:.2f}'.format(p), s=28, marker=m, edgecolor='w', clip_on=False, zorder=1) x = [ pi for (pi, length) in data if not np.isnan(length) and not np.isinf(length) ] if p == 0.1: #print("Data: {0}\n".format([(i, row) for (i, row) in enumerate(data)])) prev = data[0][1] diffs = [] for (i, row) in enumerate(data): if i in [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 ]: #[50, 100]:#[5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]: #print(i, prev, row) diffs.append(prev * 100.0 - row[1] * 100.0) #print(float(prev-row[1])*100.0/float(row[1]), prev*100.0-row[1]*100.0) prev = row[1] #print(diffs, np.mean(diffs)) max_pi = max(x) if p > 0: # fit a logistic curve to this y = [ length for (pi, length) in data if not np.isnan(length) and not np.isinf(length) ] popt, pcov = curve_fit(curve, np.array(x), np.array(y), bounds=(0., [1., 2., 200.]), maxfev=100) ##print("infection probability: {0}\tcurve_fit: {1}".format(p, popt)) y = curve(x, *popt) ax.plot(x, y, color=c) ax.set_xlim(0, max_pi) #ax.set_title(title, y=1.05, fontsize=16) ax.tick_params(labelsize=12) ax.set_xlabel(r'University Prestige, $\pi$', fontsize=plot_utils.LABEL_SIZE) ax.set_ylabel(r'Epidemic Size, $\frac{S}{N}$', fontsize=plot_utils.LABEL_SIZE) plot_utils.finalize(ax) plt.ylim(0, 1) plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=plot_utils.LEGEND_SIZE, title='Transmission\nProbability, $p$', frameon=False, scatterpoints=1) #plt.tight_layout() plt.savefig('results/test/size-results-of-ALL-SI.eps', bbox_inches='tight', format='eps', dpi=1000)
def plot_si_prestige_size(cache_dirs): fig, ax = plt.subplots(1, 1, figsize=(6.0, 4.0), sharey=True) (title, cache_dir) = cache_dirs cache = pickle.load(open(cache_dir, 'rb')) meta = meta_of_dir(cache_dir) graph = graph_of_dir(cache_dir) results_size = defaultdict(list) for p in cache["size"].keys(): for node, sizes in cache["size"][p].items(): if node is bad_node_of_dir(cache_dir): continue avg = np.average(sizes) if not np.isnan(avg) and not np.isinf(avg): result = (meta[node]["pi"], avg) results_size[p].append(result) results_size[p] = sorted(results_size[p], key=lambda x: x[0]) filtered = sorted(cache["size"].keys())[1::2] length_of_results = len(filtered) with open(cache_dir.replace(".p", "_size.tsv"), 'w') as file: writer = csv.writer(file, delimiter='\t') writer.writerow(["infection_prob", "prestige", "size"]) for p, data in sorted(results_size.items(), key=lambda x: x[0]): if p not in filtered: continue for (pi, size) in data: if not np.isnan(size) and not np.isinf(size): writer.writerow([p, pi, size]) colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results))) markers = Line2D.filled_markers; count = -1 with open(cache_dir.replace(".p", "_size.tsv"), 'w') as file: writer = csv.writer(file, delimiter='\t') writer.writerow(["infection_prob", "prestige", "size"]) for p, data in sorted(results_size.items(), key=lambda x: x[0]): if p not in filtered: continue c = next(colors); count += 1; m = markers[count] ax.scatter(*zip(*data), color=c, label='{0:.2f}'.format(p), s=28, marker=m, edgecolor='w', clip_on=False, zorder=1) for (pi, size) in data: writer.writerow([p, pi, size]) x = [pi for (pi, length) in data if not np.isnan(length) and not np.isinf(length)] if p == 0.1: prev = data[0][1] diffs = [] for (i, row) in enumerate(data): if i in [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]: diffs.append(prev*100.0-row[1]*100.0) prev = row[1] max_pi = max(x) if p > 0: # Fit a logistic curve to this y = [length for (pi, length) in data if not np.isnan(length) and not np.isinf(length)] popt, pcov = curve_fit(curve, np.array(x), np.array(y), bounds=(0., [1., 2., 200.]), maxfev=100) y = curve(x, *popt) ax.plot(x, y, color=c) ax.set_xlim(0, max_pi) ax.tick_params(labelsize=12) ax.set_xlabel(r'University Prestige, $\pi$', fontsize=plot_utils.LABEL_SIZE) ax.set_ylabel(r'Epidemic Size, $\frac{Y}{N}$', fontsize=plot_utils.LABEL_SIZE) plot_utils.finalize(ax) plt.ylim(0, 1) plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=plot_utils.LEGEND_SIZE, title='Transmission\nProbability, $p$', frameon=False, scatterpoints=1) plt.savefig('results/size-results-of-ALL-SI.eps', bbox_inches='tight', format='eps', dpi=1000)