/
experiment.py
198 lines (187 loc) · 10.4 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
from typing import List, Tuple, Callable, Optional
import networkx as nx
import numpy as np
import random
from scipy.stats import ttest_ind as ttest
import matplotlib.pyplot as plt
class Statistics:
@staticmethod
def get_tau(graph):
return 1.0 / max(np.linalg.eigvals(nx.to_numpy_matrix(graph)))
@staticmethod
def get_freeman(graph):
es = [i for i in nx.eigenvector_centrality(graph).values()]
e_max = max(es)
return np.mean([(e_max - e) for e in es])
@staticmethod
def get_gini(graph):
es = np.sort([i for i in nx.eigenvector_centrality(graph).values()])
n = es.shape[0]
index = np.arange(1, n + 1)
return (np.sum(2 * index - n - 1) * es) / (n * np.sum(es))
class Experiment:
def __init__(self, num_graphs: int, num_nodes: int, male_ratio: float,
dd_without_sex_ed: List[List[float], List[float]], dd_with_sex_ed: List[List[float], List[float]],
is_invalid_graph:Callable=lambda x: False, match_mean_degrees:bool=False,
cd_without_sex_ed:Optional[List[List[float], List[float]]]=None,
cd_with_sex_ed:Optional[List[List[float], List[float]]]=None, condom_weight:Optional[float]=None):
"""
:param num_graphs: The number of random graphs we want to generate
:param num_nodes: The number of nodes we want in each random graph
:param male_ratio: The ratio of male
:param dd_without_sex_ed: A list of two lists,
the first one corresponding to the degree distribution of men without sex ed,
and the second to that of women without sex ed.
:param dd_with_sex_ed: Same with dd_without_sex_ed, but data of men and women with sex ed
:param is_invalid_graph: A function that takes a networkx graph as input,
and outs true if the graph violates some rule, false otherwise
:param match_mean_degrees: Whether or not we want to match the mean degrees between two groups
:param cd_without_sex_ed: Same with dd_without_sex_ed, but is the distribution of ratio of condom usage
:param cd_with_sex_ed: Same with dd_with_sex_ed, but is the distribution of ratio of condom usage
:param condom_weight: The weight we want to set on edges encoding sexual interaction with condom usage
"""
if condom_weight is None:
assert cd_with_sex_ed is None and cd_without_sex_ed is None
self.num_graphs = num_graphs
self.num_nodes = num_nodes
self.male_ratio = male_ratio
self.dd_without_sex_ed = dd_without_sex_ed
self.dd_with_sex_ed = dd_with_sex_ed
self.is_invalid_graph = is_invalid_graph
self.match_mean_degrees = match_mean_degrees
self.cd_without_sex_ed = cd_without_sex_ed
self.cd_with_sex_ed = cd_with_sex_ed
self.condom_weight = condom_weight
@staticmethod
def __connect_spokes(graph: nx.Graph, men_spokes: List[int], women_spokes: List[int],
is_invalid_graph: Callable, condom_weight: Optional[float]=None):
men_spokes_copy = [i for i in men_spokes]
women_spokes_copy = [i for i in women_spokes]
random.shuffle(men_spokes_copy)
while len(men_spokes_copy) > 0 and len(len(women_spokes_copy)) > 0:
man_id = men_spokes_copy.pop(0)
women = [i for i in women_spokes_copy]
random.shuffle(women)
while len(women) > 0:
success = False
woman_id = women.pop(0)
if graph.has_edge(man_id, woman_id):
continue
if condom_weight is None:
graph.add_edge(man_id, woman_id)
else:
graph.add_edge(man_id, woman_id, weight=condom_weight)
if is_invalid_graph(graph):
graph.remove_edge(man_id, woman_id)
else:
success = True
break
if success:
women_spokes_copy.remove(woman_id)
@staticmethod
def __generate_random_graph(n_men: int, n_women: int,
men_dd: List[float], women_dd: List[float],
is_invalid_graph: Callable,
men_cd: Optional[List[float]]=None, women_cd: Optional[List[float]]=None,
condom_weight: Optional[float]=None) -> nx.Graph:
"""
:param n_men: Number of men in the graph
:param n_women: Number of women in the graph
:param men_dd: Degree distribution of men
:param women_dd: Degree distribution of women
:param is_invalid_graph: A function that tests whether a graph violates a certain rule
:param men_cd: Degree distribution of condom usage of men
:param women_cd: Degree distribution of condom usage of women
:return: A networkx Graph object
"""
graph = nx.Graph()
node_id = 0
men_spokes, women_spokes, men_cspokes, women_cspokes = [], [], [], []
men_cd = men_cd if men_cd is not None else [0] * len(men_dd)
women_cd = women_cd if women_cd is not None else [0] * len(women_dd)
ds = [i for i in range(1,len(men_dd)+1)]
for _ in range(n_men):
graph.add_node(node_id)
node_id += 1
total_degree = np.random.choice(ds, p=men_dd)
use_condom_prob = men_cd[total_degree - 1]
degree_non_condom, degree_condom = 0, 0
for _ in range(total_degree):
choice = np.random.choice([1, 0], p=[use_condom_prob, 1 - use_condom_prob])
if choice == 0:
degree_non_condom += 1
else:
degree_condom += 1
men_spokes += [node_id] * degree_non_condom
men_cspokes += [node_id] * degree_condom
for i in range(n_women):
graph.add_node(node_id)
node_id += 1
total_degree = np.random.choice(ds, p=women_dd)
use_condom_prob = women_cd[total_degree - 1]
degree_non_condom, degree_condom = 0, 0
for _ in range(total_degree):
choice = np.random.choice([1, 0], p=[use_condom_prob, 1 - use_condom_prob])
if choice == 0:
degree_non_condom += 1
else:
degree_condom += 1
women_spokes += 1
Experiment.__connect_spokes(graph, men_spokes, women_spokes, is_invalid_graph, condom_weight)
Experiment.__connect_spokes(graph, men_cspokes, women_cspokes, is_invalid_graph, condom_weight)
return graph
def __run_one_iteration(self):
men_without_sex_ed_dd, women_without_sex_ed_dd = self.dd_without_sex_ed
men_with_sex_ed_dd, women_with_sex_ed_dd = self.dd_with_sex_ed
men_without_sex_ed_cd, women_without_sex_ed_cd = self.cd_without_sex_ed
men_with_sex_ed_cd, women_with_sex_ed_cd = self.cd_with_sex_ed
num_men = round(self.num_nodes * self.male_ratio)
num_women = self.num_nodes - num_men
graph_without_sex_ed = self.__generate_random_graph(num_men, num_women,
men_without_sex_ed_dd, women_without_sex_ed_dd,
self.is_invalid_graph,
men_without_sex_ed_cd, women_without_sex_ed_cd, self.condom_weight)
graph_with_sex_ed = self.__generate_random_graph(num_men, num_women,
men_with_sex_ed_dd, women_with_sex_ed_dd,
self.is_invalid_graph,
men_with_sex_ed_cd, women_with_sex_ed_cd, self.condom_weight)
if self.match_mean_degrees:
i, j = graph_without_sex_ed.number_of_edges(), graph_with_sex_ed.number_of_edges()
diff = i - j
if diff > 0:
graph_without_sex_ed.remove_edges_from(random.sample(graph_without_sex_ed.edges, abs(diff)))
elif diff < 0:
graph_with_sex_ed.remove_edges_from(random.sample(graph_with_sex_ed.edges, abs(diff)))
gcc_without_sex_ed = max(nx.connected_component_subgraphs(graph_without_sex_ed), key=len)
gcc_with_sex_ed = max(nx.connected_component_subgraphs(graph_with_sex_ed), key=len)
return {'without': {'tau': Statistics.get_tau(gcc_without_sex_ed),
'gini': Statistics.get_gini(gcc_without_sex_ed),
'freeman': Statistics.get_freeman(gcc_without_sex_ed),
'mg': nx.average_shortest_path_length(gcc_without_sex_ed)},
'with': {'tau': Statistics.get_tau(gcc_with_sex_ed),
'gini': Statistics.get_gini(gcc_with_sex_ed),
'freeman': Statistics.get_freeman(gcc_with_sex_ed),
'mg': nx.average_shortest_path_length(gcc_with_sex_ed)}
}
def run(self, num_iters: int):
"""
:param num_iters: Number of iterations we want to run the experiment
:return: A dictionary with t-test results on various metrics between the random graphs with and without sex-ed
"""
without_taus, without_ginis, without_freemans, without_mgs = [], [], [], []
with_taus, with_ginis, with_freemans, with_mgs = [], [], [], []
for _ in range(num_iters):
result = self.__run_one_iteration()
without_taus.append(result['without']['tau'])
without_ginis.append(result['without']['gini'])
without_freemans.append(result['without']['freeman'])
without_mgs.append(result['without']['mg'])
with_taus.append(result['with']['tau'])
with_ginis.append(result['with']['gini'])
with_freemans.append(result['with']['freeman'])
with_mgs.append(result['with']['mg'])
ttest_results_tau = ttest(without_taus, with_taus)
ttest_results_gini = ttest(without_ginis, with_ginis)
ttest_results_freeman = ttest(without_freemans, with_freemans)
ttest_results_mgs = ttest(without_mgs, with_mgs)
return {'tau': ttest_results_tau, 'gini': ttest_results_gini, 'freeman': ttest_results_freeman, 'mgs': ttest_results_mgs}