def Direct_Sample(CyberNet, data, num_samples, T, s0): """ Returns P(data|attacker) by Monte Carlo Sampling CyberNet : CyberNet Instance data : list Output of gen_data num_samples : int How many Monte Carlo samples T : int Time window s0 : dict Initial states of nodes """ net = copy.deepcopy(CyberNet) # The states of nodes will be changing so we want to make sure # we do not change the input network logn_fact = gen_logn_fact(data) #Precompute log(n!) for various n. n = 1 nodes_to_change = [nd for nd in net.node_names if s0[nd] == 'normal' ] nodes_no_change = [nd for nd in net.node_names if s0[nd] == 'infected'] prob_no_attacker = prob_model_no_attacker(net, data, T, logn_fact) numattackers = len(nodes_no_change) prob_mod = lambda x : prob_model_given_data_times(net, data, x, T, logn_fact, s0) # Only input is not the infection times probs = [] while n < num_samples: t = 0 for nd in net.node_names: net.node_dict[nd].state = s0[nd] times = {nd: 0 for nd in nodes_no_change} # Corresponds to correct order while t<T : infected = [nd.name for nd in net.nodes if nd.state =='infected'] at_risk = set(chain(*[net.node_dict[nd].sends_to for nd in infected])) - set(infected) if len(at_risk) == 0: break at_risk_ix = [net.node_names.index(nd) for nd in at_risk] mt_rates = np.sum(net.get_mal_trans()[:, at_risk_ix], axis=0) #print at_risk, mt_rates, infected, n r_rate = np.sum(mt_rates) t += np.random.exponential(scale=1./r_rate) # Sample time of next infection if t<T: next_infected = np.random.choice(list(at_risk), p = mt_rates/float(sum(mt_rates))) # Sample node to be infected times[next_infected] = t net.node_dict[next_infected].state = 'infected' #print times, n probs.append(prob_mod(times)) n+=1 # prob_mod returns log prob so we need to exponentiate to get the mean e_probs = np.exp(probs) return np.log(np.mean(e_probs))
def get_likelihoods(seed, num_pos, num_neg, CyberNet, s0, T, truenet=None, directsamps=1000): """ seed : int Random seed for Monte Carlo and data generation See plot_roc_parallel for more info """ if truenet == None: truenet = CyberNet no_a_s0 = dict(zip(CyberNet.node_names, ['normal'] * len(CyberNet.nodes))) np.random.seed(seed) random.seed(seed) infected_lhoods = [] # Will store the lhood w attacker and lhood difference clean_lhoods = [] datalen = 0 for i in range(num_pos): seed +=1 np.random.seed(seed) random.seed(seed) # Augment the seed by 1 to generate new sample data = gen_data(T, truenet, s0) if len(data[0]) > datalen: datalen = len(data[0]) logn_fact = gen_logn_fact(data) # Only generate these as needed p_data_attacker = Direct_Sample(CyberNet, data, directsamps, T, s0) p_no_attacker = prob_model_no_attacker(CyberNet, data, T, logn_fact) infected_lhoods.append((p_data_attacker, p_no_attacker)) for j in range(num_neg): seed += 1 np.random.seed(seed) random.seed(seed) # Code uses both data = gen_data(T, truenet, no_a_s0) if len(data[0]) > datalen: datalen = len(data[0]) logn_fact = gen_logn_fact(data) # Only generate these as needed p_data_attacker = Direct_Sample(CyberNet, data, directsamps, T, s0) p_no_attacker = prob_model_no_attacker(CyberNet, data, T, logn_fact) clean_lhoods.append((p_data_attacker, p_no_attacker)) return np.asarray(infected_lhoods), np.asarray(clean_lhoods)
def Direct_Sample(SFTNet, data, num_samples, T, s0): net = copy.deepcopy(SFTNet) logn_fact = gen_logn_fact(data) n = 1 nodes_to_change = [nd for nd in net.node_names if s0[nd] == 'normal'] nodes_no_change = [nd for nd in net.node_names if s0[nd] == 'infected'] prob_no_attacker = prob_model_no_attacker(net, data, T) prob_true_value = prob_model_given_data(net, data, data[-1], T, logn_fact, s0) numattackers = len(nodes_no_change) prob_mod = lambda x: prob_model_given_data(net, data, x, T, logn_fact, s0) probs = [] while n < num_samples: t = 0 for nd in net.node_names: net.node_dict[nd].state = s0[nd] times = {nd: 0 for nd in nodes_no_change} # Corresponds to correct order while t < T: infected = [nd.name for nd in net.nodes if nd.state == 'infected'] at_risk = set( chain(*[net.node_dict[nd].sends_to for nd in infected])) - set(infected) if len(at_risk) == 0: break at_risk_ix = [net.node_names.index(nd) for nd in at_risk] mt_rates = np.sum(net.get_mal_trans()[:, at_risk_ix], axis=0) #print at_risk, mt_rates, infected, n r_rate = np.sum(mt_rates) t += np.random.exponential(scale=1 / r_rate) if t < T: next_infected = np.random.choice(list(at_risk), p=mt_rates / sum(mt_rates)) times[next_infected] = t net.node_dict[next_infected].state = 'infected' #print times, n probs.append(prob_mod(times)[1]) n += 1 e_probs = np.exp(probs) return np.log(np.mean(e_probs)), e_probs
def Direct_Sample(SFTNet, data, num_samples, T, s0): net = copy.deepcopy(SFTNet) logn_fact = gen_logn_fact(data) n = 1 nodes_to_change = [nd for nd in net.node_names if s0[nd] == 'normal' ] nodes_no_change = [nd for nd in net.node_names if s0[nd] == 'infected'] prob_no_attacker = prob_model_no_attacker(net, data, T) prob_true_value = prob_model_given_data(net, data, data[-1], T, logn_fact, s0) numattackers = len(nodes_no_change) prob_mod = lambda x : prob_model_given_data(net, data, x, T, logn_fact, s0) probs = [] while n < num_samples: t = 0 for nd in net.node_names: net.node_dict[nd].state = s0[nd] times = {nd: 0 for nd in nodes_no_change} # Corresponds to correct order while t<T : infected = [nd.name for nd in net.nodes if nd.state =='infected'] at_risk = set(chain(*[net.node_dict[nd].sends_to for nd in infected])) - set(infected) if len(at_risk) == 0: break at_risk_ix = [net.node_names.index(nd) for nd in at_risk] mt_rates = np.sum(net.get_mal_trans()[:, at_risk_ix], axis=0) #print at_risk, mt_rates, infected, n r_rate = np.sum(mt_rates) t += np.random.exponential(scale=1/r_rate) if t<T: next_infected = np.random.choice(list(at_risk), p = mt_rates/sum(mt_rates)) times[next_infected] = t net.node_dict[next_infected].state = 'infected' #print times, n probs.append(prob_mod(times)[1]) n+=1 e_probs = np.exp(probs) return np.log(np.mean(e_probs)), e_probs
def MCMC_MH(SFTNet, data, s0, N, T, proposal_var=100, print_jumps=False): # TODO Need to profile this # TODO: Need to make this more general. Not trivial # TODO : Add sample from possible node orderings """ Performs MCMC integration using Metropolis Hastings. Returns the sampled times, their associated probabilities and the associated likelihood value. This method corresponds to David's "half-way" approach in the 2nd version of the ASCII where we sample (accept / reject) according to P(z | attacker) and then take the average of P(data | z, attacker) of the accepted values. SFTNet : SFTNet instance The net to do MCMC over data : list The data as outputted by gen_data N : int The number of MCMC proposals s0 : dict State of the net at t=0 T : int How long the process ran for. """ logn_fact = gen_logn_fact(data) n = 1 nodes_to_change = [nd for nd in SFTNet.node_names if s0[nd] == "normal"] nodes_no_change = [nd for nd in SFTNet.node_names if s0[nd] == "infected"] prob_no_attacker = prob_model_no_attacker(SFTNet, data, T) prob_true_value = prob_model_given_data(SFTNet, data, data[-1], T, logn_fact) prob_mod = lambda x: prob_model_given_data(SFTNet, data, x, T, logn_fact) guess_times = np.sort(np.random.random(size=len(nodes_to_change)) * T) z0 = dict(zip(nodes_to_change, guess_times)) for nd in nodes_no_change: z0[nd] = 0 # lambda function that calls prob_model_given_data for # specified infection times p0 = prob_mod(z0) # Initiial probability # actual times time_samples = {node.name: [] for node in SFTNet.nodes} # container for samples probs = [] # container for probabilities z1 = copy.deepcopy(z0) while n < N: # if np.random.random() < alpha: # order = random.sample(orderings, 1)[0] for nd in nodes_to_change: z1[nd] = z0[nd] + np.random.normal() * proposal_var p1 = prob_mod(z1) if p1[0] - p0[0] > np.log(np.random.random()): if print_jumps: print "A Jump at, ", n, "to ", z1, "with prob", p1, "\n" p0 = p1 z0 = copy.deepcopy(z1) for key, val in z0.iteritems(): time_samples[key].append(val) probs.append(p0[:2]) n += 1 probs = np.asarray(probs) out_ar = np.hstack((np.asarray(time_samples.values()).T, probs)) columns = copy.copy(time_samples.keys()) columns.append("P(z | attacker)") columns.append("P(data | z, attacker)") out = pandas.DataFrame(out_ar, columns=columns) mcmc_results = Results(out, data[-1], prob_no_attacker, prob_true_value, data, metropolis=True) return mcmc_results
def MCMC_MH(SFTNet, data, s0, N, T, proposal_var=100, print_jumps=False): # TODO Need to profile this # TODO: Need to make this more general. Not trivial # TODO : Add sample from possible node orderings """ Performs MCMC integration using Metropolis Hastings. Returns the sampled times, their associated probabilities and the associated likelihood value. This method corresponds to David's "half-way" approach in the 2nd version of the ASCII where we sample (accept / reject) according to P(z | attacker) and then take the average of P(data | z, attacker) of the accepted values. SFTNet : SFTNet instance The net to do MCMC over data : list The data as outputted by gen_data N : int The number of MCMC proposals s0 : dict State of the net at t=0 T : int How long the process ran for. """ logn_fact = gen_logn_fact(data) n = 1 nodes_to_change = [nd for nd in SFTNet.node_names if s0[nd] == 'normal'] nodes_no_change = [nd for nd in SFTNet.node_names if s0[nd] == 'infected'] prob_no_attacker = prob_model_no_attacker(SFTNet, data, T) prob_true_value = prob_model_given_data(SFTNet, data, data[-1], T, logn_fact) prob_mod = lambda x: prob_model_given_data(SFTNet, data, x, T, logn_fact) guess_times = np.sort(np.random.random(size=len(nodes_to_change)) * T) z0 = dict(zip(nodes_to_change, guess_times)) for nd in nodes_no_change: z0[nd] = 0 # lambda function that calls prob_model_given_data for # specified infection times p0 = prob_mod(z0) # Initiial probability # actual times time_samples = {node.name: [] for node in SFTNet.nodes} # container for samples probs = [] # container for probabilities z1 = copy.deepcopy(z0) while n < N: #if np.random.random() < alpha: # order = random.sample(orderings, 1)[0] for nd in nodes_to_change: z1[nd] = z0[nd] + np.random.normal() * proposal_var p1 = prob_mod(z1) if (p1[0] - p0[0] > np.log(np.random.random())): if print_jumps: print 'A Jump at, ', n, 'to ', z1, 'with prob', p1, '\n' p0 = p1 z0 = copy.deepcopy(z1) for key, val in z0.iteritems(): time_samples[key].append(val) probs.append(p0[:2]) n += 1 probs = np.asarray(probs) out_ar = np.hstack((np.asarray(time_samples.values()).T, probs)) columns = copy.copy(time_samples.keys()) columns.append('P(z | attacker)') columns.append('P(data | z, attacker)') out = pandas.DataFrame(out_ar, columns=columns) mcmc_results = Results(out, data[-1], prob_no_attacker, prob_true_value, data, metropolis=True) return mcmc_results
def get_roc_coords(seed, num_pos, num_neg, i_net, s0, truenet=None, method='Direct_Sample', T=10000, uni_samp_size=2000, mcmc_steps=5000, directsamps=1000, burnin_rate=.25, printsteps=False): """ num_pos : int Number pf infected nets in the sample num_neg : Number of clean nets to generate i_net : SFTNet The net instance with an attacker s0 : dict Initial state of the net when there is an attacker T : int Observation Window uni_samp_size : int The sample size for each infection ordering in uniform sampling """ if truenet == None: truenet = i_net np.random.seed(seed) infected_lhoods = [] # Will store the lhood w attacker and lhood difference clean_lhoods = [] for i in range(num_pos): if printsteps: print 'i= ', i data = gen_data(T, truenet, s0) if method == 'uniform': res = uniform_samp(i_net, s0, uni_samp_size, T, data)[0] elif method == 'mcmc': mh_res = MCMC_sequence(i_net, data, s0, mcmc_steps, T, print_jumps=False) res = mh_res.calc_log_likelihood(burnin=int(burnin_rate * mcmc_steps)) else: res = Direct_Sample(i_net, data, directsamps, T, s0)[0] p_no_attacker = prob_model_no_attacker(i_net, data, T) # infected_lhoods.append((uni_res[0], p_no_attacker)) infected_lhoods.append((res, p_no_attacker)) for j in range(num_neg): if printsteps: print 'j =', j data = gen_data( T, truenet, dict(zip(i_net.node_names, ['normal'] * len(i_net.nodes)))) if method == 'uniform': res = uniform_samp(i_net, s0, uni_samp_size, T, data)[0] elif method == 'mcmc': mh_res = MCMC_sequence(i_net, data, s0, mcmc_steps, T, print_jumps=False) res = mh_res.calc_log_likelihood(burnin=int(burnin_rate * mcmc_steps)) else: res = Direct_Sample(i_net, data, directsamps, T, s0)[0] p_no_attacker = prob_model_no_attacker(i_net, data, T) clean_lhoods.append((res, p_no_attacker)) return infected_lhoods, clean_lhoods
def get_roc_coords( seed, num_pos, num_neg, i_net, s0, truenet=None, method="Direct_Sample", T=10000, uni_samp_size=2000, mcmc_steps=5000, directsamps=1000, burnin_rate=0.25, printsteps=False, ): """ num_pos : int Number pf infected nets in the sample num_neg : Number of clean nets to generate i_net : SFTNet The net instance with an attacker s0 : dict Initial state of the net when there is an attacker T : int Observation Window uni_samp_size : int The sample size for each infection ordering in uniform sampling """ if truenet == None: truenet = i_net np.random.seed(seed) infected_lhoods = [] # Will store the lhood w attacker and lhood difference clean_lhoods = [] for i in range(num_pos): if printsteps: print "i= ", i data = gen_data(T, truenet, s0) if method == "uniform": res = uniform_samp(i_net, s0, uni_samp_size, T, data)[0] elif method == "mcmc": mh_res = MCMC_sequence(i_net, data, s0, mcmc_steps, T, print_jumps=False) res = mh_res.calc_log_likelihood(burnin=int(burnin_rate * mcmc_steps)) else: res = Direct_Sample(i_net, data, directsamps, T, s0)[0] p_no_attacker = prob_model_no_attacker(i_net, data, T) # infected_lhoods.append((uni_res[0], p_no_attacker)) infected_lhoods.append((res, p_no_attacker)) for j in range(num_neg): if printsteps: print "j =", j data = gen_data(T, truenet, dict(zip(i_net.node_names, ["normal"] * len(i_net.nodes)))) if method == "uniform": res = uniform_samp(i_net, s0, uni_samp_size, T, data)[0] elif method == "mcmc": mh_res = MCMC_sequence(i_net, data, s0, mcmc_steps, T, print_jumps=False) res = mh_res.calc_log_likelihood(burnin=int(burnin_rate * mcmc_steps)) else: res = Direct_Sample(i_net, data, directsamps, T, s0)[0] p_no_attacker = prob_model_no_attacker(i_net, data, T) clean_lhoods.append((res, p_no_attacker)) return infected_lhoods, clean_lhoods
def MCMC_sequence(SFTNet, data, s0, N, T, proposal_var=100, print_jumps=False, alpha=1): # TODO Need to profile this # TODO: Need to make this more general. Not trivial # TODO : Add sample from possible node orderings """ Performs MCMC integration using Metropolis Hastings. Returns the sampled times, their associated probabilities and the associated likelihood value. This method corresponds to David's "half-way" approach in the 2nd version of the ASCII where we sample (accept / reject) according to P(z | attacker) and then take the average of P(data | z, attacker) of the accepted values. SFTNet : SFTNet instance The net to do MCMC over data : list The data as outputted by gen_data N : int The number of MCMC proposals s0 : dict State of the net at t=0 T : int How long the process ran for. """ logn_fact = gen_logn_fact(data) n = 1 nodes_to_change = [nd for nd in SFTNet.node_names if s0[nd] == 'normal' ] nodes_no_change = [nd for nd in SFTNet.node_names if s0[nd] == 'infected'] prob_no_attacker = prob_model_no_attacker(SFTNet, data, T) prob_true_value = prob_model_given_data(SFTNet, data, data[-1], T, logn_fact) numattackers = len(nodes_no_change) prob_mod = lambda x : prob_model_given_data(SFTNet, data, x, T, logn_fact) guess_times = np.sort(np.random.random(size=len(nodes_to_change))*T) z0 = dict(zip(nodes_to_change, guess_times)) for nd in nodes_no_change: z0[nd] = 0 order = sorted(z0.iterkeys(), key = lambda k: z0[k]) # lambda function that calls prob_model_given_data for # specified infection times p0 = prob_mod(z0) # Initiial probability # actual times time_samples = {node.name : [] for node in SFTNet.nodes} # container for samples probs = [] # container for probabilities z1 = copy.deepcopy(z0) orders = gen_orderings(SFTNet, s0) state0 = ['infected'] * numattackers + ['normal'] * len(nodes_to_change) while n < N: z1 = dict(zip(nodes_no_change, [0] * numattackers)) last_infect = 0 state = copy.copy(state0) if np.random.random() < alpha: new_order = random.choice(orders) switch_order = True else : switch_order = False new_order = order for nd in new_order[numattackers:]: cross_s_ix = SFTNet.cross_S.index(state) nd_ix = SFTNet.node_names.index(nd) incoming_rate = np.sum(SFTNet.mal_trans_mats[cross_s_ix][:, nd_ix]) last_infect = last_infect + trunc_expon(incoming_rate, T-last_infect) z1[nd] = last_infect state[nd_ix] = 'infected' p1 = prob_mod(z1) # Possible change to 2 if (p1[2] -p0[2] > np.log(np.random.random())): if print_jumps : print 'A Jump at, ', n, 'to ', z1, 'with prob', p1, '\n' if switch_order: #print ' new order ', order, ' at ', n p0 = p1 z0 = copy.deepcopy(z1) order = new_order for key, val in z0.iteritems(): time_samples[key].append(val) for nd in nodes_to_change: if nd not in z0.keys(): time_samples[nd].append(T) probs.append(p0[:2]) n += 1 probs = np.asarray(probs) out_ar = np.hstack((np.asarray(time_samples.values()).T, probs)) columns = copy.copy(time_samples.keys()) columns.append('P(z | attacker)') columns.append('P(data | z, attacker)') out = pandas.DataFrame(out_ar, columns = columns) mcmc_results = Results(out, data[-1], prob_no_attacker, prob_true_value, data, metropolis = True) return mcmc_results
def MCMC_sequence(SFTNet, data, s0, N, T, proposal_var=100, print_jumps=False, alpha=1): # TODO Need to profile this # TODO: Need to make this more general. Not trivial # TODO : Add sample from possible node orderings """ Performs MCMC integration using Metropolis Hastings. Returns the sampled times, their associated probabilities and the associated likelihood value. This method corresponds to David's "half-way" approach in the 2nd version of the ASCII where we sample (accept / reject) according to P(z | attacker) and then take the average of P(data | z, attacker) of the accepted values. SFTNet : SFTNet instance The net to do MCMC over data : list The data as outputted by gen_data N : int The number of MCMC proposals s0 : dict State of the net at t=0 T : int How long the process ran for. """ logn_fact = gen_logn_fact(data) n = 1 nodes_to_change = [nd for nd in SFTNet.node_names if s0[nd] == 'normal'] nodes_no_change = [nd for nd in SFTNet.node_names if s0[nd] == 'infected'] prob_no_attacker = prob_model_no_attacker(SFTNet, data, T) prob_true_value = prob_model_given_data(SFTNet, data, data[-1], T, logn_fact) numattackers = len(nodes_no_change) prob_mod = lambda x: prob_model_given_data(SFTNet, data, x, T, logn_fact) guess_times = np.sort(np.random.random(size=len(nodes_to_change)) * T) z0 = dict(zip(nodes_to_change, guess_times)) for nd in nodes_no_change: z0[nd] = 0 order = sorted(z0.iterkeys(), key=lambda k: z0[k]) # lambda function that calls prob_model_given_data for # specified infection times p0 = prob_mod(z0) # Initiial probability # actual times time_samples = {node.name: [] for node in SFTNet.nodes} # container for samples probs = [] # container for probabilities z1 = copy.deepcopy(z0) orders = gen_orderings(SFTNet, s0) state0 = ['infected'] * numattackers + ['normal'] * len(nodes_to_change) while n < N: z1 = dict(zip(nodes_no_change, [0] * numattackers)) last_infect = 0 state = copy.copy(state0) if np.random.random() < alpha: new_order = random.choice(orders) switch_order = True else: switch_order = False new_order = order for nd in new_order[numattackers:]: cross_s_ix = SFTNet.cross_S.index(state) nd_ix = SFTNet.node_names.index(nd) incoming_rate = np.sum(SFTNet.mal_trans_mats[cross_s_ix][:, nd_ix]) last_infect = last_infect + trunc_expon(incoming_rate, T - last_infect) z1[nd] = last_infect state[nd_ix] = 'infected' p1 = prob_mod(z1) # Possible change to 2 if (p1[2] - p0[2] > np.log(np.random.random())): if print_jumps: print 'A Jump at, ', n, 'to ', z1, 'with prob', p1, '\n' if switch_order: #print ' new order ', order, ' at ', n p0 = p1 z0 = copy.deepcopy(z1) order = new_order for key, val in z0.iteritems(): time_samples[key].append(val) for nd in nodes_to_change: if nd not in z0.keys(): time_samples[nd].append(T) probs.append(p0[:2]) n += 1 probs = np.asarray(probs) out_ar = np.hstack((np.asarray(time_samples.values()).T, probs)) columns = copy.copy(time_samples.keys()) columns.append('P(z | attacker)') columns.append('P(data | z, attacker)') out = pandas.DataFrame(out_ar, columns=columns) mcmc_results = Results(out, data[-1], prob_no_attacker, prob_true_value, data, metropolis=True) return mcmc_results
def Direct_Sample(CyberNet, data, num_samples, T, s0): """ Returns P(data|attacker) by Monte Carlo Sampling CyberNet : CyberNet Instance data : list Output of gen_data num_samples : int How many Monte Carlo samples T : int Time window s0 : dict Initial states of nodes """ net = copy.deepcopy(CyberNet) # The states of nodes will be changing so we want to make sure # we do not change the input network logn_fact = gen_logn_fact(data) #Precompute log(n!) for various n. n = 1 nodes_to_change = [nd for nd in net.node_names if s0[nd] == 'normal'] nodes_no_change = [nd for nd in net.node_names if s0[nd] == 'infected'] prob_no_attacker = prob_model_no_attacker(net, data, T, logn_fact) numattackers = len(nodes_no_change) prob_mod = lambda x: prob_model_given_data_times(net, data, x, T, logn_fact, s0) # Only input is not the infection times probs = [] while n < num_samples: t = 0 for nd in net.node_names: net.node_dict[nd].state = s0[nd] times = {nd: 0 for nd in nodes_no_change} # Corresponds to correct order while t < T: infected = [nd.name for nd in net.nodes if nd.state == 'infected'] at_risk = set( chain(*[net.node_dict[nd].sends_to for nd in infected])) - set(infected) if len(at_risk) == 0: break at_risk_ix = [net.node_names.index(nd) for nd in at_risk] mt_rates = np.sum(net.get_mal_trans()[:, at_risk_ix], axis=0) #print at_risk, mt_rates, infected, n r_rate = np.sum(mt_rates) t += np.random.exponential(scale=1. / r_rate) # Sample time of next infection if t < T: next_infected = np.random.choice(list(at_risk), p=mt_rates / float(sum(mt_rates))) # Sample node to be infected times[next_infected] = t net.node_dict[next_infected].state = 'infected' #print times, n probs.append(prob_mod(times)) n += 1 # prob_mod returns log prob so we need to exponentiate to get the mean e_probs = np.exp(probs) return np.log(np.mean(e_probs))