def det_ext_and_local_ids(self, prob_rspnd_local: int=0): """ Map the given IDs to a locality (i.e. local or external} considering the given probabilities. :param prob_rspnd_local: the probabilty that a responder is local """ external_ids = set() local_ids = self.local_init_ids.copy() # set up probabilistic chooser rspnd_locality = Lea.fromValFreqsDict({"local": prob_rspnd_local*100, "external": (1-prob_rspnd_local)*100}) for id_ in self.external_init_ids: external_ids.add(id_) # determine responder localities for id_ in self.respnd_ids: if id_ in local_ids or id_ in external_ids: continue pos = rspnd_locality.random() if pos == "local": local_ids.add(id_) elif pos == "external": external_ids.add(id_) self.local_ids, self.external_ids = local_ids, external_ids return self.local_ids, self.external_ids
def assign_realistic_ttls(bot_configs: dict): """ Assigns a realisitic ttl to each bot from @param: bot_configs. Uses statistics and distribution to be able to calculate a realisitc ttl. :param bot_configs: List that contains all bots that should be assigned with realistic ttls. """ ids = sorted(bot_configs.keys()) for pos, bot in enumerate(ids): bot_type = bot_configs[bot]["Type"] if bot_type == "local": # Set fix TTL for local Bots bot_configs[bot]["TTL"] = 128 # Set TTL based on TTL distribution of IP address else: # Set varying TTl for external Bots bot_ttl_dist = self.statistics.get_ttl_distribution( bot_configs[bot]["IP"]) if len(bot_ttl_dist) > 0: source_ttl_prob_dict = Lea.fromValFreqsDict( bot_ttl_dist) bot_configs[bot]["TTL"] = source_ttl_prob_dict.random() else: most_used_ttl = self.statistics.process_db_query( "most_used(ttlValue)") if isinstance(most_used_ttl, list): bot_configs[bot]["TTL"] = choice( self.statistics.process_db_query( "most_used(ttlValue)")) else: bot_configs[bot][ "TTL"] = self.statistics.process_db_query( "most_used(ttlValue)")
def markov(corpus, n_seq=1, start=None, length=42): # Counting occurrences next_one = defaultdict(Counter) next_one[EOS_TOKEN][EOS_TOKEN] = 1 # Last state is absorbing for sentence in corpus: words = sentence.split() nb_words = len(words) next_one[BOS_TOKEN][words[0]] += 1 for i in range(nb_words - 1): next_one[words[i]][words[i + 1]] += 1 if nb_words: final_word = words[nb_words - 1] next_one[final_word][EOS_TOKEN] += 1 # Initializing states states = {} for state in next_one: states[state] = Lea.fromValFreqsDict(next_one[state]) # Outputting visited states for _ in range(n_seq): state = start if start is not None else BOS_TOKEN seq = [state] while len(seq) < length and state != EOS_TOKEN: state = states[state].random() seq.append(state) print(' '.join(filter(lambda x: x not in {BOS_TOKEN, EOS_TOKEN}, seq)))
def markov(corpus, start, length): # Counting occurrences next_one = defaultdict(Counter) for sentence in corpus: words = sentence.split() nb_words = len(words) for i in range(nb_words - 1): next_one[words[i]][words[i + 1]] += 1 # Initializing states states = {} for word in next_one: states[word] = Lea.fromValFreqsDict(next_one[word]) # Outputting visited states word = start words = [word] for _ in range(length - 1): word = states[word].random() words.append(word) return (words)
def markov(corpus, start, length): # Counting occurrences next_one = defaultdict(Counter) for sentence in corpus: words = sentence.split() nb_words = len(words) for i in range(nb_words - 1): next_one[words[i]][words[i + 1]] += 1 # Initializing states states = {} for word in next_one: states[word] = Lea.fromValFreqsDict(next_one[word]) # Outputting visited states word = start words = [word] for _ in range(length - 1): word = states[word].random() words.append(word) return(words)
def assign_ttls_from_caida(bot_configs): """ Assign realistic TTL values to bots with respect to their IP, based on the CAIDA dataset. If there exists an entry for a bot's IP, the TTL is chosen based on a distribution over all used TTLs by this IP. If there is no such entry, the TTL is chosen based on a distribution over all used TTLs and their respective frequency. :param bot_configs: the existing bot configurations """ def get_ip_ttl_distrib(): """ Parses the CSV file containing a mapping between IP and their used TTLs. :return: returns a dict with the IPs as keys and dicts for their TTL distribution as values """ ip_based_distrib = {} with open("resources/CaidaTTL_perIP.csv", "r") as file: # every line consists of: IP, TTL, Frequency next(file) # skip CSV header line for line in file: ip_addr, ttl, freq = line.split(",") if ip_addr not in ip_based_distrib: # the values for ip_based_distrib are dicts with key=TTL, value=Frequency ip_based_distrib[ip_addr] = {} ip_based_distrib[ip_addr][ttl] = int(freq) return ip_based_distrib def get_total_ttl_distrib(): """ Parses the CSV file containing an overview of all used TTLs and their respective frequency. :return: returns a dict with the TTLs as keys and their frequencies as keys """ total_ttl_distrib = {} with open("resources/CaidaTTL_total.csv", "r") as file: # every line consists of: TTL, Frequency, Fraction next(file) # skip CSV header line for line in file: ttl, freq, _ = line.split(",") total_ttl_distrib[ttl] = int(freq) return total_ttl_distrib # get the TTL distribution for every IP that is available in "resources/CaidaTTL_perIP.csv" ip_ttl_distrib = get_ip_ttl_distrib() # build a probability dict for the total TTL distribution total_ttl_prob_dict = Lea.fromValFreqsDict(get_total_ttl_distrib()) # loop over every bot id and assign a TTL to the respective bot for bot_id in sorted(bot_configs): bot_type = bot_configs[bot_id]["Type"] bot_ip = bot_configs[bot_id]["IP"] if bot_type == "local": bot_configs[bot_id]["TTL"] = 128 # if there exists detailed information about the TTL distribution of this IP elif bot_ip in ip_ttl_distrib: ip_ttl_freqs = ip_ttl_distrib[bot_ip] # build a probability dict from this IP's TTL distribution source_ttl_prob_dict = Lea.fromValFreqsDict(ip_ttl_freqs) bot_configs[bot_id]["TTL"] = source_ttl_prob_dict.random() # otherwise assign a random TTL based on the total TTL distribution else: bot_configs[bot_id]["TTL"] = total_ttl_prob_dict.random()