コード例 #1
0
    def det_ext_and_local_ids(self, prob_rspnd_local: int=0):
        """
        Map the given IDs to a locality (i.e. local or external} considering the given probabilities.

        :param prob_rspnd_local: the probabilty that a responder is local
        """
        external_ids = set()
        local_ids = self.local_init_ids.copy()
        
        # set up probabilistic chooser
        rspnd_locality = Lea.fromValFreqsDict({"local": prob_rspnd_local*100, "external": (1-prob_rspnd_local)*100})

        for id_ in self.external_init_ids:
            external_ids.add(id_)

        # determine responder localities
        for id_ in self.respnd_ids:
            if id_ in local_ids or id_ in external_ids:
                continue 
            
            pos = rspnd_locality.random() 
            if pos == "local":
                local_ids.add(id_)
            elif pos == "external":
                external_ids.add(id_)

        self.local_ids, self.external_ids = local_ids, external_ids
        return self.local_ids, self.external_ids
コード例 #2
0
 def assign_realistic_ttls(bot_configs: dict):
     """
     Assigns a realisitic ttl to each bot from @param: bot_configs. Uses statistics and distribution to be able
     to calculate a realisitc ttl.
     :param bot_configs: List that contains all bots that should be assigned with realistic ttls.
     """
     ids = sorted(bot_configs.keys())
     for pos, bot in enumerate(ids):
         bot_type = bot_configs[bot]["Type"]
         if bot_type == "local":  # Set fix TTL for local Bots
             bot_configs[bot]["TTL"] = 128
             # Set TTL based on TTL distribution of IP address
         else:  # Set varying TTl for external Bots
             bot_ttl_dist = self.statistics.get_ttl_distribution(
                 bot_configs[bot]["IP"])
             if len(bot_ttl_dist) > 0:
                 source_ttl_prob_dict = Lea.fromValFreqsDict(
                     bot_ttl_dist)
                 bot_configs[bot]["TTL"] = source_ttl_prob_dict.random()
             else:
                 most_used_ttl = self.statistics.process_db_query(
                     "most_used(ttlValue)")
                 if isinstance(most_used_ttl, list):
                     bot_configs[bot]["TTL"] = choice(
                         self.statistics.process_db_query(
                             "most_used(ttlValue)"))
                 else:
                     bot_configs[bot][
                         "TTL"] = self.statistics.process_db_query(
                             "most_used(ttlValue)")
コード例 #3
0
def markov(corpus, n_seq=1, start=None, length=42):
    # Counting occurrences
    next_one = defaultdict(Counter)
    next_one[EOS_TOKEN][EOS_TOKEN] = 1  # Last state is absorbing
    for sentence in corpus:
        words = sentence.split()
        nb_words = len(words)
        next_one[BOS_TOKEN][words[0]] += 1
        for i in range(nb_words - 1):
            next_one[words[i]][words[i + 1]] += 1
        if nb_words:
            final_word = words[nb_words - 1]
            next_one[final_word][EOS_TOKEN] += 1

    # Initializing states
    states = {}
    for state in next_one:
        states[state] = Lea.fromValFreqsDict(next_one[state])

    # Outputting visited states
    for _ in range(n_seq):
        state = start if start is not None else BOS_TOKEN
        seq = [state]
        while len(seq) < length and state != EOS_TOKEN:
            state = states[state].random()
            seq.append(state)
        print(' '.join(filter(lambda x: x not in {BOS_TOKEN, EOS_TOKEN}, seq)))
コード例 #4
0
def markov(corpus, start, length):
    # Counting occurrences
    next_one = defaultdict(Counter)
    for sentence in corpus:
        words = sentence.split()
        nb_words = len(words)
        for i in range(nb_words - 1):
            next_one[words[i]][words[i + 1]] += 1

    # Initializing states
    states = {}
    for word in next_one:
        states[word] = Lea.fromValFreqsDict(next_one[word])

    # Outputting visited states
    word = start
    words = [word]
    for _ in range(length - 1):
        word = states[word].random()
        words.append(word)
    return (words)
def markov(corpus, start, length):
    # Counting occurrences
    next_one = defaultdict(Counter)
    for sentence in corpus:
        words = sentence.split()
        nb_words = len(words)
        for i in range(nb_words - 1):
            next_one[words[i]][words[i + 1]] += 1

    # Initializing states
    states = {}
    for word in next_one:
        states[word] = Lea.fromValFreqsDict(next_one[word])

    # Outputting visited states
    word = start
    words = [word]
    for _ in range(length - 1):
        word = states[word].random()
        words.append(word)
    return(words)
コード例 #6
0
        def assign_ttls_from_caida(bot_configs):
            """
            Assign realistic TTL values to bots with respect to their IP, based on the CAIDA dataset.
            If there exists an entry for a bot's IP, the TTL is chosen based on a distribution over all used TTLs by
            this IP.
            If there is no such entry, the TTL is chosen based on a distribution over all used TTLs and their
            respective frequency.

            :param bot_configs: the existing bot configurations
            """
            def get_ip_ttl_distrib():
                """
                Parses the CSV file containing a mapping between IP and their used TTLs.
                :return: returns a dict with the IPs as keys and dicts for their TTL distribution as values
                """
                ip_based_distrib = {}
                with open("resources/CaidaTTL_perIP.csv", "r") as file:
                    # every line consists of: IP, TTL, Frequency
                    next(file)  # skip CSV header line
                    for line in file:
                        ip_addr, ttl, freq = line.split(",")
                        if ip_addr not in ip_based_distrib:
                            # the values for ip_based_distrib are dicts with key=TTL, value=Frequency
                            ip_based_distrib[ip_addr] = {}
                        ip_based_distrib[ip_addr][ttl] = int(freq)

                return ip_based_distrib

            def get_total_ttl_distrib():
                """
                Parses the CSV file containing an overview of all used TTLs and their respective frequency.
                :return: returns a dict with the TTLs as keys and their frequencies as keys
                """

                total_ttl_distrib = {}
                with open("resources/CaidaTTL_total.csv", "r") as file:
                    # every line consists of: TTL, Frequency, Fraction
                    next(file)  # skip CSV header line
                    for line in file:
                        ttl, freq, _ = line.split(",")
                        total_ttl_distrib[ttl] = int(freq)

                return total_ttl_distrib

            # get the TTL distribution for every IP that is available in "resources/CaidaTTL_perIP.csv"
            ip_ttl_distrib = get_ip_ttl_distrib()
            # build a probability dict for the total TTL distribution
            total_ttl_prob_dict = Lea.fromValFreqsDict(get_total_ttl_distrib())

            # loop over every bot id and assign a TTL to the respective bot
            for bot_id in sorted(bot_configs):
                bot_type = bot_configs[bot_id]["Type"]
                bot_ip = bot_configs[bot_id]["IP"]

                if bot_type == "local":
                    bot_configs[bot_id]["TTL"] = 128

                # if there exists detailed information about the TTL distribution of this IP
                elif bot_ip in ip_ttl_distrib:
                    ip_ttl_freqs = ip_ttl_distrib[bot_ip]
                    # build a probability dict from this IP's TTL distribution
                    source_ttl_prob_dict = Lea.fromValFreqsDict(ip_ttl_freqs)
                    bot_configs[bot_id]["TTL"] = source_ttl_prob_dict.random()

                # otherwise assign a random TTL based on the total TTL distribution
                else:
                    bot_configs[bot_id]["TTL"] = total_ttl_prob_dict.random()