Exemple #1
0
 def __init__(self, op, lea1, nTimes=2):
     Lea.__init__(self)
     self._op = op
     self._lea1 = lea1
     self._nTimes = nTimes
     if nTimes <= 0:
         raise Lea.Error("times method requires a strictly positive integer")
Exemple #2
0
def exact():
    W = Lea.fastMax(W0 + U, 0)
    for k in range(1, 21):
        if k % 5 == 0:
            plt.plot(W.support(), W.pmf(), label="k={}".format(k))
        W = Lea.fastMax(W + U, 0)
    return W.support(), W.pmf()
Exemple #3
0
 def __init__(self,lea1,nTimes=2):
     Lea.__init__(self)
     self._lea1 = lea1
     self._lea1Tuple = lea1.map(lambda v: (v,))
     self._nTimes = nTimes
     if nTimes <= 0:
         raise Lea.Error("cprodTimes method requires a strictly positive integer")
Exemple #4
0
 def build(*clauses,**kwargs):
     priorLea = kwargs.get('priorLea',None)
     # TODO: check no other args !!
     # PY3: def build(*clauses,priorLea=None):
     elseClauseResults = tuple(result for (cond,result) in clauses if cond is None)
     if len(elseClauseResults) > 1:
         raise Lea.Error("impossible to define more than one 'other' clause")
     if len(elseClauseResults) == 1:
         if priorLea is not None:
             raise Lea.Error("impossible to define together prior probabilities and 'other' clause")
         elseClauseResult = elseClauseResults[0]
     else:
         elseClauseResult = None
     normClauseLeas = tuple((Lea.coerce(cond),Lea.coerce(result)) for (cond,result) in clauses if cond is not None)
     condLeas = tuple(condLea for (condLea,resultLea) in normClauseLeas)
     # check that conditions are disjoint
     for (condLea1,condLea2) in genPairs(condLeas):
         if (condLea1&condLea2).isFeasible():
             raise Lea.Error("clause conditions are not disjoint")
     # build the OR of all given conditions
     orCondsLea = Lea.reduce(or_,condLeas)
     isClauseSetComplete = orCondsLea.isTrue()
     if priorLea is not None:
         # prior distribution: determine elseClauseResult
         if isClauseSetComplete:
             # TODO check priorLea equivalent to self
             raise Lea.Error("forbidden to define prior probabilities for complete clause set")
         (pTrue,count) = orCondsLea._p(True)
         pFalse = count - pTrue
         priorAleaDict = dict(priorLea.getAlea().genVPs())
         priorAleaCount = sum(priorAleaDict.values())
         normAleaDict = dict(Mlea(*(resultLea for (condLea,resultLea) in normClauseLeas)).getAlea().genVPs())
         normAleaCount = sum(normAleaDict.values())
         valuesSet = frozenset(chain(priorAleaDict.keys(),normAleaDict.keys()))
         vps = []
         for value in valuesSet:
              priorP = priorAleaDict.get(value,0)
              condP = normAleaDict.get(value,0)
              p = priorP*count*normAleaCount - condP*pTrue*priorAleaCount
              if not(0 <= p <= pFalse*normAleaCount*priorAleaCount):
                  # Infeasible : probability represented by p goes outside range from 0 to 1
                  priorPFraction = ProbFraction(priorP,priorAleaCount)
                  lowerPFraction = ProbFraction(condP*pTrue,count*normAleaCount)
                  upperPFraction = ProbFraction(condP*pTrue+pFalse*normAleaCount,count*normAleaCount)
                  raise Lea.Error("prior probability of '%s' is %s, outside the range [ %s , %s ]"%(value,priorPFraction,lowerPFraction,upperPFraction))
              vps.append((value,p))
         elseClauseResult = Lea.fromValFreqs(*vps)
     elif elseClauseResult is None:
         # check that clause set is complete
         if not isClauseSetComplete:
             # TODO? : assume a uniform prior distribution ? ... which values ? 
             raise Lea.Error("incomplete clause set requires 'other' clause or prior probabilities")
     if elseClauseResult is not None:
         elseCondLea = ~orCondsLea
         normClauseLeas += ((elseCondLea,Lea.coerce(elseClauseResult)),)
         # note that orCondsLea is NOT extended with rCondsLea |= elseCondLea
         # so, in case of else clause (and only in this case), orCondsLea is NOT certainly true
     return Blea(*(Ilea(resultLea,condLea) for (condLea,resultLea) in normClauseLeas))    
Exemple #5
0
 def __init__(self,*iLeas):
     Lea.__init__(self)
     self._iLeas = tuple(iLeas)
     # the following treatment is needed only if some clauses miss variables present 
     # in other clauses (e.g. CPT with context-specific independence)
     # a rebalancing is needed if there are such missing variables and if these admit multiple
     # values (total probability weight > 1)
     aleaLeavesSet = frozenset(aleaLeaf for ilea in iLeas                       \
                                        for aleaLeaf in ilea.getAleaLeavesSet() \
                                        if aleaLeaf._count > 1                  )
     self._ctxClea = Clea(*aleaLeavesSet)
Exemple #6
0
 def __init__(self,nextStateLeaPerState):
     ''' initializes Chain instance's attributes; 
         nextStateLeaPerState is a sequence of tuples (stateObj,nextStateLea)
         where stateObj is a state object (e.g. a string) and nextStateLea is a Lea instance
         giving probabilities of transition from stateObj to each state object 
     '''
     object.__init__(self)
     self._stateObjs = tuple(stateObj for (stateObj,nextStateLea) in nextStateLeaPerState)
     self._stateAleaDict = dict((stateObj,StateAlea(Lea.coerce(stateObj),self)) for stateObj in self._stateObjs)
     self._state = StateAlea(Lea.fromVals(*self._stateObjs),self)
     iterNextStateData = ((self._state==stateObj,nextStateLea) for (stateObj,nextStateLea) in nextStateLeaPerState)
     self._nextStateBlea = Blea.build(*iterNextStateData)
Exemple #7
0
 def assign_realistic_ttls(bot_configs: dict):
     """
     Assigns a realisitic ttl to each bot from @param: bot_configs. Uses statistics and distribution to be able
     to calculate a realisitc ttl.
     :param bot_configs: List that contains all bots that should be assigned with realistic ttls.
     """
     ids = sorted(bot_configs.keys())
     for pos, bot in enumerate(ids):
         bot_type = bot_configs[bot]["Type"]
         if bot_type == "local":  # Set fix TTL for local Bots
             bot_configs[bot]["TTL"] = 128
             # Set TTL based on TTL distribution of IP address
         else:  # Set varying TTl for external Bots
             bot_ttl_dist = self.statistics.get_ttl_distribution(
                 bot_configs[bot]["IP"])
             if len(bot_ttl_dist) > 0:
                 source_ttl_prob_dict = Lea.fromValFreqsDict(
                     bot_ttl_dist)
                 bot_configs[bot]["TTL"] = source_ttl_prob_dict.random()
             else:
                 most_used_ttl = self.statistics.process_db_query(
                     "most_used(ttlValue)")
                 if isinstance(most_used_ttl, list):
                     bot_configs[bot]["TTL"] = choice(
                         self.statistics.process_db_query(
                             "most_used(ttlValue)"))
                 else:
                     bot_configs[bot][
                         "TTL"] = self.statistics.process_db_query(
                             "most_used(ttlValue)")
Exemple #8
0
def markov(corpus, n_seq=1, start=None, length=42):
    # Counting occurrences
    next_one = defaultdict(Counter)
    next_one[EOS_TOKEN][EOS_TOKEN] = 1  # Last state is absorbing
    for sentence in corpus:
        words = sentence.split()
        nb_words = len(words)
        next_one[BOS_TOKEN][words[0]] += 1
        for i in range(nb_words - 1):
            next_one[words[i]][words[i + 1]] += 1
        if nb_words:
            final_word = words[nb_words - 1]
            next_one[final_word][EOS_TOKEN] += 1

    # Initializing states
    states = {}
    for state in next_one:
        states[state] = Lea.fromValFreqsDict(next_one[state])

    # Outputting visited states
    for _ in range(n_seq):
        state = start if start is not None else BOS_TOKEN
        seq = [state]
        while len(seq) < length and state != EOS_TOKEN:
            state = states[state].random()
            seq.append(state)
        print(' '.join(filter(lambda x: x not in {BOS_TOKEN, EOS_TOKEN}, seq)))
Exemple #9
0
    def det_ext_and_local_ids(self, prob_rspnd_local: int=0):
        """
        Map the given IDs to a locality (i.e. local or external} considering the given probabilities.

        :param prob_rspnd_local: the probabilty that a responder is local
        """
        external_ids = set()
        local_ids = self.local_init_ids.copy()
        
        # set up probabilistic chooser
        rspnd_locality = Lea.fromValFreqsDict({"local": prob_rspnd_local*100, "external": (1-prob_rspnd_local)*100})

        for id_ in self.external_init_ids:
            external_ids.add(id_)

        # determine responder localities
        for id_ in self.respnd_ids:
            if id_ in local_ids or id_ in external_ids:
                continue 
            
            pos = rspnd_locality.random() 
            if pos == "local":
                local_ids.add(id_)
            elif pos == "external":
                external_ids.add(id_)

        self.local_ids, self.external_ids = local_ids, external_ids
        return self.local_ids, self.external_ids
Exemple #10
0
 def nextState(self,fromState=None,n=1):
     ''' returns the StateAlea instance obtained after n transitions from initial state
         defined by the given fromState, instance of StateAlea
         if fromState is None, then the initial state is the uniform distribution of the declared states
         if n = 0, then this initial state is returned
     '''
     if n < 0:
         raise Lea.Error("nextState method requires a positive value for argument 'n'")
     if fromState is None:
         fromState = self._state
     stateN = Lea.coerce(fromState).getAlea()
     while n > 0:
         n -= 1
         stateN = self._nextStateBlea.given(self._state==stateN).getAlea()
     return StateAlea(stateN,self)
Exemple #11
0
def markov(corpus, start, length):
    # Counting occurrences
    next_one = defaultdict(Counter)
    for sentence in corpus:
        words = sentence.split()
        nb_words = len(words)
        for i in range(nb_words - 1):
            next_one[words[i]][words[i + 1]] += 1

    # Initializing states
    states = {}
    for word in next_one:
        states[word] = Lea.fromValFreqsDict(next_one[word])

    # Outputting visited states
    word = start
    words = [word]
    for _ in range(length - 1):
        word = states[word].random()
        words.append(word)
    return (words)
def markov(corpus, start, length):
    # Counting occurrences
    next_one = defaultdict(Counter)
    for sentence in corpus:
        words = sentence.split()
        nb_words = len(words)
        for i in range(nb_words - 1):
            next_one[words[i]][words[i + 1]] += 1

    # Initializing states
    states = {}
    for word in next_one:
        states[word] = Lea.fromValFreqsDict(next_one[word])

    # Outputting visited states
    word = start
    words = [word]
    for _ in range(length - 1):
        word = states[word].random()
        words.append(word)
    return(words)
Exemple #13
0
 def __init__(self,f,cleaArgs):
     Lea.__init__(self)
     self._f = f
     self._cleaArgs = cleaArgs
Exemple #14
0
 def __init__(self,lea1,condLea):
     Lea.__init__(self)
     self._lea1 = lea1
     self._condLea = condLea
Exemple #15
0
 def __init__(self,*args):
     Lea.__init__(self)
     self._leaArgs = tuple(Lea.coerce(arg) for arg in args)
from __future__ import division, print_function
from lea import Lea

# define cancer dist
cancer = Lea.fromValFreqs(('yes', 1),
                          ('no',  99))

print('\nCancer Distribution',
      'P(C)',
      cancer.asPct(),
      sep='\n')

# prob for mamm given cancer == yes
mamm_g_cancer = Lea.fromValFreqs(('pos', 80),
                                 ('neg', 20))

print('\nProb for mammogram given cancer',
      'P(M|C=yes)',
      mamm_g_cancer.asPct(),
      sep='\n')

# prob for mamm given cancer == no
mamm_g_no_cancer = Lea.fromValFreqs(('pos', 96),
                                    ('neg', 1000-96))

print('\nProb for mammogram given NO cancer',
      'P(M|C=no)',
      mamm_g_no_cancer.asPct(),
      sep='\n')
Exemple #17
0
 def __init__(self,lea1,nbValues):
     if nbValues <= 0:
         raise Lea.Error("draw method requires a strictly positive integer")
     Lea.__init__(self)
     self._lea1 = lea1
     self._nbValues = nbValues
Exemple #18
0
def largest_n_out_of(pmf: Lea, n: int, out_of: int) -> Distribution:
    return pmf.map(lambda outcome: (outcome,))\
        .times(out_of, lambda outcomes1, outcomes2: tuple(sorted(outcomes1 + outcomes2)[-n:]))
Exemple #19
0
        def assign_realistic_timestamps(messages: list, external_ids: set,
                                        local_ids: set, avg_delay_local: list,
                                        avg_delay_external: list,
                                        zero_reference: float):
            """
            Assigns realistic timestamps to a set of messages

            :param messages: the set of messages to be updated
            :param external_ids: the set of bot ids, that are outside the network, i.e. external
            :param local_ids: the set of bot ids, that are inside the network, i.e. local
            :param avg_delay_local: the avg_delay distribution between the dispatch and the reception of a packet
                                    between local computers
            :param avg_delay_external: the avg_delay distribution between the dispatch and the reception of a packet
                                       between a local and an external computer
            :param zero_reference: the timestamp which is regarded as the beginning of the pcap_file and therefore
                                   handled like a timestamp that resembles 0
            """
            updated_msgs = []

            # Dict, takes a tuple of 2 Bot_IDs as a key (requester, responder), returns the time of the last response,
            # the requester received necessary in order to make sure, that additional requests are sent only after the
            # response to the last one was received
            last_response = {}

            for m in messages:  # init
                last_response[(m.src, m.dst)] = -1

            # update all timestamps
            for req_msg in messages:

                if req_msg in updated_msgs:
                    # message already updated
                    continue

                # if req_msg.timestamp would be before the timestamp of the response to the last request, req_msg needs
                # to be sent later (else branch)
                if last_response[
                    (req_msg.src, req_msg.dst)] == -1 or last_response[
                        (req_msg.src, req_msg.dst)] < (zero_reference +
                                                       req_msg.time - 0.05):
                    # update req_msg timestamp with a variation of up to 50ms
                    req_msg.time = zero_reference + req_msg.time + uniform(
                        -0.05, 0.05)
                    updated_msgs.append(req_msg)

                else:
                    req_msg.time = last_response[
                        (req_msg.src, req_msg.dst)] + 0.06 + uniform(
                            -0.05, 0.05)

                # update response if necessary
                if req_msg.refer_msg_id != -1:
                    respns_msg = messages[req_msg.refer_msg_id]

                    # check for local or external communication and update response timestamp with the respective
                    # avg delay
                    if req_msg.src in external_ids or req_msg.dst in external_ids and avg_delay_external:
                        # external communication
                        external_dist = Lea.fromSeq(avg_delay_external)
                        respns_msg.time = req_msg.time + float(
                            external_dist.random()) * 0.001

                    else:
                        # local communication
                        local_dist = Lea.fromSeq(avg_delay_local)
                        respns_msg.time = req_msg.time + float(
                            local_dist.random()) * 0.001

                    updated_msgs.append(respns_msg)
                    last_response[(req_msg.src, req_msg.dst)] = respns_msg.time
from __future__ import division, print_function
from lea import Lea

# define cancer dist
cancer = Lea.fromValFreqs(('yes', 1), ('no', 99))

print('\nCancer Distribution', 'P(C)', cancer.asPct(), sep='\n')

# prob for mamm given cancer == yes
mamm_g_cancer = Lea.fromValFreqs(('pos', 80), ('neg', 20))

print('\nProb for mammogram given cancer',
      'P(M|C=yes)',
      mamm_g_cancer.asPct(),
      sep='\n')

# prob for mamm given cancer == no
mamm_g_no_cancer = Lea.fromValFreqs(('pos', 96), ('neg', 1000 - 96))

print('\nProb for mammogram given NO cancer',
      'P(M|C=no)',
      mamm_g_no_cancer.asPct(),
      sep='\n')

# conditional probability table
mammograms = Lea.buildCPT((cancer == 'yes', mamm_g_cancer),
                          (cancer == 'no', mamm_g_no_cancer))

print('\nMammograms', 'P(M)', mammograms.asPct(), sep='\n')

# get joint probs for all events
Exemple #21
0
 def __init__(self, *args):
     Lea.__init__(self)
     self._leaArgs = tuple(Lea.coerce(arg) for arg in args)
     counts = tuple(leaArg.getAlea()._count for leaArg in self._leaArgs)
     lcm = calcLCM(counts)
     self._factors = tuple(lcm // count for count in counts)
from __future__ import division, print_function
from lea import Lea

# define coin
coin = Lea.fromValFreqs(('H', 1),
                        ('T', 1))

print('Coin Distribution',
      coin,
      sep='\n')

# define six-sided die
die6 = Lea.fromValFreqs(('1', 1),
                        ('2', 1),
                        ('3', 1),
                        ('4', 1),
                        ('5', 1),
                        ('6', 1))

print('Six-sided Die Distribution',
      die6,
      sep='\n')

# define four-side die
die4 = Lea.fromValFreqs(('1', 1),
                        ('2', 1),
                        ('3', 1),
                        ('4', 1))

print('Four-sided Die Distribution',
 def __init__(self,*words):
     self.words = Lea.fromVals(*words)
Exemple #24
0
from collections import Counter
from matplotlib.pylab import plt
from matplotlib2tikz import save as tikz_save
from matplotlib import style
style.use('ggplot')

from lea import Lea

W0 = 5  # Lea.fromVals(0, 1, 2)
S = Lea.fromVals(1, 2, 3)
X = Lea.fromVals(1, 2, 4)
U = S - X


def simulate():
    count = Counter()
    N = 1000
    W = max(W0 + U.random(), 0)
    for k in range(1, N + 1):
        W = max(W + U.random(), 0)
        count[W] += 1
        if k % (N // 5) == 0:  # make 5 plots
            x = [w for w in count]
            tot = sum(count.values())
            y = [count[w] / tot for w in count]
            plt.plot(x, y, label="N={}".format(k))
    return x, y


def exact():
    W = Lea.fastMax(W0 + U, 0)
Exemple #25
0
        def assign_ttls_from_caida(bot_configs):
            """
            Assign realistic TTL values to bots with respect to their IP, based on the CAIDA dataset.
            If there exists an entry for a bot's IP, the TTL is chosen based on a distribution over all used TTLs by
            this IP.
            If there is no such entry, the TTL is chosen based on a distribution over all used TTLs and their
            respective frequency.

            :param bot_configs: the existing bot configurations
            """
            def get_ip_ttl_distrib():
                """
                Parses the CSV file containing a mapping between IP and their used TTLs.
                :return: returns a dict with the IPs as keys and dicts for their TTL distribution as values
                """
                ip_based_distrib = {}
                with open("resources/CaidaTTL_perIP.csv", "r") as file:
                    # every line consists of: IP, TTL, Frequency
                    next(file)  # skip CSV header line
                    for line in file:
                        ip_addr, ttl, freq = line.split(",")
                        if ip_addr not in ip_based_distrib:
                            # the values for ip_based_distrib are dicts with key=TTL, value=Frequency
                            ip_based_distrib[ip_addr] = {}
                        ip_based_distrib[ip_addr][ttl] = int(freq)

                return ip_based_distrib

            def get_total_ttl_distrib():
                """
                Parses the CSV file containing an overview of all used TTLs and their respective frequency.
                :return: returns a dict with the TTLs as keys and their frequencies as keys
                """

                total_ttl_distrib = {}
                with open("resources/CaidaTTL_total.csv", "r") as file:
                    # every line consists of: TTL, Frequency, Fraction
                    next(file)  # skip CSV header line
                    for line in file:
                        ttl, freq, _ = line.split(",")
                        total_ttl_distrib[ttl] = int(freq)

                return total_ttl_distrib

            # get the TTL distribution for every IP that is available in "resources/CaidaTTL_perIP.csv"
            ip_ttl_distrib = get_ip_ttl_distrib()
            # build a probability dict for the total TTL distribution
            total_ttl_prob_dict = Lea.fromValFreqsDict(get_total_ttl_distrib())

            # loop over every bot id and assign a TTL to the respective bot
            for bot_id in sorted(bot_configs):
                bot_type = bot_configs[bot_id]["Type"]
                bot_ip = bot_configs[bot_id]["IP"]

                if bot_type == "local":
                    bot_configs[bot_id]["TTL"] = 128

                # if there exists detailed information about the TTL distribution of this IP
                elif bot_ip in ip_ttl_distrib:
                    ip_ttl_freqs = ip_ttl_distrib[bot_ip]
                    # build a probability dict from this IP's TTL distribution
                    source_ttl_prob_dict = Lea.fromValFreqsDict(ip_ttl_freqs)
                    bot_configs[bot_id]["TTL"] = source_ttl_prob_dict.random()

                # otherwise assign a random TTL based on the total TTL distribution
                else:
                    bot_configs[bot_id]["TTL"] = total_ttl_prob_dict.random()
Exemple #26
0
# PyBossa is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with PyBossa.  If not, see <http://www.gnu.org/licenses/>.

from lea import Lea
import pandas as pd
import ngram

def lower(s):
    return s.lower()

task_runs = Lea.fromValFreqs(("hola mundo", 55), ("HoLa mundos", 45), ("algo horroroso", 10))

observation = task_runs.random(30)

a = [lower(w) for w in observation]

df = pd.DataFrame({'info': a})

desc = df.describe()

top_string = desc['info']['top']

print "The top transcribed word is: %s" % top_string

G = ngram.NGram([ lower(w) for w in a])
 def setTermsChoices(self,*termsChoices):
     self.termsChoices = Lea.fromValFreqs(*termsChoices)