Python countre Examples

Programming Language: Python

Namespace/Package Name: toolkit

Method/Function: countre

Examples at hotexamples.com: 2

Python countre - 2 examples found. These are the top rated real world Python examples of toolkit.countre extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: features.py Project: pkok/MLPR-labs

def best_features():
    ham_train = bayes.HAM + bayes.TRAIN
    spam_train = bayes.SPAM + bayes.TRAIN
    ham_words = map(lambda s: '/' + s + '/', 
            set.union(*map(toolkit.word_bag, toolkit.get_files(ham_train))))
    spam_words = map(lambda s: '/' + s + '/', 
            set.union(*map(toolkit.word_bag, toolkit.get_files(spam_train))))
    all_words = set(ham_words + spam_words)

    # p(w_i | c)
    p_w_ham = defaultdict(lambda: toolkit.ZERO, zip(ham_words, 
            toolkit.countre(ham_train, ham_words, smoothing=toolkit.ONE)[0]))
    p_w_spam = defaultdict(lambda: toolkit.ZERO, zip(spam_words,
            toolkit.countre(spam_train, spam_words, smoothing=toolkit.ONE)[0]))
    #for word in all_words:
    #    if type(p_w_ham[word]) != toolkit.NUM or type(p_w_spam[word]) != toolkit.NUM:
    #        print p_w_ham[word], p_w_spam[word], word
    #return range(0, 100)
    mut_inf = dict()
    maxlog = math.log(toolkit.MAX)
    def no_error(x, y):
        try:
            return math.log(x / y)
        except Exception:
            return maxlog
    for word in all_words:
        p_w_h = p_w_ham[word]
        #if type(p_w_h)() == list() or type(p_w_s)() == list:
        #    print "'" + word + "'"
        #else:
        #    print "*",
        p_nw_h = toolkit.ONE - p_w_h
        p_w_s = p_w_spam[word]
        p_nw_s = toolkit.ONE - p_w_s
        p_w = p_w_h * toolkit.PRIOR_HAM + p_w_s * toolkit.PRIOR_SPAM
        p_nw = toolkit.ONE - p_w
        log_w_h = no_error(p_w, p_w_h)
        log_nw_h = no_error(p_nw, p_nw_h)
        log_w_s = no_error(p_w, p_w_s)
        log_nw_s = no_error(p_nw, p_nw_s)
        mut_inf[word] = p_w_h * toolkit.PRIOR_HAM * log_w_h
        mut_inf[word] += p_nw_h * toolkit.PRIOR_HAM * log_nw_h
        mut_inf[word] += p_w_s * toolkit.PRIOR_SPAM * log_w_s
        mut_inf[word] += p_nw_s * toolkit.PRIOR_SPAM * log_nw_s
    return sorted(mut_inf.iteritems(), key=operator.itemgetter(1), reverse=True)

Example #2

Show file

File: bayes.py Project: pkok/MLPR-labs

def instance_feature_prob(instance, features, clss, train=True, smoothing=toolkit.ZERO):
    """
    Corresponds to [p(x_i | C_k) for x_i in x] from the assignment.
    
    The instance is a message, which is either of clss HAM or clss SPAM.
    features is a collection of (compiled) regular expressions.  If applied to
    the instance, we compute for every feature, if it occurs in the instance.
    We divide that number by how many training/testing instances of the class
    match against the feature.

    ``The probability of an observation (i.e., an email) given the class (i.e., 
    ham or spam), p(x|Ck ) is then modelled as the probability of seeing
    specific keywords in the email.''
    """
    folder = clss + TRAIN if train else clss + TEST
    countre_result = toolkit.countre(folder, features, smoothing=smoothing)[0]
    feature_presence = toolkit.presentre(instance, features) 
    return map(lambda c, p: 1 if c == toolkit.ZERO and p == toolkit.ZERO else pow(c, p) * pow(1 - c, 1 - p), countre_result, feature_presence)