Exemple #1
0
def verify_classifiers(nagssifiers, fromfile, verbose=0):    
    nag_teach_file = open(fromfile, 'r')
    slack_tag = r"""BREAK"""
    
    correct_per_tag = defaultdict(int)
    incorrect_per_tag = defaultdict(int)
    
    for line in nag_teach_file.readlines():
        parts = line.split('\t')
        tags = literal_eval(parts[1])
        tokens = list(tokenize_en(parts[2]))

        if len(tags)==0:
            # binary classificaiton result.
            prob = 1.0
            for tag in nagssifiers:
                prob *= nagssifiers[tag].get_probability("", tokens)
                
            if prob>=0.5:
                correct_per_tag[""]+=1
            else:
                incorrect_per_tag[""]+=1
            if verbose>1:
                 print("p(C|X)=%f, for C=%s and X=%s" % (prob, slack_tag, tokens))
        
        for tag in tags:
            # binary classificaiton result.
            prob = nagssifiers[tag].get_probability(tag, tokens)
            if prob>=0.5:
                correct_per_tag[tag]+=1
            else:
                incorrect_per_tag[tag]+=1
            if verbose>1:
                 print("p(C|X)=%f, for C=%s and X=%s" % (prob, tag, tokens))
    
    for tag in correct_per_tag.keys():            
        right = correct_per_tag[tag]
        wrong = incorrect_per_tag[tag]
        tot = right+wrong
        
        if verbose>0:
            if tag=="":
                tag = slack_tag
                
            print("%s: %d %% right, %d %% wrong" %
                (tag, right*100/tot, wrong*100/tot) )
    if verbose==0:
        print("They _do_ classify, up the verbiosity for more info.")
        
Exemple #2
0
def test_if_should_nag(nagssifiers, requiredTags, onActivity, requireAll):
    tokens = list(tokenize_en(onActivity))
    isSuspicious = False
    doingTheTask = False
    for tag in requiredTags:
        utag = tag.upper()      
        prob = nagssifiers[utag].get_probability(utag, tokens)
        print utag, prob              
        if prob>=0.5:
            # Probably doing a task with this tag
            doingTheTask = True
        else:
             isSuspicious = True
    
    isSuspicious = isSuspicious and requireAll
    return True if not doingTheTask or isSuspicious else False
Exemple #3
0
def teach_classifiers(fromfile):        
    nag_teach_file = open(fromfile, 'r')
        
    # Collect teach data 
    teach_data = defaultdict(list)
    for line in nag_teach_file.readlines():
        # Tab delimeted list with format:
        #  <timestamp> <tokenizable string> <list of tags (labels)>
        parts = line.strip().split('\t')
        tags = literal_eval(parts[1])
        tokens = list(tokenize_en(parts[2]))
        if len(tags)==0:
            teach_data[""]+=tokens
        for tag in tags:
            teach_data[tag]+=tokens
    nag_teach_file.close()
    
    # Teach new set of classifiers
    nagssifiers = defaultdict(NaiveBayes)
    for tag,tokens in teach_data.items():
        # Shorthand for "not tagged work"
        if tag=="":
            continue
        
        # Positive samples
        nagssifiers[tag].teach_tokens(tag, tokens)
        
        #TODO: Try out negative samples training with 
        # self.nagssifiers[""].teach_tokens("", teach_data[""])
        #  how does it affect the accuracy?

        # Negative samples            
        for ntag,ntokens in teach_data.items():
            if tag==ntag:
                continue
            nagssifiers[tag].teach_tokens("", ntokens)
    return nagssifiers