Beispiel #1
0
def count_scoring_binary(data, url, params):
    """Returns correct only if entire document correct"""
    correct = sum(len(redactometer.censor_fill(url + i, **params)[1]) \
                == data.total_censor[i] for i in data.index)
    results = [(i, len(redactometer.censor_fill(url + i, **params)[1]), data.total_censor[i]) \
                    for i in data.index]

    score = float(correct)/float(len(data.index))
    return score, results
Beispiel #2
0
def eval(data, url, params):
    """ return score and individual metrics"""
    score = count_scoring(data, url, params)
    
    metrics = [(i, len(redactometer.censor_fill(url + i, **params)[1]),  data.total_censor[i], \
                    abs_error(len(redactometer.censor_fill(url + i, **params)[1]), \
                    data['total_censor'][i])) for i in data.index]
        
    metrics = pandas.DataFrame(metrics, columns=['img', 'observed', 'actual', 'error'])
    #scores.to_csv(outfile, sep="\t")

    return score, metrics
Beispiel #3
0
def f_score(data, url, params):
    predicted = float(sum([len(redactometer.censor_fill(url + i, **params)[1]) \
                    for i in data.index]))
    truepos = float(sum([len(redactometer.censor_fill(url + i, **params)[1]) \
                 == data.total_censor[i] for i in data.index]))
    #verify by coordinate! Above not correct. Truepos = relevant ^ retrieved


    precision = truepos / predicted
    recall = truepos / sum(data.total_censor)

    F = 2 * (precision * recall) / (precision + recall)

    return F
Beispiel #4
0
def count_scoring(data, url, params):
    """Count number of censors detected over real number""" 
    error = np.mean([abs_error(len(redactometer.censor_fill(url + i, **params)[1]), \
              data.total_censor[i]) for i in data.index])
    return 1 - error