Python precision Examples

Programming Language: Python

Namespace/Package Name: util

Method/Function: precision

Examples at hotexamples.com: 8

Python precision - 8 examples found. These are the top rated real world Python examples of util.precision extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: models.py Project: wsgan001/diverse-rule-set

def run_ours(Xtr,
             Ytr,
             Xt,
             Yt,
             lb,
             nsample,
             lambda_mode,
             q,
             sample_mode,
             k=None,
             rerun=True,
             eps=0.01,
             min_recall_per_class=0.8,
             log=None):
    #name = 'ours' if k is None else 'oursk'
    name = 'ours{}'.format(int(rerun))
    k = k if k is not None else 100

    dec = DecisionSet(eps)
    dec.train(Xtr,
              Ytr,
              max_k=k,
              nsamp=nsample,
              lamb=lambda_mode,
              q=q,
              mode=sample_mode,
              rerun=rerun,
              min_recall_per_class=min_recall_per_class)
    print('default:', dec.default)

    Xt_ = [Transaction(feat2item(t)) for t in Xt.values]
    Y_pred = dec.predict_all(Xt_)

    if log is None:
        from logger import log
    log('{}-default'.format(name), dec.default)
    log('{}-k'.format(name), len(dec.rules))
    log('{}-maxk'.format(name), k)
    [log('{}-nconds'.format(name), len(r), i) for i, r in enumerate(dec.rules)]
    log('{}-q'.format(name), q)
    log('{}-nsample'.format(name), nsample)
    log('{}-lamb'.format(name), lambda_mode)
    log('{}-seq'.format(name), dec.seq)
    log('{}-auc'.format(name),
        roc_auc_score(lb.transform(Yt.values), lb.transform(Y_pred)))
    log('{}-bacc'.format(name), balanced_accuracy_score(Yt, Y_pred))
    log('{}-disp'.format(name), dispersion(dec.rules, average=True))
    log('{}-overlap'.format(name), overlap(dec.rules))
    log('{}-mode'.format(name), sample_mode)
    [
        log('{}-precisions-tr'.format(name), v, l)
        for l, v in precision(dec).items()
    ]
    [
        log('{}-recall-tr'.format(name), v, l)
        for l, v in recall(dec.rules).items()
    ]
    print(confusion_matrix(Yt, Y_pred))

    return Y_pred

Example #2

Show file

File: external_sources.py Project: ArniBoy/Feedback

def evaluate_mutator(mutator, threshold, min_percent, latex=True):
    labels = (POS, NEU, NEG)
    train_loc = root+'Data/twitterData/train_alternative.tsv'
    dev_loc = root+'Data/twitterData/dev_alternative.tsv'
    test_loc = root+'Data/twitterData/test_alternative.tsv'
    train, dev, test = get_final_semeval_data(reduce(lambda x, y: x|y, labels), train_loc, dev_loc, test_loc)
    dev_x, dev_y = dev
    for label in labels:
        pred_y = []
        for tweet in dev_x:
            pred_y.append(label if mutator.apply_filter(tweet, label) else -1)
        if pred_y.count(label) < min_percent*len(pred_y):
            yield str(label), ' (%.3f,0.0)' % threshold
        else:
            yield str(label), ' (%.3f,%.4f)' % (threshold, precision(dev_y, pred_y, label))

Example #3

Show file

def calculateLSTMaccuracy(receipts, results):
    total_price_total = 0
    total_price_found = 0
    total_price_correct = 0

    currency_total = 0
    currency_found = 0
    currency_correct = 0

    date_total = 0
    date_found = 0
    date_correct = 0

    vendor_total = 0
    vendor_found = 0
    vendor_correct = 0

    tax_rate_total = 0
    tax_rate_found = 0
    tax_rate_correct = 0

    address_total = 0
    address_found = 0
    address_correct = 0

    products_total = 0
    products_found = 0
    products_correct = 0

    count = 0
    for i, receipt in enumerate(receipts):
        corr = True
      ## Check total price
        if 'total_price' in results[i]:
            price = results[i]['total_price'].replace(',','.')
            to_remove = []
            for p in price:
              if util.isInt(p) or p == '.':
                continue
              to_remove.append(p)
            for p in to_remove:
              price = price.replace(p, '')
            if price.count('.') == 2:
                index = price.index('.')
                price = price[0 : index : ] + price[index + 1 : :]
            elif price.count('.') == 1 and len(price.split('.')[-1]) > 2:
                price = price.replace('.', '')
        else:
            price = None
        if price and price != '':
            total_price_found+=1
        if 'total_price' in receipt.groundTruth:
            total_price_total+= 1
            if compare.totalPrice(receipt.groundTruth['total_price'], price):
                total_price_correct += 1
            else:
                corr = False
        ## Check currecy
        if 'currency' in results[i]:
            currency = results[i]['currency']
            to_remove = []
            for c in currency:
              if c.isalpha():
                continue 
              to_remove.append(c)
            for c in to_remove:
              currency = currency.replace(c, '')
        else:
            currency = None
        if currency and currency != '':
            currency_found+=1
        if 'currency' in receipt.groundTruth:
            currency_total+=1
            if compare.currency(receipt.groundTruth['currency'], currency):
                currency_correct += 1
            else:
                corr = False
        ## Check date
        if 'date' in results[i]:
            date = results[i]['date']
            split = date.split(' ')
            if len(split) == 2:
                date = split[0]
                if len(split[1]) > len(split[0]):
                    date = split[1]
        else:
            date = None
        if date and date != '':
            date_found+=1
        if 'date' in receipt.groundTruth:
            date_total+=1
            if compare.date(receipt.groundTruth['date'],date):
                date_correct += 1
            else:
                corr = False
        ## Check vendor
        if 'vendor' in results[i]:
            vendor = results[i]['vendor']
        else:
            vendor = None
        if vendor and vendor != '':
            vendor_found +=1
        if 'vendor' in receipt.groundTruth:
            vendor_total+=1
            if compare.vendor(receipt.groundTruth['vendor'], vendor):
                vendor_correct += 1
            else:
                corr = False
        ## Check tax rate
        if 'tax_rate' in results[i]:
            tax = results[i]['tax_rate']
            split = tax.split(' ')
            if len(split) == 2:
                tax = split[0]
        else:
            tax = None
        if tax and tax != '':
            tax_rate_found+=1
        if 'tax_rate' in receipt.groundTruth:
            tax_rate_total+=1
            if compare.taxRate(receipt.groundTruth['tax_rate'], tax):
                tax_rate_correct += 1
            else:
                corr = False
        ## Check address
        if 'address' in results[i]:
            address = results[i]['address']
        else:
            address = None
        if address and address != '':
            address_found += 1
        if 'address' in receipt.groundTruth:
            address_total+=1
            if compare.address(receipt.groundTruth['address'], address):
                address_correct += 1
            else:
                corr = False
        if 'products' in receipt.groundTruth:
            products_total += len(receipt.groundTruth['products'])
        if 'products' in results[i]:
            products = results[i]['products']
        found = []
        for product in products:
            product['amount'] = 1
            products_found += 1
            if not 'name' in product:
                continue
            if 'products' in receipt.groundTruth:
                real_products = receipt.groundTruth['products']
                for j,real_product in enumerate(real_products):
                    if j in found:
                        continue
                    if compare.products(product, real_product):
                        found.append(j)
                        products_correct += 1
                        break
            

    totalDataPoints = vendor_total + date_total + address_total + tax_rate_total +  total_price_total + currency_total + products_total
    totalDataPointsFound = vendor_found + date_found + address_found + tax_rate_found + total_price_found + currency_found + products_found
    totalCorrect = vendor_correct + date_correct + address_correct + tax_rate_correct + total_price_correct + currency_correct + products_correct

    total_precision = 0
    total_recall = 0
    
    print('-----TOTAL CORRECT RECEIPTS-----')
    print(count, 'of', len(receipts))
    print('-----VENDORS-----')
    print(vendor_total, vendor_found, vendor_correct)
    precision = util.precision(vendor_correct, vendor_found)
    recall = util.recall(vendor_total, vendor_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----DATES-----')
    print(date_total, date_found, date_correct)
    precision = util.precision(date_correct, date_found)
    recall = util.recall(date_total, date_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----ADDRESSES-----')
    print(address_total, address_found, address_correct)
    precision = util.precision(address_correct, address_found)
    recall = util.recall(address_total, address_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----TAX RATES-----')
    print(tax_rate_total, tax_rate_found, tax_rate_correct)
    precision = util.precision(tax_rate_correct, tax_rate_found)
    recall = util.recall(tax_rate_total, tax_rate_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----PRICE-----')
    print(total_price_total, total_price_found, total_price_correct)
    precision = util.precision(total_price_correct, total_price_found)
    recall = util.recall(total_price_total, total_price_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----CURRENCY-----')
    print(currency_total, currency_found, currency_correct)
    precision = util.precision(currency_correct, currency_found)
    recall = util.recall(currency_total, currency_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----PRODUCTS-----')
    print(products_total, products_found, products_correct)
    precision = util.precision(products_correct, products_found)
    recall = util.recall(products_total, products_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----MICRO AVG-----')
    print(totalDataPoints, totalDataPointsFound, totalCorrect)
    precision = util.precision(totalCorrect, totalDataPointsFound)
    recall = util.recall(totalDataPoints, totalCorrect)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----MACRO AVG-----')
    print(totalDataPoints, totalDataPointsFound, totalCorrect)
    precision = total_precision / 7.0
    recall = total_recall / 7.0
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))

Example #4

Show file

def calculateRuleBasedAccuracy(receipts):
    total_price_total = 0
    total_price_found = 0
    total_price_correct = 0

    currency_total = 0
    currency_found = 0
    currency_correct = 0

    date_total = 0
    date_found = 0
    date_correct = 0

    vendor_total = 0
    vendor_found = 0
    vendor_correct = 0

    tax_rate_total = 0
    tax_rate_found = 0
    tax_rate_correct = 0

    address_total = 0
    address_found = 0
    address_correct = 0

    products_total = 0
    products_found = 0
    products_correct = 0

    count = 0
    for receipt in receipts:
        corr = True
        ## Check total price
        if 'total_price' in receipt.ruleBasedPrediction:
            price = receipt.ruleBasedPrediction['total_price']
        else:
            price = None
        if price:
            total_price_found+=1
        if 'total_price' in receipt.groundTruth:
            total_price_total+= 1
            if compare.totalPrice(receipt.groundTruth['total_price'], price):
                total_price_correct += 1
            else:
                corr = False
        ## Check currecy
        if 'currency' in receipt.ruleBasedPrediction:
            currency = receipt.ruleBasedPrediction['currency']
        else:
            currency = None
        if currency:
            currency_found+=1
        if 'currency' in receipt.groundTruth:
            currency_total+=1
            if compare.currency(receipt.groundTruth['currency'], currency):
                currency_correct += 1
            else:
                corr = False
        ## Check date
        if 'date' in receipt.ruleBasedPrediction:
            date = receipt.ruleBasedPrediction['date']
        else:
            date = None
        if date:
            date_found+=1
        if 'date' in receipt.groundTruth:
            date_total+=1
            if compare.date(receipt.groundTruth['date'],date):
                date_correct += 1
            else:
                corr = False
        ## Check vendor
        if 'vendor' in receipt.ruleBasedPrediction:
            vendor = receipt.ruleBasedPrediction['vendor']
        else:
            vendor = None
        if vendor:
            vendor_found +=1
        if 'vendor' in receipt.groundTruth:
            vendor_total+=1
            if compare.vendor(receipt.groundTruth['vendor'], vendor):
                vendor_correct += 1
            else:
                corr = False
        ## Check tax rate
        if 'tax_rate' in receipt.ruleBasedPrediction:
            tax = receipt.ruleBasedPrediction['tax_rate']
        else:
            tax = None
        if tax:
            tax_rate_found+=1
        if 'tax_rate' in receipt.groundTruth:
            tax_rate_total+=1
            if compare.taxRate(receipt.groundTruth['tax_rate'], tax):
                tax_rate_correct += 1
            else:
                corr = False
        ## Check address
        if 'address' in receipt.ruleBasedPrediction:
            address = receipt.ruleBasedPrediction['address']
        else:
            address = None
        if address:
            address_found += 1
        if 'address' in receipt.groundTruth:
            address_total+=1
            if compare.address(receipt.groundTruth['address'], address):
                address_correct += 1
            else:
                corr = False
        ## Check products
        if 'products' in receipt.ruleBasedPrediction:
            products = receipt.ruleBasedPrediction['products']
        else:
            products = []
        found = []
        if 'products' in receipt.groundTruth:
            products_total+= len(receipt.groundTruth['products'])
        for product in products:
            products_found += 1
            if 'products' in receipt.groundTruth:
                real_products = receipt.groundTruth['products']
                for j,real_product in enumerate(real_products):
                    if j in found:
                        continue
                    if compare.products(product, real_product):
                        found.append(j)
                        products_correct += 1
                        break
        if 'products' in receipt.groundTruth:
            if len(found) < len(receipt.groundTruth['products']):
                corr = False
        if corr:
            count+=1
    totalDataPoints = vendor_total + date_total + address_total + tax_rate_total +  total_price_total + currency_total + products_total
    totalDataPointsFound = vendor_found + date_found + address_found + tax_rate_found + total_price_found + currency_found + products_found
    totalCorrect = vendor_correct + date_correct + address_correct + tax_rate_correct + total_price_correct + currency_correct + products_correct
    
    total_precision = 0
    total_recall = 0

    print('-----TOTAL CORRECT RECEIPTS-----')
    print(count, 'of', len(receipts))
    print('-----VENDORS-----')
    print(vendor_total, vendor_found, vendor_correct)
    precision = util.precision(vendor_correct, vendor_found)
    recall = util.recall(vendor_total, vendor_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----DATES-----')
    print(date_total, date_found, date_correct)
    precision = util.precision(date_correct, date_found)
    recall = util.recall(date_total, date_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----ADDRESSES-----')
    print(address_total, address_found, address_correct)
    precision = util.precision(address_correct, address_found)
    recall = util.recall(address_total, address_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----TAX RATES-----')
    print(tax_rate_total, tax_rate_found, tax_rate_correct)
    precision = util.precision(tax_rate_correct, tax_rate_found)
    recall = util.recall(tax_rate_total, tax_rate_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----PRICE-----')
    print(total_price_total, total_price_found, total_price_correct)
    precision = util.precision(total_price_correct, total_price_found)
    recall = util.recall(total_price_total, total_price_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----CURRENCY-----')
    print(currency_total, currency_found, currency_correct)
    precision = util.precision(currency_correct, currency_found)
    recall = util.recall(currency_total, currency_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----PRODUCTS-----')
    print(products_total, products_found, products_correct)
    precision = util.precision(products_correct, products_found)
    recall = util.recall(products_total, products_correct)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----MICRO AVG-----')
    print(totalDataPoints, totalDataPointsFound, totalCorrect)
    precision = util.precision(totalCorrect, totalDataPointsFound)
    recall = util.recall(totalDataPoints, totalCorrect)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----MACRO AVG-----')
    print(totalDataPoints, totalDataPointsFound, totalCorrect)
    precision = total_precision / 7.0
    recall = total_recall / 7.0
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))

Example #5

Show file

File: export_results.py Project: fbr1/proyecto_twitter_utn

def kmeans(eac, removeTerms, ngram):
    terms = ['lazaro', 'lázaro', 'baez', 'báez', 'carlitos']

    print('Filtering tweets')
    tweets = util.read_from_file("dataset.csv")
    if removeTerms:
        tweets = filter.filter_tweets(tweets, terms_to_remove=terms)
    else:
        tweets = filter.filter_tweets(tweets)

    # Reduce tweets list length
    tweets = tweets[0:6300]

    carlitos = 0
    lazaro = 0

    data = []
    for tw in tweets:
        if tw.tw_type == 'Carlitos':
            carlitos += 1
        else:
            lazaro += 1
        data.append(tw.text)

    print(carlitos, lazaro)

    print("Transform Data...")
    # Transform data
    if ngram:
        hasher = HashingVectorizer(non_negative=True,
                                   ngram_range=(1, 3),
                                   analyzer='word',
                                   norm='l2',
                                   binary=False)
    else:
        hasher = HashingVectorizer(non_negative=True, norm='l2', binary=False)
    vectorizer = make_pipeline(hasher)
    X = vectorizer.fit_transform(data)

    count = 0
    precision_list = []

    while count < 100:

        # Start timer
        t0 = time()

        if eac:

            clustering = EAC(30, min_k=2, max_k=10)
            EAC_D = clustering.fit(X).distance_

            # Kmedoids over EAC_D
            kmed = KMedoids(2, init='random', distance_metric="precomputed")
            labels = kmed.fit(EAC_D).labels_

        else:

            km = KMeans(n_clusters=2, init='k-means++', n_init=1, max_iter=100)
            labels = km.fit(X).labels_

        # Assign labels to tweets
        for i in range(len(tweets)):
            tweets[i].label = labels[i]

        print("Precision: ")
        # Print precision
        precision = util.precision(tweets)

        print("done in %0.3fs" % (time() - t0))

        if isAdable(precision):
            precision_list.append(precision)
            count += 1

    return precision_list

Example #6

Show file

File: export_results.py Project: fbr1/proyecto_twitter_utn

def minhash(eac, shingle, removeTerms):
    terms = ['lazaro', 'lázaro', 'baez', 'báez', 'carlitos']

    print('Filtering tweets')
    tweets = util.read_from_file("dataset.csv")
    if removeTerms:
        tweets = filter.filter_tweets(tweets, terms_to_remove=terms)
    else:
        tweets = filter.filter_tweets(tweets)

    # Reduce tweets list length
    tweets = tweets[0:6300]

    carlitos = 0
    lazaro = 0

    data = []
    for tw in tweets:
        if tw.tw_type == 'Carlitos':
            carlitos += 1
        else:
            lazaro += 1
        data.append(tw.text)

    print(carlitos, lazaro)

    # Extract text from tweets
    X = [tw.text for tw in tweets]

    # Start timer
    t0 = time()

    print("Calculating distance matrix...")
    D = metrics.jaccard_minhash_distance_mp(X, shingle_length=shingle)

    count = 0
    precision_list = []

    while count < 100:

        if eac:

            print("EAC clustering...")
            # EAC clustering
            kmedoid = KMedoids(init='random', distance_metric='precomputed')
            clustering = EAC(30, min_k=2, max_k=10, clustering=kmedoid)
            EAC_D = clustering.fit(D).distance_

            # Kmedoids over EAC_D
            kmed = KMedoids(2, init='random', distance_metric="precomputed")
            labels = kmed.fit(EAC_D).labels_

        else:
            kmedoid = KMedoids(2, init='random', distance_metric='precomputed')

            print("Kmedoids clustering...")
            labels = kmedoid.fit(D).labels_

        # Assign labels to tweets
        for i in range(len(tweets)):
            tweets[i].label = labels[i]

        # Print precision
        print("Precision: ")
        precision = util.precision(tweets)

        if isAdable(precision):
            print(count)
            precision_list.append(precision)
            count += 1

        print("done in %0.3fs" % (time() - t0))

    return precision_list

Example #7

Show file

def calculateMetrics(reciepts, result, writeToFile=False, path=None):
    correctVendors = 0
    vendorsFound = 0
    vendors = 0

    correctDates = 0
    datesFound = 0
    dates = 0

    correctAddresses = 0
    addressesFound = 0
    addresses = 0

    correctTaxes = 0
    taxesFound = 0
    taxes = 0

    correctPrices = 0
    pricesFound = 0
    prices = 0

    correctCurrencies = 0
    currenciesFound = 0
    currencies = 0

    correctProducts = 0
    productsFound = 0
    products = 0

    result_dict = {}
    count = 0
    for i, reciept in enumerate(reciepts):
        corr = True
        vendor = result[i]['vendor']
        result_dict['vendor'] = vendor
        if vendor:
            vendorsFound += 1
            vendor = vendor.lower()
        if 'vendor' in reciept.groundTruth:
            vendors += 1
            if vendor and levenshtein_distance(
                    vendor, reciept.groundTruth['vendor'].lower()) <= 0:
                correctVendors += 1
            else:
                corr = False
        date = result[i]['date']
        result_dict['date'] = date
        if date:
            datesFound += 1
            date = date.lower()
        if 'date' in reciept.groundTruth:
            dates += 1
            if date == reciept.groundTruth['date'].lower(
            ) or date == reciept.groundTruth['date'].lower().replace(' ', ''):
                correctDates += 1
            else:
                corr = False
        address = result[i]['address']
        result_dict['address'] = address
        if address:
            addressesFound += 1
            address = address.lower()
        if 'address' in reciept.groundTruth:
            addresses += 1
            if address and levenshtein_distance(
                    address, reciept.groundTruth['address'].lower()) <= 0:
                correctAddresses += 1
            else:
                corr = False
        tax = result[i]['tax_rate']
        result_dict['tax_rate'] = tax
        if tax != None:
            taxesFound += 1
        if 'tax_rate' in reciept.groundTruth:
            taxes += 1
            real_tax = int(
                float(reciept.groundTruth['tax_rate'].lower().replace('%',
                                                                      '')))
            if tax == real_tax:
                correctTaxes += 1
            else:
                corr = False
        price = result[i]['total_price']
        result_dict['total_price'] = price
        if price:
            pricesFound += 1
        if 'total_price' in reciept.groundTruth:
            prices += 1
            real_price = float(reciept.groundTruth['total_price'].lower())
            if price == real_price:
                correctPrices += 1
            else:
                corr = False
        currency = result[i]['currency']
        result_dict['currency'] = currency
        if currency:
            currenciesFound += 1
            currency = currency.lower()
        if 'currency' in reciept.groundTruth:
            currencies += 1
            if currency == reciept.groundTruth['currency'].lower():
                correctCurrencies += 1
            else:
                corr = False
        productsList = result[i]['products']
        result_dict['products'] = productsList
        if 'products' in reciept.groundTruth:
            for product in reciept.groundTruth['products']:
                products += 1
        checkedIndexes = []
        for product in productsList:
            productsFound += 1
            for i, real_product in enumerate(reciept.groundTruth['products']):
                if i in checkedIndexes:
                    continue
                price = None
                if 'price' in product:
                    price = product['price'].replace(',', '.')
                    try:
                        price = float(price)
                    except:
                        price = None
                real_price = real_product['price']
                real_price = float(real_price)
                if levenshtein_distance(product['name'].lower(),
                                        real_product['name'].lower()) <= 0:
                    if util.floatCompare(price, real_price):
                        if product['amount'] == real_product['amount']:
                            correctProducts += 1
                            checkedIndexes.append(i)
                            break
        if len(checkedIndexes) < len(reciept.groundTruth['products']):
            corr |= False
        if corr:
            count += 1

        if writeToFile:
            with open(os.path.join(path, reciept.path), 'w') as fp:
                json.dump(result_dict, fp, indent=1)

    totalDataPoints = vendors + dates + addresses + taxes + prices + currencies + products
    totalDataPointsFound = vendorsFound + datesFound + addressesFound + taxesFound + pricesFound + currenciesFound + productsFound
    totalCorrect = correctVendors + correctDates + correctAddresses + correctTaxes + correctPrices + correctCurrencies + correctProducts

    total_precision = 0
    total_recall = 0

    print('-----TOTAL CORRECT RECEIPTS-----')
    print(count, 'of', len(reciepts))
    print('-----VENDORS-----')
    print(vendors, vendorsFound, correctVendors)
    precision = util.precision(correctVendors, vendorsFound)
    recall = util.recall(vendors, correctVendors)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----DATES-----')
    print(dates, datesFound, correctDates)
    precision = util.precision(correctDates, datesFound)
    recall = util.recall(dates, correctDates)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----ADDRESSES-----')
    print(addresses, addressesFound, correctAddresses)
    precision = util.precision(correctAddresses, addressesFound)
    recall = util.recall(addresses, correctAddresses)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----TAX RATES-----')
    print(taxes, taxesFound, correctTaxes)
    precision = util.precision(correctTaxes, taxesFound)
    recall = util.recall(taxes, correctTaxes)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----PRICE-----')
    print(prices, pricesFound, correctPrices)
    precision = util.precision(correctPrices, pricesFound)
    recall = util.recall(prices, correctPrices)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----CURRENCY-----')
    print(currencies, currenciesFound, correctCurrencies)
    precision = util.precision(correctCurrencies, currenciesFound)
    recall = util.recall(currencies, correctCurrencies)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----PRODUCTS-----')
    print(products, productsFound, correctProducts)
    precision = util.precision(correctProducts, productsFound)
    recall = util.recall(products, correctProducts)
    total_precision += precision
    total_recall += recall
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----MICRO AVG-----')
    print(totalDataPoints, totalDataPointsFound, totalCorrect)
    precision = util.precision(totalCorrect, totalDataPointsFound)
    recall = util.recall(totalDataPoints, totalCorrect)
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))
    print('-----MACRO AVG-----')
    print(totalDataPoints, totalDataPointsFound, totalCorrect)
    precision = total_precision / 7.0
    recall = total_recall / 7.0
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1:', util.fScore(precision, recall))

Example #8

Show file

File: decisionset.py Project: wsgan001/diverse-rule-set

    def train(self,
              X,
              Y,
              max_k=100,
              nsamp=100,
              lamb=None,
              q='kl',
              mode=3,
              rerun=True,
              min_recall_per_class=0.5):
        print('##### START #####')
        Itemset.clear_db()
        prep_db(X, Y)

        # Allow specify lamb to a certain number by users
        if type(lamb) == str or lamb is None:
            samp = self.sample_from_each_label(set(Itemset.labels), 100, set(),
                                               mode)
            if lamb == 'max':
                lamb = np.max([Rule.quality([r], metric=q) for r in samp])
            elif lamb == 'mean':
                lamb = np.mean([Rule.quality([r], metric=q) for r in samp])
            else:
                lamb = 0
            print('lamb:', lamb)

        greed = GreedyDiv([], lamb)
        U_all = []
        labels_samp = set(Itemset.labels)
        while len(self) < max_k and len(labels_samp) > 0:
            if mode == 0:
                samps = []
                for label in labels_samp:
                    _, samp = sample_rn(nsamp, label)
                    samp = [Rule(s, label)
                            for s in list(samp)]  # Very time-consuming
                    samps.extend(samp)
                U = set(samps)
            else:
                covered = set([t for r in self.rules for t in r.trans()])
                U = self.sample_from_each_label(labels_samp, nsamp, covered,
                                                mode)
            print('nsamp (after):', len(U))
            if len(U) == 0:
                break
            U_all.extend(U)

            # Greedy
            greed.update_univ(U)
            r = greed.greedy_once()
            # Termination criteria. Also check zero sampling above.
            if self.enough(r):
                # Include at least one rule per class, except default class.
                labels_samp.remove(r.label)
                print('remove label:', r.label)
            else:
                # Print quality vs. dispersion
                q, d = obj(self.rules, lamb, sep=True)
                qr, dr = obj(self.rules + [r], lamb, sep=True)
                print('inc q vs. d: {}, {}'.format(qr - q, dr - d))

                self.add(r)
                if np.abs(recall(self.rules)[r.label] - 1.0) < 1e-8:
                    labels_samp.remove(r.label)
                print('#{} '.format(len(self.rules)), end='')
                printRules([r])

        # Consecutive greedy over all sampels
        if rerun:
            greed.clear()
            greed.update_univ(list(set(U_all)))
            rules = greed.greedy(len(self.rules))
            if obj(rules, lamb) > obj(self.rules, lamb):
                print('Full greedy wins: {} > {}'.format(
                    obj(rules, lamb), obj(self.rules, lamb)))
                self.reset(rules)

        default = self.set_default()
        print('default:', default)

        self.build()

        print('precision: ', precision(self).items())
        print('recall (coverage): ', recall(self.rules).items())
        print('ave disp: ', dispersion(self.rules, average=True))
        print('##### END #####')