classifier = Classifier(products) classified_listings = get_data("classified_listings.txt") old_listings = map(lambda x: UnenforcedFrozenDict(x['listing']), classified_listings) added = 0 for listing in listings: if not UnenforcedFrozenDict(listing) in old_listings: print str(listing) suggested_product = classifier.classify(listing) if suggested_product is not None: print "Suggested product: %s" % str(suggested_product) yn = '' while not yn.lower() in ['y', 'n']: yn = raw_input('Is the suggested product correct? (y/n) ') if yn == 'y': product_name = suggested_product["product_name"] else: product_name = raw_input('Enter product name: ') else: product_name = raw_input('Enter product name: ') if product_name == '': classified_listings.append({'listing' : listing, 'product_name' : None }) else: classified_listings.append({'listing' : listing, 'product_name' : product_name }) added += 1 if added == process_at_a_time: break print_results("classified_listings.txt", classified_listings)
def main(): # Parse command line arguments parser = argparse.ArgumentParser(description=""" Shopping listing classifier tool. Run in a directory containing listings.txt and products.txt. Produces results.txt.""") parser.add_argument('-v','--verbose', help='display extra information for debugging', action='store_true') parser.add_argument('-d','--diagnostic', help='use classified_listings.txt, which contains listings with correct products.', action='store_true') args = parser.parse_args() # Load data from files. products = get_data("products.txt") if args.diagnostic: classified_listings = get_data("classified_listings.txt") listings = map(lambda x: x["listing"], classified_listings) listing_to_product = {} for classified_listing in classified_listings: product_name = classified_listing["product_name"] listing = UnenforcedFrozenDict(classified_listing["listing"]) listing_to_product[listing] = product_name else: listings = get_data("listings.txt") # Create a classifier for the provided products. classifier = Classifier(products) # Classify all listings. results = defaultdict(list) classified = 0 correct = 0 positive_error = 0 negative_error = 0 for listing in listings: product = classifier.classify(listing, verbose=args.verbose) if args.diagnostic: correct_product_name = listing_to_product[UnenforcedFrozenDict(listing)] if product is None: if args.diagnostic and correct_product_name is not None: print "NEGATIVE ERROR: None instead of %s\nListing: %s\n" % (correct_product_name, str(listing)) negative_error += 1 elif args.diagnostic: correct += 1 else: classified += 1 product_name = product["product_name"] results[product_name].append(listing) if args.diagnostic and product_name != correct_product_name: positive_error += 1 print "POSITIVE ERROR: %s instead of %s\nListing: %s\n" % (product_name, correct_product_name, str(listing)) elif args.diagnostic: correct += 1 if args.verbose: print "Classification rate: %.02f" % (float(classified) / len(listings)) if args.diagnostic: print "Total listings: %d" % len(listings) print "Total classified: %d" % classified print "Correct: %d (%.02f)" % (correct, (float(correct) / len(listings))) print "Positive errors: %d (%.02f)" % (positive_error, (float(positive_error) / len(listings))) print "Negative errors: %d (%.02f)" % (negative_error, (float(negative_error) / len(listings))) # Process results dictionary into array of Result objects. processed_results = [] for (product_name, classified_listings) in results.items(): processed_results.append({ 'product_name' : product_name, 'listings' : classified_listings }) # Print results to "results.txt". print_results("results.txt", processed_results)