Ejemplo n.º 1
0
def test_PriceEstimator_compute_product_occurrence_matrix():
    "Test construction of matrix for linear least square algorithm."
    from clair.coredata import DataStore
    from clair.prices import PriceEstimator
    print "start"
    
    data = DataStore()
    data.read_data(relative("../../example-data"))
    
    test_listings = data.listings.ix[0:20]
    print test_listings
    print test_listings.to_string(columns=["products", "price"])
    product_ids = [u'nikon-d70', u'nikon-d90', u'nikon-sb-24', u'nikon-sb-26', 
                   u'nikon-18-70-f/3.5-4.5--1', u'nikon-18-105-f/3.5-5.6--1',
                   u'nikon-28-85-f/3.5-4.5--1']
    
    estimator = PriceEstimator()
    matrix, prices, listing_ids, product_ids = \
        estimator.compute_product_occurrence_matrix(test_listings, product_ids)
    
    print
    print "matrix:\n", matrix
    print "matrix rank:", np.linalg.matrix_rank(matrix)
    print "number products:", len(product_ids)
    print "prices:\n", prices
    print "listing_ids:\n", listing_ids
    print "product_ids:\n", product_ids
    
    
    #TODO: assertions
    print "finshed"
Ejemplo n.º 2
0
def test_PriceEstimator_create_prices_lstsq_soln_1():
    "Test creation of price records with real data."
    from clair.coredata import DataStore
    from clair.prices import PriceEstimator
    print "start"
    
    data = DataStore()
    data.read_data(relative("../../example-data"))
    
    #Use all data as test data
#    listings = data.listings
    product_ids = [p.id for p in data.products 
                   if not p.id.startswith("xxx-unknown")]
#    #Take a small amount of test data.
    listings = data.listings.ix[0:200]
#    product_ids = [u'nikon-d70', u'nikon-d90', u'nikon-sb-24', u'nikon-sb-26', 
#                   u'nikon-18-70-f/3.5-4.5--1', u'nikon-18-105-f/3.5-5.6--1',
#                   u'nikon-28-85-f/3.5-4.5--1']
    print listings
#    print listings.to_string(columns=["products", "price"])
    
    estimator = PriceEstimator()
    
    #Create matrix and vectors for linear least square
    matrix, listing_prices, listing_ids, product_ids = \
        estimator.compute_product_occurrence_matrix(listings, product_ids)
#    print
#    print "matrix:\n", matrix
#    print "matrix rank:", np.linalg.matrix_rank(matrix)
#    print "number products:", len(product_ids)
#    print "listing_prices:\n", listing_prices
#    print "listing_ids:\n", listing_ids
#    print "product_ids:\n", product_ids
    
    #Compute average product prices
    product_prices, good_rows, good_cols, problem_products = \
                estimator.solve_prices_lstsq(matrix, listing_prices, 
                                                     listing_ids, product_ids)
    
    #Create price records
    prices = estimator.create_prices_lstsq_soln(matrix, 
                                     listing_prices, listing_ids, 
                                     product_prices, product_ids,
                                     good_rows, good_cols, listings)
#    print prices.to_string()
    
    #TODO: assertions
    print "finshed"
Ejemplo n.º 3
0
def test_PriceEstimator_solve_prices_lstsq_1():
    "Test linear least square algorithm with real data."
    from clair.coredata import DataStore
    from clair.prices import PriceEstimator
    print "start"
    
    data = DataStore()
    data.read_data(relative("../../example-data"))
    
    #Take a small amount of test data.
    listings = data.listings.ix[0:50]
#    listings = data.listings
#    product_ids = [p.id for p in data.products]
    product_ids = [u'nikon-d70', u'nikon-d90', u'nikon-sb-24', u'nikon-sb-26', 
                   u'nikon-18-70-f/3.5-4.5--1', u'nikon-18-105-f/3.5-5.6--1',
                   u'nikon-28-85-f/3.5-4.5--1']
    print listings
    print listings.to_string(columns=["products", "price"])
    
    estimator = PriceEstimator()
    
    #Create matrix and vectors for linear least square
    matrix, listing_prices, listing_ids, product_ids = \
        estimator.compute_product_occurrence_matrix(listings, product_ids)
    print
    print "matrix:\n", matrix
    print "matrix rank:", np.linalg.matrix_rank(matrix)
    print "number products:", len(product_ids)
    print "listing_prices:\n", listing_prices
    print "listing_ids:\n", listing_ids
    print "product_ids:\n", product_ids
    
    product_prices, good_rows, good_cols, problem_products = \
                estimator.solve_prices_lstsq(matrix, listing_prices, 
                                                     listing_ids, product_ids)
    
    print "product_prices:\n", product_prices * 0.7
    #TODO: assertions
    print "finshed"