def __init__(self, heirarchy, import_dir=None, export_dir=None, base_feature_count=None): Exporter.__init__(self, export_dir=export_dir) Importer.__init__(self, import_dir=import_dir) self.tree = {} self.dataset = [] self.heirarchy = heirarchy if base_feature_count is None: self.base_feature_count = 0 else: self.base_feature_count = base_feature_count self.max_depth = len(heirarchy)
def __init__(self, conditions, skip_sellingprice=True, import_dir=None, export_dir=None): Importer.__init__(self, import_dir=import_dir) Exporter.__init__(self, export_dir=export_dir) self.conditions = conditions self.skip_sellingprice = skip_sellingprice # state: region # e.g. ca: West self.states = features.USA_MAP # make: type # e.g. honda: Normal self.makes = features.MAKE_TYPE_MAP # seller: type # e.g. ford motor ca: Lease self.sellers = features.SELLER_TYPE_MAP self.dataset = [] self.filtered = [] self.output = []
try: conditions[condition] = val['prices_centered'] except KeyError: pass results = [] for condition, offset in conditions.iteritems(): results.append([float(condition), np.average(offset)]) results = sorted(results, key = itemgetter(0)) # NOTE - there is clearly an issue with condition 4.0 selling prices :( # it doesn't make any sense by that set of prices would be above the average # while 3.9 and 4.1 are significantly below average # let's just adjust this value and interpolate between 3.9 and 4.1 results[1][1] = (results[0][1] + results[2][1]) / 2 condition_map = {} for item in results: condition_map[str(item[0])] = item[1] print item[0], item[1] #plt.scatter([item[0] for item in results], [item[1] for item in results]) #plt.savefig('./plots/condition-offset.png') EXPORT = Exporter() EXPORT.save('condition-offset.json', dataset=condition_map)
try: states[state] = states[state] + val['prices_centered'] except KeyError: try: states[state] = val['prices_centered'] except KeyError: pass results = [] for state, offset in states.iteritems(): count = len(offset) if count < 1 / pow(confidence, 2): print 'state: %s, n: %s' % (state, count) print 'not enough data to calculate reliable offset' else: results.append([state, np.average(offset)]) results = sorted(results, key = itemgetter(0)) state_map = {} for item in results: state_map[item[0]] = item[1] print item[0], item[1] # plt.scatter([i for i, item in enumerate(results)], [item[1] for item in results]) # plt.savefig('./plots/state-offset.png') EXPORT = Exporter() EXPORT.save('state-offset.json', dataset=state_map)