Ejemplo n.º 1
0
    def __init__(self, heirarchy, import_dir=None, export_dir=None, base_feature_count=None):
        Exporter.__init__(self, export_dir=export_dir)
        Importer.__init__(self, import_dir=import_dir)
        self.tree = {}
        self.dataset = []

        self.heirarchy = heirarchy

        if base_feature_count is None:
            self.base_feature_count = 0
        else:
            self.base_feature_count = base_feature_count

        self.max_depth = len(heirarchy)
Ejemplo n.º 2
0
    def __init__(self, conditions, skip_sellingprice=True, import_dir=None, export_dir=None):
        Importer.__init__(self, import_dir=import_dir)
        Exporter.__init__(self, export_dir=export_dir)

        self.conditions = conditions
        self.skip_sellingprice = skip_sellingprice

        # state: region
        # e.g. ca: West
        self.states = features.USA_MAP

        # make: type
        # e.g. honda: Normal
        self.makes = features.MAKE_TYPE_MAP

        # seller: type
        # e.g. ford motor ca: Lease
        self.sellers = features.SELLER_TYPE_MAP

        self.dataset = []
        self.filtered = []
        self.output = []
Ejemplo n.º 3
0
                    try:
                        conditions[condition] = val['prices_centered']
                    except KeyError:
                        pass

results = []
for condition, offset in conditions.iteritems():
    results.append([float(condition), np.average(offset)])


results = sorted(results, key = itemgetter(0))

# NOTE - there is clearly an issue with condition 4.0 selling prices :(
# it doesn't make any sense by that set of prices would be above the average
# while 3.9 and 4.1 are significantly below average
# let's just adjust this value and interpolate between 3.9 and 4.1

results[1][1] = (results[0][1] + results[2][1]) / 2

condition_map = {}
for item in results:
    condition_map[str(item[0])] = item[1]
    print item[0], item[1]

#plt.scatter([item[0] for item in results], [item[1] for item in results])
#plt.savefig('./plots/condition-offset.png')

EXPORT = Exporter()

EXPORT.save('condition-offset.json', dataset=condition_map)
Ejemplo n.º 4
0
                try:
                    states[state] = states[state] + val['prices_centered']
                except KeyError:
                    try:
                        states[state] = val['prices_centered']
                    except KeyError:
                        pass

results = []
for state, offset in states.iteritems():
    count = len(offset)
    if count < 1 / pow(confidence, 2):
        print 'state: %s, n: %s' % (state, count)
        print 'not enough data to calculate reliable offset'
    else:
        results.append([state, np.average(offset)])

results = sorted(results, key = itemgetter(0))

state_map = {}
for item in results:
    state_map[item[0]] =  item[1]
    print item[0], item[1]

# plt.scatter([i for i, item in enumerate(results)], [item[1] for item in results])
# plt.savefig('./plots/state-offset.png')

EXPORT = Exporter()

EXPORT.save('state-offset.json', dataset=state_map)