def ZooplaPriceChanges(): total = 0 pSame = 0 priceMap = {} # distribution = DiscountDistribution() data = ds.ZooplaMatchedDaily() # store = pd.HDFStore('rawDaily.hd5',mode='w') # for chunk in data.parser: chunk = data.read(1000) chunk.rename(columns={'\xef\xbb\xbfLISTING ID': 'LISTING ID'}, inplace=True) filteredchunk = chunk[chunk["MARKET"] == "SALE"][[ 'LISTING ID', 'DAY', 'PRICE' ]][chunk['PRICE'] > 0] for row in filteredchunk.values: currentState = priceMap.get(row[0]) if currentState == None: priceMap[row[0]] = PriceCalc(row[1], row[2]) else: startDay, endDay, percent = currentState.add(row[1], row[2]) distribution.add(startDay, endDay, percent) # now get deletion dates delData = ds.ZooplaMatchedCollated() # for chunk in delData.parser: chunk = delData.read(1000) chunk.rename(columns={'\xef\xbb\xbfLISTING ID': 'LISTING ID'}, inplace=True) filteredchunk = chunk[chunk["MARKET"] == "SALE"][['LISTING ID', 'DELETED']] for row in filteredchunk.values: currentState = priceMap.get(row[0]) if currentState != None: if (currentState.currentprice == currentState.initialmarketprice): pSame += 1 total += 1 startDay, endDay, percent = currentState.add(row[1], 0) distribution.add(startDay, endDay, percent) priceMap.pop(row[0]) print len(priceMap) print pSame, total, pSame * 1.0 / total plotProbability(distribution.dist)
def ZooplaPriceChanges(): total = 0 pSame = 0 priceMap = {} # distribution = DiscountDistribution() data = ds.ZooplaMatchedDaily() # store = pd.HDFStore('rawDaily.hd5',mode='w') # for chunk in data.parser: chunk = data.read(10000000) chunk.rename(columns={'\xef\xbb\xbfLISTING ID':'LISTING ID'},inplace=True) filteredchunk = chunk[chunk["MARKET"]=="SALE"][['LISTING ID','DAY','PRICE']][chunk['PRICE']>0] change = [] changeprice = [] nochange = [] for row in filteredchunk.values: if row[0] in priceMap: if(priceMap[row[0]].currentprice == row[2]): # no change nochange.append(priceMap[row[0]].daysonmarket/30) else:' change.append(priceMap[row[0]].daysonmarket/30) changeprice.append([priceMap[row[0]].daysonmarket/30, -(priceMap[row[0]].currentprice-row[2])/row[2]*100]) startDay, endDay, percent = priceMap[row[0]].add(row[1],row[2]) distribution.add(startDay, endDay, percent) else: priceMap[row[0]] = PriceCalc(row[1],row[2]) # now get deletion dates delData = ds.ZooplaMatchedCollated() # for chunk in delData.parser: chunk = delData.read(10000000) chunk.rename(columns={'\xef\xbb\xbfLISTING ID':'LISTING ID'},inplace=True) filteredchunk = chunk[chunk["MARKET"]=="SALE"][['LISTING ID','DELETED']] for row in filteredchunk.values: if row[0] in priceMap: if(priceMap[row[0]].currentprice == priceMap[row[0]].initialmarketprice): pSame += 1 total += 1 print pSame, total, pSame*1.0/total for row in filteredchunk.values: if row[0] in priceMap: startDay, endDay, percent = priceMap[row[0]].add(row[1],0) distribution.add(startDay, endDay, percent) priceMap.pop(row[0]) print len(priceMap) global savedOutput1 global savedOutput2 global savedOutput3 savedOutput1 = nochange savedOutput2 = change savedOutput3 = changeprice plotProbability(distribution.dist) global hist global n, n1, n2, nprice, df # hist = np.histogram(savedOutput1) n1, bins1, patches1 = pyl.hist(savedOutput1,bins=range(min(savedOutput1), max(savedOutput1) + 1, 1)) n2, bins2, patches2 = pyl.hist(savedOutput2,bins=range(min(savedOutput2), max(savedOutput2) + 1, 1)) dist, binsa, binsb = np.histogram2d([x[0] for x in savedOutput3], [x[1] for x in savedOutput3], range=[[0,30],[-30,0]], bins=[30,20]) # plt.imshow(dist) n = n2/(n1+n2) return(n, n1, n2)