コード例 #1
0
ファイル: preprocess.py プロジェクト: psvinaya/Yelp-Kaggle
def writeToFileByAttribute(attrFilename,photosFilename, suffix):
	attr = util.readAttributesFromCSV(attrFilename)
	photos = util.readPhotosFromCSV(photosFilename)
	for i in xrange(numAttributes):
		b2a, p2b = util.getPhotosBusinessByAttribute(photos, attr, i)
		b2a.to_csv(processedInputDir+'/'+str(i)+'/'+suffix + '.csv', index=False)
		p2b.to_csv(processedInputDir+'/'+str(i)+'/'+suffix + '_photo_to_biz.csv', index=False)
		p2a = util.photoByAttributes(p2b, photos)
		p2a.to_csv(processedInputDir+'/'+str(i)+'/'+suffix + '_photo_to_attr.csv', index=False)
コード例 #2
0
def getLabelDistribution():
	data = util.readAttributesFromCSV()
	N = data.shape[0]
	for i in range(numAttributes):
		print i, np.sum(data[i])*1.0/N