def scatterPlot(clustering, location, fileName): userClusterMap = {} for clusterId, users in clustering[2]['clusters'].iteritems(): for user in users: if user in location['users']: userClusterMap[user]=clusterId scatterData = defaultdict(dict) clusterMap = clustering[3] for user, userVector in location['users'].iteritems(): if user in userClusterMap: for d in userVector: for db in userVector[d]: for h in [(datetime.datetime.fromtimestamp(ep).hour-6)%24 for ep in userVector[d][db]]: if h not in scatterData[userClusterMap[user]]: scatterData[userClusterMap[user]][h]=0 scatterData[userClusterMap[user]][h]+=1 total = float(sum([k for cluster, clusterInfo in scatterData.iteritems() for k, v in clusterInfo.iteritems() for i in range(v)])) for cluster, clusterInfo in scatterData.iteritems(): if cluster in validClusters: # if type=='normal': data = [k for k, v in clusterInfo.iteritems() for i in range(v)] mean, std = np.mean(data), np.std(data) if std!=0: plotNorm(sum(data)/total, mean, std, color=clusterMap[cluster]) else: plotNorm(sum(data)/total, mean, random.uniform(0.1, 0.5), color=clusterMap[cluster]) # elif type=='scatter': plt.scatter(clusterInfo.keys(), clusterInfo.values(), color=clusterMap[cluster], label=cluster) plt.title('%s (%s, %s, %s)'%(location['name'],location['location'], location['categories'], location['tags'])),plt.legend() # plt.show() plt.xlim(xmin=0,xmax=24) plt.savefig(fileName), plt.clf()
def plotLocation(locationName, locationId, locationClustering, dayBlockMeans, dayBlockStandardDeviations, colorMap): classes, classDistribution = getDataDistribution(locationClustering.values()) mu, sigma = dayBlockMeans, dayBlockStandardDeviations totalUsers = float(sum(classDistribution)) for dist, mu, sigma, color in zip(classDistribution, mu, sigma, [colorMap[c] for c in classes]): if sigma==0: sigma=0.15 plotNorm(dist/totalUsers, scale(mu), scale(sigma), color=color) plt.title('%s (%s)'%(locationName,locationId)) plt.xlim(xmin=0,xmax=24) # plt.show() plt.savefig(fileName) plt.clf()
def plotGaussianGraphsForClusters(place): for location in Analysis.iterateLocationsWithClusterDetails(place): total = location['total'] clustersInfo = location['clustersInfo'] for clusterId, data in clustersInfo.iteritems(): mean, std, clusterSum, color = data['mean'], data['std'], data['clusterSum'], data['color'] if std!=0: plotNorm(clusterSum/total, mean, std, color=color, label=str(clusterId)) else: plotNorm(clusterSum/total, mean, random.uniform(0.1, 0.5), color=color, label=str(clusterId)) plt.xlim(xmin=0, xmax=23); plt.legend() plt.title(location['name']) fileName = '/'.join([placesGaussianImagesFolder%place['name'], getLocationType(location), location['location'].replace(' ', '_').replace('.', '+')+'.png']) print fileName FileIO.createDirectoryForFile(fileName) plt.savefig(fileName), plt.clf()
def plotLocationDistribution(): '''Types of locations seen: => Locations where different people have to be at same time: Example office, pub => Locations that different people choose to go at different times: cafe+party place Big cluster suggests most people who come to a location go to similar locations (implies similar people). Their mean suggests the most poplar time to go to that location. ''' def scale(val): return (val*4)+2#val*2*4+2 for location in FileIO.iterateJsonFromFile(locationClustersFile): if 'clustering' in location: classes, classDistribution = getDataDistribution(location['clustering'][1].values()) mu, sigma = location['clustering'][2][0], location['clustering'][2][1] totalUsers = float(sum(classDistribution)) for dist, mu, sigma in zip(classDistribution, mu, sigma): if sigma==0: sigma=0.15 print dist/totalUsers plotNorm(dist/totalUsers, scale(mu), scale(sigma)) title = venuesCollection.find_one({'lid':location['location']}) if title!=None: title = unicode(title['n']).encode("utf-8") else: title = '' plt.title('%s (%s)'%(title,location['location'])) plt.xlim(xmin=0,xmax=24) print 'comes here' plt.show()