def plotClusterOverlapByLocationCheckins(place): dataX, dataY = [], [] for location in locationClusterMeansIterator(place): locationOverlaps = [] if len(location['clusters'])>=2: for cluster1, cluster2 in combinations(location['clusters'], 2): mu1, mu2, sd1, sd2 = cluster1[1], cluster2[1], cluster1[2], cluster2[2] locationOverlaps.append(getWeitzmanOVL(mu1, mu2, sd1, sd2)[0]) dataY.append(np.mean(locationOverlaps)) dataX.append(location['details']['noOfCheckins']) plt.scatter(dataX, dataY) plt.show()
def plotOverlapDistribution(place): oVLValues = [] for location in locationClusterMeansIterator(place): locationOVLValues = [] if len(location['clusters'])>=2: for cluster1, cluster2 in combinations(location['clusters'], 2): mu1, mu2, sd1, sd2 = cluster1[1], cluster2[1], cluster1[2], cluster2[2] ovl = getWeitzmanOVL(mu1, mu2, sd1, sd2) locationOVLValues.append(ovl[0]) if locationOVLValues: mean = np.mean(locationOVLValues) if mean>0: oVLValues.append(mean) plt.hist(oVLValues, 100, facecolor='green', alpha=0.75) plt.show()
def iterateLocationsByOVLAndClustersType(place, type): dataX, dataY = [], [] for location in locationClusterMeansIterator(place): locationOVLValues = [] if len(location['clusters'])>=2: for cluster1, cluster2 in combinations(location['clusters'], 2): mu1, mu2, sd1, sd2 = cluster1[1], cluster2[1], cluster1[2], cluster2[2] ovl = getWeitzmanOVL(mu1, mu2, sd1, sd2) locationOVLValues.append(ovl[0]) if locationOVLValues: lengthOfCluster = len(location['clusters']) mean = np.mean(locationOVLValues) locationType = None if lengthOfCluster<=place['lowClusters'] and mean<=place['lowOVL']: locationType = CLUSTERS_OVL_TYPE_LOW_LOW elif lengthOfCluster<=place['lowClusters'] and mean>=place['highOVL']: locationType = CLUSTERS_OVL_TYPE_LOW_HIGH elif lengthOfCluster>=place['highClusters'] and mean<=place['lowOVL']: locationType = CLUSTERS_OVL_TYPE_HIGH_LOW elif lengthOfCluster>=place['highClusters'] and mean>=place['highOVL']: locationType = CLUSTERS_OVL_TYPE_HIGH_HIGH if locationType==type: yield location
def plotClusterOverlapInLocations(place): dataX, dataY = [], [] for location in locationClusterMeansIterator(place): locationOVLValues = [] if len(location['clusters'])>=2: for cluster1, cluster2 in combinations(location['clusters'], 2): mu1, mu2, sd1, sd2 = cluster1[1], cluster2[1], cluster1[2], cluster2[2] ovl = getWeitzmanOVL(mu1, mu2, sd1, sd2) locationOVLValues.append(ovl[0]) if locationOVLValues: lengthOfCluster = len(location['clusters']) mean = np.mean(locationOVLValues) if lengthOfCluster<=place['lowClusters'] and mean<=place['lowOVL']: plt.scatter([lengthOfCluster], [mean], color='g') elif lengthOfCluster<=place['lowClusters'] and mean>=place['highOVL']: plt.scatter([lengthOfCluster], [mean], color='y') elif lengthOfCluster>=place['highClusters'] and mean<=place['lowOVL']: plt.scatter([lengthOfCluster], [mean], color='r') elif lengthOfCluster>=place['highClusters'] and mean>=place['highOVL']: plt.scatter([lengthOfCluster], [mean], color='m') else: plt.scatter([lengthOfCluster], [np.mean(locationOVLValues)], color='b') plt.show()