def writeUserDistributionUsingItemsetClustering( minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation ): spotsFile = "%s/%s_%s" % (spotsFIFolder, minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation) SpotsAnalysis.writeUserDistributionInSpots( spotsFile, filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, fullRecord=True), )
def writeCheckinSequenceGraphFile(): userSet = set([userVector['user'] for userVector in filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, fullRecord = True)]) count, total = 1, len(userSet) for user in userSet: print user, count, total checkins = [(c['_id'], c['lid'], time.mktime(c['t'].timetuple())) for c in checkinsCollection.find({'u': user})] for i in GeneralMethods.getElementsInWindow(checkins, 2): FileIO.writeToFileAsJson([user, i], checkinSequenceGraphFile) count+=1
def locationsFromAllTransactionsIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation): observedLocations = set() i = 0 for d in filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation): print i i += 1 for k in filter(lambda l: l not in observedLocations, d): observedLocations.add(k) yield getLocationFromLid(k)
def run(): # Spots.writeUsingItemsetClustering(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, minCalculatedSupport=minSupport, initialNumberofLocationsInSpot=initialNumberofLocationsInSpot, extraMinSupport=5) # Spots.writeUsingRadius(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, radiusInMiles) # Spots.writeUserDistributionUsingItemsetClustering(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation) # Spots.writeUserDistributionUsingRadius(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, radiusInMiles) print SpotsAnalysis.getStats( "%s/%s_%s" % (spotsFIFolder, minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation), filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, fullRecord=True), )
def writeUserDistributionUsingRadius( minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, radiusInMiles ): spotsFile = "%s/%s_%s_%s" % ( spotsRadiusFolder, minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, radiusInMiles, ) SpotsAnalysis.writeUserDistributionInSpots( spotsFile, filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, fullRecord=True), )
def locationTransactionsIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation): i = 0 def decrementDictionary(d): for k in d.keys()[:]: d[k] -= 1 if d[k] == 0: del d[k] for d in filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation): # while len(d.keys())>=minimumTransactionLength: while d.keys(): yield d.keys() decrementDictionary(d) i += 1 print i
def getStats(): return Spots.getStats(UserGraphSpots.getSpotsFile(), filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, fullRecord = True)) @staticmethod
def writeUserDistribution(): Spots.writeUserDistributionInSpots(UserGraphSpots.getSpotsFile(), filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, fullRecord = True)) @staticmethod
def writeUserDistribution(): Spots.assignUserToSpots(RadiusSpots.getSpotsFile(), filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, fullRecord = True)) @staticmethod
def getUserVectors(): ''' Returns a dict for user vectors across top 100 location dimensions. ''' return dict((u['user'], dict(sorted(u['locations'].iteritems(), key=itemgetter(1), reverse=True)[:10000])) for u in filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, fullRecord = True))