def analyzeDataClusters(): regex = 'cafe' neighborLocationExtractionMethod = NeighborLocationsSelection.N_LOCATIONS inputFile = checkinSequenceLocationRegexAnalysisFolder+neighborLocationExtractionMethod+'/'+regex for line in FileIO.iterateJsonFromFile(inputFile): if line['parameters']['checkinsWindow']==10: for location, data in line['locations'].iteritems(): # data = line['locations']['41.895 -87.623'] if isWithinBoundingBox(getLocationFromLid(location), us_boundary): print venuesCollection.find_one({'lid': location})['n'], location,'\n' for l, _ in data['clusters'][:5]: print [i[0] for i in l] print '\n ********** \n'
def getKMLForCluster(cluster): clusterToYield = [] if len(cluster)>3: for lid in cluster: title = venuesCollection.find_one({'lid':lid}) if title!=None: clusterToYield.append((getLocationFromLid(lid), unicode(title['n']).encode("utf-8"))) else: clusterToYield.append((getLocationFromLid(lid), '')) return clusterToYield
def getRandomLocationNames(file, **conf): data = defaultdict(list) for d in FileIO.iterateJsonFromFile(file): data[d['location_db_mad']].append(d['location']) for k in sorted(data): print k, for i in range(5): venue = venuesCollection.find_one({'lid':random.choice(data[k])}) if venue: print unicode(venue['n']).encode("utf-8")+', ', print
def addCheckinSequenceToDB(): count = 1 for i in FileIO.iterateJsonFromFile(checkinSequenceGraphLocationsFile): title = '' lidInfo = venuesCollection.find_one({'lid': i['lid']}, ['n']) if lidInfo: title = lidInfo['n'] print count, len(i['checkins']) count+=1 checkinSequenceLocationsCollection.insert({'_id': i['lid'], 'e': i['edges'], 'c': i['checkins'], 'n': title})
def writeLocationToUserMap(place): name, boundary = place['name'], place['boundary'] GeneralMethods.runCommand('rm -rf %s'%placesLocationToUserMapFile%name) for location in filteredLocationToUserAndTimeMapIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, inputFile=locationToUserAndExactTimeMapFile): lid=getLocationFromLid(location['location']) if isWithinBoundingBox(lid, boundary): location['categories'] = ''; location['tags'] = ''; location['name']='' title = venuesCollection.find_one({'lid':location['location']}) if title: location['name'] = unicode(title['n']).encode("utf-8") meta = venuesMetaDataCollection.find_one({'_id':location['location']}) if meta: location['categories'] = unicode(meta['c']).encode("utf-8"); location['tags'] = unicode(meta['t']).encode("utf-8") for user in location['users'].keys()[:]: location['users'][str(user)]=location['users'][user]; del location['users'][user] location['noOfCheckins']=sum([len(epochs) for user, userVector in location['users'].iteritems() for day, dayVector in userVector.iteritems() for db, epochs in dayVector.iteritems()]) if location['noOfCheckins']>place.get('minLocationCheckins',0): FileIO.writeToFileAsJson(location, placesLocationToUserMapFile%name)
def plotLocationDistribution(): '''Types of locations seen: => Locations where different people have to be at same time: Example office, pub => Locations that different people choose to go at different times: cafe+party place Big cluster suggests most people who come to a location go to similar locations (implies similar people). Their mean suggests the most poplar time to go to that location. ''' def scale(val): return (val*4)+2#val*2*4+2 for location in FileIO.iterateJsonFromFile(locationClustersFile): if 'clustering' in location: classes, classDistribution = getDataDistribution(location['clustering'][1].values()) mu, sigma = location['clustering'][2][0], location['clustering'][2][1] totalUsers = float(sum(classDistribution)) for dist, mu, sigma in zip(classDistribution, mu, sigma): if sigma==0: sigma=0.15 print dist/totalUsers plotNorm(dist/totalUsers, scale(mu), scale(sigma)) title = venuesCollection.find_one({'lid':location['location']}) if title!=None: title = unicode(title['n']).encode("utf-8") else: title = '' plt.title('%s (%s)'%(title,location['location'])) plt.xlim(xmin=0,xmax=24) print 'comes here' plt.show()
def getLocationName(lid): if lid not in locationNames: locationObject = venuesCollection.find_one({'lid':lid}) if locationObject: locationNames[lid] = unicode(locationObject['n']).encode("utf-8") else: locationNames[lid] = '' return locationNames[lid]
def getLocationName(lid): object = venuesCollection.find_one({'lid': lid}) if object: return object['n'] else: return lid