Example #1
0
 def analyzeDataClusters():
     regex = 'cafe'
     neighborLocationExtractionMethod = NeighborLocationsSelection.N_LOCATIONS
     inputFile = checkinSequenceLocationRegexAnalysisFolder+neighborLocationExtractionMethod+'/'+regex
     for line in FileIO.iterateJsonFromFile(inputFile):
         if line['parameters']['checkinsWindow']==10:
             for location, data in line['locations'].iteritems():
 #                data = line['locations']['41.895 -87.623']
                 if isWithinBoundingBox(getLocationFromLid(location), us_boundary):
                     print venuesCollection.find_one({'lid': location})['n'], location,'\n'
                     for l, _ in data['clusters'][:5]:
                         print [i[0] for i in l]
                     print '\n ********** \n'
def getKMLForCluster(cluster):
    clusterToYield = []
    if len(cluster)>3: 
        for lid in cluster:
            title = venuesCollection.find_one({'lid':lid})
            if title!=None: clusterToYield.append((getLocationFromLid(lid), unicode(title['n']).encode("utf-8")))
            else: clusterToYield.append((getLocationFromLid(lid), ''))
    return clusterToYield
def getRandomLocationNames(file, **conf):
    data = defaultdict(list)
    for d in FileIO.iterateJsonFromFile(file): data[d['location_db_mad']].append(d['location'])
    for k in sorted(data):
        print k,
        for i in range(5):
            venue = venuesCollection.find_one({'lid':random.choice(data[k])})
            if venue: print unicode(venue['n']).encode("utf-8")+', ',
        print
Example #4
0
def addCheckinSequenceToDB():
    count = 1
    for i in FileIO.iterateJsonFromFile(checkinSequenceGraphLocationsFile):
        title = ''
        lidInfo = venuesCollection.find_one({'lid': i['lid']}, ['n'])
        if lidInfo: title = lidInfo['n']
        print count, len(i['checkins'])
        count+=1
        checkinSequenceLocationsCollection.insert({'_id': i['lid'], 'e': i['edges'], 'c': i['checkins'], 'n': title})
Example #5
0
 def writeLocationToUserMap(place):
     name, boundary = place['name'], place['boundary']
     GeneralMethods.runCommand('rm -rf %s'%placesLocationToUserMapFile%name)
     for location in filteredLocationToUserAndTimeMapIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, inputFile=locationToUserAndExactTimeMapFile):
         lid=getLocationFromLid(location['location'])
         if isWithinBoundingBox(lid, boundary): 
             location['categories'] = ''; location['tags'] = ''; location['name']=''
             title = venuesCollection.find_one({'lid':location['location']})
             if title: location['name'] = unicode(title['n']).encode("utf-8")
             meta = venuesMetaDataCollection.find_one({'_id':location['location']})
             if meta: location['categories'] = unicode(meta['c']).encode("utf-8"); location['tags'] = unicode(meta['t']).encode("utf-8")
             for user in location['users'].keys()[:]: location['users'][str(user)]=location['users'][user]; del location['users'][user]
             location['noOfCheckins']=sum([len(epochs) for user, userVector in location['users'].iteritems() for day, dayVector in userVector.iteritems() for db, epochs in dayVector.iteritems()])
             if location['noOfCheckins']>place.get('minLocationCheckins',0): FileIO.writeToFileAsJson(location, placesLocationToUserMapFile%name)
def plotLocationDistribution():
    '''Types of locations seen: 
        => Locations where different people have to be at same time: Example office, pub
        => Locations that different people choose to go at different times: cafe+party place
       Big cluster suggests most people who come to a location go to similar locations (implies similar people). 
        Their mean suggests the most poplar time to go to that location.
    '''
    def scale(val): return (val*4)+2#val*2*4+2
    for location in FileIO.iterateJsonFromFile(locationClustersFile):
        if 'clustering' in location:
            classes, classDistribution = getDataDistribution(location['clustering'][1].values())
            mu, sigma = location['clustering'][2][0], location['clustering'][2][1]
            totalUsers = float(sum(classDistribution))
            for dist, mu, sigma in zip(classDistribution, mu, sigma):
                if sigma==0: sigma=0.15
                print dist/totalUsers
                plotNorm(dist/totalUsers, scale(mu), scale(sigma))
            title = venuesCollection.find_one({'lid':location['location']})
            if title!=None: title = unicode(title['n']).encode("utf-8")
            else: title = ''
            plt.title('%s (%s)'%(title,location['location']))
            plt.xlim(xmin=0,xmax=24)
            print 'comes here'
            plt.show()
Example #7
0
 def getLocationName(lid): 
     if lid not in locationNames:
         locationObject = venuesCollection.find_one({'lid':lid})
         if locationObject: locationNames[lid] = unicode(locationObject['n']).encode("utf-8")
         else: locationNames[lid] = ''
     return locationNames[lid]
Example #8
0
 def getLocationName(lid):
     object = venuesCollection.find_one({'lid': lid})
     if object: return object['n']
     else: return lid