Esempio n. 1
0
 def getStats(spotsFile, userToLocationVector):
     lidToSpotIdMap, userToSpotIdMap, spotMap, spotsWithUsersFile = {}, {}, defaultdict(dict), spotsFile + "_users"
     for spot in FileIO.iterateJsonFromFile(spotsWithUsersFile):
         for location, _ in spot["lids"]:
             lidToSpotIdMap[getLidFromLocation(location)] = spot["id"]
         for user in spot["users"]:
             userToSpotIdMap[user] = spot["id"]
     observedUsers = set()
     for userVector in userToLocationVector:
         user = userVector["user"]
         assert user not in observedUsers
         if user in userToSpotIdMap:
             assignment = [
                 [lidToSpotIdMap[lid]] * userVector["locations"][lid]
                 for lid in userVector["locations"]
                 if lid in lidToSpotIdMap
             ]
             spotMap[userToSpotIdMap[user]][user] = [item for t in assignment for item in t]
             observedUsers.add(user)
     accuracyList = []
     for spotId, userMap in spotMap.iteritems():
         totalAssignments, wrongAssignments = 0.0, 0.0
         for user in userMap:
             for a in userMap[user]:
                 if a != spotId:
                     wrongAssignments += 1
                 totalAssignments += 1
         accuracyList.append(wrongAssignments / totalAssignments)
     return {
         "accuracy": np.mean(accuracyList),
         "total_locations": len(lidToSpotIdMap),
         "total_users": len(userToSpotIdMap),
     }
 def mapper(self, key, line):
     data = parseData(line)
     if data and isWithinBoundingBox(data['l'], boundary): 
         del data['_id']
         data['t'] = time.mktime(data['t'].timetuple())
         data['lid'] = getLidFromLocation(data['l'])
         data['llid'] = getLatticeLid(data['l'], accuracy=0.015)
         yield data, 1
Esempio n. 3
0
def addVenuesMetaToDB():
    i = 0
    for data in open(venuesFile):
        data = data.strip().split('\t')
#        print data[10].replace('\\', ''), data[11].replace('\\', '')
        try:
            venuesMetaDataCollection.insert({'_id': getLidFromLocation([float(data[2]), float(data[3])]), 'c': data[10].replace('\\', ''), 't':data[11].replace('\\', '') })
        except Exception as e: print i, 'Exception while processing:', data; i+=1
Esempio n. 4
0
def clusterSpot(spot):
    dimensions = [getLidFromLocation(l) for l, n in spot['lids']]
    userVectorsToCluster = [(u, ' '.join([l.replace(' ', '_') for l in userVectors[u] if l in dimensions for j in range(userVectors[u][l])])) for u in spot['users']]
    resultsForVaryingK = []
    for k in range(2,6):
#        try:
        cluster = KMeansClustering(userVectorsToCluster, k).cluster()
        print '$$$$$$$$$$', cluster   
        userClusterMap = dict((k1,v) for k1,v in zip(spot['users'], cluster))
        dayBlockMeansForClusters = getDayBlockMeansForClusters(spot['users'], userClusterMap)
        print dayBlockMeansForClusters
Esempio n. 5
0
 def assignUserToSpots(spotsFile, userToLocationVector):
     lidToSpotIdMap, userDistributionInSpots, spotsWithUsersFile = {}, defaultdict(list), spotsFile + "_users"
     for spot in FileIO.iterateJsonFromFile(spotsFile):
         for location, _ in spot["spot"]:
             lidToSpotIdMap[getLidFromLocation(location)] = spot["id"]
         userDistributionInSpots[spot["id"]] = {"id": spot["id"], "lids": spot["spot"], "users": []}
     for userObject in userToLocationVector:
         userId, userVector = userObject["user"], userObject["locations"]
         for lid in userVector:
             if lid in lidToSpotIdMap:
                 userDistributionInSpots[lidToSpotIdMap[lid]]["users"].append(userId)
     for spotId, object in userDistributionInSpots.iteritems():
         print spotId
         FileIO.writeToFileAsJson(object, spotsWithUsersFile)
Esempio n. 6
0
def plotHashtagSourcesOnMap(timeRange, outputFolder):
    i = 1
    distribution = defaultdict(int)
    for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%(outputFolder,'%s_%s'%timeRange)):
        occuranesInHighestActiveRegion, isFirstActiveRegion = getOccuranesInHighestActiveRegion(hashtagObject, True)
        if occuranesInHighestActiveRegion:
            source, count = getSourceLattice(occuranesInHighestActiveRegion)
            print i, source;i+=1
            distribution[getLidFromLocation(source)]+=1
#        if i==10: break
    points, colors = zip(*[(getLocationFromLid(k),v) for k, v in sorted(distribution.iteritems(), key=itemgetter(1))])
    cm = matplotlib.cm.get_cmap('Paired')
    sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0)
    plt.colorbar(sc)
    plt.show()
Esempio n. 7
0
 def writeUserDistributionInSpots(spotsFile, userToLocationVector):
     lidToSpotIdMap, userDistributionInSpots, spotsWithUsersFile = {}, defaultdict(list), spotsFile + "_users"
     for spot in FileIO.iterateJsonFromFile(spotsFile):
         for location, _ in spot["spot"]:
             lidToSpotIdMap[getLidFromLocation(location)] = spot["id"]
         userDistributionInSpots[spot["id"]] = {"id": spot["id"], "lids": spot["spot"], "users": []}
     for userObject in userToLocationVector:
         userId, userVector = userObject["user"], userObject["locations"]
         spotDistribution = defaultdict(int)
         for lid in userVector:
             if lid in lidToSpotIdMap:
                 spotDistribution[lidToSpotIdMap[lid]] += 1 * userVector[lid]
         if spotDistribution:
             spotId = sorted(spotDistribution.iteritems(), key=itemgetter(1))[-1][0]
             userDistributionInSpots[spotId]["users"].append(userId)
     for spotId, object in userDistributionInSpots.iteritems():
         FileIO.writeToFileAsJson(object, spotsWithUsersFile)
 def mapper(self, key, line):
     data = parseData(line)
     if data: yield getLidFromLocation(data['l']), data['u']
 def collectLocationsMapper(self, key, line):
     data = parseData(line)
     if data: yield data['u'], getLidFromLocation(data['l'])
 def mapper(self, key, line):
     data = parseData(line)
     if data: 
         d = data['t']
         yield getLidFromLocation(data['l']), '_'.join([str(data['u']), str(d.weekday()), str(d.hour/4)])
Esempio n. 11
0
def addVenuesToDB():
    i = 0
    for data in open(venuesFile):
        data = data.strip().split('\t')
        try:
            venuesCollection.insert({'_id': int(data[0]), 'n': data[1], 'l': [float(data[2]), float(data[3])], 'lid': getLidFromLocation([float(data[2]), float(data[3])]), 'm':' '.join(data[4:-2]), 'tp': int(data[-2]),  'tc': int(data[-1])})
        except Exception as e: print i, 'Exception while processing:', data; i+=1
Esempio n. 12
0
def addCheckinsToDB():
    i = 0
    for data in open(checkinsFile):
        data = data.strip().split('\t')
        try:
            if len(data)!=7: data.append(None) 
            if len(data) == 7: checkinsCollection.insert({'_id':int(data[1]), 'u': int(data[0]), 'l': [float(data[2]), float(data[3])], 'lid': getLidFromLocation([float(data[2]), float(data[3])]), 't': dateutil.parser.parse(data[4]), 'x': data[5], 'pid': data[6]})
        except Exception as e: print i, 'Exception while processing:', data; i+=1