Example #1
0
def get_HD_from_UTMLL(u1, u2, radius=earthRadiusKMs):
    lat1, long1, _ = u1.split('_')
    corr1 = (float(lat1), float(long1))
    lat2, long2, _ = u2.split('_')
    corr2 = (float(lat2), float(long2))
    dist = getHaversineDistance(corr1, corr2, radius)
    return dist
Example #2
0
def getHDfromUTMLL(u1,u2, radius):
    lat1,long1,_=u1.split('_')
    corr1=(float(lat1),float(long1))
    lat2,long2,_=u2.split('_')
    corr2=(float(lat2),float(long2))
    dist=getHaversineDistance(corr1,corr2,radius)
    return dist
Example #3
0
 def map_line_to_user(self, key, line):
     if False: yield
     for user, creator in ReadFile.read_json_yield_user(line):
         dist = getHaversineDistance([creator[1], creator[2]],
                                     [self.query_lat, self.query_lng])
         s = (self.dmin / (dist + self.dmin))**1.01
         yield user, [s, creator[3], creator[4]]
Example #4
0
def addLocationToLocationDistanceToDB():
    i = 0
    for data in locationGraphIterator():
        try:
            d = map(float, data['e'].split())
            d = getHaversineDistance(d[0:2],d[2:])
            locationToLocationCollection.insert({'_id': data['e'], 'u': data['w'], 'd': d})
        except Exception as e: print i, 'Exception while processing:', data; i+=1
Example #5
0
 def read_json_yield_uid1(line):
     line = cjson.decode(line)
     user_id = line['user_id']
     tags = line['tag']
     lat1 = line['list_creator_lat']
     lng1 = line['list_creator_lng']
     user_lat = line['user_lat']
     user_lng = line['user_lng']
     distance = getHaversineDistance([lat1, lng1], [user_lat, user_lng])
     for tag in set(tags):
         yield user_id, [tag, distance]
Example #6
0
 def read_json_yield_uid1(line):
     line = cjson.decode(line)
     user_id=line['user_id']
     tags=line['tag']
     lat1=line['list_creator_lat']
     lng1=line['list_creator_lng']
     user_lat=line['user_lat']
     user_lng=line['user_lng']
     distance=getHaversineDistance([lat1,lng1],[user_lat,user_lng])
     for tag in set(tags):
         yield user_id,[tag,distance]
Example #7
0
 def read_json_yield_uid2(line):
     line = cjson.decode(line)
     user_id = line['user_id']
     tags = line['tag']
     lat1 = line['list_creator_lat']
     lng1 = line['list_creator_lng']
     user_lat = line['user_lat']
     user_lng = line['user_lng']
     distance = getHaversineDistance(target_loc, [user_lat, user_lng])
     if distance <= 10:
         distance = 0
     elif distance > 10:
         distance -= 10
     for tag in set(tags):
         yield user_id, [tag, distance]
Example #8
0
 def read_json_yield_uid2(line):
     line = cjson.decode(line)
     user_id=line['user_id']
     tags=line['tag']
     lat1=line['list_creator_lat']
     lng1=line['list_creator_lng']
     user_lat=line['user_lat']
     user_lng=line['user_lng']
     distance=getHaversineDistance(target_loc,[user_lat,user_lng])
     if distance <= 10:
         distance=0
     elif distance > 10:
         distance-=10
     for tag in set(tags):
         yield user_id,[tag,distance]
Example #9
0
def getLocalityIndexAtK(occurances, kValue):
    ''' Locality index at k - for a hashtag is the minimum radius that covers k percentage of occurrances.
            A high locality index suggests hashtag was global with a small index suggests it was local.
        To find locality index at k, I must find a point that is closest to k percentage of occurances. 
            Brute force requires nC2 complexity. 
            Hence, use lattices of bigger size technique.
    '''
    def getLatticeThatGivesMinimumLocalityIndexAtK():
        occurancesDict = {'occurances': occurances}
        for accuracy in [4, 2, 1, 0.5, ACCURACY]: occurancesDict = getLatticeThatGivesMinimumLocalityIndexAtKForAccuracy(occurancesDict['occurances'], accuracy)
        return occurancesDict['sourceLattice']
    def getLatticeThatGivesMinimumLocalityIndexAtKForAccuracy(occurances, accuracy):
        occurancesDistributionInHigherLattice, distanceMatrix = defaultdict(list), defaultdict(dict)
        for oc in occurances: occurancesDistributionInHigherLattice[getLatticeLid(oc, accuracy)].append(oc)
        higherLattices = sorted(occurancesDistributionInHigherLattice.iteritems(), key=lambda t: len(t[1]), reverse=True)
        for hl1, hl2 in combinations(occurancesDistributionInHigherLattice, 2): distanceMatrix[hl1][hl2] = distanceMatrix[hl2][hl1] = getHaversineDistance(getLocationFromLid(hl1.replace('_', ' ')), getLocationFromLid(hl2.replace('_', ' ')))
        for k,v in distanceMatrix.iteritems(): distanceMatrix[k] = sorted(v.iteritems(), key=itemgetter(1))
        occurancesToReturn = []
        currentHigherLatticeSet, totalOccurances = {'distance': ()}, float(len(occurances))
        for hl, occs  in higherLattices: 
            higherLatticeSet = {'distance': 0, 'observedOccurances': len(occs), 'lattices': [hl], 'sourceLattice': hl}
            while currentHigherLatticeSet['distance']>higherLatticeSet['distance'] and higherLatticeSet['observedOccurances']/totalOccurances<0.5:
                (l, d) = distanceMatrix[hl][0]; 
                distanceMatrix[hl]=distanceMatrix[hl][1:]
                higherLatticeSet['distance']+=d
                higherLatticeSet['lattices'].append(l)
                higherLatticeSet['observedOccurances']+=len(occurancesDistributionInHigherLattice[l])
            if currentHigherLatticeSet==None or currentHigherLatticeSet['distance']>higherLatticeSet['distance']: currentHigherLatticeSet=higherLatticeSet
        for l in currentHigherLatticeSet['lattices']: occurancesToReturn+=occurancesDistributionInHigherLattice[l]
    #    return {'distance': currentHigherLatticeSet['distance'], 'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))}
        return {'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))}
    occurancesDistributionInHigherLattice = defaultdict(int)
    for oc in occurances: occurancesDistributionInHigherLattice[getLatticeLid(oc, ACCURACY)]+=1
    totalOccurances, distance, observedOccuraces = float(len(occurances)), 0, 0
    lattice = getLatticeThatGivesMinimumLocalityIndexAtK()
    sortedLatticeObjects = sorted([(getLocationFromLid(k.replace('_', ' ')), getHaversineDistance(lattice, getLocationFromLid(k.replace('_', ' '))), v) for k, v in occurancesDistributionInHigherLattice.iteritems()],
                 key=itemgetter(1))
    for l, d, oc in sortedLatticeObjects:
        distance=d; observedOccuraces+=oc
        if observedOccuraces/totalOccurances>=kValue: break
    return (d, lattice)
Example #10
0
    def plot_correlation_between_influence_similarity_and_distance(model_ids, distance_accuracy=500):
        def get_larger_lid(lid): return getLatticeLid(getLocationFromLid(lid.replace('_', ' ')), 10)
        for model_id in model_ids:
            mf_influence_type_to_tuo_distance_and_similarity = defaultdict(list)
            for line_count, (location, tuo_neighbor_location_and_mf_influence_type_and_similarity) in \
                    enumerate(FileIO.iterateJsonFromFile(tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id)):
                print line_count
                for neighbor_location, mf_influence_type_to_similarity in \
                        tuo_neighbor_location_and_mf_influence_type_and_similarity:
                    distance = getHaversineDistance(getLocationFromLid(location.replace('_', ' ')), getLocationFromLid(neighbor_location.replace('_', ' ')))
                    distance = int(distance)/distance_accuracy*distance_accuracy + distance_accuracy
                    for influence_type, similarity in mf_influence_type_to_similarity.iteritems():
                        mf_influence_type_to_tuo_distance_and_similarity[influence_type].append([distance, similarity])
            subpot_id = 211
            for influence_type in \
                    [InfluenceMeasuringModels.TYPE_OUTGOING_INFLUENCE, InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE]:
                tuo_distance_and_similarity = mf_influence_type_to_tuo_distance_and_similarity[influence_type]
                tuo_distance_and_similarities =  [(distance, zip(*ito_tuo_distance_and_similarity)[1])
                                                    for distance, ito_tuo_distance_and_similarity in groupby(
                                                            sorted(tuo_distance_and_similarity, key=itemgetter(0)),
                                                            key=itemgetter(0)
                                                        )
                                                ]
                plt.subplot(subpot_id)
                x_distances, y_similarities = [], []
                for distance, similarities in tuo_distance_and_similarities:
#                    similarities=filter_outliers(similarities)
                    x_distances.append(distance), y_similarities.append(np.mean(similarities))
    #            x_distances, y_similarities = splineSmooth(x_distances, y_similarities)
                plt.semilogy(x_distances, y_similarities, c = InfluenceMeasuringModels.INFLUENCE_PROPERTIES[influence_type]['color'], 
                         lw=2, marker = InfluenceMeasuringModels.INFLUENCE_PROPERTIES[influence_type]['marker'])
                plt.ylabel(InfluenceMeasuringModels.INFLUENCE_PROPERTIES[influence_type]['label'], fontsize=13)
                subpot_id+=1
            plt.xlabel('Distance (Miles)', fontsize=13)
#            plt.show()
            savefig('images/%s.png'%(GeneralMethods.get_method_id()))
Example #11
0
 def getLatticeThatGivesMinimumLocalityIndexAtKForAccuracy(occurances, accuracy):
     occurancesDistributionInHigherLattice, distanceMatrix = defaultdict(list), defaultdict(dict)
     for oc in occurances: occurancesDistributionInHigherLattice[getLatticeLid(oc, accuracy)].append(oc)
     higherLattices = sorted(occurancesDistributionInHigherLattice.iteritems(), key=lambda t: len(t[1]), reverse=True)
     for hl1, hl2 in combinations(occurancesDistributionInHigherLattice, 2): distanceMatrix[hl1][hl2] = distanceMatrix[hl2][hl1] = getHaversineDistance(getLocationFromLid(hl1.replace('_', ' ')), getLocationFromLid(hl2.replace('_', ' ')))
     for k,v in distanceMatrix.iteritems(): distanceMatrix[k] = sorted(v.iteritems(), key=itemgetter(1))
     occurancesToReturn = []
     currentHigherLatticeSet, totalOccurances = {'distance': ()}, float(len(occurances))
     for hl, occs  in higherLattices: 
         higherLatticeSet = {'distance': 0, 'observedOccurances': len(occs), 'lattices': [hl], 'sourceLattice': hl}
         while currentHigherLatticeSet['distance']>higherLatticeSet['distance'] and higherLatticeSet['observedOccurances']/totalOccurances<0.5:
             (l, d) = distanceMatrix[hl][0]; 
             distanceMatrix[hl]=distanceMatrix[hl][1:]
             higherLatticeSet['distance']+=d
             higherLatticeSet['lattices'].append(l)
             higherLatticeSet['observedOccurances']+=len(occurancesDistributionInHigherLattice[l])
         if currentHigherLatticeSet==None or currentHigherLatticeSet['distance']>higherLatticeSet['distance']: currentHigherLatticeSet=higherLatticeSet
     for l in currentHigherLatticeSet['lattices']: occurancesToReturn+=occurancesDistributionInHigherLattice[l]
 #    return {'distance': currentHigherLatticeSet['distance'], 'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))}
     return {'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))}
Example #12
0
def getMeanDistanceFromSource(source, llids): return np.mean([getHaversineDistance(source, p) for p in llids])

def getLocalityIndexAtK(occurances, kValue):
def getRadius(locations):
    meanLid = getCenterOfMass(locations,accuracy=LATTICE_ACCURACY)
    distances = [getHaversineDistance(meanLid, p) for p in locations]
    _, upperBoundForDistance = getOutliersRangeUsingIRQ(distances)
    return np.mean(filter(lambda d: d<=upperBoundForDistance, distances))
Example #14
0
    dist=getHaversineDistance(corr1,corr2,radius)
    return dist
earthRadiusMiles = 3958.761
outfile='/spare/wei/folk/dallas_tagging'
#outfile1='/spare/wei/folk/dist_greater_than_50_less_than_500_2'
#outfile2='/spare/wei/folk/dist_greater_than_500_less_than_3000_2'
#outfile3='/spare/wei/folk/dist_greater_than_3000_2'
outfile=open(outfile,'w')
#outfile1=open(outfile1,'w')
#outfile2=open(outfile2,'w')
#outfile3=open(outfile3,'w')
for line in open(infile,'r'):
    line = cjson.decode(line)
    lat_u,lng_u=line['user_lat'],line['user_lng']
    lat_c,lng_c = 32.78014,-96.800451
   # lat_c,lng_c=line['list_creator_lat'],line['list_creator_lng']
   # lat_c,lng_c = 40.705631,-73.978003
   # lat_c,lng_c = 37.77493,-122.419416
   # lat_c,lng_c=29.760193,-95.36939
   # lat_c,lng_c = 30.627977,-96.334407
    dist = getHaversineDistance([lat_u,lng_u],[lat_c,lng_c],earthRadiusMiles)
    if dist <= 20:
        outfile.write(cjson.encode(line)+'\n')
#    elif dist>50 and dist<500:
#        outfile1.write(cjson.encode(line)+'\n')
#    elif dist>500 and dist<3000:
#        outfile2.write(cjson.encode(line)+'\n')
#    else:
#        outfile3.write(cjson.encode(line)+'\n')

 def _haversine_distance(self, location, neighbor_location):
     loc_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(location)
     nei_loc_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(neighbor_location)
     return getHaversineDistance(loc_lat_long, nei_loc_lat_long)
Example #16
0
 def map_line_to_user(self,key,line):
     if False:yield
     for user,creator in ReadFile.read_json_yield_user(line):
         dist=getHaversineDistance([creator[1],creator[2]],[self.query_lat,self.query_lng])
         s=(self.dmin/(dist+self.dmin))**1.01
         yield user,[s,creator[3],creator[4]]