Example #1
0
    def compute_latencies(self):
        self.latencies = {}
        self.wlatencies = {}
        self.max_pole_dist2 = {}

        for c in self.cluster:
            latency = 0.0
            weighted_latency = 0.0
            max_pole_dist2 = 0.0
            current_node = north_pole

            for j in self.cluster[c]:
                latency += haversine(current_node, self.X[j])
                weighted_latency += latency * self.gifts.Weight[j]
                current_node = self.X[j]
                if self.X[j][1] > max_pole_dist2:
                    max_pole_dist2 = self.X[j][1]

            latency += haversine(current_node, north_pole)
            weighted_latency += latency * sleigh_weight
            max_pole_dist2 *= 2

            self.wlatencies[c] = weighted_latency
            self.latencies[c] = latency
            self.max_pole_dist2[c] = max_pole_dist2
Example #2
0
def find_closest(point, segments, haversine_distance=False):
    """
    Find the linestring in segments that has a start or end point closest to point.

    return (segment, start or end, distance)
    """
    closest_segment = None
    closest_distance = 0
    closest_location = None
    for seg in segments:
        start = Point(seg.coords[0])
        end = Point(seg.coords[-1])
        if haversine_distance:
            start_distance = haversine((start.y, start.x), (point.y, point.x)) * 1000
            end_distance = haversine((end.y, end.x), (point.y, point.x)) * 1000
        else:
            start_distance = point.distance(start)
            end_distance = point.distance(end)

        if closest_segment is None or start_distance < closest_distance:
            closest_distance = start_distance
            closest_segment = seg
            closest_location = "start"

        if end_distance < closest_distance:
            closest_distance = end_distance
            closest_segment = seg
            closest_location = "end"

    return closest_segment, closest_location, closest_distance
def freq_counts(entries,frame):
    district = 0
    min_dis = 25.0
    time = int(entries['Hour of the day'].get()) #The .get() method is used to fetch the values from the GUI text box
    myzip = entries['Zipcode'].get()
    zipcd = geocoder.google(myzip) #Converts the entered zip code to latitudes and longitudes.
    for key in DistLatLon:
        if min_dis == 0.0:
            break
        i = DistLatLon[key]
        pzip = geocoder.google(i)
        if (pzip.latlng):
            dis = haversine(zipcd.latlng,pzip.latlng,miles = True)#Haversine function calculates the distance 
            #between two locations given their latitudes and longitudes.
            if dis < min_dis:
                min_dis = dis
                district = key#Identifying which district the given zipcode falls under.
        
        else: #The else block des the same thing as the if block. This is due a bug in geocoder library. 
        #Sometimes, it doesn't fetch the latitudes and lonitudes as expected.
            pzip = geocoder.google(i)
            dis = haversine(zipcd.latlng,pzip.latlng,miles = True)
            if dis < min_dis:
                min_dis = dis
                district = key
    #Filtering the data frames with the district and time given.
    frame= frame[(frame["DC_DIST"]==district) & (frame["DISPATCH_TIME"]==time)]
    frame=frame[["DC_DIST","DISPATCH_TIME","TEXT_GENERAL_CODE"]]
    #Calling the plotting function to set it ready for plotting.
    plot_top_crimes(frame, 'TEXT_GENERAL_CODE','Top Crime Categories','category.png')
Example #4
0
def location_chunk(bbox):
    # x is lat space, y is lon space
    max_x_dist = haversine((bbox["min_lat"], bbox["min_lon"]), (bbox["max_lat"], bbox["min_lon"]))
    max_y_dist = haversine((bbox["min_lat"], bbox["min_lon"]), (bbox["min_lat"], bbox["max_lon"]))

    x_nums, y_nums = 1000, 1000 # of cells in each axis
    x_cell_size = max_x_dist / x_nums # in km
    y_cell_size = max_y_dist / y_nums # in km

    session = db_connection.SESSIONMAKER()
    pickups = session.query(db_model_2.Pickup.id, func.ST_X(cast(db_model_2.Pickup.location, Geometry)), func.ST_Y(cast(db_model_2.Pickup.location, Geometry)))    

    print(pickups.count())
    count = 0
    for pickup in pickups:
        x_dist = haversine((bbox["min_lat"], bbox["min_lon"]), (pickup[2], bbox["min_lon"]))# dist along lat
        y_dist = haversine((bbox["min_lat"], bbox["min_lon"]), (bbox["min_lat"], pickup[1]))# dist along lon

        session.query(db_model_2.Pickup).\
            filter(db_model_2.Pickup.id == pickup[0]).\
            update({
                    "x_coordinate" : x_dist // x_cell_size,
                    "y_coordinate" : y_dist // y_cell_size
                    })
        if count % 10000 == 0:
            print(count)
        count += 1
    session.commit()
 def get_pos(self):
     walked_distance = 0.0
     if not self.is_paused:
         time_passed = time.time()
     else:
         time_passed = self._last_paused_timestamp
     time_passed_distance = self.speed * abs(time_passed - self._timestamp - self._paused_total)
     # check if there are any steps to take https://github.com/th3w4y/PokemonGo-Bot/issues/27
     if self.walk_steps(self.points):
         steps_dict = {}
         for step in self.walk_steps(self.points):
             walked_distance += haversine.haversine(*step)*1000
             steps_dict[walked_distance] = step
         for walked_end_step in sorted(steps_dict.keys()):
             if walked_end_step >= time_passed_distance:
                 break
         step_distance = haversine.haversine(*steps_dict[walked_end_step])*1000
         if walked_end_step >= time_passed_distance:
             percentage_walked = (time_passed_distance - (walked_end_step - step_distance)) / step_distance
         else:
             percentage_walked = 1.0
         result = self.calculate_coord(percentage_walked, *steps_dict[walked_end_step])
         self._last_pos = tuple(result[0])
         return self._last_pos
     else:
         # otherwise return the destination https://github.com/th3w4y/PokemonGo-Bot/issues/27
         self._last_pos = tuple(self.points[-1])
         return self._last_pos
Example #6
0
	def cost(job):
		pickup_tuple = (job['pickup']['lat'], job['pickup']['lng'])
		dropoff_tuple = (job['dropoff']['lat'], job['dropoff']['lng'])
		pickup_dist = haversine(pos, pickup_tuple)
		delivery_dist = haversine(pickup_tuple, dropoff_tuple)
		total_comp = travel_cost * (pickup_dist + delivery_dist) + delivery_cost * (delivery_dist)
		return total_comp / (pickup_dist + delivery_dist)
 def get_pos(self):
     if self.speed > self.get_total_distance():
         self._last_pos = self.destination
         self._last_step = len(self._step_keys)-1
     if self.get_last_pos() == self.destination:
         return self.get_last_pos()
     distance = self.speed
     origin = Point(*self._last_pos)
     ((so_lat, so_lng), (sd_lat, sd_lng)) = self._step_dict[self._step_keys[self._last_step]]
     bearing = self._calc_bearing(so_lat, so_lng, sd_lat, sd_lng)
     while haversine.haversine(self._last_pos, (sd_lat, sd_lng))*1000 < distance:
         distance -= haversine.haversine(self._last_pos, (sd_lat, sd_lng))*1000
         self._last_pos = (sd_lat, sd_lng)
         if self._last_step < len(self._step_keys)-1:
             self._last_step += 1
             ((so_lat, so_lng), (sd_lat, sd_lng)) = self._step_dict[self._step_keys[self._last_step]]
             bearing = self._calc_bearing(so_lat, so_lng, sd_lat, sd_lng)
             origin = Point(so_lat, so_lng)
             lat, lng = self._calc_next_pos(origin, distance, bearing)
             if haversine.haversine(self._last_pos, (lat, lng))*1000 < distance:
                 distance -= haversine.haversine(self._last_pos, (lat, lng))*1000
                 self._last_pos = (lat, lng)
         else:
             return self.get_last_pos()
     else:
         lat, lng = self._calc_next_pos(origin, distance, bearing)
         self._last_pos = (lat, lng)
         return self.get_last_pos()
Example #8
0
    def proximity_search(self, latitude, longitude, radius):
        """
        Given a centerpoint, find everything within a radius around
        that latitude and longitude, returned in order.

        :param latitude: floating point latitude
        :param longitude: floating point longitude
        :param radius: radius in meters.
        :return:
        """

        hashcode = geohash.encode(latitude=latitude, longitude=longitude)
        centerpoint = (latitude, longitude)

        tmp_hashcode = ''
        for x in hashcode:
            # Go through the hashcode character by character
            tmp_hashcode += x
            lat, lng, delta_lat, delta_lng = geohash.decode(tmp_hashcode,
                                                            delta=True)
            overall_lat = 2 * 1000 * haversine(
                point1=(latitude - delta_lat, longitude),
                point2=(latitude + delta_lat, longitude)
            )
            overall_lng = 2 * 1000 * haversine(
                point1=(latitude, longitude-delta_lng),
                point2=(latitude, longitude+delta_lng)
            )

            dist = min(overall_lng, overall_lat)
            if dist < radius:
                tmp_hashcode = tmp_hashcode[:-1]
                break

        if tmp_hashcode == '':
            raise ValueError('Radius larger than earth')

        precision = len(tmp_hashcode)

        search_hashes = self._get_adjoining_hashes(hashcode=hashcode,
                                                   precision=precision)
        search_hashes.append(tmp_hashcode)

        possible_points = []
        result_values = []

        for search_hash in search_hashes:
            possible_points.extend(self.storage.values(prefix=search_hash))

        for point_id in possible_points:
            point = self.points_by_id[point_id]
            dist = 1000 * haversine(centerpoint, point)
            if dist <= radius:
                result_values.append((point_id, dist))

        sorted_results = sorted(result_values, key = lambda x: x[1])
        final_results = [x[0] for x in sorted_results]
        return final_results
 def reduce(self, events):
     latitude = self.latfield
     longitude = self.longfield
     relative_distance = self.output_field
     use_haversine = bool(self.use_haversine)
     self.logger.info("[%s] - Starting geodistance instance" % str(self.metadata.searchinfo.sid))
     self.logger.debug("[%s] - Using parameters - %s" % (str(self.metadata.searchinfo.sid), str(self.metadata)))
     if self.group_by:
         position_tracker = {}
         for event in events:
             current = event
             if not (current[latitude] or current[longitude]):
                 current[relative_distance] = 0.0
                 self.logger.debug("[%s] - Using distance=0 for private IPs or unknown coordinates. "
                                   "Exclude if undesired." % str(self.metadata.searchinfo.sid))
             else:
                 current_pos = (float(current[latitude]), float(current[longitude]))
                 if current[self.group_by] not in position_tracker.keys():
                     last_pos = None
                 else:
                     last_pos = position_tracker[current[self.group_by]]
                 if last_pos is None:
                     current[relative_distance] = 0.0
                     self.logger.debug(
                         "[%s] - Initializing the first location with distance=0" % str(self.metadata.searchinfo.sid)
                     )
                 else:
                     if use_haversine:
                         current[relative_distance] = haversine(last_pos, current_pos, miles=bool(self.miles))
                     else:
                         current[relative_distance] = vincenty(last_pos, current_pos, miles=bool(self.miles))
                 position_tracker[current[self.group_by]] = current_pos
             yield current
     else:
         last_pos = None
         for event in events:
             current = event
             if not (current[latitude] or current[longitude]):
                 current[relative_distance] = 0.0
                 self.logger.debug(
                     "[%s] - Using distance=0 for private IPs or unknown coordinates. Exclude if undesired." % str(
                         self.metadata.searchinfo.sid))
             else:
                 current_pos = (float(current[latitude]), float(current[longitude]))
                 if last_pos is None:
                     current[relative_distance] = 0.0
                     self.logger.debug("[%s] - Initializing the first location with distance=0" % str(
                         self.metadata.searchinfo.sid))
                 else:
                     if use_haversine:
                         current[relative_distance] = haversine(last_pos, current_pos, miles=bool(self.miles))
                     else:
                         current[relative_distance] = vincenty(last_pos, current_pos, miles=bool(self.miles))
                 last_pos = current_pos
             self.logger.debug(current)
             yield current
         self.logger.info("[%s] - Completed successfully." % str(self.metadata.searchinfo.sid))
Example #10
0
def location_error(true_loc, coord, LocRes):
	# we create location resolver in method.py because we don't want it to load every time we import this file
	if not true_loc: return 0.0
	# check if location field contains coordinates
	#coord = isCoord(text_loc)
	if coord: return haversine(true_loc, coord)
	# resolve to lat lon 
	res = LocRes.reverse_geocode(text_loc.split()[0],text_loc.split()[1])
	if not res: return 0.0
	res_val = map(float, res)
	return haversine(true_loc, res_val)
Example #11
0
def path_opt_test(llo):
    f_ = 0.0
    d_ = 0.0
    l_ = north_pole
    for i in range(len(llo)):
        d_ += haversine(l_, llo[i][1])
        f_ += d_ * llo[i][2]
        l_ = llo[i][1]
    d_ += haversine(l_, north_pole)
    f_ += d_ * 10 #sleigh weight for whole trip
    return f_
Example #12
0
	def calculate_distance(self):
		latlng1 = self.location1.latlng
		latlng2 = self.location2.latlng

		if bool(latlng1 and latlng2):
			self.ok = True
			self.km = haversine(latlng1, latlng2)
			self.miles = haversine(latlng1, latlng2, miles=True)
			self.meters = int(self.km * 1000)
			self.feet = int(self.miles * 5280)
		else:
			print '<ERROR - Input is incorrect>'
def path_opt_test(llo):
    f_ = 0.0
    d_ = 0.0
    we_ = 0.0
    l_ = north_pole
    for i in range(len(llo)):
        d_ += haversine(l_, llo[i][1])
        we_ += llo[i][2]
        f_ += d_ * llo[i][2]
        l_ = llo[i][1]
    d_ += haversine(l_, north_pole)
    f_ += d_ * 10
    return [f_,d_,we_]
def bipartite(day, month,year,files):
    """
    Function bipartite
    ---------------------
    Creates a bipartite graph with edges across all possible combinations of waypoints through the zones.
    Each edge has an attribute of speed(average speed across the edge based on wind) and time (speed/distance)

    day: day the data was collected
    month: month the data was collected
    year: year the data was collected
    files: list with tags for paths of xlsx files formatted in the way shown getFlightData

    returns: a bipartite graph with edges between zones that have the attributes of speed and time (networkx graph)
    """
    
    import geopy
    from geopy.distance import VincentyDistance


    zone = zones()  #create zones
    waypoint = waypointDict(files) #get the waypoint dict of all waypoints
    zdir = GP(day,month,year)[0] #predicted wind directions across all prediction points
    zspeed = GP(day,month,year)[0]#predicted wind speed across all prediction points
    network = nx.DiGraph()

    for i in range(len(zone) - 1):  #Creates the edges from layer to layer in bipartite graph
        for j in range(len(zone[i])):
            for k in range(len(zone[i+1])):
                network.add_edge(zone[i][j], zone[i+1][k],  #Adds edges from one zone to another with distance as attribute
                                 distance = haversine((waypoint[zone[i][j]]), (waypoint[zone[i+1][k]]))/1.60934)
    for i in range(len(zone[0])):
        network.add_edge('source', zone[0][i], distance = haversine(waypoint['source'], waypoint[zone[0][i]])/1.60934)
    for i in range(len(zone[5])):
        network.add_edge(zone[5][i], 'sink', distance = haversine(waypoint[zone[5][i]], waypoint['sink'])/1.60934)

    p = 0 #placeholder for iterating through zdir and zspeed lists
    for i in range(network.number_of_edges()):#Goes through each edge to find intervals to calculate weather data
        b = bearing((waypoint[network.edges()[i][0]]), (waypoint[network.edges()[i][1]]))   #bearing of the edge
        origin = geopy.Point(waypoint[network.edges()[i][0]][0], waypoint[network.edges()[i][0]][1])#lat,lon of point 1
        network[network.edges()[i][0]][network.edges()[i][1]]['speed'] = 0
        k = 0 #placeholder to find total number of iteration points along each edge
        for j in range(0, int(roundDown(network[network.edges()[i][0]][network.edges()[i][1]]['distance'],20)),20):
            destination = VincentyDistance(kilometers=j).destination(origin, b) #geopy to calculate lat lon after 20miles
            b_final = (bearing((destination.latitude, destination.longitude), (waypoint[network.edges()[i][0]][0], waypoint[network.edges()[i][0]][1]))+180)%360
            network[network.edges()[i][0]][network.edges()[i][1]]['speed'] += speed_calc(destination.latitude, destination.longitude, b_final, zdir[p],zpeed[p])
            k+=1
            p+=1
        network[network.edges()[i][0]][network.edges()[i][1]]['speed'] /= k #average speed across each edge
        network[network.edges()[i][0]][network.edges()[i][1]]['time'] = network[network.edges()[i][0]][network.edges()[i][1]]['distance']/
                                                                                        network[network.edges()[i][0]][network.edges()[i][1]]['speed'] #time across each edge
Example #15
0
    def add_in_tour(self,giftID,centroidID):
        i,k = giftID,centroidID
        n = len(self.clusters[k])
        if n==0:
            raise Exception('cluster was not initialized')
        lati,longi = self.X[i]
        dpole_i = self.distances_to_pole[i]
        j = n-np.searchsorted(self.latitudes_in_cluster[k][::-1],lati)
        if j==0:
            latency_i = dpole_i
        else:
            previous_gift = self.clusters[k][j-1]
            latency_i = self.latencies_in_cluster[k][j-1] + haversine(self.X[previous_gift],self.X[i])
        if j==n:
            #add gift in last position
            delta_latency = latency_i - self.latencies_in_cluster[k][j-1]
            weight_after_j = 0.
            delta_d = dpole_i - self.distances_to_pole[self.clusters[k][-1]]
        else:
            next_gift = self.clusters[k][j]
            delta_latency = latency_i +  haversine(self.X[next_gift],self.X[i]) - self.latencies_in_cluster[k][j]
            weight_after_j = sum(self.weights[self.clusters[k][j:]])
            delta_d = 0.

        self.clusters[k].insert(j,i)
        self.latitudes_in_cluster[k].insert(j,lati)

        for jj in range(j,n):
            self.latencies_in_cluster[k][jj]+=delta_latency
        
        self.latencies_in_cluster[k].insert(j,latency_i)
        
        if min(self.centroids[k][1],self.X[i][1])<-150 and max(self.centroids[k][1],self.X[i][1])>150:
            lamean = (self.centroids[k][0] * self.weight_per_cluster[k] + self.X[i][0] *
                                 self.weights[i])/(self.weights[i]+self.weight_per_cluster[k])
            lo = np.array([self.centroids[k][1],self.X[i][1]])
            lo = np.where(lo<0,lo+360,lo)
            lomean = (lo[0] * self.weight_per_cluster[k] + lo[1]*self.weights[i])/(self.weights[i]+self.weight_per_cluster[k])
            if lomean>180:
                lomean = lomean-360.
            self.centroids[k] = np.array([lamean,lomean])
            #import pdb;pdb.set_trace()
        else:
            self.centroids[k] = (self.centroids[k] * self.weight_per_cluster[k] + self.X[i] *
                                 self.weights[i])/(self.weights[i]+self.weight_per_cluster[k])
        self.weight_per_cluster[k]+= self.weights[i]
        
        self.cost_per_cluster[k] +=  self.weights[i]*latency_i+ (weight_after_j + sleigh_weight) * delta_latency + sleigh_weight * delta_d
Example #16
0
    def most_likely_location(self,user,location_set):
        """
        Returns the most likely location for a user of unknown locale,
        based on the social tightness model.
        """
        max_probability = float('-inf')
        best_location = None
        for neighbor_u in self.mention_network.neighbors_iter_(user):
            if neighbor_u not in location_set: continue

            location_of_neighbor_u = self.mention_network.node_data_(neighbor_u)
            probability = 0

            for neighbor_v in self.mention_network.neighbors_iter_(neighbor_u):
                if neighbor_v not in location_set: continue
                location_of_neighbor_v = self.mention_network.node_data_(neighbor_v)

                #to get the dict-lookup correct, we round to the nearest kilometer
                distance = round(haversine(location_of_neighbor_u,location_of_neighbor_v),0)

                # "" , round to two significant figures
                social_closeness = self.sij[neighbor_u][neighbor_v]
                probability += self.probability_distance_social_closeness[distance][social_closeness]

            #compare the probability of this neighbor with other possible neighbors
            #sets the highest-probability user as the most likely location
            if probability > max_probability:
                max_probability = probability
                best_location = location_of_neighbor_u
    
        return best_location
Example #17
0
def find_distance(start, end, miles=True):

    response1 = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address='+start)
    response2 = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address='+end)
    resp_json_payload1 = response1.json()
    resp_json_payload2 = response2.json()

    #Retrieve latitude and longitude of the starting and ending points
    orig_lat = resp_json_payload1['results'][0]['geometry']['location']['lat']
    orig_lon = resp_json_payload1['results'][0]['geometry']['location']['lng']
    dest_lat = resp_json_payload2['results'][0]['geometry']['location']['lat']
    dest_lng = resp_json_payload2['results'][0]['geometry']['location']['lng']

    orig_coord = (orig_lat, orig_lon)
    dest_coord = (dest_lat, dest_lng)
    #Find the distance between the starting and ending points using the Haversine equation
    distance_traveled = haversine(orig_coord, dest_coord, miles)
    

    # url = "http://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&language=en-EN&units=imperial".format(str(orig_coord),str(dest_coord))
    # result = requests.get(url)
    # resp_json = result.json()
    # distance_traveled= resp_json['rows'][0]['elements'][0]['distance']['text']

    return distance_traveled
def find_near():
    """Find doc near point from nearest to farest. Usually have limit() clause.

    Order by distance from nearest to farest by default.
    """
    # This is how you construct a filter
    filters = {
        "loc": {
            "$nearSphere": {
                "$geometry": {
                    "type": "Point",
                    "coordinates": [LNG, LAT],
                },
                "$maxDistance": 5 * 1000, # max distance in meter
            }
        }
    }

    array = list()
    for doc in col.find(filters).limit(5):
        lng, lat = doc["loc"]["coordinates"][0], doc["loc"]["coordinates"][1]
        dist = haversine((lat, lng), (LAT, LNG), miles=False)
        assert dist <= 5.0
        array.append(dist)

    assert_increasing(array)
Example #19
0
def read_pl_lns_geo(pl_lns, pl_lns_geo):
    """returns list of lns, and a dict with pairwise distances between lns."""
    lns = []
    f = open(pl_lns)
    for line in f:
        lns.append(line.split()[0].strip())
    
    lns_geo = {}
    f = open(pl_lns_geo)
    for line in f:
        tokens = line.split()
        lns_geo[tokens[0]] = [float(tokens[1]), float(tokens[2])]
    lns_dist = {}
    for lns1 in lns:
        lns_dist[lns1] = {}
        for lns2 in lns:
            if lns1 == lns2:
                lns_dist[lns1][lns2] = 0
            else:
                lns_dist[lns1][lns2] = haversine(lns_geo[lns1][0], lns_geo[lns1][1], lns_geo[lns2][0], lns_geo[lns2][1])

    lns_nbrs_ordered_bydist = {}
    for lns1 in lns:
        tuples = []
        for k,v in lns_dist[lns1].items():
            if k == lns1:
                continue
            tuples.append([k,v])
            
        tuples.sort(key = itemgetter(1))
        lns_nbrs_ordered_bydist[lns1] = [t[0] for t in tuples]
    return lns, lns_dist, lns_nbrs_ordered_bydist
Example #20
0
    def get_proximos_a(cls_obj, latitude, longitude):
        """
        Calculos com latitude e longitude são complicados.
        O ideal seria usar a https://pt.wikipedia.org/wiki/F%C3%B3rmula_de_Haversine
        para testar a distancia de cada imóvel ao endereço de busca.
        Mas, além de complicado de implementar numa consultado do django seria problematico
        quando o banco de dados ficasse cheio de imóveis.

        Resolvi usar a seguinte abordagem:
        A função get_min_max_coordenates calcula as latitudes e longitudes
        mínimas e máximas com n km de distancia (aproximada).
        Nesse caso n=1 km

        Assim, eu filtro os imoveis cujas latitude e longitude fiquem dentro desse quadrado.
        É claro que essa lista pode retornar imoveis que estejam a mais de 1km de distância
        do endereço, afinal, geramos um quadrado ao invés de um circulo.

        Como o conjunto de imóveis já foi reduzido, itero os imóveis
        retornado pelo orm testando a distância entre cada um e ponto através da fórmula.
        Os imóveis fora do circúlo, mas retornados pela consulta são eliminados,
        ficando apenas os dentro do circulo.

        """
        circle = 1  # Até 1km de distância
        bounds = get_min_max_coordenates(latitude, longitude, circle)
        candidatos = Imovel.get_disponiveis().filter(latitude__gte=bounds[0],
                                                     latitude__lte=bounds[1],
                                                     longitude__gte=bounds[2],
                                                     longitude__lte=bounds[3])
        center = (latitude, longitude)
        return [imovel for imovel in candidatos if haversine(center, (imovel.latitude, imovel.longitude)) <= circle]
def most_central_point(geos_array, valid_medoid=30):
    """
    Algorithm to find the point that is most central (i.e., medoid)
    using the haversine formula. Distances are weighted by the number
    of observations (increases sucessful selection of a medoid from the pool by 50%).

    :param geos_array:
    :param valid_medoid: min for mean distance to all other points / number of observations.
                         Defaults to 30. If the value is still over 30 after computing the above
                         weighting metric, it is almost certainly worth removing.
    :return:
    """
    # Count the number eac coordinate appears in `geos_array`
    geos_array_count = dict(Counter(geos_array))

    # Define a list of unique coordinates
    uniuqe_geos = list(set(geos_array))

    # Compute the distance from each point to all of the others
    coord_dict = dict()
    for i in uniuqe_geos:
        coord_dict[i] = [haversine(i, j) for j in uniuqe_geos if j != i]

    # Compute the mean for each and divide by the number of times it occured in geos_array
    coord_dict_mean = {k: mean(v) / float(geos_array_count[k]) for k, v in coord_dict.items()}

    # Use the most central point as the medoid
    medoid_mean_coord = min(coord_dict_mean, key=coord_dict_mean.get)

    # Check against threshold
    if coord_dict_mean[medoid_mean_coord] <= valid_medoid:
        return medoid_mean_coord
    else:
        return np.NaN
Example #22
0
def get_relations_informations(user):
    min_distance = None
    nearest = None
    stale = []
    no_gps = []
    user_loc = (user.last_latitude, user.last_longitude)
    for relation in user.relationships:
        # If too old, we add the relation to the stale list
        time_diff = (datetime.utcnow() - relation.updated_at) / timedelta(minutes=1)
        if time_diff > 5:
            stale.append(relation.facebook_id)
            continue

        # If the GPS is deactivated, we add it to the no-gps list
        loc = (relation.last_latitude, relation.last_longitude)
        if loc[0] is None:
            no_gps.append(relation.facebook_id)
            continue

        # If the GPS is active and up to date, we test if the relation is the nearest.
        distance = haversine(user_loc, loc) * 1000.
        if distance < DISTANCE_THRESHOLD:
            min_distance = distance
            nearest = relation

    return {"user": nearest, "distance": min_distance, "stale": stale, "no_gps": no_gps}
def medium():
    a = []
    b = []

    while len(a) < 3:
        c1 = random_coord()
        c2 = random_coord()

        if haversine(c1, c2) >= 0.1 and haversine(c1, c2) < 1000:
            a.append(c1)
            b.append(c2)

    print("\n\t@Test")
    print("\tpublic void mediumDistanceTest() {")
    fmtprt("testMedium", a, b)
    print("\t}")
Example #24
0
    def social_closeness(self):
        """
        The social tightness based model is based on the assumption
        that different friends hae different importance to a user.
        The social closeness between two users is measured via cosine similarly,
        then we estimate the probability of user i and user j located at a distance
        | l_i - l_j | with social closeness. Then we estimate the probability of user_i
        located at l_i and use the location with the top probability
        """
        pairs = 0
        #here we calculate social closeness
        logger.debug("Calcuating social closeness")
        for user in self.users_with_location:
            user_location = self.mention_network.node_data_(user)
            for friend in self.mention_network.neighbors_iter_(user):
                friend_location = self.mention_network.node_data_(friend)
                if not friend_location: continue

                pairs += 1
                social_closeness = round(self.cosine_similarity(user,friend),2)
                self.sij[user][friend] = social_closeness
                distance = round(haversine(user_location,friend_location),0)
                self.probability_distance_social_closeness[distance][social_closeness] += 1.0

        #the normalizing factor is the total number of social_closeness probabilities added above...
        normalizing_factor = pairs
        for distance in self.probability_distance_social_closeness:
            for social_closeness in self.probability_distance_social_closeness[distance]:
                self.probability_distance_social_closeness[distance][social_closeness] /= normalizing_factor
        logger.debug("Finished calculating the social closeness...")
def compare_coordinates(left_lat, left_lon, right_lat, right_lon, accuracy=0.1):
    '''Compares dates with specified accuracy

    Before comparison dates are parsed into datetime.datetime format
    and localized.

    :param left_lat:        First coordinate latitude
    :param left_lon:        First coordinate longitude
    :param right_lat:       Second coordinate latitude
    :param right_lon:       Second coordinate longitude
    :param accuracy:        Max difference between coordinates to consider them equal
                            Default value   - 0.1
                            Possible values - float or integer value of kilometers

    :returns:               Boolean value

    :error:                 ValueError when there is problem with converting accuracy
                            into float value. When it will be catched warning will be
                            given and accuracy will be set to 0.1.

    '''
    for key, value in {'left_lat': left_lat, 'left_lon': left_lon, 'right_lat': right_lat, 'right_lon': right_lon}.iteritems():
        if not isinstance(value, NUM_TYPES):
            raise TypeError("Invalid type for coordinate '{0}'. "
                            "Expected one of {1}, got {2}".format(
                                key, str(NUM_TYPES), str(type(value))))
    distance = haversine((left_lat, left_lon), (right_lat, right_lon))
    if distance > accuracy:
        return False
    return True
Example #26
0
def isinradius(point, distance):
	"""Takes a tuple of (lat, lon) where lon and lat are floats, and a distance in miles. Returns a list of zipcodes near the point."""
	zips_in_radius = list()
	
	if not isinstance(point, tuple):
		raise TypeError('point should be a tuple of floats')
	for f in point:
		if not isinstance(f, float):
			raise TypeError('lat and lon must be of type float')

	dist_btwn_lat_deg = 69.172
	dist_btwn_lon_deg = math.cos(point[0]) * 69.172
	lat_degr_rad = float(distance)/dist_btwn_lat_deg
	lon_degr_rad = float(distance)/dist_btwn_lon_deg

	latmin = point[0] - lat_degr_rad
	latmax = point[0] + lat_degr_rad
	lonmin = point[1] - lon_degr_rad
	lonmax = point[1] + lon_degr_rad

	if latmin > latmax:
		latmin, latmax = latmax, latmin
	if lonmin > lonmax:
		lonmin, lonmax = lonmax, lonmin

	stmt = ('SELECT * FROM ZIPS WHERE LONG > {lonmin} AND LONG < {lonmax}\
	 AND LAT > {latmin} AND LAT < {latmax}')
	_cur.execute(stmt.format(lonmin=lonmin, lonmax=lonmax, latmin=latmin, latmax=latmax))
	results = _cur.fetchall()

	for row in results:
		if haversine(point, (row[_LAT], row[_LONG])) <= distance:
			zips_in_radius.append(Zip(row))
	return zips_in_radius
Example #27
0
def hotspots(lat, lon):
	for city in cities.values():
		if haversine(float(lat), float(lon), city[0],city[1])<city[2]:
			newradius=city[3]
		else:
			newradius=''
	return newradius
Example #28
0
def consultar(BT1, bt2_gps):
    a = randint(1, 125000)
    # b = r_server.get("BT"+str(a).zfill(12))
    b = r_server.get(BT1)
    b_str = str(b).split("|")
    bt1_gps = (float(b_str[0]), float(b_str[1]))
    return (haversine(bt1_gps, bt2_gps),bt1_gps)
Example #29
0
    def cluster_gifts(self,verbose,giftDF):
#       df=pd.read_csv(path)
       # Kaggle specific variables

       print('initializing santa data...')

       
       self.cluster=True
       # set demand as weight
       print("creating customer demands (gift weights)...")

#       self.d=df.Weight.values
#       self.xc=zip(df.Latitude.values,df.Longitude.values)
#       self.xf=self.xc
    #       np.vstack([df.Longitude.values,df.Latitude.values]).T
       
       #since we are clustering, any customer can be a factory
       self.n=len(giftDF)
       self.m=self.n
       
       # For memory efficiency, do not create data matrix
       print("creating distance matrix...")
       self.create_distance_matrix(x=giftDF[['Latitude','Longitude']].values)

       # Set start up -- fixed -- cost to be the distance 
       # to the centroid
       print("creating startup costs...")
       self.s=[haversine(xx,self.north_pole) for xx in giftDF[['Latitude','Longitude']].values]
       return self.gurobi_cluster(verbose,giftDF)
 def walk(self):
     if not self.is_paused:
         walked_distance = 0.0
         time_passed = time.time()
         remaining_points = []
         time_passed_distance = self.speed * abs(time_passed - self._timestamp - self._paused_total)
         for step in self.walk_steps():
             step_distance = haversine.haversine(*step) * 1000
             if walked_distance + step_distance >= time_passed_distance:
                 if walked_distance > time_passed_distance:
                     percentage_walked = 0.0
                 else:
                     if step_distance == 0.0:
                         percentage_walked = 1.0
                     else:
                         percentage_walked = (time_passed_distance - walked_distance) / step_distance
                 remaining_points += self.calculate_coord(percentage_walked, *step)
             else:
                 percentage_walked = 1.0
             walked_distance += step_distance * percentage_walked
         self.points = remaining_points
         if self.points:
             self.lat, self.long = self.points[0][0], self.points[0][1]
             self.polyline = self.combine_polylines(self.points)
         else:
             self.lat, self.long = None, None
             self.polyline = ''
         self.reset_timestamps()
     return (self.lat, self.long)
Example #31
0
                'Please input the number of hospital you want to show: '))
        ratingin = float(
            raw_input(
                'What is the minimum rating (pleast input an int between 1 and 5)? '
            ))
        print "Please wait for a few seconds..."
        #latitude and longitude of input zipcode
        x = zipcode[zipcode.zip_code == zipcodein]['lat']
        y = zipcode[zipcode.zip_code == zipcodein]['lng']
        start = [x, y]

        #distance between input zipcode and hospitals
        distance = []
        for i in range(result.shape[0]):
            ending = result[['lat', 'lng']].ix[i].values
            distance.append(haversine(start, ending))

        #append distance as the last column
        result1 = result.copy()
        result1['distance_miles'] = distance
        #convert the data type of 0verall_rating from string into int
        result1['overall_rating'] = pd.to_numeric(result1['overall_rating'],
                                                  errors='coerce')
        #drop all overall_rating less than input
        result1 = result1[result1.overall_rating >= ratingin]
        #change the order of columns
        result2 = result1[[
            'ccn', 'name', 'Street', 'City', 'State', 'zip_code',
            'overall_rating', 'spending_score', 'lat', 'lng', 'distance_miles'
        ]]
        result1 = result1[[
Example #32
0
def haversine_distance(lat1, lon1, lat2, lon2):
    loc1 = (lat1, lon1)
    loc2 = (lat2, lon2)

    return hs.haversine(loc1, loc2)
    other_longitude = 85.76064

    if my_latitude == 0 or my_longitude == 0 or other_latitude == 0 or other_longitude == 0:
        return 0
    else:
        R = 6372.8  # Earth radius in kilometers

        dLat = radians(other_latitude - my_latitude)
        print("dLat", dLat)
        dLon = radians(other_longitude - my_longitude)
        print("dLon", dLon)
        lat1 = radians(my_latitude)
        print("lat1", lat1)
        lat2 = radians(other_latitude)
        print("lat2", lat2)

        a = sin(dLat / 2)**2 + cos(lat1) * cos(lat2) * sin(dLon / 2)**2
        print("a", a)
        c = 2 * asin(sqrt(a))
        print("c", c)

        return R * c * 1000


print("haversine", haversine((38.21273, 85.76018), (38.2126, 85.75976)))
print("haversine2", haversine2(38.21273, 85.76018, 38.21232, 85.76064))
print(calculate_distance_between_subsystems2())

38.21273, 85.76018
38.21232, 85.76064
Example #34
0
def df_haversine(lat1: float, lng1: float, lat2: float, lng2: float):
    # print(f"lat1: {lat1} is of type {type(lat1)}")
    # print(f"lat2: {lat2} is of type {type(lat2)}")
    return haversine((lat1, lng1), (lat2, lng2), Unit.MILES)
Example #35
0
def astar(apf, start, distance_target, genome, values_matrix, K, pre_matrix, x_value,
          type_astar):
    """
    Returns a list of tuples as a path from the given start to the given end in the given maze
    This is a normal a star algorithm is supposed to work

    The current version does not know the destination point
    It only knows the distance from the destination point

    :param apf: matrix representing the routing system of the area
    :param start: starting point
    :param distance_target: distance from the target (total length trip)
    :param genome: genome
    :param values_matrix: values to translate cells to coordinates
    :param K: constant for computing charge
    :param pre_matrix: pre computation of distance from the cell to the objects
    :param type_astar: typology of the astar wanted
    """
    # make x_value a percentege of the total distance target
    x_value = (distance_target * x_value) / 100

    # end_node = Node(parent=None, position=end_point)
    # if it takes too long, going to stop it
    # start_time = time.time()
    # Create start and end node
    start_node = Node(parent=None, position=start)
    start_node.g = start_node.h = start_node.f = 0

    # Initialize both open and closed list
    # open_list = []
    open_queue = []
    second_open_queue = {}
    closed_list = {}

    # Add the start node
    # open_list.append(start_node)
    heapq.heappush(open_queue, (start_node.f, start_node))
    second_open_queue.update({start_node.id: 0})

    # Loop until you find the end
    while len(open_queue) > 0:

        # Get the current node
        # current_node = open_list[0]
        # current_index = 0
        # for index, item in enumerate(open_list):
        #     if item.f < current_node.f:
        #         current_node = item
        #         current_index = index
        current_node = heapq.heappop(open_queue)[1]
        del second_open_queue[current_node.id]

        # with open("a_star_{}.txt".format(LoadConfigs.configurations["name_exp"]), "a") as myfile:
        #     myfile.write("current node -> {} \n".format(current_node))

        # Pop current off open list, add to closed list
        # open_list.pop(current_index)
        # closed_list.add(current_node)
        closed_list[current_node.id] = ""

        # current_time = time.time()
        # time_from_start = current_time - start_time  # time in seconds
        # block execution astar after 10 minutes -> hardcoded function

        # end = False
        # if time_from_start >= 300:
        #     end = True
        # find the bigger node.g
        # max_id = max(second_open_queue.items(), key=operator.itemgetter(1))[0]
        #
        # for el in open_queue:
        #     if el[1].id == max_id:
        #         current_node = el[1]
        #         break
        # distance_target = current_node.g - 10

        # Found the goal
        # on the cuxrrent node, the distance from the start is saved as g
        # so if the distance from the start is equals to distance_target then I found the goal
        # if current_node.id == end_node.id: # or end:
        if current_node.g >= distance_target:
            # with open("a_star_{}.txt".format(LoadConfigs.configurations["name_exp"]), "a") as myfile:
            #     myfile.write("closed list {}, open list {} \n".format(len(closed_list), len(second_open_queue)))
            #     myfile.write("------------------ \n")
            #     myfile.write("------------------ \n")
            #     myfile.write("------------------ \n")

            return return_best_path_so_far(current_node=current_node)

        # Generate children
        points = list_neighbours(x_value=current_node.position.x, y_value=current_node.position.y, apf=apf)
        # points_on_the_street = keep_only_points_on_street(apf=pre_matrix.get_apf(), points=points)
        points_on_the_street = pre_matrix.keep_only_points_on_street(points=points)

        children = [Node(parent=current_node, position=node_position) for node_position in points_on_the_street]
        # for node_position in points_on_the_street:  # Adjacent squares
        #
        #     # Create new node
        #     new_node = Node(current_node, node_position)
        #
        #     # Append
        #     children.append(new_node)

        # Loop through children
        for child in children:
            # Child is on the closed list
            if closed_list.get(child.id, None) is not None:
                # if child in closed_list:
                continue

            # Child already computed
            if second_open_queue.get(child.id, None) is not None:
                # result = list(filter(lambda x: x[1] == child, open_queue))
                # if len(result) > 0:
                continue

            # Create the f, g, and h values
            r = haversine((values_matrix[0][child.position.x], values_matrix[1][child.position.y]),
                          (values_matrix[0][current_node.position.x],
                           values_matrix[1][current_node.position.y])) * 1000  # in metres
            child.g = current_node.g + r

            # distance to end is total distance - distance from start
            distance_to_end = distance_target - child.g
            # distance_to_end = haversine((values_matrix[0][child.position.x], values_matrix[1][child.position.y]),
            #                             (values_matrix[0][end_node.position.x],
            #                              values_matrix[1][end_node.position.y])) * 1000  # in metres

            child.h = abs(_compute_h(distance_to_end=distance_to_end, genome=genome, type_astar=type_astar,
                                     current_position=child.position, K=K, pre_matrix=pre_matrix, x_value=x_value,
                                     tra_moved_so_far=return_best_path_so_far(current_node=current_node)
                                     if type_astar == 1 else None))

            total_g_normalised = _standard_normalisation(old_value=child.g, old_min=0, old_max=distance_target + 100,
                                                         new_min=0, new_max=10)
            # now higher is the most attractive
            child.f = -(total_g_normalised + child.h)
            # child.f = -child.h

            # Child is already in the open list
            # result = list(filter(lambda x: x[1] == child, open_queue))
            # if len(result) > 0 and child.g > result[0].g:
            #     continue
            res = second_open_queue.get(child.id, None)
            # if res is not None:
            #     print("S")
            if res is not None and child.g > res:
                continue

            # print(child)
            # Add the child to the open list
            # open_list.append(child)
            heapq.heappush(open_queue, (child.f, child))
            second_open_queue.update({child.id: child.g})
data = pd.read_csv('filtered_3rdgtw_loravar.csv', sep=',')
data['received'] = pd.to_datetime(data['received'])
results = data.shape
results = results[0]

devloc = pd.read_csv('device_location.csv', sep=';')
gtwloc = pd.read_csv('gateway_location.csv', sep=';')

tableofdistance = []
indices = []
for index, row in data.iterrows():
    for index_gtw, row_gtw in gtwloc.iterrows():
        for index_dev, row_dev in devloc.iterrows():
            if (row['gtw_id'] == row_gtw['gtw_id']
                    and row['dev_id'] == row_dev['dev_id']):
                loc1 = (devloc.loc[index_dev]['dev_lat'],
                        devloc.loc[index_dev]['dev_long'])
                loc2 = (gtwloc.loc[index_gtw]['gtw_lat'],
                        gtwloc.loc[index_gtw]['gtw_long'])
                hav = haversine(loc1, loc2)
                hav = hav * 1000  #Distance in meters (By default, haversine returns in kilometers)
                distance = (hav**2 +
                            (devloc.loc[index_dev]['dev_alt'] -
                             gtwloc.loc[index_gtw]['gtw_alt'])**2)**(0.5)
                tableofdistance.append(distance)

distanceDF = pd.DataFrame(tableofdistance, columns=['dist_devgtw'])
data_distance = data.join(distanceDF)

data_distance.to_csv('loravar_distances.csv', sep=',', index=False)
Example #37
0
def find_dist(y1, y2):
    dists = []
    for i in range(len(y1)):
        a = haversine(y1[i], y2[i])
        dists.append(a)
    return dists
Example #38
0
def calcDistanceKm(geom1, geom2):
    coord1 = ST_AsTuple(session.query(functions.ST_AsText(geom1)).one())
    coord2 = ST_AsTuple(session.query(functions.ST_AsText(geom2)).one())

    return haversine(coord1, coord2)
Example #39
0
    import timeit
    import haversine as hs

    # The reason these are slightly different is that
    # haversine computes radians(x2 - x1) where as
    # dis uses radians(x2) - radians(x1). The
    # advantage is that dis is about 15% faster
    # and the difference is in the noise:
    #   hs.haversine(pt1, pt2)=111178.14375531959
    #   perf=1.1505752360008046
    #       ptr1.dis(pt2)=111178.1437553196
    #   perf=0.9691372659999615

    hpt1: Tuple[float, float] = 1.0, 2.0
    hpt2: Tuple[float, float] = 1.0, 3.0
    hd: float = hs.haversine(hpt1, hpt2)
    print(f'hs.haversine(hpt1, hpt2)={hd}')
    #loops: int  = 1_000_000
    #print(f'perf={timeit.timeit("hs.haversine(hpt1, hpt2)", number=loops, globals=globals())}')

    pt1: TrackPoint = TrackPoint(lat=1.0, lon=2.0)
    pt2: TrackPoint = TrackPoint(lat=1.0, lon=3.0)
    d: float = pt1.disMeters(pt2)
    print(f'    ptr1.disMeters(pt2)={d}')
    #print(f'perf={timeit.timeit("pt1.disMeters(pt2)", number=loops, globals=globals())}')

    import unittest

    class TestTrackPoint(unittest.TestCase):

        def test_init_default(self: TestTrackPoint):
Example #40
0
def famous_route(data, veo_data, dist):

    lon = veo_data['Lon'].to_numpy()
    lat = veo_data['Lat'].to_numpy()

    start_points_long = data['START LONG'].to_numpy()
    start_points_lat = data['START LAT'].to_numpy()
    end_points_long = data['END LONG'].to_numpy()
    end_points_lat = data['END LAT'].to_numpy()

    length = len(start_points_long)

    fro = [""] * length
    to = [""] * length
    distance = [""] * length

    destination = veo_data['STATION'].to_numpy()

    for i in range(length):

        loc2 = (float(start_points_lat[i]), float(start_points_long[i]))

        for j in range(len(lon)):

            loc1 = (float(lat[j]), float(lon[j]))

            if fro[i] == "":
                if hs.haversine(loc1, loc2, unit=Unit.METERS) <= dist:
                    fro[i] = destination[j]
                    distance[i] = hs.haversine(loc1, loc2, unit=Unit.METERS)

            elif distance[i] > hs.haversine(loc1, loc2, unit=Unit.METERS):
                fro[i] = destination[j]

        if fro[i] == "":
            fro[i] = "unknown"

    distance = [""] * len(start_points_long)

    for i in range(length):

        loc2 = (float(end_points_lat[i]), float(end_points_long[i]))

        for j in range(len(lon)):

            loc1 = (float(lat[j]), float(lon[j]))

            if to[i] == "":
                if hs.haversine(loc1, loc2, unit=Unit.METERS) <= dist:
                    to[i] = destination[j]
                    distance[i] = hs.haversine(loc1, loc2, unit=Unit.METERS)

            elif distance[i] > hs.haversine(loc1, loc2, unit=Unit.METERS):
                distance[i] = hs.haversine(loc1, loc2, unit=Unit.METERS)
                to[i] = destination[j]

        if to[i] == "":
            to[i] = "unknown"

    map_path = {}

    for i in range(length):

        try:
            s = map_path[fro[i] + "-" + to[i]]
            map_path[fro[i]+"-"+to[i]] = s + 1
        except Exception as e:
            map_path[fro[i]+"-"+to[i]] = 1

    #print(map_path)

    start_unknown = [False] * length
    end_unknown = [False] * length

    for i in range(len(start_points_long)):
        if fro[i] == "unknown":
            start_unknown[i] = True
        if to[i] == "unknown":
            end_unknown[i] = True

    data['FROM'] = fro
    data['TO'] = to
    data['START_UNKNOWN'] = start_unknown
    data['END_UNKNOWN'] = end_unknown

    return map_path
Example #41
0
    def get_best_flight_price(self, departure_airport, arrival_airports):

        final_response = self.handle_error(
            None, None, None, None,
            [])  #initialize empty array to store the results

        if departure_airport is None:
            return self.handle_error(None, None,
                                     'No departure airport is found')

        if not arrival_airports or arrival_airports is None:
            return self.handle_error(None, None,
                                     'No arrival airports are found')

        url = self.flights_api

        price_distance_ratio = []
        for arrival_city in arrival_airports:
            parameters = {
                "fly_from": departure_airport["id"],
                "fly_to": arrival_city["id"],
                "v": 3,
                "date_from":
                "29/06/2020",  #str(date.today().strftime("%d/%m/%Y")),
                "date_to":
                '27/09/2020',  #str((date.today() + timedelta(days=120)).strftime("%d/%m/%Y")),
                "max_fly_duration": 6,
                "flight_type": "oneway",
                "one_for_city": 1,
                "one_per_date": 0,
                "adults": 1,
                "children": 0,
                "infants": 0,
                "partner": "picky",
                "partner_market": "us",
                "curr": "USD",
                "locale": "en",
                "limit": 30,
                "sort": "price",
                "asc": 1,
                "xml": 0
            }

            distance = round(
                haversine((departure_airport["lat"], departure_airport["lon"]),
                          (arrival_city["lat"], arrival_city["lon"])), 3)
            response = self.make_request(url, parameters)

            if response["error_message"] == None:
                response = json.loads(response["response"].content)
                if response["data"]:
                    price = list(response["data"].values())[0]
                    ratio = round(price / distance, 3)
                    price_distance_ratio.append(
                        (arrival_city["city"], ratio, distance, price))

        optimal_flight = None

        if price_distance_ratio:
            optimal_flight = sorted(price_distance_ratio, key=lambda x: x[1])
            final_response["data"] = optimal_flight
        else:
            final_response[
                "error_message"] = 'No flight information for given cities'

        return final_response
Example #42
0
    def get(self, request):
        try:
            store = Store.objects.select_related('category', 'address', 'open_status').\
                                  prefetch_related('menu_set', 'storeimage_set', 'metrostationstore_set', 'review_set').\
                                  get(id=request.GET['store_id'])

            review_ratings_avg = ReviewDetail.get_review_ratings_avg(store)
            review_count       = ReviewDetail.get_review_count(store)

            result = {
                    'region_1depth_name'      : store.address.region_1depth_name,
                    'region_2depth_name'      : store.address.region_2depth_name,
                    'region_3depth_name'      : store.address.region_3depth_name,
                    'store_id'                : store.id,
                    'lat'                     : store.address.latitude,
                    'lng'                     : store.address.longitude,
                    'full_address'            : store.address.full_address,
                    'store_name'              : store.name,
                    'one_line_introduction'   : store.one_line_introduction,
                    'opening_time_description': store.opening_time_description,
                    'phone_number'            : store.phone_number,
                    'sns_url'                 : store.sns_url,
                    'menu_pamphlet_image_url' : store.menu_pamphlet_image_url,
                    'is_reservation'          : 1 if store.is_reservation else 0,
                    'is_wifi'                 : 1 if store.is_wifi else 0,
                    'is_parking'              : 1 if store.is_parking else 0,
                    'category'                : store.category.name,
                    'open_status'             : store.open_status.name,
                    'menus'                   : [
                                                    {
                                                    'name'          : menu.name,
                                                    'price'         : menu.price,
                                                    'menu_image_url': menu.menu_image_url
                                                    } for menu in store.menu_set.all()
                                            ],
                    'store_images'  : [store_image.image_url for store_image in store.storeimage_set.all()],
                    'metro_stations': [
                                        {
                                        'name'                 : metro_station_store.metro_station.name,
                                        'line'                 : metro_station_store.metro_station.line,
                                        'lat'                  : metro_station_store.metro_station.latitude,
                                        'lng'                  : metro_station_store.metro_station.longitude,
                                        'distance_from_store_m': haversine(
                                            (store.address.longitude, store.address.latitude),
                                            (metro_station_store.metro_station.longitude, metro_station_store.metro_station.latitude),
                                            unit = 'm'
                                            )
                                        } for metro_station_store in store.metrostationstore_set.all()
                                    ],
                    'reviews': [
                                {
                                'review_id' : review.id,
                                'rating'    : review.rating,
                                'content'   : review.content,
                                'image_url' : review.image_url,
                                'updated_at': review.updated_at
                                }
                        for review in store.review_set.all()
                            ],
                    'visitor_photos': [review.image_url for review in store.review_set.all()],
                    'rating_average': review_ratings_avg,
                    'review_count'  : review_count
                    }
                    
            return JsonResponse({'result': result}, status=200)
        
        except KeyError:
            return JsonResponse({'message': 'KEY_ERROR'}, status=400)
        except Store.DoesNotExist:
            return JsonResponse({'message': 'STORE_DOES_NOT_EXIST'}, status=404)
def build_dataset(documents, user_location, query, collection):
    '''
    build the dataset for each query, which will be used for training and testing later on 

    Args: 
        documents: the documents returning from the mongodb cluster
        user_location: [longitude, latitude]
        query: inputted query
        query: the query's text
        
    Returns: 
        dataframe of the query's data, with columns: 'query', 'document', 'query_length', 'document_length',
       'jaccard_entire', 'sub_jaccard', 'prefix_match', 'elasticsearch_score',
       'distance'
    '''
    # define the list result of the documents data
    documents_data = []
    # go over each document and gather its info
    for doc in documents:

        # slice the location of the document
        coordinates = list(doc['_source']['location'].values())

        # get the tweets
        tweets = find_tweets_near_place(coordinates, collection)

        # neglect the doc that don't have tweets
        if len(tweets):

            # get the document name
            document = doc['_source']['name']

            # find the jaccard similarity
            jaccard_entire = jaccard_similarity(query.split(" "),
                                                document.split(" "))

            # slice the first 3 characters and find the jaccard similaity
            sub_query = [word[:3] for word in query.split(" ")]
            sub_document = [word[:3] for word in document.split(" ")]

            sub_jaccard = jaccard_similarity(sub_query, sub_document)

            # check if the query and the document have the same prefix
            prefix_match = query[:3] == document[:3]

            # create two tuples of coordinates
            tweet_loc = (coordinates[0], coordinates[1])
            user_loc = (user_location[0], user_location[1])

            # get the distance between the user and the document(in meters)
            distance = hs.haversine(tweet_loc, user_loc)

            # define the dict for the document
            doc_data = {
                "query": query,
                "document": document,
                "query_length": len(query),
                "document_length": len(doc['_source']['name']),
                "jaccard_entire": jaccard_entire,
                "sub_jaccard": sub_jaccard,
                "prefix_match": prefix_match,
                "elasticsearch_score": doc['_score'],
                "distance": distance
            }

            # add the tweets info
            doc_data.update(tweets[0])

            # append the doc dict to the list result
            documents_data.append(doc_data)

    # return the documents data
    return pd.json_normalize(documents_data)
def analyze_events(event_masks_xarray, class_masks_xarray, results_dir):
    """Analzse event masks of ARs and TCs

    Produces PNGs of
        - histograms of event lifetimes, speeds, and travel_distances
        - Frequency plots of genesus, termination, and global occurence
    
    Keyword arguements:
    class_masks_xarray -- the class masks as xarray, 0==Background, 1==TC, 2 ==AR
    event_masks_xarray -- the event masks as xarray with IDs as elements 
    results_dir -- the directory where the PNGs get saved to
    """
    # create results_dir if it doesn't exist
    pathlib.Path(results_dir).mkdir(parents=True, exist_ok=True)

    class_masks = class_masks_xarray.values
    event_masks = event_masks_xarray.values

    print('calculating centroids..', flush=True)

    def pixel_to_degree(pos):
        """Returns the (lat,long) position of a pixel coordinate"""
        return (pos[0] * 180.0 / event_masks.shape[1] - 90,
                pos[1] * 360 / event_masks.shape[2] + 180)

    def average_location(coordinates_pixel):
        """Returns the average geolocation in pixel space

        Based on https://stackoverflow.com/questions/37885798/how-to-calculate-the-midpoint-of-several-geolocations-in-python
        """

        coordinates_degree = [
            pixel_to_degree(cord) for cord in coordinates_pixel
        ]

        x = 0.0
        y = 0.0
        z = 0.0

        for lat_deg, lon_deg in coordinates_degree:
            latitude = math.radians(lat_deg)
            longitude = math.radians(lon_deg)

            x += math.cos(latitude) * math.cos(longitude)
            y += math.cos(latitude) * math.sin(longitude)
            z += math.sin(latitude)

        total = len(coordinates_degree)

        x = x / total
        y = y / total
        z = z / total

        central_longitude = math.atan2(y, x)
        central_square_root = math.sqrt(x * x + y * y)
        central_latitude = math.atan2(z, central_square_root)

        average_degree = math.degrees(central_latitude), math.degrees(
            central_longitude)

        return (event_masks.shape[1] * (average_degree[0] + 90) / 180,
                event_masks.shape[2] * (average_degree[1] + 180) / 360)

    global centroids  # make function visible to pool

    def centroids(event_mask):
        """Returns a dict mapping from the IDs in event_mask to their centroids"""
        coordinates_per_id = {}

        for row in range(np.shape(event_mask)[0]):
            for col in range(np.shape(event_mask)[1]):
                this_id = event_mask[row][col]
                if this_id == 0:  # don't consider background as event
                    continue
                coordinates_per_id.setdefault(this_id, []).append((row, col))

        centroid_per_id = {}
        for this_id in coordinates_per_id:
            centroid_per_id[this_id] = average_location(
                coordinates_per_id[this_id])

        return centroid_per_id

    pool = Pool(psutil.cpu_count(logical=False))
    centroid_per_id_per_time = pool.map(centroids, event_masks)

    # %%
    print('extracting event types..', flush=True)
    global event_type_of_mask  # make function visible to pool

    def event_type_of_mask(event_mask, class_mask):
        """Returns a dict mapping from the IDs in event_mask to their type ('tc' or 'ar")"""
        event_type = {}  # event type as tring 'ar' or 'tc' per event ID
        for row in range(np.shape(event_mask)[0]):
            for col in range(np.shape(event_mask)[1]):
                this_id = event_mask[row][col]
                this_class = class_mask[row][col]
                if this_id == 0:
                    continue
                elif this_class == 1:
                    event_type[this_id] = 'tc'
                else:
                    event_type[this_id] = 'ar'
        return event_type

    pool = Pool(psutil.cpu_count(logical=False))
    pool_result = pool.starmap(event_type_of_mask, zip(event_masks,
                                                       class_masks))
    event_type = dict(i for dct in pool_result for i in dct.items())

    # %%
    print('calculating genesis and termination frequencies..', flush=True)
    genesis_time_per_id = {}
    termination_time_per_id = {}

    previous_ids = set()
    for time in range(len(event_masks)):
        for this_id in centroid_per_id_per_time[time].keys():
            if this_id not in previous_ids:
                genesis_time_per_id[this_id] = time
                previous_ids.add(this_id)
            termination_time_per_id[this_id] = time

    genesis_ids_per_time = {}
    termination_ids_per_time = {}
    for this_id, time in genesis_time_per_id.items():
        genesis_ids_per_time.setdefault(time, []).append(this_id)
    for this_id, time in termination_time_per_id.items():
        termination_ids_per_time.setdefault(time, []).append(this_id)

    genesis_count_ar = np.zeros(
        event_masks.shape[1:3])  # sum over all AR genesis events
    genesis_count_tc = np.zeros(event_masks.shape[1:3])
    termination_count_ar = np.zeros(event_masks.shape[1:3])
    termination_count_tc = np.zeros(event_masks.shape[1:3])
    for time in range(event_masks.shape[0]):
        genesis_events = np.isin(event_masks[time],
                                 genesis_ids_per_time.get(time, []))
        termination_events = np.isin(event_masks[time],
                                     termination_ids_per_time.get(time, []))
        genesis_count_tc += (class_masks[time] == 1) * genesis_events
        genesis_count_ar += (class_masks[time] == 2) * genesis_events
        termination_count_tc += (class_masks[time] == 1) * termination_events
        termination_count_ar += (class_masks[time] == 2) * termination_events

    genesis_frequency_ar = genesis_count_ar / (5 * 12)
    genesis_frequency_tc = genesis_count_tc / (5 * 12)
    termination_frequency_ar = termination_count_ar / (5 * 12)
    termination_frequency_tc = termination_count_tc / (5 * 12)

    # %%
    print('generating histograms..', flush=True)
    event_ids = set(genesis_time_per_id.keys()).union(
        set(termination_time_per_id.keys()))

    for event_class in ['tc', 'ar']:
        this_class_ids = set()
        for event_id in event_ids:
            if event_type[event_id] == event_class:
                this_class_ids.add(event_id)

        # lifetime calculation
        termination_times = np.array(
            [termination_time_per_id[event_id] for event_id in this_class_ids])
        genesis_times = np.array(
            [genesis_time_per_id[event_id] for event_id in this_class_ids])
        lifetimes = termination_times - genesis_times

        # lifetime histogram
        plt.figure(dpi=100)
        plt.hist(3 * lifetimes,
                 bins=np.arange(0, 264, 12),
                 cumulative=0,
                 rwidth=0.85,
                 color='#607c8e')  # multiplied by 3 to get result in hours
        plt.title(f"Lifetime histogram of {event_class.upper():s}s",
                  fontdict={'fontsize': 16})
        plt.rc('xtick', labelsize=8)
        plt.rc('ytick', labelsize=8)
        plt.xlabel("Lifetime in hours")
        plt.xticks(np.arange(12, 264, 48))
        plt.xlim(12, 252)
        plt.ylabel("Count")
        # plt.show()
        plt.savefig(results_dir + f"histogram_lifetime_{event_class:s}")

        # travel distance calculation
        termination_centroids = []
        genesis_centroids = []
        for i in range(len(this_class_ids)):
            termination_centroids.append(centroid_per_id_per_time[
                termination_times[i]][list(this_class_ids)[i]])
            genesis_centroids.append(centroid_per_id_per_time[genesis_times[i]]
                                     [list(this_class_ids)[i]])

        distances = np.array([
            hs.haversine(pixel_to_degree(pos1), pixel_to_degree(pos2))
            for pos1, pos2 in zip(termination_centroids, genesis_centroids)
        ])

        # travel distance histogram
        plt.figure(dpi=100)
        plt.hist(distances,
                 bins=np.arange(0, 10000, 500),
                 rwidth=0.85,
                 color='#607c8e')
        plt.title(f"Travel distance histogram of {event_class.upper():s}s",
                  fontdict={'fontsize': 16})
        plt.rc('xtick', labelsize=8)
        plt.rc('ytick', labelsize=8)
        plt.xlabel("distance in km")
        plt.xticks(np.arange(0, 10001, 2500))
        plt.xlim(0, 10000)
        plt.ylabel("Count")
        plt.savefig(results_dir + f"histogram_travel_distance_{event_class:s}")

        # speed histogram
        plt.figure(dpi=100)
        plt.hist(distances / (3 * lifetimes),
                 bins=np.arange(0, 100, 5),
                 rwidth=0.85,
                 color='#607c8e')  # multiplied by 3 to get result in km/h)
        plt.title(f"Speed histogram of {event_class.upper():s}s",
                  fontdict={'fontsize': 16})
        plt.rc('xtick', labelsize=8)
        plt.rc('ytick', labelsize=8)
        plt.xlabel("speed in km/h")
        plt.xticks(np.arange(0, 101, 25))
        plt.xlim(0, 100)
        plt.ylabel("Count")
        plt.savefig(results_dir + f"histogram_speed_{event_class:s}")

    # set cartopy background dir to include blue marble
    os.environ['CARTOPY_USER_BACKGROUNDS'] = str(os.getcwd() +
                                                 '/climatenet/bluemarble')

    def map_instance(title):
        """Returns a matplotlib instance with bluemarble background"""
        plt.figure(figsize=(100, 20), dpi=100)
        plt.rc('xtick', labelsize=20)
        plt.rc('ytick', labelsize=20)
        mymap = plt.subplot(111, projection=ccrs.PlateCarree())
        mymap.set_global()
        mymap.background_img(name='BM')
        mymap.coastlines()
        mymap.gridlines(crs=ccrs.PlateCarree(),
                        linewidth=2,
                        color='k',
                        alpha=0.5,
                        linestyle='--')
        mymap.set_xticks([-180, -120, -60, 0, 60, 120, 180])
        mymap.set_yticks([-90, -60, -30, 0, 30, 60, 90])
        plt.title(title, fontdict={'fontsize': 44})
        return mymap

    def visualize_frequency_map(frequency_map, title, colorbar_text, filepath):
        """Save a PNG of frequency_map with title and colorbar_text at filepath"""

        # initialize
        mymap = map_instance(title)
        lon = np.linspace(0, 360, frequency_map.shape[1])
        lat = np.linspace(-90, 90, frequency_map.shape[0])

        # draw frequencies
        contourf = mymap.contourf(
            lon,
            lat,
            np.ma.masked_array(frequency_map, mask=(frequency_map == 0)),
            levels=np.linspace(0.0, frequency_map.max(), 11),
            alpha=0.7)

        #colorbar and legend
        cbar = mymap.get_figure().colorbar(contourf,
                                           orientation='vertical',
                                           ticks=np.linspace(
                                               0, frequency_map.max(), 3))
        cbar.ax.set_ylabel(colorbar_text, size=32)

        #save
        mymap.get_figure().savefig(filepath,
                                   bbox_inches="tight",
                                   facecolor='w')

    print('generating frequency maps..', flush=True)
    visualize_frequency_map(genesis_frequency_tc,
                            "Genesis frequency map of TCs",
                            "Frequency in events per month",
                            results_dir + "genesis_frequency_tc")
    visualize_frequency_map(genesis_frequency_ar,
                            "Genesis frequency map of ARs",
                            "Frequency in events per month",
                            results_dir + "genesis_frequency_ar")
    visualize_frequency_map(termination_frequency_tc,
                            "Termination frequency map of TCs",
                            "Frequency in events per month",
                            results_dir + "termination_frequency_tc")
    visualize_frequency_map(termination_frequency_ar,
                            "Termination frequency map of ARs",
                            "Frequency in events per month",
                            results_dir + "termination_frequency_ar")

    visualize_frequency_map(
        100 * ((class_masks == 1) * (event_masks != 0)).sum(axis=0) /
        event_masks.shape[0], "Global frequency map of TCs",
        "Frequency in % of time steps", results_dir + "global_frequency_tc")
    visualize_frequency_map(
        100 * ((class_masks == 2) * (event_masks != 0)).sum(axis=0) /
        event_masks.shape[0], "Global frequency map of ARs",
        "Frequency in % of time steps", results_dir + "global_frequency_ar")
Example #45
0
from haversine import haversine, Unit

lyon = (45.7597, 4.8422)  # (lat, lon)
paris = (48.8567, 2.3508)

haversine(lyon, paris)
Example #46
0
def distance(lat1, long1, lat2, long2):
    return haversine((lat1, long1), (lat2, long2), miles=True)
Example #47
0
def main():
    #DIMENSIONE AREA DI RICERCA
    SIZE = 25

    #CRITERI IDENTIFICAZIONE
    forward_speed = 10  # m/s
    slp_threshold = 1000  # hPa
    ws_threshold = 15  # m/s

    #CRITERI DATA
    start_date = datetime(2018, 10, 30, 0, 0, 0)
    stop_date = datetime(2018, 10, 30, 12, 0, 0)
    delta_hours = 1

    current_date = start_date
    L = []

    while current_date < stop_date:

        url = "/home/giangiui/Dropbox/Uni/Progetto/MedistormTracker/MEDIACANE_data/" + \
            "/wrf5_d01_" + \
              "{:04d}".format(current_date.year) + "{:02d}".format(current_date.month) + "{:02d}".format(
            current_date.day) + \
              "Z{:02d}".format(current_date.hour) + "{:02d}".format(current_date.minute) + ".nc"

        f = Dataset(url)
        lats = f.variables["latitude"][:]
        lons = f.variables["longitude"][:]
        slp = f.variables["SLP"][:][0]
        u10m = f.variables["U10M"][:][0]
        v10m = f.variables["V10M"][:][0]
        t2c = f.variables["T2C"][:][0]
        rh2 = f.variables["RH2"][:][0]
        uh = f.variables["UH"][:][0]

        for j in range(0, len(slp), SIZE):
            for i in range(0, len(slp[0]), SIZE):
                S = slp[j:j + SIZE, i:i + SIZE]
                iMin = None
                jMin = None
                for jj in range(0, len(S)):
                    for ii in range(0, len(S[0])):
                        if S[jj, ii] < slp_threshold:
                            iMin = i + ii
                            jMin = j + jj
                            ws = math.pow(
                                u10m[jMin, iMin] * u10m[jMin, iMin] +
                                v10m[jMin, iMin] * v10m[jMin, iMin], 0.5)
                            if ws >= ws_threshold:
                                L.append({
                                    "date": str(current_date),
                                    "lat": lats[jMin],
                                    "lon": lons[iMin],
                                    "data": {
                                        "slp": S[jj, ii],
                                        "ws": ws,
                                        "t2c": t2c[jMin, iMin],
                                        "rh2": rh2[jMin, iMin],
                                        "uh": uh[jMin, iMin]
                                    }
                                })

        current_date = current_date + timedelta(hours=delta_hours)

    #ALGORITMO TEMPORALE
    # Input: L, velocità massima tempeste Umax, minima durata Tmin, criteri identificazione tempeste, Dmax distanza

    #CRITERI IDENTIFICAZIONE SUCCESSORE
    Umax = 15  #ms^-1
    mpsToKph = 3.6
    DMAX = Umax * mpsToKph * delta_hours

    #CRITERI IDENTIFICAZIONE TEMPESTA
    minDuration = 12  # hours
    maxSpeed = 30.0  # meters per second

    T = []
    current_date = start_date
    while current_date < stop_date:
        list_filtered = [
            item for item in L if (item['date'] == str(current_date))
        ]
        for l in list_filtered:
            track = []
            track.append(l)
            continueC = True
            current_date_exam = current_date + timedelta(hours=delta_hours)
            while continueC == True:
                list_filtered_exam = [
                    item for item in L
                    if item['date'] == str(current_date_exam)
                ]

                if list_filtered_exam:
                    for ll in list_filtered_exam:
                        current_distance = haversine((l["lat"], l["lon"]),
                                                     (ll["lat"], ll["lon"]))
                        if current_distance < DMAX:
                            track.append(ll)
                            current_date_exam = current_date_exam + timedelta(
                                hours=delta_hours)
                            break
                        else:
                            continueC = False
                else:
                    continueC = False

        if (len(track) >= 12):
            T.append(track)
        current_date = current_date + timedelta(hours=delta_hours)

    print(T)
Example #48
0
    def computeFastestTrain(self):
        nodes = OrderedDict()
        edges = []
        mrtPath = []
        mrtRoutes = OrderedDict()
        mrtNodes = {}
        temp = {}
        # Retrieve all the json files under MRT directory
        path_to_json = "MRT/"
        json_files = [
            pos_json for pos_json in os.listdir(path_to_json)
            if pos_json.endswith('.geojson')
        ]
        for (index, name) in enumerate(json_files):
            with open(path_to_json + str(name)) as json_file:
                data = json.load(json_file)

            for feature in data['features']:

                #Added all the coordinates that follow the mrt path into mrtPath list
                if feature['geometry']['type'] == 'MultiLineString':
                    for y in feature['geometry']['coordinates']:
                        mrtPath.append(y)

                else:
                    coordinates = feature['geometry']['coordinates']
                    nodes[feature['properties']['node-details']] = coordinates
                    mrtNodes[tuple(
                        coordinates)] = feature['properties']['node-details']

                    lowest = 1000
                    lowestIndex = 0

                    i = 0
                    while i < len(mrtPath):
                        distance = haversine(coordinates, mrtPath[i])
                        if distance < lowest:
                            lowest = distance
                            lowestIndex = i
                            i += 1
                    mrtPath.insert(lowestIndex, coordinates)

            length = len(mrtPath)
            for i in range(length):
                c = tuple(mrtPath[i])
                k = str(i)
                mrtRoutes[k] = c
                temp[c] = k

            for i in range(length):
                if i + 1 != length:
                    distance = haversine(mrtPath[i], mrtPath[i + 1])
                    if tuple(mrtPath[i]) in mrtNodes:
                        edges.append((mrtNodes[tuple(mrtPath[i])],
                                      temp[tuple(mrtPath[i + 1])],
                                      distance / 70, "LRT"))
                    elif tuple(mrtPath[i + 1]) in mrtNodes:
                        edges.append((temp[tuple(mrtPath[i])],
                                      mrtNodes[tuple(mrtPath[i + 1])],
                                      distance / 70, "LRT"))
                    else:
                        edges.append((temp[tuple(mrtPath[i])],
                                      temp[tuple(mrtPath[i + 1])],
                                      distance / 70, "LRT"))

            temp.clear()
            mrtPath.clear()

        with open('Combined/nodes.json') as f:
            getJson = json.load(f)
            feature_access = getJson['features']

            for feature_data in feature_access:
                prop = feature_data['properties']
                if 'node-details' in prop:
                    location_name = prop['node-details']
                    nodes[location_name] = feature_data['geometry'][
                        'coordinates']

        findPath = ShortestPath(nodes)
        findPath.createEdges()
        findPath.createMrtEdgeNodes(edges, mrtNodes, mrtRoutes)
        graph = findPath.buildAGraph()
        print("Get graph: " + str(graph))
        path = findPath.findShortestPath(graph, self.comboStart.currentText(),
                                         self.comboEnd.currentText())

        print("Get Path: " + str(path))

        self.m = folium.Map(location=[1.4053, 103.9021], zoom_start=16)
        self.lblSelectedBusRoute.setText('Bus Route Displayed: ')
        folium.PolyLine(path, opacity=1, color='red').add_to(self.m)
        self.marker_cluster = MarkerCluster().add_to(self.m)
        self.initMap(self.m, self.marker_cluster)
        data = io.BytesIO()
        self.m.save(data, close_file=False)
        self.mapView.setHtml(data.getvalue().decode())
Example #49
0
            location_2 = (lat2, long2)
            thewriter.writerow({
                'node':
                i,
                'destination':
                j,
                'Lat1':
                format(lat1, '.4f'),
                'Long1':
                format(long1, '.4f'),
                'Lat2':
                format(lat2, '.4f'),
                'Long2':
                format(long2, '.4f'),
                'value':
                haversine(location_1, location_2, unit=Unit.KILOMETERS)
            })

with open('sequence.csv', 'w', newline='') as f:
    fieldnames = [
        'node', 'destination', 'Lat1', 'Long1', 'Lat2', 'Long2', 'value'
    ]
    thewriter = csv.DictWriter(f, fieldnames=fieldnames)
    thewriter.writeheader()
    for i in range(length):

        if not i == (length - 1):
            lat1 = float(df[latColumnName].values[i])
            long1 = float(df[longColumnName].values[i])
            lat2 = float(df[latColumnName].values[i + 1])
            long2 = float(df[longColumnName].values[i + 1])
Example #50
0
    sns.scatterplot(LAT_LNG[:, 0],
                    LAT_LNG[:, 1],
                    hue=y_label,
                    palette=sns.color_palette("Set1", n_colors=NUM_CLUSTERS))
    # highlight the furthest point in black

    sns.scatterplot(LAT_LNG[max_indices, 0],
                    LAT_LNG[max_indices, 1],
                    color='black')
    plt.show()
    # endregion

    distances = []
    for i, centroid in enumerate(centroids):
        distances.append(
            haversine(farthest_points[i], centroid, unit=Unit.METERS))
        if distances[
                i] > MAX_DISTANCE:  # generate new cluster if distance is more than a threshold
            temp = NUM_CLUSTERS + 1

    if temp == NUM_CLUSTERS:
        outlier_label = outlier_cluster(kmeans, MIN_POINTS_PER_CLUSTER)
        if outlier_label == -1:  # no outliers detected
            print("Success! No outliers detected")
            break
        else:
            print("Outlier cluster:", outlier_label)
            indices = outlier_cluster_data_point_indices(outlier_label)
            LAT_LNG = remove_outlier_cluster(LAT_LNG, indices)
    elif temp > NUM_CLUSTERS:
        NUM_CLUSTERS = temp
Example #51
0
def mark(request):
    """ Adds a duck, location, photo and link from webform """
    # if this is a POST request we need to process the form data
    if request.method == 'POST':
        # create a form instance and populate it with data from the request:
        form = DuckForm(request.POST)
        # check whether it's valid:
        if form.is_valid():
            duck_id = form.cleaned_data['duck_id']
            try:
                duck = Duck.objects.get(pk=duck_id)
                if duck.name == 'Unnamed' and form.cleaned_data[
                        'name'] != 'Unnamed':
                    duck.name = form.cleaned_data['name']
            except Duck.DoesNotExist:
                name = form.cleaned_data['name'] if form.cleaned_data[
                    'name'] else 'Unnamed'
                duck = Duck(duck_id=duck_id,
                            name=name,
                            approved='Y',
                            create_time=datetime.datetime.now().strftime(
                                '%Y-%m-%d %H:%M:%S'),
                            comments='')

            # Calculate the distance since last location
            last_duck_location = DuckLocation.objects.filter(
                duck_id=duck_id).order_by('-date_time')[0]
            distance_travelled = haversine(
                (last_duck_location.latitude, last_duck_location.longitude),
                (form.cleaned_data['lat'], form.cleaned_data['lng']),
                unit=Unit.MILES)
            duck_location = DuckLocation(
                duck=duck,
                latitude=form.cleaned_data['lat'],
                longitude=form.cleaned_data['lng'],
                location=form.cleaned_data['location'],
                date_time=form.cleaned_data['date_time'],
                comments=form.cleaned_data['comments'],
                distance_to=round(distance_travelled, 2),
                user=request.user,
                approved='Y')
            duck_location.save()
            if request.FILES and request.FILES['image']:
                photo_info = media.handle_uploaded_file(
                    request.FILES['image'], duck_id, duck.name,
                    form.cleaned_data['comments'])
                duck_location_photo = DuckLocationPhoto(
                    duck_location=duck_location,
                    flickr_photo_id=photo_info['id'],
                    flickr_thumbnail_url=photo_info['sizes']['Small 320']
                    ['source'])
                duck_location_photo.save()

            duck.total_distance = round(
                DuckLocation.objects.filter(duck_id=duck_id).aggregate(
                    Sum('distance_to'))['distance_to__sum'], 2)
            duck.save()

            # redirect to a new URL:
            return HttpResponseRedirect('/location/' +
                                        str(duck_location.duck_location_id))
    # if a GET (or any other method) we'll create a blank form
    else:
        form = DuckForm()

    map_data = {
        'width': '100%',
        'height': '400px',
        'focus_lat': 35,
        'focus_long': -30,
        'focus_zoom': 1,
        'location_list': [],
        'duck_location_id': 0,
    }
    return render(request, 'duck/mark.html', {'form': form, 'map': map_data})
Example #52
0
    def computeFastestBus(self):
        # Fastest Bus Route
        nodes = OrderedDict()
        edges = []
        busPath = []
        busRoutes = OrderedDict()
        busNodes = {}
        temp = {}
        #Retrieve all the json files under Bus_Path directory
        path_to_json = "Bus_Path/"
        #Referenced from: https://stackoverflow.com/questions/30539679/python-read-several-json-files-from-a-folder
        json_files = [
            pos_json for pos_json in os.listdir(path_to_json)
            if pos_json.endswith('.geojson')
        ]
        for (index, name) in enumerate(json_files):
            with open(path_to_json + str(name)) as json_file:
                data = json.load(json_file)

            for feature in data['features']:

                #Added all the coordinates that follow the bus path into busPath list
                if feature['geometry']['type'] == 'MultiLineString':
                    for y in feature['geometry']['coordinates']:
                        busPath.append(y)

                #Added all the nodes that are placed on the maop into busNodes list
                else:
                    coordinates = feature['geometry']['coordinates']
                    nodes[feature['properties']['node-details']] = coordinates
                    print("Nodes: " + str(coordinates))
                    busNodes[tuple(
                        coordinates)] = feature['properties']['node-details']

                    lowest = 1000
                    i = 0
                    while i < len(busPath):
                        distance = haversine(coordinates, busPath[i])
                        if distance < lowest:
                            lowest = distance
                            lowestIndex = i
                            i += 1
                    busPath.insert(lowestIndex, coordinates)

            length = len(busPath)
            for i in range(length):
                c = tuple(busPath[i])
                k = str(i)
                busRoutes[k] = c
                temp[c] = k

            #Added for overall edges for the other node coordinates to find out the fastest path based on speed
            for i in range(length):
                if i + 1 != length:
                    distance = haversine(busPath[i], busPath[i + 1])
                    if tuple(busPath[i]) in busNodes:
                        edges.append((busNodes[tuple(busPath[i])],
                                      temp[tuple(busPath[i + 1])],
                                      distance / 60, "Bus"))
                    elif tuple(busPath[i + 1]) in busNodes:
                        edges.append((temp[tuple(busPath[i])],
                                      busNodes[tuple(busPath[i + 1])],
                                      distance / 60, "Bus"))
                    else:
                        edges.append((temp[tuple(busPath[i])],
                                      temp[tuple(busPath[i + 1])],
                                      distance / 60, "Bus"))

            temp.clear()
            busPath.clear()

        with open('Combined/nodes.json') as f:
            getJson = json.load(f)
            feature_access = getJson['features']

            for feature_data in feature_access:
                prop = feature_data['properties']
                if 'node-details' in prop:
                    location_name = prop['node-details']
                    nodes[location_name] = feature_data['geometry'][
                        'coordinates']

        findPath = ShortestPath(nodes)
        findPath.createEdges()
        findPath.createBusEdgeNodes(edges, busNodes, busRoutes)
        graph = findPath.buildAGraph()
        print("Get graph: " + str(graph))
        path = findPath.findShortestPath(graph, self.comboStart.currentText(),
                                         self.comboEnd.currentText())

        print("Get Path: " + str(path))

        self.m = folium.Map(location=[1.4053, 103.9021], zoom_start=16)
        self.lblSelectedBusRoute.setText('Bus Route Displayed: ')
        folium.PolyLine(path, opacity=1, color='#800080').add_to(self.m)
        self.marker_cluster = MarkerCluster().add_to(self.m)
        self.initMap(self.m, self.marker_cluster)
        data = io.BytesIO()
        self.m.save(data, close_file=False)
        self.mapView.setHtml(data.getvalue().decode())
Example #53
0
def process_points(data):
    alt_dif = []
    time_dif = []
    dist_vin = []
    dist_hav = []
    dist_vin_no_alt = []
    dist_hav_no_alt = []
    dist_dif_hav_2d = []
    dist_dif_vin_2d = []

    for start, stop in zip(data[0::], data[1::]):

        distance_vin_2d = distance.geodesic((start.latitude, start.longitude),
                                            (stop.latitude, stop.longitude)).m
        dist_dif_vin_2d.append(distance_vin_2d)

        distance_hav_2d = haversine.haversine(
            (start.latitude, start.longitude),
            (stop.latitude, stop.longitude)) * 1000
        dist_dif_hav_2d.append(distance_hav_2d)

        dist_vin_no_alt.append(
            (dist_vin_no_alt[-1] if len(dist_vin_no_alt) > 0 else 0) +
            distance_vin_2d)
        dist_hav_no_alt.append(
            (dist_hav_no_alt[-1] if len(dist_hav_no_alt) > 0 else 0) +
            distance_hav_2d)

        alt_d = start.elevation - stop.elevation

        alt_dif.append(alt_d)

        distance_vin_3d = math.sqrt(distance_vin_2d**2 + (alt_d)**2)

        distance_hav_3d = math.sqrt(distance_hav_2d**2 + (alt_d)**2)

        time_delta = (stop.time - start.time).total_seconds()

        time_dif.append(time_delta)

        dist_vin.append((dist_vin[-1] if len(dist_vin) > 0 else 0) +
                        distance_vin_3d)
        dist_hav.append((dist_hav[-1] if len(dist_hav) > 0 else 0) +
                        distance_hav_3d)

    # print('Vincenty 2D : ', dist_vin_no_alt[-1])
    # print('Haversine 2D : ', dist_hav_no_alt[-1])
    # print('Vincenty 3D : ', dist_vin[-1])
    # print('Haversine 3D : ', dist_hav[-1])
    # print('Total Time : ', math.floor(sum(time_dif)/60),
    #       ' min ', int(sum(time_dif) % 60), ' sec ')
    # print('Elevation diff: ', int(sum(alt_dif)))
    # print('Elevation loss: ', abs(int(sum([a for a in alt_dif if a > 0]))))
    # print('Elevation gain: ', abs(int(sum([a for a in alt_dif if a < 0]))))

    df = pd.DataFrame()
    df['dis_vin_2d'] = dist_vin_no_alt
    df['dist_hav_2d'] = dist_hav_no_alt
    df['dis_vin_3d'] = dist_vin
    df['dis_hav_3d'] = dist_hav
    df['alt_dif'] = alt_dif
    df['time_dif'] = time_dif
    df['dis_dif_hav_2d'] = dist_dif_hav_2d
    df['dis_dif_vin_2d'] = dist_dif_vin_2d

    # Clear data set
    df = df[df['time_dif'] > 0.0]

    df['dist_dif_per_sec'] = df['dis_dif_hav_2d'] / df['time_dif']
    df['spd'] = (df['dis_dif_hav_2d'] / df['time_dif']) * 3.6

    df_with_timeout = df[df['dist_dif_per_sec'] > MOVEMENT_THRESHOLD]

    avg_km_h = (sum((df_with_timeout['spd'] * df_with_timeout['time_dif'])) /
                sum(df_with_timeout['time_dif']))

    # print(math.floor(60 / avg_km_h), 'minutes',
    #       round(((60 / avg_km_h - math.floor(60 / avg_km_h))*60), 0),
    #       ' seconds')

    return {
        'dist': dist_hav_no_alt[-1],
        'total_time': math.floor(sum(time_dif)),
        'moving_time': sum(df_with_timeout['time_dif']),
        'alt_loss': abs(int(sum([a for a in alt_dif if a > 0]))),
        'alt_gain': abs(int(sum([a for a in alt_dif if a < 0]))),
        'avg_km_h': avg_km_h
    }
Example #54
0
ON a.brewery_id = c.brewery_id
"""
df = pd.read_sql_query(query, engine)

# Concatenate beer types on brewery level for visualization
df_agg = df.copy()
group = ['brewery_id', 'brewery_name', 'latitude', 'longitude']
df_agg = df_agg.groupby(group)['beer_type'].agg(
    lambda x: '|'.join(x)).reset_index()

# Calculated distances in KM between coordinates
# Discard places which are too far from starting point
# Filter distance matrix and sort by distance
df_loc = pd.concat([home, df_agg]).reset_index(drop=True)
df_loc['distance'] = df_loc.apply(
    lambda x: haversine(starting_point, [x['latitude'], x['longitude']]),
    axis=1)
df_loc = df_loc.sort_values(by='distance')
df_loc = df_loc[df_loc['distance'] < MAX_DISTANCE * 0.4]

points_coordinate = df_loc[coord].copy()
distance_matrix = pairwise_distances(X=points_coordinate, metric=haversine)
num_points = points_coordinate.shape[0]
# raise exception if total number of points is less than 8:
if num_points < 8:
    raise Exception(
        'Not enough factories in this location. Try a different location.')
num_points = num_points - 1 if num_points % 2 == 1 else points_coordinate.shape[
    0]

def calculate_adj_points_dis(df_origin, inplace=False):
    df = df_origin if inplace else df_origin.copy()
    df.loc[:, 'x1'], df.loc[:, 'y1'] = df.x.shift(1), df.y.shift(1)
    return df.apply(lambda i: haversine((i.y, i.x), (i.y1, i.x1)) * 1000,
                    axis=1)
def write_crm_train_test():
    import sqlalchemy as sql
    import json
    import pandas as pd
    import datetime
    import os
    import numpy as np
    from haversine import haversine
    import glob
    import random
    from multiprocessing import Pool
    from itertools import repeat
    from dateutil.relativedelta import relativedelta
    print("Start_Part_2: %s" % str(datetime.datetime.now()))

    with open('./config.json', 'rb') as f:
        dict_config = json.load(f)

    username = dict_config['username']
    password = dict_config['password']
    database = dict_config['database']
    folder_store_list = dict_config['folder_store_list']
    path_TA_excel = dict_config['path_TA_excel']
    path_json_zip_center = dict_config['path_json_zip_center']
    pos_end_date = dict_config['pos_end_date']
    folder_store_list = dict_config['folder_store_list']
    folder_email_unsub = dict_config['folder_email_unsub']

    with open('./table_names_%s.json' % str(pos_end_date).replace("-", ""),
              'rb') as f:
        dict_table_names = json.load(f)
    table_filtered_crm = dict_table_names['table_filtered_crm']

    BL_engine = sql.create_engine("mysql+pymysql://%s:%s@localhost/%s" %
                                  (username, password, database))

    # In[3]:

    def create_index(table_name, list_of_columns):
        columns = ', '.join(list_of_columns)
        query = "CREATE INDEX id_index ON %s(%s)" % (table_name, columns)
        print(query)
        with BL_engine.connect() as connection:
            result = connection.execute(query)
            result.close()
        return

    def week_end_dt(date_input):
        weekday_int = date_input.weekday()
        if weekday_int == 6:
            return date_input + datetime.timedelta(days=6)
        else:
            return date_input + datetime.timedelta(days=5 - weekday_int)

    high_date = datetime.datetime.strptime(dict_config['crm_end_date'],
                                           "%Y-%m-%d").date()
    if dict_config['recent_n_month']:
        recent_n_month = dict_config['recent_n_month']
        pos_start_date_id_filter = str(high_date - datetime.timedelta(
            days=int(np.ceil(365 * recent_n_month / 12))))
    else:
        pos_start_date_id_filter = dict_config["pos_start_date"]

    sql_str_high_date = "'%s'" % str(high_date)
    sql_str_lastweekstart_date = "'%s'" % str(high_date -
                                              datetime.timedelta(days=6))
    # sql_sign_up_start_date="'%s'"%str(sign_up_start_date)
    sql_POS_start_date = "'%s'" % str(pos_start_date_id_filter)
    str_week_end_d = str(high_date).replace("-", "")
    print("check point 1")

    path_store_list = glob.glob(folder_store_list + "*.txt")
    path_store_list.sort()
    path_store_list_ahead = [
        x for x in path_store_list
        if "MediaStormStores%s" % str_week_end_d[:6] in x
    ][0]
    # updated 2020-10-03
    str_month_after = (datetime.datetime.strptime(str_week_end_d, '%Y%m%d') +
                       relativedelta(months=1)).date()
    str_month_after = str(str_month_after).replace("-", "")
    # path_store_list_after=[x for x in path_store_list if "MediaStormStores%s"%str_month_after in x][0]

    df_store_list = pd.read_csv(path_store_list_ahead, sep="|")
    df_store_list = df_store_list[[
        'location_id', 'address_line_1', 'address_line_2', 'city_nm',
        'state_nm', 'zip_cd', 'latitude_meas', 'longitude_meas'
    ]]
    df_store_list['latitude_meas'] = df_store_list['latitude_meas'].astype(
        float)
    df_store_list['longitude_meas'] = df_store_list['longitude_meas'].astype(
        float)
    df_store_list['zip_cd'] = df_store_list['zip_cd'].apply(
        lambda x: x.split("-")[0].zfill(5))
    df_store_list = df_store_list[~df_store_list['location_id'].
                                  isin(['145', '6990'])]
    df_store_list['location_id'] = df_store_list['location_id'].astype(str)
    #
    TA_zips = pd.ExcelFile(path_TA_excel)
    TA_zips = TA_zips.parse("view_by_store", dtype=str)

    df_temporary = TA_zips[[
        'location_id', 'trans_P_zips_70_within_TA',
        'trans_S_zips_70_within_TA', 'zips_in_10'
    ]]
    df_zip_by_store = pd.DataFrame()

    for ind, row in df_temporary.iterrows():
        location_id = str(row['location_id'])
        P_zips = eval(row['trans_P_zips_70_within_TA'])
        S_zips = eval(row['trans_S_zips_70_within_TA'])
        zip_10 = eval(row['zips_in_10'])

        df_P = pd.DataFrame(zip([location_id] * len(P_zips), P_zips))
        if len(df_P) > 0:
            df_P.columns = ['location_id', 'zip_cd']
            df_P['zip_type'] = "P"

        df_S = pd.DataFrame(zip([location_id] * len(S_zips), S_zips))
        if len(df_S) > 0:
            df_S.columns = ['location_id', 'zip_cd']
            df_S['zip_type'] = "S"

        df_10 = pd.DataFrame(zip([location_id] * len(zip_10), zip_10))
        if len(df_10) > 0:
            df_10.columns = ['location_id', 'zip_cd']
            df_10['zip_type'] = "zip_10"

        df_zip_by_store = df_zip_by_store.append(df_P).append(df_S).append(
            df_10)
    df_zip_by_store['location_id'] = df_zip_by_store['location_id'].astype(str)
    df_store_list = df_store_list[[
        'location_id', 'latitude_meas', 'longitude_meas'
    ]]
    df_store_zip = pd.merge(df_store_list,
                            df_zip_by_store,
                            on="location_id",
                            how="left")
    df_store_zip_new = df_store_zip[pd.isnull(df_store_zip['zip_cd'])]
    df_store_zip_existing = df_store_zip[pd.notnull(df_store_zip['zip_cd'])]

    df_store_zip_new_no_loc = df_store_zip_new[
        df_store_zip_new['latitude_meas'] == 0]
    df_store_zip_new_with_loc = df_store_zip_new[
        df_store_zip_new['latitude_meas'] != 0]
    df_store_zip_new_with_loc = df_store_zip_new_with_loc[[
        'location_id', 'latitude_meas', 'longitude_meas'
    ]]
    df_store_zip_new_no_loc = df_store_zip_new_no_loc[[
        'location_id', 'latitude_meas', 'longitude_meas'
    ]]
    if len(df_store_zip_new_no_loc) > 0:
        store_list_later = [
            x for x in path_store_list
            if x.split("MediaStormStores")[1][:6] > str_week_end_d
        ]
        store_list_later = sorted(store_list_later,
                                  key=lambda x: os.stat(x).st_mtime)
        for file in store_list_later:
            df = pd.read_csv(
                file,
                dtype=str,
                sep="|",
                usecols=['location_id', 'latitude_meas', 'longitude_meas'])
            df = df[['location_id', 'latitude_meas', 'longitude_meas']]
            df['latitude_meas'] = df['latitude_meas'].astype(float)
            df['longitude_meas'] = df['longitude_meas'].astype(float)
            df['location_id'] = df['location_id'].astype(str)
            df = df[df['location_id'].isin(
                df_store_zip_new_no_loc['location_id'].tolist())]
            df = df[df['latitude_meas'] != 0]
            df_store_zip_new_with_loc = df_store_zip_new_with_loc.append(df)
            df_store_zip_new_no_loc = df_store_zip_new_no_loc[
                ~df_store_zip_new_no_loc['location_id'].isin(df['location_id'].
                                                             tolist())]
            if len(df_store_zip_new_no_loc) == 0:
                break
        df_store_zip_new = df_store_zip_new_with_loc.reset_index()
        del df_store_zip_new['index']
        if len(df_store_zip_new_with_loc) > 0:
            del df_store_zip_new_with_loc
        if len(df_store_zip_new_no_loc) > 0:
            del df_store_zip_new_no_loc

    zip_centers = json.load(open(path_json_zip_center, "r"))
    if len(df_store_zip_new) > 0:

        df_all_new_zip = pd.DataFrame()
        for i, row in df_store_zip_new.iterrows():
            store_coor = (row['latitude_meas'], row['longitude_meas'])
            store_num = row['location_id']
            list_store_zip = []
            for zip_cd, v in zip_centers.items():
                dist = haversine(store_coor, v, unit="mi")
                if dist <= 10:
                    list_store_zip.append(zip_cd)
            df = pd.DataFrame(
                {
                    "zip_cd": list_store_zip,
                    "zip_type": ["zip_10"] * len(list_store_zip)
                },
                index=[store_num] * len(list_store_zip))
            df = df.reset_index().rename(columns={"index": "location_id"})
            df_all_new_zip = df_all_new_zip.append(df)

        df_store_zip_new = pd.merge(df_store_zip_new,
                                    df_all_new_zip,
                                    on="location_id",
                                    how="left")

        df_store_zip = df_store_zip_existing.append(df_store_zip_new)
    else:
        df_store_zip = df_store_zip_existing
    df_zip_type = df_store_zip[['zip_cd', 'zip_type']].drop_duplicates()
    df_zip_type = df_zip_type.sort_values(['zip_cd', 'zip_type'])
    print(df_zip_type['zip_type'].unique().tolist())
    df_unique_zip_type = df_zip_type.drop_duplicates("zip_cd")

    list_P_zips = df_zip_type[df_zip_type['zip_type'] ==
                              "P"]['zip_cd'].tolist()
    list_S_zips = df_zip_type[df_zip_type['zip_type'] ==
                              "S"]['zip_cd'].tolist()
    list_10_zips = df_zip_type[df_zip_type['zip_type'] ==
                               "zip_10"]['zip_cd'].tolist()

    df_store_list = df_store_zip[[
        'location_id', 'latitude_meas', 'longitude_meas'
    ]].drop_duplicates().reset_index()
    del df_store_list['index']
    df_store_list = df_store_zip[[
        'location_id', 'latitude_meas', 'longitude_meas'
    ]].drop_duplicates().reset_index()
    del df_store_list['index']
    #
    print("check point 2")

    # In[5]:

    processors = 20

    list_all_zips = list(zip_centers.keys())
    len_chunck = int(np.ceil(len(list_all_zips) / processors))
    list_of_input_all_us_zip_list = []

    for i in range(processors):
        l = list_all_zips[i * len_chunck:(i + 1) * len_chunck]
        list_of_input_all_us_zip_list.append(l)

    p = Pool(processors)
    result = p.starmap(
        get_dist_output_df,
        zip(list_of_input_all_us_zip_list, repeat(df_store_list),
            repeat(zip_centers)))
    ## result=p.map(get_dist_output_df, list_of_input_all_us_zip_list)
    # get_dist_output_df defined in the main py file, due to the thread need to be defined top-level
    df_zips_with_BL_store = pd.DataFrame()
    for res in result:
        if res is not None:
            df_zips_with_BL_store = df_zips_with_BL_store.append(res)
    p.close()
    p.join()
    print("check point 3")

    print(df_zips_with_BL_store.shape,
          df_zips_with_BL_store['zip_cd'].nunique(),
          df_zips_with_BL_store['nearest_BL_store'].nunique())
    df_zips_with_BL_store['zip_cd'] = df_zips_with_BL_store['zip_cd'].astype(
        str)
    df_zips_with_BL_store['zip_cd'] = df_zips_with_BL_store['zip_cd'].apply(
        lambda x: x.zfill(5))

    # In[6]:

    # IVs
    print(datetime.datetime.now())
    df_1 = pd.read_sql(
        "select t1.customer_id_hashed, sign_up_channel, sign_up_location, customer_zip_code, t1.sign_up_date from BL_Rewards_Master as t1 right join %s as t2 on t1.customer_id_hashed=t2.customer_id_hashed;"
        % table_filtered_crm,
        con=BL_engine)
    df_1 = df_1.sort_values("sign_up_date", ascending=False)
    df_1 = df_1.drop_duplicates("customer_id_hashed")

    df_1_len = df_1.shape[0]
    df_1_id_nunique = df_1['customer_id_hashed'].nunique()
    print("df_1_len", df_1_len)
    print("df_1_id_nunique", df_1_id_nunique)
    print(datetime.datetime.now())

    df_1['customer_zip_code'] = df_1['customer_zip_code'].astype(str)
    df_1['customer_zip_code'] = df_1['customer_zip_code'].apply(
        lambda x: x.split("-")[0].split(" ")[0].zfill(5)[:5])
    # df_1['sign_up_date']=pd.to_datetime(df_1['sign_up_date'],format="%Y-%m-%d").dt.date
    # df_1['weeks_since_sign_up']=df_1['sign_up_date'].apply(lambda x: int(np.ceil((high_date-x).days/7)))
    df_1['P_zip'] = np.where(df_1['customer_zip_code'].isin(list_P_zips), 1, 0)
    df_1['S_zip'] = np.where(df_1['customer_zip_code'].isin(list_S_zips), 1, 0)
    df_1['else_10_zip'] = np.where(
        df_1['customer_zip_code'].isin(list_10_zips), 1, 0)
    # del df_1['customer_zip_code']
    df_1['signed_online'] = np.where(df_1['sign_up_channel'] == "STORE", 0, 1)
    del df_1['sign_up_channel']

    df_1['sign_up_location'] = df_1['sign_up_location'].fillna("-1")
    df_1['sign_up_location'] = df_1['sign_up_location'].astype(float)
    df_1['sign_up_location'] = df_1['sign_up_location'].astype(int).astype(str)

    df_copy_sign_up = df_1[['sign_up_location',
                            'customer_zip_code']].drop_duplicates()
    df_copy_sign_up = df_copy_sign_up.reset_index()
    del df_copy_sign_up['index']
    print("check point 4")

    # In[7]:

    # distance to sign up stores
    df_store_all = pd.DataFrame(
        columns=['location_id', 'latitude_meas', 'longitude_meas'])

    list_all_stores = glob.glob(folder_store_list + "*.txt")
    list_all_stores = [x for x in list_all_stores if "MediaStormStores" in x]
    list_all_stores = sorted(list_all_stores,
                             key=lambda x: x.split("MediaStormStores")[1][:8])
    list_all_stores = [
        x for x in list_all_stores if x.split("MediaStormStores")[1][:8] <=
        str(high_date + datetime.timedelta(days=2)).replace("-", "")
    ]
    list_all_stores.reverse()

    for file in list_all_stores:
        df = pd.read_table(
            file,
            dtype=str,
            sep="|",
            usecols=['location_id', 'latitude_meas', 'longitude_meas'])
        df = df[['location_id', 'latitude_meas', 'longitude_meas']]
        df['latitude_meas'] = df['latitude_meas'].astype(float)
        df['longitude_meas'] = df['longitude_meas'].astype(float)
        df = df[~df['location_id'].isin(['145', '6990'])]
        df = df[~df['location_id'].isin(df_store_all['location_id'].tolist())]
        df_store_all = df_store_all.append(df)
    df_store_all['store_coor'] = df_store_all[[
        'latitude_meas', 'longitude_meas'
    ]].values.tolist()
    dict_store_all = df_store_all.set_index(
        "location_id").to_dict()['store_coor']
    df_copy_sign_up['distc_to_sign_up'] = np.nan
    for i, row in df_copy_sign_up.iterrows():
        try:
            store_coor = dict_store_all[row['sign_up_location']]
            zip_center = zip_centers[row['customer_zip_code']]
            dist = haversine(store_coor, zip_center, unit="mi")
            df_copy_sign_up.loc[i, "distc_to_sign_up"] = dist

        except:
            continue
    df_1 = pd.merge(df_1,
                    df_copy_sign_up,
                    on=['sign_up_location', 'customer_zip_code'],
                    how="left")
    print("check point 5")
    #
    list_unsub = glob.glob(folder_email_unsub + "*.csv")
    df_unsub_files = pd.DataFrame({"file_path": list_unsub})
    df_unsub_files['date'] = df_unsub_files['file_path'].apply(
        lambda x: x.split("ile_Refresh__")[1][:8])
    df_unsub_files['date'] = pd.to_datetime(df_unsub_files['date']).dt.date
    df_unsub_files['day_diff'] = abs(df_unsub_files['date'] - high_date)
    path_unsub = df_unsub_files[
        df_unsub_files['day_diff'] ==
        df_unsub_files['day_diff'].min()]['file_path'].values.tolist()[0]
    ######
    list_unsunsribe_ids = pd.read_csv(
        path_unsub, dtype=str,
        usecols=['customersummary_c_primaryscnhash'
                 ])['customersummary_c_primaryscnhash'].unique().tolist()

    print(len(list_unsunsribe_ids))
    df_1['email_unsub_label'] = np.where(
        df_1['customer_id_hashed'].isin(list_unsunsribe_ids), 1, 0)
    del list_unsunsribe_ids
    df_zips_with_BL_store = df_zips_with_BL_store.rename(
        columns={"zip_cd": "customer_zip_code"})
    df_1 = pd.merge(df_1,
                    df_zips_with_BL_store,
                    on="customer_zip_code",
                    how="left")
    df_1 = df_1.reset_index()
    del df_1['index']
    df_1 = df_1.reset_index()
    del df_1['index']
    df_1 = df_1.reset_index()
    # Changed to 3 weeks
    dv_start_date = high_date + datetime.timedelta(days=1)
    dv_end_date = high_date + datetime.timedelta(days=21)

    str_sql_dv_start_date = "'" + str(dv_start_date) + "'"
    str_sql_dv_end_date = "'" + str(dv_end_date) + "'"
    print(str_sql_dv_start_date, str_sql_dv_end_date)
    print(datetime.datetime.now())
    df_dvs = pd.read_sql(
        "select customer_id_hashed, transaction_dt from Pred_POS_Department where transaction_dt between %s and %s and sales >0"
        % (str_sql_dv_start_date, str_sql_dv_end_date),
        con=BL_engine).drop_duplicates()
    print(datetime.datetime.now())
    print("check point 6")

    # In[36]:

    df_dvs['week_end_dt'] = df_dvs['transaction_dt'].apply(week_end_dt)
    df_dvs = df_dvs[['customer_id_hashed', 'week_end_dt']].drop_duplicates()
    list_unique_weeks = df_dvs['week_end_dt'].unique().tolist()
    list_unique_weeks.sort()
    df_dv_binary = df_dvs[df_dvs['week_end_dt'] == list_unique_weeks[0]][[
        'customer_id_hashed'
    ]]
    df_dv_binary['DV_cumulative_week_updated_1'] = 1
    for i in range(1, 3):
        w = list_unique_weeks[i]
        df = df_dvs[df_dvs['week_end_dt'] <= w][['customer_id_hashed'
                                                 ]].drop_duplicates()
        df['DV_cumulative_week_updated_%d' % (i + 1)] = 1
        df_dv_binary = pd.merge(df_dv_binary,
                                df,
                                on="customer_id_hashed",
                                how="outer")
        print(w, datetime.datetime.now())
    df_dv_binary = df_dv_binary.fillna(0)

    df_1 = pd.merge(df_dv_binary, df_1, on="customer_id_hashed", how="right")

    for i in range(3):
        df_1['DV_cumulative_week_updated_%d' %
             (i + 1)] = df_1['DV_cumulative_week_updated_%d' %
                             (i + 1)].fillna(0)

    print(df_1.shape, df_1['customer_id_hashed'].nunique())
    if "index" in df_1.columns.tolist():
        del df_1['index']

    print("check point 7")
    # self
    table_crm_id_list_train = "crm_table_id_list_train_%s" % str_week_end_d
    table_crm_id_list_test = "crm_table_id_list_test_%s" % str_week_end_d
    table_df_1 = "table_pred_1_crm_up_to_%s" % str_week_end_d

    dict_table_names.update(
        {"table_crm_id_list_train": table_crm_id_list_train})
    dict_table_names.update({"table_crm_id_list_test": table_crm_id_list_test})
    dict_table_names.update({"table_df_1": table_df_1})
    # split
    len_df_1 = len(df_1)
    train_sample_size = 10**6
    test_ratio = 0.25
    if len_df_1 > train_sample_size / (1 - test_ratio):
        list_ind_train = random.sample(range(len_df_1), train_sample_size)
    else:
        list_ind_train = random.sample(range(len_df_1),
                                       int(len_df_1 * (1 - test_ratio)))

    df_1 = df_1.reset_index()
    df_1_train = df_1[['customer_id_hashed'
                       ]][df_1['index'].isin(list_ind_train)]
    df_1_test = df_1[['customer_id_hashed'
                      ]][~df_1['index'].isin(list_ind_train)]
    del df_1['index']

    print("df_1_train.shape", df_1_train.shape)
    print("df_1_test.shape", df_1_test.shape)
    chunksize = 10**6

    dtype_id = {"customer_id_hashed": sql.types.VARCHAR(length=64)}
    df_1_train.to_sql(name=table_crm_id_list_train,
                      chunksize=chunksize,
                      con=BL_engine,
                      index=False,
                      if_exists="replace",
                      dtype=dtype_id)
    df_1_test.to_sql(name=table_crm_id_list_test,
                     chunksize=chunksize,
                     con=BL_engine,
                     index=False,
                     if_exists="replace",
                     dtype=dtype_id)

    dtype_df_1 = {
        'customer_id_hashed': sql.types.VARCHAR(length=64),
        'DV_cumulative_week_updated_1': sql.types.Integer,
        'DV_cumulative_week_updated_2': sql.types.Integer,
        'DV_cumulative_week_updated_3': sql.types.Integer,
        # 'DV_cumulative_week_updated_4':sql.types.Integer,
        'sign_up_location': sql.types.VARCHAR(length=5),
        'customer_zip_code': sql.types.VARCHAR(length=5),
        'P_zip': sql.types.Integer,
        'S_zip': sql.types.Integer,
        'else_10_zip': sql.types.Integer,
        'signed_online': sql.types.Integer,
        'distc_to_sign_up': sql.types.Float,
        'email_unsub_label': sql.types.Integer,
        'nearest_BL_store': sql.types.VARCHAR(length=4),
        'nearest_BL_dist': sql.types.Float
    }

    df_1.to_sql(name=table_df_1,
                con=BL_engine,
                index=False,
                if_exists="replace",
                dtype=dtype_df_1,
                chunksize=chunksize)
    print("check point 8")
    create_index(table_name=table_crm_id_list_train,
                 list_of_columns=["customer_id_hashed"])
    create_index(table_name=table_crm_id_list_test,
                 list_of_columns=["customer_id_hashed"])
    create_index(table_name=table_df_1, list_of_columns=["customer_id_hashed"])
    # In[38]:

    path_json_table_names = "./table_names_%s.json" % str(high_date).replace(
        "-", "")
    with open(path_json_table_names, "w") as json_file:
        json.dump(dict_table_names, json_file)
    print("Done_of_part_2: %s" % str(datetime.datetime.now()))
import networkx as nx
from haversine import haversine
import sys

graph = ns.read_shp(
    './shape_files/tl_2013_48_prisecroads/tl_2013_48_prisecroads.shp')
edges = graph.edges()
nodes = graph.nodes()

d1 = sys.maxint
p1 = None
d2 = sys.maxint
p2 = None

for n in nodes:
    d = haversine(n, (-101.897681, 32.08691))
    if d < d1:
        d1 = d
        p1 = n
    d = haversine(n, (-97.032193, 32.759417))
    if d < d2:
        d2 = d
        p2 = n


def dist(a, b):
    return haversine(a, b)


print(nx.astar_path(graph, p1, p2, dist))
Example #58
0
    def get_graph_temp(self):
        from haversine import haversine
        from collections import defaultdict
        g = nx.Graph()
        nodes = set(self.df_train.index.tolist() + self.df_dev.index.tolist() +
                    self.df_test.index.tolist())
        assert len(nodes) == len(self.df_train) + len(self.df_dev) + len(
            self.df_test), 'duplicate target node'
        nodes_list = self.df_train.index.tolist() + self.df_dev.index.tolist(
        ) + self.df_test.index.tolist()
        node_id = {node: id for id, node in enumerate(nodes_list)}
        g.add_nodes_from(node_id.values())
        train_locs = self.df_train[['lat', 'lon']].values
        for node in nodes:
            g.add_edge(node_id[node], node_id[node])
        pattern = '(?<=^|(?<=[^a-zA-Z0-9-_\\.]))@([A-Za-z]+[A-Za-z0-9_]+)'
        pattern = re.compile(pattern)
        logging.info('adding the train graph')
        for i in range(len(self.df_train)):
            user = self.df_train.index[i]
            user_id = node_id[user]
            mentions = [m for m in pattern.findall(self.df_train.text[i])]
            idmentions = set()
            for m in mentions:
                if m in node_id:
                    idmentions.add(node_id[m])
                else:
                    id = len(node_id)
                    node_id[m] = id
                    idmentions.add(id)
            if len(idmentions) > 0:
                g.add_nodes_from(idmentions)
            for id in idmentions:
                g.add_edge(id, user_id)
        celebrities = []
        for i in range(len(nodes_list), len(node_id)):
            deg = len(g[i])
            if deg > self.celebrity_threshold:
                celebrities.append(i)
        #get neighbours of celebrities
        id_node = {v: k for k, v in node_id.iteritems()}

        degree_distmean = defaultdict(list)
        degree_distance = defaultdict(list)
        c_distmean = {}
        for c in celebrities:
            c_name = id_node[c]
            c_nbrs = g[c].keys()
            c_degree = len(c_nbrs)
            c_locs = train_locs[c_nbrs, :]
            c_lats = c_locs[:, 0]
            c_lons = c_locs[:, 1]
            c_median_lat = np.median(c_lats)
            c_median_lon = np.median(c_lons)
            distances = [
                haversine((c_median_lat, c_median_lon),
                          tuple(c_locs[i].tolist()))
                for i in range(c_locs.shape[0])
            ]
            degree_distance[c_degree].extend(distances)
            c_meandist = np.mean(distances)
            degree_distmean[c_degree].append(c_meandist)
            c_distmean[c_name] = [c_degree, c_meandist]
        with open('celebrity.pkl', 'wb') as fin:
            pickle.dump((c_distmean, degree_distmean, degree_distance), fin)

        logging.info('removing %d celebrity nodes with degree higher than %d' %
                     (len(celebrities), self.celebrity_threshold))

        self.biggraph = g
def parseResponse(gpsLine):
    gpsChars = ''.join(chr(c) for c in gpsLine)
    local_pending_redraw = False
    
    if "$GNGGA" in gpsChars:
        if ",1," not in gpsChars:
            print("Looking for fix... (GGA)")
            add_to_image.rectangle(status_icon_zone, fill="black", outline = "black")
            add_to_image.rectangle(status_zone, fill="black", outline = "black")
            add_to_image.text(status_icon_start, "\uf252", font=FA_solid, fill="white")
            add_to_image.text(status_start, "GPS...", fill="white")
            return False
        try:
            nmea = pynmea2.parse(gpsChars, check=True)
            print('%.6f'%(nmea.latitude), ",",'%.6f'%(nmea.longitude), ", sats:", nmea.num_sats, ", alt:", nmea.altitude) # GGA
            
            ## update altitude
            add_to_image.text(alti_icon_start, "\uf077", font=FA_solid, fill="white")
            add_to_image.rectangle(alti_zone, fill="black", outline = "black")
            add_to_image.text(alti_start, str('%.0f'%(nmea.altitude)), font=text_medium, fill="white")
            
            ## fix found, show nb satelites
            add_to_image.rectangle(status_icon_zone, fill="black", outline = "black")
            add_to_image.rectangle(status_zone, fill="black", outline = "black")
            text_sats = "Sats.:" + nmea.num_sats
            add_to_image.text(status_start, text_sats, fill="white")
            
            ## update total distance
            global reading_nr
            global total_km
            global prev_lat
            global prev_long
            dist = 0
            if reading_nr != 1:
                dist = haversine(((float(prev_lat)), (float(prev_long))), ((float(nmea.latitude)), (float(nmea.longitude))))
                total_km = total_km+dist
                print("Total KM:", total_km)
                add_to_image.text(dist_icon_start, "\uf1b9", font=FA_solid, fill="white")
                add_to_image.rectangle(dist_zone, fill="black", outline = "black")
                add_to_image.text(dist_start, "%0.1f" % total_km, font=text_medium, fill="white")
            prev_lat = nmea.latitude
            prev_long = nmea.longitude
            reading_nr +=1
            
            ## log every 10th GPS coordinate in CSV file
            if reading_nr % 10 == 0:
                filename = 'data/gps/gps_' + datetime.datetime.now().strftime("%Y%m%d") + '.csv'
                with open(filename, 'a', newline='') as csvfile:
                    gps_writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
                    gps_writer.writerow([nmea.timestamp, nmea.latitude, nmea.longitude, nmea.altitude])
            
            local_pending_redraw = True
            
        except Exception as e:
            print("NMEA parse error (GGA)")
            print(e)
            pass
        
    if "$GNRMC" in gpsChars:
        if ",A," not in gpsChars: # 1 for GGA, A for RMC
            print("Looking for fix... (RMC)")
            return False
        try:
            nmea = pynmea2.parse(gpsChars, check=True)
            print("Speed: ", nmea.spd_over_grnd) # RMC
            ## update speed
            add_to_image.rectangle(speed_zone, fill="black", outline = "black")
            add_to_image.text(speed_start, str('%.0f'%(nmea.spd_over_grnd)), font=text_largest, fill="white")
            local_pending_redraw = True
        except Exception as e:
            print("NMEA parse error (RMC)")
            print(e)
            pass
        
    if local_pending_redraw == True:
        global pending_redraw
        pending_redraw = True
def dist(a, b):
    return haversine(a, b)