Esempio n. 1
0
	def query(self,sqls = [],description = False,compare = False,
		percent = False,sep = ' ',std_out = False,csv = ""):
		search_hits = []

		for sql in sqls:
			
			sql = self.evaluate_sql(sql)

			try:
			
				self.cursor.execute(sql)
			
			except Exception as e:
				import sys
				self.cursor.close()
				self.conn.close()
				print e
				sys.exit(0)

			results = self.cursor.fetchall()
			if results:
				variables = []
		
				for key in results[0].keys():
					variables.append(key)
				if csv:
					print_variables = self.extract_variables(variables)
				if std_out:
					rg.search((0,0))
					print sql
					print sep.join(variables)
					for rank,result in enumerate(results):
						print rank,
						for var in variables:
							print result[var],sep,
			
						if 'trans_lat' in variables and 'trans_lon' in variables:
							lat = result['trans_lat']
							lon = result['trans_lon']
							location = rg.search((lat,lon))[0]
							print location['name'],',',location['admin1'],sep
						elif 'INTPTLAT10' in variables and 'INTPTLON10' in variables:
							lat = result['INTPTLAT10']
							lon = result['INTPTLON10']
							location = rg.search((lat,lon))[0]
							print location['name'],',',location['admin1'],sep
						else:
							print ''

				if csv:
					self.print_csv(variables,results,csv)
				if description:
					self.description(variables,results,sql)
				if compare:
					self.compare_to_secondary(results,sql,percent = percent)
			
			search_hits += results

		return search_hits
Esempio n. 2
0
def getUserLocationNewModel(userId):
	"""
	:param userId:
	:return: userCoords [lat, lon]
	Function to triangular the location of a user based on his reviewed business locations.
	"""
	max = -9999
	userCountryKey = -1
	reviews = findReviewUserBusinessByUserId(userId)
	# print(reviews)
	bCountry = {}
	latlondeg = []
	#print reviews.count()
	reviewIndex = 0
	# reviewsCount = reviews.count()
	for review in reviews:
		# reviewIndex += 1
		#print "Processing Review ", reviewIndex, " of ", reviewsCount, " of user: "******"name"]
		#print businessCityId
		try:
			bCountry[businessCityId].addCoord((latBus, lonBus))
		except KeyError:
			bCountry[businessCityId] = businessCountryObj
			bCountry[businessCityId].addCoord((latBus, lonBus))

	for k, v in bCountry.iteritems():
		if v.getCount() > max:
			max = v.getCount()
			userCountryKey = k
	#userCountryKey = max(bCountry.iteritems(), key=operator.itemgetter(1))[0]

	for k, v in bCountry.items():
		#print v
		#print v.getCount()
		#print v.getCoords()
		if k == userCountryKey:
			#print v[1]
			for loc in v.getCoords():
				latlondeg.append({"LATIDX": loc[0], "LNGIDX": loc[1]})

	#print latlondeg
	# Triangularing user coords.
	userCoords = getLatLngCenter(latlondeg)
	userCountry = rg.search((userCoords[0], userCoords[1]))[0]
	userCoords.append(userCountry)
	#print userCountry
	return userCountry
Esempio n. 3
0
 def add_geo_assets(self):
     map(
         lambda way, geoass: way.add_geo_asset(geoass), self.waypoints,
         rg.search([(waypoint.latitude, waypoint.longitude)
                    for waypoint in self.waypoints]))
     route_start_geoassets = rg.search([(route.start_latitude,
                                         route.start_longitude)
                                        for route in self.metadata])
     route_end_geoassets = rg.search([(route.end_latitude,
                                       route.end_longitude)
                                      for route in self.metadata])
     map(
         lambda rte, start_geoass, end_geoass: rte.add_geo_asset(
             start_geoass, end_geoass), self.metadata,
         route_start_geoassets, route_end_geoassets)
def main(argv):
    try:
        input_data = sys.stdin
        csv.field_size_limit(sys.maxsize)
        csv_in = csv.DictReader(fixNulls(input_data),
                                delimiter=delimiter,
                                fieldnames=headers,
                                quoting=csv.QUOTE_MINIMAL)
        """ load formatted geocoded file (preventing it to print <Loading formatted geocoded file...> to stdout) """
        sys.stdout = open(os.devnull, "w")
        rg.search((0, 0))
        sys.stdout = sys.__stdout__

        previousLat = 0
        previousLng = 0
        for line in csv_in:
            try:
                """ CUSTOMIZE HERE """
                lat = str(line['lat'])
                lng = str(line['lng'])

                if lat != previousLat or lng != previousLng:
                    r = rg.search((lat, lng))
                    latlng2country = r[0]['cc']
                """ CUSTOMIZE HERE """
                outline = '{}{}{}{}{}'.format( \
                    lat, \
                    delimiter, \
                    lng, \
                    delimiter, \
                    latlng2country \
                )

                print(outline)

                previousLat = lat
                previousLng = lng

            except:
                latlng2country = ''
                continue

    except EOFError:
        return None

    except Exception as e:
        print(e)
        sys.exit(1)
Esempio n. 5
0
    def update_name_from_location(self):
        """In the given list resolve the city and append the location city to the filename"""
        for entry in self.file_list:
            if entry.location:
                lat, lon = entry.location.split(',')
                default_city = Folder.clean_name(
                    reverse_geocoder.search([(float(lat), float(lon))
                                             ])[0]['name'])
                alt_city = []
                try:
                    alt_city = Folder.geo.reverse(
                        entry.location, timeout=60).address.split(', ')
                except geopy.exc.GeocoderTimedOut as e:
                    print(
                        f"Timeout on GeoLookup for {entry.location} of {entry.name}. Found {default_city} already."
                    )
                    raise FactoryError("Exiting.")

                name, ext = os.path.splitext(entry.name)
                known_city = ""
                if len(alt_city) > 4:
                    known_city = Folder.get_known_city(alt_city[-5])
                if not known_city and len(alt_city) > 5:
                    known_city = Folder.get_known_city(alt_city[-6])
                if not known_city:
                    known_city = Folder.get_known_city(default_city)
                if known_city:
                    entry.name = name + ' ' + known_city + ext
                    continue

                entry.name = name + ' ' + default_city + ext
Esempio n. 6
0
 def reverseGeocode(self, latlon):
     # call using geocoder, biggest con is performance for this option
     #geo = self.geocoder.reverse_geocode(latlon[0], latlon[1], no_annotations = '1', pretty = '1', language='pt')
     #reversed = self.removeStopWords(geo[0]['formatted'])
     # call using reverse_geocoder, nice thing about this: performance
     aux = (latlon[0], latlon[1])
     data = rg.search(aux)
     city = data[0]["name"]
     province = data[0]["admin1"] + " " + data[0]["admin2"]
     dictionary = {'AC': 'Acre','AL': 'Alagoas','AP': 'Amapá','AM': 'Amazonas','BA': 'Bahia',
      'CE': 'Ceará','DF': 'Distrito Federal','ES': 'Espírito Santo','GO': 'Goiás',
      'MA': 'Maranhão','MT': 'Mato Grosso','MS': 'Mato Grosso do Sul',
      'MG': 'Minas Gerais','PA': 'Pará','PB': 'Paraíba','PR': 'Paraná',
      'PE': 'Pernambuco','PI': 'Piauí','RJ': 'Rio de Janeiro',
      'RN': 'Rio Grande do Norte','RS': 'Rio Grande do Sul','RO': 'Rondônia',
      'RR': 'Roraima','SC': 'Santa Catarina','SP': 'São Paulo','SE': 'Sergipe',
      'TO': 'Tocantins'}
     country = data[0]["cc"].replace("BR","Brasil")
     reversed = city + " " + province + " " + country
     for key in dictionary.keys():
         latlon[3] = latlon[3].upper().replace(key, dictionary[key]) if len(latlon[3]) == 2 else latlon[3]
     reported = ' '.join(latlon[2:5])
     reversed = self.removeStopWords(reversed)
     reported = self.removeStopWords(reported)
     reversed = self.removeNonAscii(reversed)
     reported = self.removeNonAscii(reported)
     similarity = 100 * textdistance.Cosine(qval=None).similarity(reported , reversed) 
     return pd.Series((reported, reversed, similarity))
Esempio n. 7
0
def update_filter_data(country, n):
    df = pd.DataFrame(requests.get(url).json()['data'])
    station = json_normalize(df['station'])
    df.drop(['station'], axis=1, inplace=True)
    df = pd.concat([df, station], axis=1)
    df.drop(['time'], axis=1, inplace=True)
    location = rg.search(list(zip(df.lat, df.lon)))
    df['coordinates'] = location
    df['location'] = df['coordinates'].apply(lambda x: x['name'])
    df['cc'] = df['coordinates'].apply(lambda x: x['cc'])
    df.drop(['coordinates', 'name', 'uid'], axis=1, inplace=True)
    df = df.merge(countries, how="inner", left_on="cc", right_on="value")
    df.rename(columns={'label': 'country'}, inplace=True)
    df.drop(['value'], axis=1, inplace=True)
    df.aqi.replace('-', np.NaN, inplace=True)
    df.dropna(axis=0, inplace=True)
    df['aqi'] = pd.to_numeric(df['aqi'])
    df['marker_color'] = pd.cut(
        df['aqi'], [0, 50, 100, 150, 200, 300, df['aqi'].max()],
        labels=['green', 'yellow', 'orange', 'red', 'purple', 'brown'])
    df['country'] = df['country'].apply(
        lambda x: 'USA' if (x == 'United States of America') else x)
    df = df[df['cc'] == country]
    df.reset_index(drop=True, inplace=True)
    return df.to_dict('records')
Esempio n. 8
0
 def build_geocodes(self):
     gps_tuples = [tuple(xy) for xy in self.centroids]
     geocodes = reverse_geocoder.search(gps_tuples) 
     columns= ['latitude','longitude', 'city', 'state', 'region', 'country']
     geocodes = pd.DataFrame(geocodes)
     geocodes.columns = columns
     self.geocodes = geocodes
def add_country_codes(df):
    '''
    Use latitude and longitude coordinates to determine the country of a project.

    :param df: dataframe with coordinates
    :returns: dataframe with project country codes appended
    '''
    latitude = df.Latitude.values
    longitude = df.Longitude.values
    coordinates = []

    for lat, lng in zip(latitude, longitude):
        coordinates.append((lat, lng))

    codes = []
    results = rg.search(coordinates)

    for r in results:
        try:
            code = r['cc']
            codes.append(code)
        except:
            codes.append('NA')

    df['Country'] = codes

    return df
Esempio n. 10
0
def home(request):

    # Current Location
    g = geocoder.ip('me')
    latlon = g.latlng
    result = rg.search((latlon[0], latlon[1]))

    # Weather
    url = "https://api.darksky.net/forecast/9229258e1491d592cda918516dd382f9/{},{}?exclude=minutely,hourly,daily,alerts,flags&units=auto".format(
        latlon[0], latlon[1])
    response = urllib.request.urlopen(url)
    data = json.loads(response.read())

    # Compliments
    compliment = [
        'You Look Stunning!', 'Hey, Gorgeous!', 'You look Sexy!',
        'Synonym for beauty is you!', 'Fair and Lovely!'
    ]
    chooser = random.randint(0, len(compliment) - 1)
    final_data = {
        'temperature': round(int(data['currently']['temperature'])),
        'summary': data['currently']['summary'],
        'place': result[0]['name'],
        'compliment': compliment[chooser]
    }
    return render(request, 'base.html', context=final_data)
Esempio n. 11
0
    def transformToCountry(self, x: float, y: float) -> str:
        geoms = fiona.open(
            shpreader.natural_earth(resolution='50m',
                                    category='physical',
                                    name='land'))

        land_geom = sgeom.MultiPolygon(
            [sgeom.shape(geom['geometry']) for geom in geoms])

        land = prep(land_geom)
        if x is not None and y is not None:
            on = land.contains(sgeom.Point(x, y))
            if on:
                result = rg.search((y, x))
                country_full = self.cc.convert(names=result[0]["cc"],
                                               to="name_short")
                country = self.cc.convert(names=result[0]["cc"], to="ISO3")
                print("Starting in", country_full)
                plt.title("Starting in " + str(country_full), fontsize=50)
                plt.scatter(x,
                            y,
                            color="black",
                            marker=".",
                            transform=ccrs.Geodetic())
                self.go = True
                return country
            else:
                print("Mer")
                plt.title("Please choose a location on land !", fontsize=50)
                return ""
def add_missing_countries(tweets_df):
    '''
    Note: this functions work 'inplace'
    '''

    print('{} tweets do not have country information!'.format(
        tweets_df['Location'].isna().sum()))

    # get tweets without a country info
    no_country = tweets_df[tweets_df['Location'].isna()][['Lat', 'Long'
                                                          ]].drop_duplicates()

    # extract coordinates
    coordinates = list(no_country.itertuples(index=False, name=None))

    # map coordinates with countries using an external package
    results = rg.search(coordinates)
    no_country['found_countries'] = [i['cc'] for i in results]
    no_country['found_countries'] = no_country['found_countries'].map(
        cf.country_abbr)
    no_country['Lat_Long'] = no_country[['Lat', 'Long']].apply(
        lambda x: '_'.join(x.map(str)), axis=1)

    # add mapped countries to the original tweets data
    tweets_df.loc[tweets_df['Location'].isna(), 'Location'] = list(
        tweets_df[tweets_df['Location'].isna()]['Lat_Long'].map(
            dict(zip(no_country['Lat_Long'], no_country['found_countries']))))
    tweets_df.drop(['Lat_Long'], axis=1, inplace=True)

    print('{} tweets that do not have country information will be discarded!'.
          format(tweets_df['Location'].isna().sum()))
    tweets_df = tweets_df[~tweets_df['Location'].isna()]

    return None
Esempio n. 13
0
    def _get_state_and_county(self, lon, lat):
        try:
            #result = None
	    coordinates = (lat,lon)
	    #print coordinates
	    results = rg.search(coordinates)
	    try:
		county = results[0]['admin2'].split()[:1][0]
		state = results[0]['admin1']
	    except:
		state = None
		county = None
	    #state = results[0]['admin1']
	    if (lat>33 and lat <38 and lon>-103.5 and lon<-94):
	    	result = True
	    else:
		print "NOT AOI event"
		result = None
		raise TypeError("Not in AOI")
		#print "We are in the AOI"
	    #print result, Lat, Lon
        except pgdb.Error as error:
            raise pgdb.Error(error)
        except Exception as error:
            print error
        if result is None:
        #    print "Not in the AOI!!!!!!"
	    raise TypeError("Not in AOI")
       	    #print "We are NOT in the AOI" 
        return result,county,state
Esempio n. 14
0
def gzworker(fullpath):
    """Worker will open the .csv file and process the information inside"""
    print('Processing {}'.format(fullpath))
    # try:
    with open(fullpath, 'r+') as f:
        reader = csv.reader(f)
        for row in reader:
            geoloc = row[3]
            geoloc = geoloc.split(',')
            lon = geoloc[0].replace('[', '')
            lat = geoloc[1].replace(']', '').replace(' ', '')
            # print('Longitude: {} \nLatitude: {}'.format(lon, lat))
            # m_obj = re.search(r"(\d+)", geoloc)
            # print(m_obj)
            coordinates = (lat,lon)
            results = rg.search(coordinates) # default mode = 2
            print(results)
            state_num = mx_ca_us_state_abbrev.get(results[0].get('admin1'))
            print(state_num)

            # state_num = us_state_abbrev.results['admin1']
            # print(state_num)
# [('lat', '29.23329'), ('lon', '-98.79641'), ('name', 'Lytle'), ('admin1', 'Texas'), ('admin2', 'Atascosa County'), ('cc', 'US')]

    # except:
    #     print("Error in {}".format(fullpath))
    #     pass

    print('Finished {}'.format(fullpath))
Esempio n. 15
0
 def do_print(lat, lon):
     print(
         lat, lon,
         reverse_geocode.search([lat, lon])[0], ":",
         TestLocations.a.reverse(f"{lat},{lon}").address.split(', ')[-5:-4],
         ":",
         reverse_geocoder.search([(lat, lon)])[0])
Esempio n. 16
0
def reverse_geocode(lat, lng):
    res = rg.search((lat, lng))[0]
    res = list(res.items())
    name = res[2][1]
    admin1 = res[3][1]
    cc = res[5][1]
    return (name, admin1, cc)
Esempio n. 17
0
 def get(self, name):
             phq = Client(access_token="")
             retJ = []
             if name == '':
                     name = "eminem"
             for event in phq.events.search(q=name, limit=5, sort='rank', category='concerts'):
                     try:
                             cood = event.location
                             local = (rg.search([cood[1],cood[0]]))[0]['name']
                     except IndexError as e:
                             cood = [0,0]
                             local = 'USA'
              #    this = stateplane.identify(cood[0], cood[1])
                     resp = {
                             "eTitle" : event.title,
                             "eDate" : event.start.strftime('%Y-%m-%d'),
                             "eCountry" : event.country,
                             "eRank" : event.rank,
                             "eLocation" : local,
                             }
                     #     print (event.scope, event.description, event.start.strftime
             # ('%Y-%m-%d'), event.category, event.country, event.rank, event.location, event.labels, event.title)
             #    print(event.rank, event.category, event.title, event.start.strftime('%Y-%m-%d'))
                     retJ.append(resp)
             return retJ, 200
Esempio n. 18
0
def predicton(image_url):
    # path to the image or video
    imagename = "static/" + image_url + ".jpg"
    print(imagename)
    # read the image data using PIL
    image = Image.open(imagename)
    exifdata = image.getexif()
    tags = {}
    # Put tags into dict
    for tag_id in exifdata:
        tag = TAGS.get(tag_id, tag_id)
        data = exifdata.get(tag_id)
        if isinstance(data, bytes):
            data = data.decode()
        tags[tag] = data
    # extract gps coordinates
    # if statements account for W or S gps values (which should be negative)
    lat = float(
        tags['GPSInfo'].get(2)[0]) + float(tags['GPSInfo'].get(2)[1]) / 60
    if tags['GPSInfo'].get(1)[0] == 'S':
        lat = lat * -1
    lng = float(
        tags['GPSInfo'].get(4)[0]) + float(tags['GPSInfo'].get(4)[1]) / 60
    if tags['GPSInfo'].get(3)[0] == 'W':
        lng = lng * -1
    locationDict = rg.search((lat, lng))[0]
    location = locationDict.get('name') + ', ' + locationDict.get('admin1')

    return render_template('image.html',
                           tags=tags,
                           image_url="../" + imagename,
                           location=location)
Esempio n. 19
0
def main():

    # print(rg.search(()))

    coordinates = (40.73971301, -73.99456405)
    pprint.pprint(rg.search(coordinates))
    """
Esempio n. 20
0
def getlocation_drop(x):
    point_coord = (x[3] , x[2])
    this_location = rg.search(point_coord, mode = 1)
    if ( this_location[0]['admin2'] == 'Queens County'):
        return ('Queens',x[0],x[1])
    else :
        return (this_location[0]['name'],x[0],x[1])
Esempio n. 21
0
def map_filter(country):
    user_loc = country

    # Retrieve Device GPS locations
    GPS = get_GPS()

    map = {}

    for device in GPS:

        coordinates = (GPS[device]['lat'], GPS[device]['lon'])
        lat = GPS[device]['lat']
        lon = GPS[device]['lon']

        loc = rg.search(coordinates)
        country = loc[0]['cc']

        if country == user_loc:
            map.update({device: {'Country': country, 'lat': lat, 'lon': lon}})
    #print("Map:")
    #print(map)

    json_map = json.dumps(map)

    return (json_map)
Esempio n. 22
0
def getlocation2(x):
    point_coord_pickup = (float(x[1]) , float(x[0]))
    point_coord_drop = (float(x[3]) , float(x[2]))
    pickup_location = rg.search(point_coord_pickup, mode = 1)
    drop_location = rg.search(point_coord_drop , mode = 1)
    if ( pickup_location[0]['admin2'] == 'Queens County'):
        pickup_name = 'Queens'
    else :
        pickup_name = pickup_location[0]['name']
    
    if ( drop_location[0]['admin2'] == 'Queens County'):
        drop_name = 'Queens'
    else :
        drop_name = drop_location[0]['name']
    names = (pickup_name , drop_name)
    return names
Esempio n. 23
0
 def convert_geo(self):
     self._df['lat_long'] = list(
         zip(self._df['latitude'], self._df['longitude']))
     rg_result = rg.search(self._df['lat_long'].tolist())
     df_geo = json_normalize(rg_result)
     self._df['area'] = df_geo['name']
     self._df['location'] = df_geo['admin2']
Esempio n. 24
0
def reverseGeocode(coordinates):
    result = rg.search(coordinates)
    #dict=json.loads(result)
    # result is a list containing ordered dictionary.
    #pprint.pprint(dict)
    dict = result[0]
    return dict
Esempio n. 25
0
def fetchsamples(db):
    query_coords_string = '-155.6811,18.91,-66.9470,44.81'

    url = 'https://stream.twitter.com/1.1/statuses/filter.json?lang=en&locations=' + query_coords_string

    response = twitterreq(url, "POST", [])

    for line in response:
        tweet_json = ujson.loads(line.strip().decode('utf8'))

        # We only process geolocated tweets and tweets in English
        if tweet_json['lang'] == 'en':
            if 'geo' in tweet_json and tweet_json['geo'] is not None:
                latitude = tweet_json['geo']['coordinates'][0]
                longitude = tweet_json['geo']['coordinates'][1]

                result_json = rg.search((latitude, longitude), verbose=False)[0]

                if result_json is not None:
                    if 'cc' in result_json and result_json['cc'] == 'US':
                        tweet_json['usa_state'] = result_json['admin1']
                        print_tweet(remove_fields(tweet_json), db)

            elif 'place' in tweet_json and tweet_json['place'] == 'United States':
                full_name = tweet_json['place']['full_name']

                matches = re.findall('([\w+\s]+)', full_name)

                if len(matches) is 2:
                    if matches[1] == 'USA':
                        tweet_json['usa_state'] = matches[0]
                    else:
                        tweet_json['usa_state'] = str(us.states.lookup(matches[1][1:]))  # Find state full name for abbr

                    print_tweet(remove_fields(tweet_json), db)
Esempio n. 26
0
def reverseGeocode(a,b):
	coordinates = (a,b)
	result = rg.search(coordinates,verbose=False)
	if len(result)>0:
			return (result[0].get("cc"))
	else:
		return "ASSENTE"
Esempio n. 27
0
def filter_US_locations(collection):
    cursor = collection.find(
        {
            "location_dict": {
                "$exists": False
            },
            "latitude": {
                "$exists": True
            }
        },
        no_cursor_timeout=True)
    added_counter = 0
    deleted_counter = 0
    for record in cursor:
        location_data = reverse_geocoder.search(
            (record['latitude'], record['longitude']))
        if location_data[0]['cc'] == 'US':
            collection.update_one(
                {"_id": record["_id"]},
                {"$set": {
                    'location_dict': location_data[0]
                }})
            added_counter += 1
        else:
            collection.delete_one({"_id": record["_id"]})
            deleted_counter += 1
        string_report = "added {} location dicts and deleted {} records".format(
            added_counter, deleted_counter)
        print(string_report)
        time.sleep(1.5)
    cursor.close()
Esempio n. 28
0
def reverseGeocode(coordinates):
    result = rg.search(coordinates)
    pprint.pprint(result)
    pprint.pprint(result[0]["name"])
    address = result[0]["name"]
    print(address)
    return address
Esempio n. 29
0
def get_coords(x, y):
    if pd.isnull(x) | pd.isnull(y):
        return 'nan'
    else:
        coordinates = (x, y)
        results = rg.search(coordinates)
        return results
Esempio n. 30
0
def fetch_information(lat, long):

    final_info = []
    st.text(
        "Note : Radius used for feature collection and model building is 1km")
    st.warning("Fetching features......This might take a few minutes")
    country_code_info = rg.search((lat, long))
    country_code = country_code_info[0]['cc']
    continent = country_to_continent(country_code)
    final_info.append(continent)
    dist_to_road_collected = getDist(lat, long)
    final_info.append(dist_to_road_collected)
    population_info = getPopulationInfo(lat, long, population)
    final_info.extend(population_info)
    # test_information_model['Continent'] = country_to_continent(country_code)
    venues = getVenueInfo(lat, long)
    final_info.extend(venues)
    st.text(f"fetching population information ={population_info}")
    st.text(f"fetching nearby venue information ={venues}")
    st.text(f"fetching nearest distance to roads ={dist_to_road_collected}")
    st.success("Process is complete")
    st.text(final_info)

    final_info_numpy = np.array(final_info).reshape(1, 17)
    return final_info_numpy
Esempio n. 31
0
 def which_state(self, tweet):
     box = tweet.place.bounding_box.coordinates[0]
     longs = [x[0] for x in box]
     lats = [x[1] for x in box]
     coords = (np.mean(longs), np.mean(lats))
     results = rg.search(tuple(reversed(box[0])))[0]
     if results['cc'] == 'US':
         return results['admin1']
     else:
         return 'Non-US'
Esempio n. 32
0
def get_city_and_state(lat, lon):
		cityNum, stateNum = -1, -1
		result = rg.search((lat, lon))[0]
		if 'name' in result:
			city = result['name']
			if city in cityMap:
				cityNum = cityMap[city]
		if 'admin1' in result:
			state = result['admin1']
			if state in stateMap:
				stateNum = stateMap[stateToAbbr[state]]
		return cityNum, stateNum
Esempio n. 33
0
def get_location():
    API_KEY = "7efb02dc0ba8ee84f6f63945578cdcca"
    request_url = "http://api.ipstack.com/check?access_key={}".format(API_KEY)
    r = requests.get(request_url)
    j = json.loads(r.text)
    # print j
    lat = j['latitude']
    lon = j['longitude']

    print lat
    print lon

    results = rg.search((lat,lon))

    return results[0]["name"]
Esempio n. 34
0
def count_single_field(field, data):
    if field == 'hashtag':
        out = [i[1] for i in data if i[1] != 'null']
        out = Counter(list(itertools.chain.from_iterable(out)))
    if field == 'coordinates':
        coord = [i[3] for i in data if i[3] != 'null']
        result = rg.search(coord)
        out = Counter([i['admin1'] for i in result])
    if field == 'mention':
        out = Counter([i[4] for i in data if i[4] != 'null'])
    if field == 'source':
        out = Counter([i[5] for i in data if i[5] != 'null'])
    if field == 'tweet':
        out = Counter([i[6] for i in data if i[6] != 'null'])
    if field == 'user':
        out = Counter([i[7] for i in data if i[7] != 'null'])
    return out
Esempio n. 35
0
def process():
	#global scoreOfStates
	x = 0
	for i in states:
		scoreOfStates[states[i]]=0
		tweetofStates[states[i]]=0
	#print scoreOfStates
	while True:
		# print line
		try:
			x+=1
			if x==100:
				break
			TweetInfo = consumer.recv_json()
			#print(TweetInfo)
			print x
			if (TweetInfo.has_key("place")):
				if TweetInfo["place"].has_key("bounding_box"):
					if TweetInfo["place"]["bounding_box"]["coordinates"]:
						lis = TweetInfo["place"]["bounding_box"]["coordinates"]
						'''taking the average of the coordinates as the assumed
						location of the tweet and finding the corresponding state 
						using reverse geo lookup'''
						a=0
						b=0
						for i in lis[0]:
							a += i[0]
							b += i[1]
						a /= 4.0
						b /= 4.0
						state = rg.search((b,a))[0]["admin1"]
						score = 0 
						if (TweetInfo.has_key("text")):
							#print(TweetInfo['text'])
							listOfTokens = normalize(TweetInfo['text'])
							text = " ".join(listOfTokens)
							score = TextBlob(text).sentiment.polarity
						if state in states.values():
							scoreOfStates[state]+=score
							tweetofStates[state]+=1
						#print score,state
		except:
			print "Exception found"
			pass
Esempio n. 36
0
    def findLocation(hashtag, coordinates):
        print 'Finding corresponding locations...'
        results_ = rg.search(coordinates)
        # print results_
        results = [i['admin1'] for i in results_]

        total_result = []
        for i in zip(results, hashtag):
            if len(i[1]) != 0:
                temp = []
                for j in i[1]:
                    temp.append(j)
                total_result.append((i[0], temp))
            else:
                total_result.append((i[0],i[1]))

        # placeFinal = [i for i in total_result if i[0] in listStates]
        placeFinal = [i for i in total_result]
        print 'Done'
        return placeFinal
Esempio n. 37
0
       my_postpaid = int(math.ceil(float(my_stat) * .66))
       my_prepaid = int(math.ceil(float(my_stat) * .16))
       my_other = int(math.ceil(float(my_stat) * .18))
       
     
 #use yearly data to find hourly amount, create that number of points
 count = 0; 
 while (count < my_postpaid):
    if ((my_state =='HI' or my_state =='AK')):
       x,y = create_random_point(my_lat,my_lon ,100000)
    elif (my_state=='CA'):
       x,y = create_random_point(my_lat,my_lon ,500000)
    else:
       x,y = create_random_point(my_lat,my_lon ,1000000)     
    coordinates = (x,y)
    results = rg.search(coordinates,mode=1) 
 #used to verify if point in US- do we need this?
    if (results[0]['cc']=='US'):
       my_list = {'name':my_name,'lat':my_lat, 'lon':my_lon, 'newlat':x, 'newlon':y, 'type': 'postpaid'}
       data[my_state].append(my_list)
       count += 1
 count = 0; 
 while (count < my_prepaid):
    if ((my_state =='HI' or my_state =='AK')):
       x,y = create_random_point(my_lat,my_lon ,100000)
    elif (my_state=='CA'):
       x,y = create_random_point(my_lat,my_lon ,500000)
    else:
       x,y = create_random_point(my_lat,my_lon ,1000000) 
    coordinates = (x,y)
    results = rg.search(coordinates,mode=1) 
Esempio n. 38
0
import string

flist=str(sys.argv)
tfile= flist[12:len(flist)-2]
#f = open(tfile, 'r')
f = open("sent9.txt", 'r')
g = open("t9.txt", 'w')

zcdb = ZipCodeDatabase()
c=0
for line in f:
    fcoords=()
    tweet = line.split("|")
    coords = re.search(r"\[(.*)\]", tweet[0]).group(1)
    x, y = map(float, re.findall(r'[+-]?[0-9.]+', coords))
    location = rg.search([x,y])
    if location[0]['cc'] == "US":
        state = location[0]['admin1']
        city = location[0]['name']
        zlist=zcdb.find_zip(city=city)
        if zlist>0:
            zipcode = random.choice(zlist)
            s = tweet[-1].strip('\n')+","+zipcode.zip+","+state+"\n"
#    print s.encode("UTF-8")
            g.write(s.encode('utf8'))
    c+=1
    if c>100:
        g.flush()
        c=0
f.close()
g.close()
Esempio n. 39
0
    for row in fin:
        row = row.strip()
        if not row: 
            continue
        try:
            first = row.split("\t")
            _hash = int(first[0])
            rows.append(row)
            points.append(from_ord(_hash))
        except IndexError, e:
            print "IndexError: %s" % row
            sys.sdout.flush()

    if rows:
        ret_vals = search(points, max_distance=1)
        file_name = "%s/%s" %(out_dir, file_name)
        fout = open(file_name, "w")
        for i, row in enumerate(rows):
            ret_val = ret_vals[i] or {}
            first = row.split("\t")
            _hash = first[0]
            content = first[1].split(chr(0001))
            postal_code = ret_val.get("pc", "") or content[3] 
            country_code = ret_val.get("cc", "").upper() or content[4]
            if len(country_code) > 2:
                cc = mappings.get(country_code)
                if cc: 
                    country_code = cc 
                    
            content[3] = postal_code
Esempio n. 40
0
def analyze_clusters():
    # if not request.json:
    # 	abort(400)

    clusters = json.loads(request.data)["ids"]

    q = {
        "size": 5000,
        "query": {"terms": {"_id": clusters}},
        "aggregations": {
            "forces": {"terms": {"field": "city"}, "aggregations": {"prices": {"terms": {"field": "rate60"}}}}
        },
    }

    es = Elasticsearch(["es_url"])
    res = es.search(body=q, index="memex_ht", doc_type="ad")
    geo = filter(lambda x: "latitude" in x["_source"].keys(), res["hits"]["hits"])
    geopts = map(lambda x: (float(x["_source"]["latitude"]), float(x["_source"]["longitude"])), geo)
    ethnicity = filter(lambda x: "ethnicity" in x["_source"].keys(), res["hits"]["hits"])
    ethnicity = map(lambda x: str(x["_source"]["ethnicity"]), ethnicity)
    city = filter(lambda x: "city" in x["_source"].keys(), res["hits"]["hits"])
    city = map(lambda x: str(x["_source"]["city"]), city)
    ethnicity_all = dict(Counter(ethnicity))
    prices = filter(lambda x: "rate60" in x["_source"].keys() and "city" in x["_source"].keys(), res["hits"]["hits"])
    prices = filter(lambda x: x["_source"]["rate60"] != "", prices)
    time = filter(lambda x: "posttime" in x["_source"].keys(), geo)
    time_dist = map(
        lambda x: (
            x["_source"]["latitude"],
            x["_source"]["longitude"],
            datetime.datetime.strptime(x["_source"]["posttime"], "%Y-%m-%dT%H:%M:%S").date(),
        ),
        time,
    )

    imps = []  # implied travel speed
    imps2 = []  # average distance between multiple posts at exact timestamp
    for item in window(sorted(time_dist, key=lambda item: item[2]), 2):
        dist = vincenty((item[0][0], item[0][1]), (item[1][0], item[1][1])).miles
        time = abs(item[1][2] - item[0][2]).total_seconds() / 3600.00
        try:
            imps.append(dist / time)
        except ZeroDivisionError:
            if dist != 0:
                imps2.append(dist)
            else:
                pass

    if len(ethnicity_all) > 1:
        eth = "More than one"
    else:
        eth = "One"

    if len(geopts) > 0:
        results = rg.search(geopts)  # default mode = 2
        countries = set(map(lambda x: x["cc"], results))
        states = set(map(lambda x: x["admin1"], results))
        cities = set(map(lambda x: x["name"], results))
        if len(countries) > 1:
            location = "International"
        elif len(countries) == 1 and len(states) > 1:
            location = "National"
        else:
            location = "Local"
    else:
        location = "No information"

    q2 = {
        "size": 5000,
        "query": {"terms": {"city": list(set(city))}},
        "aggregations": {
            "forces": {"terms": {"field": "city"}, "aggregations": {"prices": {"terms": {"field": "rate60"}}}}
        },
    }

    pres = es.search(body=q2, index="memex_ht", doc_type="ad")
    quantiles = pres["aggregations"]["forces"]["buckets"]
    df2 = pd.DataFrame(quantiles)

    hist = []
    for i, city in enumerate(df2["key"]):
        df = pd.DataFrame(dict(df2["prices"][df2["key"] == city]).values()[0]["buckets"])
        df[["key", "doc_count"]] = df[["key", "doc_count"]].astype(float)
        df.sort("key", inplace=True)
        df["doc_count"] = df["doc_count"] / df["doc_count"].sum()
        norm_cumul = 1.0 * np.array(df["doc_count"]).cumsum()
        df["quantile"] = norm_cumul
        hist.append({"city": city, "data": df})

    pq = []
    raw = []
    for item in map(lambda x: (x["_source"]["city"], x["_source"]["rate60"]), prices):
        try:
            pq.append(price_quantile(hist, item[0], float(item[1])))
            raw.append(float(item[1]))
        except:
            pass

    return json.dumps(
        {
            "avg_price_quantile": np.mean(pq),
            "loc": location,
            "ethnicity": eth,
            "price_var": np.std(raw),
            "mean_price": np.mean(raw),
            "implied_speed": np.mean(imps),
            "avg_dist_sim_posts": np.mean(imps2),
        }
    )
Esempio n. 41
0
lat = 28.7041
lon = 77.1025
x = webMercX(lon, zoom) - cx
y = webMercY(lat, zoom) - cy

#i = 1
# while i < len(data):
#    stuff = data[i].split(' ')
#    print stuff[1], stuff[2]
#    lat = float(stuff[1])
#    lon = float(stuff[2])
#    x = webMercX(lon, zoom) - cx
#    y = webMercY(lat, zoom) - cy
#    i += 1


data = cTA.csv_to_array(
    "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_hour.csv")

for row in data:  # don't use indexes, just iterate over the data
    # you already split on commas.
    print(row[1], row[2])  # again, better names
    lat = float(row[1])
    lon = float(row[2])
    x = webMercX(lon, zoom) - cx
    y = webMercY(lat, zoom) - cy
    coords = (lat, lon)
    results = rg.search(coords)

    print results
Esempio n. 42
0
def compare(points, should_filter=False, max_distance=DISTANCE_THRESHOLD):
	ret_val = rg.search([p[0] for p in points], max_distance=max_distance)

	counts = {
		"country_mismatch": 0
		, "postal_code_mismatch": 0
		, "country_new": 0
		, "postal_code_new": 0
		, "filtered": 0
		, "postal_code_match": 0
		, "internal_filtered": 0
		, "country_match": 0
	}
	
	for i, point in enumerate(points):
		if chr(0001) in point[1] and "|" in point[1]:
			expected = point[1].split(chr(0001))[0].split("|")
		elif chr(0001) in point[1]:
			expected = point[1].split(chr(0001))
		else:
			expected = point[1].split('|')

		expected_postal_code = expected[3].upper()
		expected_country = expected[4].upper()
		

		if len(expected_country) > 2:
			temp = expected_country
			expected_country = expected_postal_code
			expected_postal_code = temp

		if not ret_val[i]:
			counts["filtered"] += 1
		elif should_filter and LatLon(point[0][0], point[0][1]).distance(LatLon(ret_val[i]["lat"], ret_val[i]["lon"])) > MAX_DISTANCE:
			counts["internal_filtered"] += 1
		else:
			
			result_postal_code = ret_val[i].get("pc", "").upper()
			result_country = ret_val[i].get("cc", "").upper()
			result_point = LatLon(ret_val[i]["lat"], ret_val[i]["lon"])
			
			def print_mismatch(mismatch_type=""):
				if random() <= sampling_rate:
					print ",".join([str(k) for k in [mismatch_type, expected_country, result_country, expected_postal_code, result_postal_code, point[0][0], point[0][1], ret_val[i]["lat"], ret_val[i]["lon"], LatLon(point[0][0], point[0][1]).distance(LatLon(ret_val[i]["lat"], ret_val[i]["lon"])), point[1]]])
					sys.stdout.flush()

			if not expected_country and result_country:
				counts["country_new"] += 1
			elif result_country != expected_country:
				print_mismatch("c")
				counts["country_mismatch"] += 1
			elif result_country == expected_country:
				counts["country_match"] += 1


			if not expected_postal_code and result_postal_code:
				counts["postal_code_new"] += 1
			elif result_postal_code != expected_postal_code:
				# print result_postal_code, expected_postal_code, expected_country
				counts["postal_code_mismatch"] += 1
				print_mismatch("p")
			elif result_postal_code == expected_postal_code:
				counts["postal_code_match"] += 1

		# else:
			# counts["filtered"] += 1


	counts["total"] = i+1
	return counts
df = s.tweets['geo.coordinates']
bad = df.apply(lambda x: x is None)
df = df[~bad]
s.tweets = s.tweets[~bad]

lat = df.apply(lambda x: x[0])
lon = df.apply(lambda x: x[1])
# lat, lon = zip(*df)  # Alternate

# Remove Alaska and Hawaii
del states["HI"]
del states["AK"]

# Match tweets by state
coordinates = zip(lat, lon)
results = rg.search(coordinates)  # default mode = 2
# print results
state_match = [results[row]['admin1'] for row,_ in enumerate(results)]
s.tweets['state'] = state_match
cc = pd.Series([results[row]['cc'] for row,_ in enumerate(results)])
# Check if not in US:
good = cc == 'US'
if len(cc)>0:
    s.tweets.index = range(len(s.tweets))
    lon.index = range(len(lon))
    lat.index = range(len(lat))
    s.tweets = s.tweets[good]
    lon = lon[good]
    lat = lat[good]

# Coordinate DataFrame
Esempio n. 44
0

from geo_hash import from_ord 
from reverse_geocoder import search

points = [1028940903367840592, 1028940741691456133, 1028940714896802299, 1028937300827959104, 1028936919208416560, 1028936496796163181, 1028936321951596184]

for point in points:
	coords = from_ord(point)
	print point, coords
	result = search(coords, max_distance=1)

	print coords, result



coords = (43.646964, -79.455647)
result = search(coords, max_distance=1)

print coords, result

coords = (43.6469, -79.4521)
result = search(coords, max_distance=1)

print coords, result
def get_country_code(coordinate_string):
	# put method here AIzaSyDuihORQ83RaQ-bkxJz-PirUUoi0YHR_x4
	coordinates = coordinate_string.split()

	location = rg.search([(coordinates[0],coordinates[1])],mode=2)
	return location[0]
Esempio n. 46
0
def threadRun():
    Boston = (42.359502, -71.062282)
    result = rg.search(Boston)
    return result
Esempio n. 47
0
	def political_boundary(self,type,latitude,longitude):
		assert type == 'name' or type == 'admin1' or type == 'admin2' or type == 'cc'
		data = rg.search((latitude,longitude))[0]
		return data[type.encode("ASCII")]
Esempio n. 48
0
        os.system("rm -r " + path)
    rdd.saveAsTextFile(path)

sc = SparkContext(appName="SentimentAnalysis")

rawTweets = sc.textFile("./tweets.json", 100)

# first thing to do is extract the information we need from the tweets, i.e. the coordinates and the text
parsedTweets = (rawTweets.map(lambda tweet: json.loads(tweet))
                         .filter(lambda tweet: tweet["text"] != "" and tweet["coordinates"] is not None)  # filter early
                         .map(lambda tweet: (tweet["coordinates"]["coordinates"], tweet["text"])) # project early
                         .map(lambda t: ((t[0][1], t[0][0]), t[1])))    # putting coordinates in usual lat - lon format        

# extract state from coordinates using geopy

state_text = (parsedTweets.map(lambda t: (geo.search(t[0])[0], t[1]))
                          .map(lambda t: ( (t[0]["cc"], t[0]["admin1"]), t[1]) )
                          .filter(lambda t: t[0][0] == "US")
                          .map(lambda t: (t[0][1], t[1]) ) )

# at this point data is like (u'state', u'tweet text') for each tweet
                
# compute sentiment for each tweet and return a list of (state, sentiment) tuples

state_sent = state_text.map(lambda t: (t[0].upper(), TextBlob(t[1]).sentiment.polarity))

# and I simply save one file
#  all the state, sentiment entries, for detailed statistical analysis in R

saveAsTextFile(state_sent.map(lambda t: ",".join(map(str, (t[0], t[1])))) , # turn into nice output to store as csv
               "./sentiments_states.csv", overwrite = True)
Esempio n. 49
0
from timeit import timeit
import csv
import reverse_geocoder as rg

if __name__ == '__main__':
    setup = "import csv;import reverse_geocoder as rg;print('Loading coordinates...');" + \
            "cities = [(row[0],row[1]) for row in csv.reader(open('../test/coordinates_10000000.csv','rt'),delimiter='\t')];"
    num = 3
    t = timeit(stmt="rg.search(cities,mode=1)",setup=setup,number=num)
    print('Running time: %.2f secs' % (t / num))

    print('\nLoading coordinates to compare mode 1 and mode 2...')
    cities = [(row[0],row[1]) for row in csv.reader(open('../test/coordinates_1000.csv','rt'),delimiter='\t')]
    result1 = rg.search(cities,mode=1)
    result2 = rg.search(cities,mode=2)
    if result1 != result2:
        print('Results do not match!')
    else:
        print('Both results match!')