def query(self,sqls = [],description = False,compare = False, percent = False,sep = ' ',std_out = False,csv = ""): search_hits = [] for sql in sqls: sql = self.evaluate_sql(sql) try: self.cursor.execute(sql) except Exception as e: import sys self.cursor.close() self.conn.close() print e sys.exit(0) results = self.cursor.fetchall() if results: variables = [] for key in results[0].keys(): variables.append(key) if csv: print_variables = self.extract_variables(variables) if std_out: rg.search((0,0)) print sql print sep.join(variables) for rank,result in enumerate(results): print rank, for var in variables: print result[var],sep, if 'trans_lat' in variables and 'trans_lon' in variables: lat = result['trans_lat'] lon = result['trans_lon'] location = rg.search((lat,lon))[0] print location['name'],',',location['admin1'],sep elif 'INTPTLAT10' in variables and 'INTPTLON10' in variables: lat = result['INTPTLAT10'] lon = result['INTPTLON10'] location = rg.search((lat,lon))[0] print location['name'],',',location['admin1'],sep else: print '' if csv: self.print_csv(variables,results,csv) if description: self.description(variables,results,sql) if compare: self.compare_to_secondary(results,sql,percent = percent) search_hits += results return search_hits
def getUserLocationNewModel(userId): """ :param userId: :return: userCoords [lat, lon] Function to triangular the location of a user based on his reviewed business locations. """ max = -9999 userCountryKey = -1 reviews = findReviewUserBusinessByUserId(userId) # print(reviews) bCountry = {} latlondeg = [] #print reviews.count() reviewIndex = 0 # reviewsCount = reviews.count() for review in reviews: # reviewIndex += 1 #print "Processing Review ", reviewIndex, " of ", reviewsCount, " of user: "******"name"] #print businessCityId try: bCountry[businessCityId].addCoord((latBus, lonBus)) except KeyError: bCountry[businessCityId] = businessCountryObj bCountry[businessCityId].addCoord((latBus, lonBus)) for k, v in bCountry.iteritems(): if v.getCount() > max: max = v.getCount() userCountryKey = k #userCountryKey = max(bCountry.iteritems(), key=operator.itemgetter(1))[0] for k, v in bCountry.items(): #print v #print v.getCount() #print v.getCoords() if k == userCountryKey: #print v[1] for loc in v.getCoords(): latlondeg.append({"LATIDX": loc[0], "LNGIDX": loc[1]}) #print latlondeg # Triangularing user coords. userCoords = getLatLngCenter(latlondeg) userCountry = rg.search((userCoords[0], userCoords[1]))[0] userCoords.append(userCountry) #print userCountry return userCountry
def add_geo_assets(self): map( lambda way, geoass: way.add_geo_asset(geoass), self.waypoints, rg.search([(waypoint.latitude, waypoint.longitude) for waypoint in self.waypoints])) route_start_geoassets = rg.search([(route.start_latitude, route.start_longitude) for route in self.metadata]) route_end_geoassets = rg.search([(route.end_latitude, route.end_longitude) for route in self.metadata]) map( lambda rte, start_geoass, end_geoass: rte.add_geo_asset( start_geoass, end_geoass), self.metadata, route_start_geoassets, route_end_geoassets)
def main(argv): try: input_data = sys.stdin csv.field_size_limit(sys.maxsize) csv_in = csv.DictReader(fixNulls(input_data), delimiter=delimiter, fieldnames=headers, quoting=csv.QUOTE_MINIMAL) """ load formatted geocoded file (preventing it to print <Loading formatted geocoded file...> to stdout) """ sys.stdout = open(os.devnull, "w") rg.search((0, 0)) sys.stdout = sys.__stdout__ previousLat = 0 previousLng = 0 for line in csv_in: try: """ CUSTOMIZE HERE """ lat = str(line['lat']) lng = str(line['lng']) if lat != previousLat or lng != previousLng: r = rg.search((lat, lng)) latlng2country = r[0]['cc'] """ CUSTOMIZE HERE """ outline = '{}{}{}{}{}'.format( \ lat, \ delimiter, \ lng, \ delimiter, \ latlng2country \ ) print(outline) previousLat = lat previousLng = lng except: latlng2country = '' continue except EOFError: return None except Exception as e: print(e) sys.exit(1)
def update_name_from_location(self): """In the given list resolve the city and append the location city to the filename""" for entry in self.file_list: if entry.location: lat, lon = entry.location.split(',') default_city = Folder.clean_name( reverse_geocoder.search([(float(lat), float(lon)) ])[0]['name']) alt_city = [] try: alt_city = Folder.geo.reverse( entry.location, timeout=60).address.split(', ') except geopy.exc.GeocoderTimedOut as e: print( f"Timeout on GeoLookup for {entry.location} of {entry.name}. Found {default_city} already." ) raise FactoryError("Exiting.") name, ext = os.path.splitext(entry.name) known_city = "" if len(alt_city) > 4: known_city = Folder.get_known_city(alt_city[-5]) if not known_city and len(alt_city) > 5: known_city = Folder.get_known_city(alt_city[-6]) if not known_city: known_city = Folder.get_known_city(default_city) if known_city: entry.name = name + ' ' + known_city + ext continue entry.name = name + ' ' + default_city + ext
def reverseGeocode(self, latlon): # call using geocoder, biggest con is performance for this option #geo = self.geocoder.reverse_geocode(latlon[0], latlon[1], no_annotations = '1', pretty = '1', language='pt') #reversed = self.removeStopWords(geo[0]['formatted']) # call using reverse_geocoder, nice thing about this: performance aux = (latlon[0], latlon[1]) data = rg.search(aux) city = data[0]["name"] province = data[0]["admin1"] + " " + data[0]["admin2"] dictionary = {'AC': 'Acre','AL': 'Alagoas','AP': 'Amapá','AM': 'Amazonas','BA': 'Bahia', 'CE': 'Ceará','DF': 'Distrito Federal','ES': 'Espírito Santo','GO': 'Goiás', 'MA': 'Maranhão','MT': 'Mato Grosso','MS': 'Mato Grosso do Sul', 'MG': 'Minas Gerais','PA': 'Pará','PB': 'Paraíba','PR': 'Paraná', 'PE': 'Pernambuco','PI': 'Piauí','RJ': 'Rio de Janeiro', 'RN': 'Rio Grande do Norte','RS': 'Rio Grande do Sul','RO': 'Rondônia', 'RR': 'Roraima','SC': 'Santa Catarina','SP': 'São Paulo','SE': 'Sergipe', 'TO': 'Tocantins'} country = data[0]["cc"].replace("BR","Brasil") reversed = city + " " + province + " " + country for key in dictionary.keys(): latlon[3] = latlon[3].upper().replace(key, dictionary[key]) if len(latlon[3]) == 2 else latlon[3] reported = ' '.join(latlon[2:5]) reversed = self.removeStopWords(reversed) reported = self.removeStopWords(reported) reversed = self.removeNonAscii(reversed) reported = self.removeNonAscii(reported) similarity = 100 * textdistance.Cosine(qval=None).similarity(reported , reversed) return pd.Series((reported, reversed, similarity))
def update_filter_data(country, n): df = pd.DataFrame(requests.get(url).json()['data']) station = json_normalize(df['station']) df.drop(['station'], axis=1, inplace=True) df = pd.concat([df, station], axis=1) df.drop(['time'], axis=1, inplace=True) location = rg.search(list(zip(df.lat, df.lon))) df['coordinates'] = location df['location'] = df['coordinates'].apply(lambda x: x['name']) df['cc'] = df['coordinates'].apply(lambda x: x['cc']) df.drop(['coordinates', 'name', 'uid'], axis=1, inplace=True) df = df.merge(countries, how="inner", left_on="cc", right_on="value") df.rename(columns={'label': 'country'}, inplace=True) df.drop(['value'], axis=1, inplace=True) df.aqi.replace('-', np.NaN, inplace=True) df.dropna(axis=0, inplace=True) df['aqi'] = pd.to_numeric(df['aqi']) df['marker_color'] = pd.cut( df['aqi'], [0, 50, 100, 150, 200, 300, df['aqi'].max()], labels=['green', 'yellow', 'orange', 'red', 'purple', 'brown']) df['country'] = df['country'].apply( lambda x: 'USA' if (x == 'United States of America') else x) df = df[df['cc'] == country] df.reset_index(drop=True, inplace=True) return df.to_dict('records')
def build_geocodes(self): gps_tuples = [tuple(xy) for xy in self.centroids] geocodes = reverse_geocoder.search(gps_tuples) columns= ['latitude','longitude', 'city', 'state', 'region', 'country'] geocodes = pd.DataFrame(geocodes) geocodes.columns = columns self.geocodes = geocodes
def add_country_codes(df): ''' Use latitude and longitude coordinates to determine the country of a project. :param df: dataframe with coordinates :returns: dataframe with project country codes appended ''' latitude = df.Latitude.values longitude = df.Longitude.values coordinates = [] for lat, lng in zip(latitude, longitude): coordinates.append((lat, lng)) codes = [] results = rg.search(coordinates) for r in results: try: code = r['cc'] codes.append(code) except: codes.append('NA') df['Country'] = codes return df
def home(request): # Current Location g = geocoder.ip('me') latlon = g.latlng result = rg.search((latlon[0], latlon[1])) # Weather url = "https://api.darksky.net/forecast/9229258e1491d592cda918516dd382f9/{},{}?exclude=minutely,hourly,daily,alerts,flags&units=auto".format( latlon[0], latlon[1]) response = urllib.request.urlopen(url) data = json.loads(response.read()) # Compliments compliment = [ 'You Look Stunning!', 'Hey, Gorgeous!', 'You look Sexy!', 'Synonym for beauty is you!', 'Fair and Lovely!' ] chooser = random.randint(0, len(compliment) - 1) final_data = { 'temperature': round(int(data['currently']['temperature'])), 'summary': data['currently']['summary'], 'place': result[0]['name'], 'compliment': compliment[chooser] } return render(request, 'base.html', context=final_data)
def transformToCountry(self, x: float, y: float) -> str: geoms = fiona.open( shpreader.natural_earth(resolution='50m', category='physical', name='land')) land_geom = sgeom.MultiPolygon( [sgeom.shape(geom['geometry']) for geom in geoms]) land = prep(land_geom) if x is not None and y is not None: on = land.contains(sgeom.Point(x, y)) if on: result = rg.search((y, x)) country_full = self.cc.convert(names=result[0]["cc"], to="name_short") country = self.cc.convert(names=result[0]["cc"], to="ISO3") print("Starting in", country_full) plt.title("Starting in " + str(country_full), fontsize=50) plt.scatter(x, y, color="black", marker=".", transform=ccrs.Geodetic()) self.go = True return country else: print("Mer") plt.title("Please choose a location on land !", fontsize=50) return ""
def add_missing_countries(tweets_df): ''' Note: this functions work 'inplace' ''' print('{} tweets do not have country information!'.format( tweets_df['Location'].isna().sum())) # get tweets without a country info no_country = tweets_df[tweets_df['Location'].isna()][['Lat', 'Long' ]].drop_duplicates() # extract coordinates coordinates = list(no_country.itertuples(index=False, name=None)) # map coordinates with countries using an external package results = rg.search(coordinates) no_country['found_countries'] = [i['cc'] for i in results] no_country['found_countries'] = no_country['found_countries'].map( cf.country_abbr) no_country['Lat_Long'] = no_country[['Lat', 'Long']].apply( lambda x: '_'.join(x.map(str)), axis=1) # add mapped countries to the original tweets data tweets_df.loc[tweets_df['Location'].isna(), 'Location'] = list( tweets_df[tweets_df['Location'].isna()]['Lat_Long'].map( dict(zip(no_country['Lat_Long'], no_country['found_countries'])))) tweets_df.drop(['Lat_Long'], axis=1, inplace=True) print('{} tweets that do not have country information will be discarded!'. format(tweets_df['Location'].isna().sum())) tweets_df = tweets_df[~tweets_df['Location'].isna()] return None
def _get_state_and_county(self, lon, lat): try: #result = None coordinates = (lat,lon) #print coordinates results = rg.search(coordinates) try: county = results[0]['admin2'].split()[:1][0] state = results[0]['admin1'] except: state = None county = None #state = results[0]['admin1'] if (lat>33 and lat <38 and lon>-103.5 and lon<-94): result = True else: print "NOT AOI event" result = None raise TypeError("Not in AOI") #print "We are in the AOI" #print result, Lat, Lon except pgdb.Error as error: raise pgdb.Error(error) except Exception as error: print error if result is None: # print "Not in the AOI!!!!!!" raise TypeError("Not in AOI") #print "We are NOT in the AOI" return result,county,state
def gzworker(fullpath): """Worker will open the .csv file and process the information inside""" print('Processing {}'.format(fullpath)) # try: with open(fullpath, 'r+') as f: reader = csv.reader(f) for row in reader: geoloc = row[3] geoloc = geoloc.split(',') lon = geoloc[0].replace('[', '') lat = geoloc[1].replace(']', '').replace(' ', '') # print('Longitude: {} \nLatitude: {}'.format(lon, lat)) # m_obj = re.search(r"(\d+)", geoloc) # print(m_obj) coordinates = (lat,lon) results = rg.search(coordinates) # default mode = 2 print(results) state_num = mx_ca_us_state_abbrev.get(results[0].get('admin1')) print(state_num) # state_num = us_state_abbrev.results['admin1'] # print(state_num) # [('lat', '29.23329'), ('lon', '-98.79641'), ('name', 'Lytle'), ('admin1', 'Texas'), ('admin2', 'Atascosa County'), ('cc', 'US')] # except: # print("Error in {}".format(fullpath)) # pass print('Finished {}'.format(fullpath))
def do_print(lat, lon): print( lat, lon, reverse_geocode.search([lat, lon])[0], ":", TestLocations.a.reverse(f"{lat},{lon}").address.split(', ')[-5:-4], ":", reverse_geocoder.search([(lat, lon)])[0])
def reverse_geocode(lat, lng): res = rg.search((lat, lng))[0] res = list(res.items()) name = res[2][1] admin1 = res[3][1] cc = res[5][1] return (name, admin1, cc)
def get(self, name): phq = Client(access_token="") retJ = [] if name == '': name = "eminem" for event in phq.events.search(q=name, limit=5, sort='rank', category='concerts'): try: cood = event.location local = (rg.search([cood[1],cood[0]]))[0]['name'] except IndexError as e: cood = [0,0] local = 'USA' # this = stateplane.identify(cood[0], cood[1]) resp = { "eTitle" : event.title, "eDate" : event.start.strftime('%Y-%m-%d'), "eCountry" : event.country, "eRank" : event.rank, "eLocation" : local, } # print (event.scope, event.description, event.start.strftime # ('%Y-%m-%d'), event.category, event.country, event.rank, event.location, event.labels, event.title) # print(event.rank, event.category, event.title, event.start.strftime('%Y-%m-%d')) retJ.append(resp) return retJ, 200
def predicton(image_url): # path to the image or video imagename = "static/" + image_url + ".jpg" print(imagename) # read the image data using PIL image = Image.open(imagename) exifdata = image.getexif() tags = {} # Put tags into dict for tag_id in exifdata: tag = TAGS.get(tag_id, tag_id) data = exifdata.get(tag_id) if isinstance(data, bytes): data = data.decode() tags[tag] = data # extract gps coordinates # if statements account for W or S gps values (which should be negative) lat = float( tags['GPSInfo'].get(2)[0]) + float(tags['GPSInfo'].get(2)[1]) / 60 if tags['GPSInfo'].get(1)[0] == 'S': lat = lat * -1 lng = float( tags['GPSInfo'].get(4)[0]) + float(tags['GPSInfo'].get(4)[1]) / 60 if tags['GPSInfo'].get(3)[0] == 'W': lng = lng * -1 locationDict = rg.search((lat, lng))[0] location = locationDict.get('name') + ', ' + locationDict.get('admin1') return render_template('image.html', tags=tags, image_url="../" + imagename, location=location)
def main(): # print(rg.search(())) coordinates = (40.73971301, -73.99456405) pprint.pprint(rg.search(coordinates)) """
def getlocation_drop(x): point_coord = (x[3] , x[2]) this_location = rg.search(point_coord, mode = 1) if ( this_location[0]['admin2'] == 'Queens County'): return ('Queens',x[0],x[1]) else : return (this_location[0]['name'],x[0],x[1])
def map_filter(country): user_loc = country # Retrieve Device GPS locations GPS = get_GPS() map = {} for device in GPS: coordinates = (GPS[device]['lat'], GPS[device]['lon']) lat = GPS[device]['lat'] lon = GPS[device]['lon'] loc = rg.search(coordinates) country = loc[0]['cc'] if country == user_loc: map.update({device: {'Country': country, 'lat': lat, 'lon': lon}}) #print("Map:") #print(map) json_map = json.dumps(map) return (json_map)
def getlocation2(x): point_coord_pickup = (float(x[1]) , float(x[0])) point_coord_drop = (float(x[3]) , float(x[2])) pickup_location = rg.search(point_coord_pickup, mode = 1) drop_location = rg.search(point_coord_drop , mode = 1) if ( pickup_location[0]['admin2'] == 'Queens County'): pickup_name = 'Queens' else : pickup_name = pickup_location[0]['name'] if ( drop_location[0]['admin2'] == 'Queens County'): drop_name = 'Queens' else : drop_name = drop_location[0]['name'] names = (pickup_name , drop_name) return names
def convert_geo(self): self._df['lat_long'] = list( zip(self._df['latitude'], self._df['longitude'])) rg_result = rg.search(self._df['lat_long'].tolist()) df_geo = json_normalize(rg_result) self._df['area'] = df_geo['name'] self._df['location'] = df_geo['admin2']
def reverseGeocode(coordinates): result = rg.search(coordinates) #dict=json.loads(result) # result is a list containing ordered dictionary. #pprint.pprint(dict) dict = result[0] return dict
def fetchsamples(db): query_coords_string = '-155.6811,18.91,-66.9470,44.81' url = 'https://stream.twitter.com/1.1/statuses/filter.json?lang=en&locations=' + query_coords_string response = twitterreq(url, "POST", []) for line in response: tweet_json = ujson.loads(line.strip().decode('utf8')) # We only process geolocated tweets and tweets in English if tweet_json['lang'] == 'en': if 'geo' in tweet_json and tweet_json['geo'] is not None: latitude = tweet_json['geo']['coordinates'][0] longitude = tweet_json['geo']['coordinates'][1] result_json = rg.search((latitude, longitude), verbose=False)[0] if result_json is not None: if 'cc' in result_json and result_json['cc'] == 'US': tweet_json['usa_state'] = result_json['admin1'] print_tweet(remove_fields(tweet_json), db) elif 'place' in tweet_json and tweet_json['place'] == 'United States': full_name = tweet_json['place']['full_name'] matches = re.findall('([\w+\s]+)', full_name) if len(matches) is 2: if matches[1] == 'USA': tweet_json['usa_state'] = matches[0] else: tweet_json['usa_state'] = str(us.states.lookup(matches[1][1:])) # Find state full name for abbr print_tweet(remove_fields(tweet_json), db)
def reverseGeocode(a,b): coordinates = (a,b) result = rg.search(coordinates,verbose=False) if len(result)>0: return (result[0].get("cc")) else: return "ASSENTE"
def filter_US_locations(collection): cursor = collection.find( { "location_dict": { "$exists": False }, "latitude": { "$exists": True } }, no_cursor_timeout=True) added_counter = 0 deleted_counter = 0 for record in cursor: location_data = reverse_geocoder.search( (record['latitude'], record['longitude'])) if location_data[0]['cc'] == 'US': collection.update_one( {"_id": record["_id"]}, {"$set": { 'location_dict': location_data[0] }}) added_counter += 1 else: collection.delete_one({"_id": record["_id"]}) deleted_counter += 1 string_report = "added {} location dicts and deleted {} records".format( added_counter, deleted_counter) print(string_report) time.sleep(1.5) cursor.close()
def reverseGeocode(coordinates): result = rg.search(coordinates) pprint.pprint(result) pprint.pprint(result[0]["name"]) address = result[0]["name"] print(address) return address
def get_coords(x, y): if pd.isnull(x) | pd.isnull(y): return 'nan' else: coordinates = (x, y) results = rg.search(coordinates) return results
def fetch_information(lat, long): final_info = [] st.text( "Note : Radius used for feature collection and model building is 1km") st.warning("Fetching features......This might take a few minutes") country_code_info = rg.search((lat, long)) country_code = country_code_info[0]['cc'] continent = country_to_continent(country_code) final_info.append(continent) dist_to_road_collected = getDist(lat, long) final_info.append(dist_to_road_collected) population_info = getPopulationInfo(lat, long, population) final_info.extend(population_info) # test_information_model['Continent'] = country_to_continent(country_code) venues = getVenueInfo(lat, long) final_info.extend(venues) st.text(f"fetching population information ={population_info}") st.text(f"fetching nearby venue information ={venues}") st.text(f"fetching nearest distance to roads ={dist_to_road_collected}") st.success("Process is complete") st.text(final_info) final_info_numpy = np.array(final_info).reshape(1, 17) return final_info_numpy
def which_state(self, tweet): box = tweet.place.bounding_box.coordinates[0] longs = [x[0] for x in box] lats = [x[1] for x in box] coords = (np.mean(longs), np.mean(lats)) results = rg.search(tuple(reversed(box[0])))[0] if results['cc'] == 'US': return results['admin1'] else: return 'Non-US'
def get_city_and_state(lat, lon): cityNum, stateNum = -1, -1 result = rg.search((lat, lon))[0] if 'name' in result: city = result['name'] if city in cityMap: cityNum = cityMap[city] if 'admin1' in result: state = result['admin1'] if state in stateMap: stateNum = stateMap[stateToAbbr[state]] return cityNum, stateNum
def get_location(): API_KEY = "7efb02dc0ba8ee84f6f63945578cdcca" request_url = "http://api.ipstack.com/check?access_key={}".format(API_KEY) r = requests.get(request_url) j = json.loads(r.text) # print j lat = j['latitude'] lon = j['longitude'] print lat print lon results = rg.search((lat,lon)) return results[0]["name"]
def count_single_field(field, data): if field == 'hashtag': out = [i[1] for i in data if i[1] != 'null'] out = Counter(list(itertools.chain.from_iterable(out))) if field == 'coordinates': coord = [i[3] for i in data if i[3] != 'null'] result = rg.search(coord) out = Counter([i['admin1'] for i in result]) if field == 'mention': out = Counter([i[4] for i in data if i[4] != 'null']) if field == 'source': out = Counter([i[5] for i in data if i[5] != 'null']) if field == 'tweet': out = Counter([i[6] for i in data if i[6] != 'null']) if field == 'user': out = Counter([i[7] for i in data if i[7] != 'null']) return out
def process(): #global scoreOfStates x = 0 for i in states: scoreOfStates[states[i]]=0 tweetofStates[states[i]]=0 #print scoreOfStates while True: # print line try: x+=1 if x==100: break TweetInfo = consumer.recv_json() #print(TweetInfo) print x if (TweetInfo.has_key("place")): if TweetInfo["place"].has_key("bounding_box"): if TweetInfo["place"]["bounding_box"]["coordinates"]: lis = TweetInfo["place"]["bounding_box"]["coordinates"] '''taking the average of the coordinates as the assumed location of the tweet and finding the corresponding state using reverse geo lookup''' a=0 b=0 for i in lis[0]: a += i[0] b += i[1] a /= 4.0 b /= 4.0 state = rg.search((b,a))[0]["admin1"] score = 0 if (TweetInfo.has_key("text")): #print(TweetInfo['text']) listOfTokens = normalize(TweetInfo['text']) text = " ".join(listOfTokens) score = TextBlob(text).sentiment.polarity if state in states.values(): scoreOfStates[state]+=score tweetofStates[state]+=1 #print score,state except: print "Exception found" pass
def findLocation(hashtag, coordinates): print 'Finding corresponding locations...' results_ = rg.search(coordinates) # print results_ results = [i['admin1'] for i in results_] total_result = [] for i in zip(results, hashtag): if len(i[1]) != 0: temp = [] for j in i[1]: temp.append(j) total_result.append((i[0], temp)) else: total_result.append((i[0],i[1])) # placeFinal = [i for i in total_result if i[0] in listStates] placeFinal = [i for i in total_result] print 'Done' return placeFinal
my_postpaid = int(math.ceil(float(my_stat) * .66)) my_prepaid = int(math.ceil(float(my_stat) * .16)) my_other = int(math.ceil(float(my_stat) * .18)) #use yearly data to find hourly amount, create that number of points count = 0; while (count < my_postpaid): if ((my_state =='HI' or my_state =='AK')): x,y = create_random_point(my_lat,my_lon ,100000) elif (my_state=='CA'): x,y = create_random_point(my_lat,my_lon ,500000) else: x,y = create_random_point(my_lat,my_lon ,1000000) coordinates = (x,y) results = rg.search(coordinates,mode=1) #used to verify if point in US- do we need this? if (results[0]['cc']=='US'): my_list = {'name':my_name,'lat':my_lat, 'lon':my_lon, 'newlat':x, 'newlon':y, 'type': 'postpaid'} data[my_state].append(my_list) count += 1 count = 0; while (count < my_prepaid): if ((my_state =='HI' or my_state =='AK')): x,y = create_random_point(my_lat,my_lon ,100000) elif (my_state=='CA'): x,y = create_random_point(my_lat,my_lon ,500000) else: x,y = create_random_point(my_lat,my_lon ,1000000) coordinates = (x,y) results = rg.search(coordinates,mode=1)
import string flist=str(sys.argv) tfile= flist[12:len(flist)-2] #f = open(tfile, 'r') f = open("sent9.txt", 'r') g = open("t9.txt", 'w') zcdb = ZipCodeDatabase() c=0 for line in f: fcoords=() tweet = line.split("|") coords = re.search(r"\[(.*)\]", tweet[0]).group(1) x, y = map(float, re.findall(r'[+-]?[0-9.]+', coords)) location = rg.search([x,y]) if location[0]['cc'] == "US": state = location[0]['admin1'] city = location[0]['name'] zlist=zcdb.find_zip(city=city) if zlist>0: zipcode = random.choice(zlist) s = tweet[-1].strip('\n')+","+zipcode.zip+","+state+"\n" # print s.encode("UTF-8") g.write(s.encode('utf8')) c+=1 if c>100: g.flush() c=0 f.close() g.close()
for row in fin: row = row.strip() if not row: continue try: first = row.split("\t") _hash = int(first[0]) rows.append(row) points.append(from_ord(_hash)) except IndexError, e: print "IndexError: %s" % row sys.sdout.flush() if rows: ret_vals = search(points, max_distance=1) file_name = "%s/%s" %(out_dir, file_name) fout = open(file_name, "w") for i, row in enumerate(rows): ret_val = ret_vals[i] or {} first = row.split("\t") _hash = first[0] content = first[1].split(chr(0001)) postal_code = ret_val.get("pc", "") or content[3] country_code = ret_val.get("cc", "").upper() or content[4] if len(country_code) > 2: cc = mappings.get(country_code) if cc: country_code = cc content[3] = postal_code
def analyze_clusters(): # if not request.json: # abort(400) clusters = json.loads(request.data)["ids"] q = { "size": 5000, "query": {"terms": {"_id": clusters}}, "aggregations": { "forces": {"terms": {"field": "city"}, "aggregations": {"prices": {"terms": {"field": "rate60"}}}} }, } es = Elasticsearch(["es_url"]) res = es.search(body=q, index="memex_ht", doc_type="ad") geo = filter(lambda x: "latitude" in x["_source"].keys(), res["hits"]["hits"]) geopts = map(lambda x: (float(x["_source"]["latitude"]), float(x["_source"]["longitude"])), geo) ethnicity = filter(lambda x: "ethnicity" in x["_source"].keys(), res["hits"]["hits"]) ethnicity = map(lambda x: str(x["_source"]["ethnicity"]), ethnicity) city = filter(lambda x: "city" in x["_source"].keys(), res["hits"]["hits"]) city = map(lambda x: str(x["_source"]["city"]), city) ethnicity_all = dict(Counter(ethnicity)) prices = filter(lambda x: "rate60" in x["_source"].keys() and "city" in x["_source"].keys(), res["hits"]["hits"]) prices = filter(lambda x: x["_source"]["rate60"] != "", prices) time = filter(lambda x: "posttime" in x["_source"].keys(), geo) time_dist = map( lambda x: ( x["_source"]["latitude"], x["_source"]["longitude"], datetime.datetime.strptime(x["_source"]["posttime"], "%Y-%m-%dT%H:%M:%S").date(), ), time, ) imps = [] # implied travel speed imps2 = [] # average distance between multiple posts at exact timestamp for item in window(sorted(time_dist, key=lambda item: item[2]), 2): dist = vincenty((item[0][0], item[0][1]), (item[1][0], item[1][1])).miles time = abs(item[1][2] - item[0][2]).total_seconds() / 3600.00 try: imps.append(dist / time) except ZeroDivisionError: if dist != 0: imps2.append(dist) else: pass if len(ethnicity_all) > 1: eth = "More than one" else: eth = "One" if len(geopts) > 0: results = rg.search(geopts) # default mode = 2 countries = set(map(lambda x: x["cc"], results)) states = set(map(lambda x: x["admin1"], results)) cities = set(map(lambda x: x["name"], results)) if len(countries) > 1: location = "International" elif len(countries) == 1 and len(states) > 1: location = "National" else: location = "Local" else: location = "No information" q2 = { "size": 5000, "query": {"terms": {"city": list(set(city))}}, "aggregations": { "forces": {"terms": {"field": "city"}, "aggregations": {"prices": {"terms": {"field": "rate60"}}}} }, } pres = es.search(body=q2, index="memex_ht", doc_type="ad") quantiles = pres["aggregations"]["forces"]["buckets"] df2 = pd.DataFrame(quantiles) hist = [] for i, city in enumerate(df2["key"]): df = pd.DataFrame(dict(df2["prices"][df2["key"] == city]).values()[0]["buckets"]) df[["key", "doc_count"]] = df[["key", "doc_count"]].astype(float) df.sort("key", inplace=True) df["doc_count"] = df["doc_count"] / df["doc_count"].sum() norm_cumul = 1.0 * np.array(df["doc_count"]).cumsum() df["quantile"] = norm_cumul hist.append({"city": city, "data": df}) pq = [] raw = [] for item in map(lambda x: (x["_source"]["city"], x["_source"]["rate60"]), prices): try: pq.append(price_quantile(hist, item[0], float(item[1]))) raw.append(float(item[1])) except: pass return json.dumps( { "avg_price_quantile": np.mean(pq), "loc": location, "ethnicity": eth, "price_var": np.std(raw), "mean_price": np.mean(raw), "implied_speed": np.mean(imps), "avg_dist_sim_posts": np.mean(imps2), } )
lat = 28.7041 lon = 77.1025 x = webMercX(lon, zoom) - cx y = webMercY(lat, zoom) - cy #i = 1 # while i < len(data): # stuff = data[i].split(' ') # print stuff[1], stuff[2] # lat = float(stuff[1]) # lon = float(stuff[2]) # x = webMercX(lon, zoom) - cx # y = webMercY(lat, zoom) - cy # i += 1 data = cTA.csv_to_array( "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_hour.csv") for row in data: # don't use indexes, just iterate over the data # you already split on commas. print(row[1], row[2]) # again, better names lat = float(row[1]) lon = float(row[2]) x = webMercX(lon, zoom) - cx y = webMercY(lat, zoom) - cy coords = (lat, lon) results = rg.search(coords) print results
def compare(points, should_filter=False, max_distance=DISTANCE_THRESHOLD): ret_val = rg.search([p[0] for p in points], max_distance=max_distance) counts = { "country_mismatch": 0 , "postal_code_mismatch": 0 , "country_new": 0 , "postal_code_new": 0 , "filtered": 0 , "postal_code_match": 0 , "internal_filtered": 0 , "country_match": 0 } for i, point in enumerate(points): if chr(0001) in point[1] and "|" in point[1]: expected = point[1].split(chr(0001))[0].split("|") elif chr(0001) in point[1]: expected = point[1].split(chr(0001)) else: expected = point[1].split('|') expected_postal_code = expected[3].upper() expected_country = expected[4].upper() if len(expected_country) > 2: temp = expected_country expected_country = expected_postal_code expected_postal_code = temp if not ret_val[i]: counts["filtered"] += 1 elif should_filter and LatLon(point[0][0], point[0][1]).distance(LatLon(ret_val[i]["lat"], ret_val[i]["lon"])) > MAX_DISTANCE: counts["internal_filtered"] += 1 else: result_postal_code = ret_val[i].get("pc", "").upper() result_country = ret_val[i].get("cc", "").upper() result_point = LatLon(ret_val[i]["lat"], ret_val[i]["lon"]) def print_mismatch(mismatch_type=""): if random() <= sampling_rate: print ",".join([str(k) for k in [mismatch_type, expected_country, result_country, expected_postal_code, result_postal_code, point[0][0], point[0][1], ret_val[i]["lat"], ret_val[i]["lon"], LatLon(point[0][0], point[0][1]).distance(LatLon(ret_val[i]["lat"], ret_val[i]["lon"])), point[1]]]) sys.stdout.flush() if not expected_country and result_country: counts["country_new"] += 1 elif result_country != expected_country: print_mismatch("c") counts["country_mismatch"] += 1 elif result_country == expected_country: counts["country_match"] += 1 if not expected_postal_code and result_postal_code: counts["postal_code_new"] += 1 elif result_postal_code != expected_postal_code: # print result_postal_code, expected_postal_code, expected_country counts["postal_code_mismatch"] += 1 print_mismatch("p") elif result_postal_code == expected_postal_code: counts["postal_code_match"] += 1 # else: # counts["filtered"] += 1 counts["total"] = i+1 return counts
df = s.tweets['geo.coordinates'] bad = df.apply(lambda x: x is None) df = df[~bad] s.tweets = s.tweets[~bad] lat = df.apply(lambda x: x[0]) lon = df.apply(lambda x: x[1]) # lat, lon = zip(*df) # Alternate # Remove Alaska and Hawaii del states["HI"] del states["AK"] # Match tweets by state coordinates = zip(lat, lon) results = rg.search(coordinates) # default mode = 2 # print results state_match = [results[row]['admin1'] for row,_ in enumerate(results)] s.tweets['state'] = state_match cc = pd.Series([results[row]['cc'] for row,_ in enumerate(results)]) # Check if not in US: good = cc == 'US' if len(cc)>0: s.tweets.index = range(len(s.tweets)) lon.index = range(len(lon)) lat.index = range(len(lat)) s.tweets = s.tweets[good] lon = lon[good] lat = lat[good] # Coordinate DataFrame
from geo_hash import from_ord from reverse_geocoder import search points = [1028940903367840592, 1028940741691456133, 1028940714896802299, 1028937300827959104, 1028936919208416560, 1028936496796163181, 1028936321951596184] for point in points: coords = from_ord(point) print point, coords result = search(coords, max_distance=1) print coords, result coords = (43.646964, -79.455647) result = search(coords, max_distance=1) print coords, result coords = (43.6469, -79.4521) result = search(coords, max_distance=1) print coords, result
def get_country_code(coordinate_string): # put method here AIzaSyDuihORQ83RaQ-bkxJz-PirUUoi0YHR_x4 coordinates = coordinate_string.split() location = rg.search([(coordinates[0],coordinates[1])],mode=2) return location[0]
def threadRun(): Boston = (42.359502, -71.062282) result = rg.search(Boston) return result
def political_boundary(self,type,latitude,longitude): assert type == 'name' or type == 'admin1' or type == 'admin2' or type == 'cc' data = rg.search((latitude,longitude))[0] return data[type.encode("ASCII")]
os.system("rm -r " + path) rdd.saveAsTextFile(path) sc = SparkContext(appName="SentimentAnalysis") rawTweets = sc.textFile("./tweets.json", 100) # first thing to do is extract the information we need from the tweets, i.e. the coordinates and the text parsedTweets = (rawTweets.map(lambda tweet: json.loads(tweet)) .filter(lambda tweet: tweet["text"] != "" and tweet["coordinates"] is not None) # filter early .map(lambda tweet: (tweet["coordinates"]["coordinates"], tweet["text"])) # project early .map(lambda t: ((t[0][1], t[0][0]), t[1]))) # putting coordinates in usual lat - lon format # extract state from coordinates using geopy state_text = (parsedTweets.map(lambda t: (geo.search(t[0])[0], t[1])) .map(lambda t: ( (t[0]["cc"], t[0]["admin1"]), t[1]) ) .filter(lambda t: t[0][0] == "US") .map(lambda t: (t[0][1], t[1]) ) ) # at this point data is like (u'state', u'tweet text') for each tweet # compute sentiment for each tweet and return a list of (state, sentiment) tuples state_sent = state_text.map(lambda t: (t[0].upper(), TextBlob(t[1]).sentiment.polarity)) # and I simply save one file # all the state, sentiment entries, for detailed statistical analysis in R saveAsTextFile(state_sent.map(lambda t: ",".join(map(str, (t[0], t[1])))) , # turn into nice output to store as csv "./sentiments_states.csv", overwrite = True)
from timeit import timeit import csv import reverse_geocoder as rg if __name__ == '__main__': setup = "import csv;import reverse_geocoder as rg;print('Loading coordinates...');" + \ "cities = [(row[0],row[1]) for row in csv.reader(open('../test/coordinates_10000000.csv','rt'),delimiter='\t')];" num = 3 t = timeit(stmt="rg.search(cities,mode=1)",setup=setup,number=num) print('Running time: %.2f secs' % (t / num)) print('\nLoading coordinates to compare mode 1 and mode 2...') cities = [(row[0],row[1]) for row in csv.reader(open('../test/coordinates_1000.csv','rt'),delimiter='\t')] result1 = rg.search(cities,mode=1) result2 = rg.search(cities,mode=2) if result1 != result2: print('Results do not match!') else: print('Both results match!')