def write_to_db_one(data_list,start_number): counter = start_number for item in data_list: if counter <= (start_number - 1 + 25000): try: db_session.query(Flat.href).filter(Flat.href==item["href"]).first()[0] except TypeError: item_type = item["type"] date = item["date"] metro_station = item["metro_station"] object_address = item["obj_address"] rooms = item["rooms"] area = item["area"] floor = item["floor"] href = item["href"] source = item["source"] name = item["name"] try: geolocator = Yandex() location = geolocator.geocode(object_address) object_latitude = location.latitude object_longitude = location.longitude except (GeocoderTimedOut): geolocator = GoogleV3() location = geolocator.geocode(object_address) object_latitude = location.latitude object_longitude = location.longitude db_item = Flat(item_type, date, metro_station, object_address, rooms, area, floor, href, source, name, object_latitude, object_longitude) db_session.add(db_item) db_session.commit() counter += 1 else: break
def get_location(uni): a = "University of" b = "University" geolocator = Yandex() location = geolocator.geocode(uni) location2 = geolocator.geocode(a + uni) location3 = geolocator.geocode(uni + b) if location is not None: return (location) elif location2 is not None: return (location2) else: return (location3)
def geolocate(address, yandex=False, try_all=True): data = {} if yandex: geolocator = Yandex(lang='en_US') location = geolocator.geocode(address, timeout=10) if location is None and try_all: return geolocate(address, False, False) elif location is None: return {"address": address} data["country"] = location.address.split(",")[-1].strip() try: data["city"] = location.address.split(",")[-3].strip() except: data["city"] = location.address.split(",")[0].strip() try: data["street"] = " ".join(location.address.split(",")[:-3]).strip() except: data["street"] = location.address else: geolocator = Nominatim() location = geolocator.geocode(address, timeout=10) if location is None and try_all: return geolocate(address, True, False) data["country"] = location.address.split(",")[-1].strip() # check for zip code if location.address.split(",")[-2].strip().replace("-", "").replace( "_", "").replace(" ", "").isdigit(): data["zip"] = location.address.split(",")[-2].strip() data["state"] = location.address.split(",")[-3].strip() try: data["region"] = location.address.split(",")[-4].strip() data["city"] = location.address.split(",")[-5].strip() except: data["city"] = location.address.split(",")[-0].strip() else: data["state"] = location.address.split(",")[-2].strip() try: data["region"] = location.address.split(",")[-3].strip() data["city"] = location.address.split(",")[-4].strip() except: data["city"] = location.address.split(",")[-0].strip() data["street"] = location.address data["address"] = location.address data["latitude"] = location.latitude data["longitude"] = location.longitude return data
def get_03_uhouse(city=None, street=None, house=None, geocoder=None): pref_addr = ['РФ'] #, 'Нижний Новгород'] if city: pref_addr.append(city) else: pref_addr.append('Нижний Новгород') if street: pref_addr.append(street) if house: pref_addr.append(str(house)) # print ', '.join(pref_addr) if len(pref_addr) < 2: return 10 if geocoder == 'Yandex': from geopy.geocoders import Yandex geolocator = Yandex() else: from geopy.geocoders import Nominatim geolocator = Nominatim() try: location = geolocator.geocode( ', '.join(pref_addr)) #, exactly_one = False) if location == None: return None address = location.address pos = (location.latitude, location.longitude) if location.raw: return pos, address, location.raw['name'] else: return pos, address except geopy.exc.GeocoderServiceError: return 11
def get_location(city=None, street=None, house=None, geocoder=None): pref_addr = ['РФ'] #, 'Нижний Новгород'] if city: pref_addr.append(city) else: pref_addr.append('Нижний Новгород') if street: pref_addr.append(street) if house: pref_addr.append(str(house)) print ', '.join(pref_addr) if len(pref_addr) < 2: return 10 if geocoder == 'Yandex': from geopy.geocoders import Yandex geolocator = Yandex() else: from geopy.geocoders import Nominatim geolocator = Nominatim() try: location = geolocator.geocode( ', '.join(pref_addr)) #, exactly_one = False) if location == None: return None if type(location) == list: print 'len location list:', len(location) for l in location: print '\t', l.address print '\t', (l.latitude, l.longitude) else: # print 'type(location)', type(location) print '\t', location.address print '\t', (location.latitude, location.longitude) if location.raw: # out_dict (location.raw, 'location.raw') print '\tname', location.raw['name'] except geopy.exc.GeocoderServiceError: return 11
def get_location(country): try: from geopy.geocoders import Yandex geo_locator = Yandex(lang='en_US') location = geo_locator.geocode(country, timeout=10) return location except ImportError: raise ImportError('Cannot import geocoder.')
def price(request): if request.method == 'POST': form = PriceForm(request.POST) if form.is_valid(): url_price = 'https://groozgo.ru/api/order/calc_price' url_drive = 'https://groozgo.ru/api/order/find_drivers' headers = {'Content-Type': 'application/json; charset=utf-8', 'X-Requested-With': 'XMLHttpRequest'} cityfrom = form.cleaned_data.get('cityfrom', None) cityto = form.cleaned_data.get('cityto', None) weight = form.cleaned_data.get('weight', None) nds = form.cleaned_data.get('nds', None) nal = form.cleaned_data.get('nal', None) from_value = "Россия, " + str(cityfrom) to_value = "Россия, " + str(cityto) geolocator = Yandex() loc_from = geolocator.geocode(str(cityfrom)) loc_to = geolocator.geocode(str(cityto)) loc_from_coord = (loc_from.latitude, loc_from.longitude) loc_to_coord = (loc_to.latitude, loc_to.longitude) distance = vincenty(loc_from_coord, loc_to_coord).meters data_price = {"distance": distance, "weight": str(weight), "from_administrative_area": cityfrom, "to_administrative_area": cityto, "mkad_distance": 23813.79, "is_refrigerator": False, "is_isotherm": False, "is_insurance": False, "insurance_sum": 0, "porters_count": "0", "destination_points": {}, "destination_points_amount": 0, "is_documents_back": False, "is_online_tracking": False, "back_doc_address": "", "from_value": from_value, "to_value": to_value} price_temp = requests.post(url_price, json=data_price, headers=headers) price = round(price_temp.json()['totalPrice']) data_drive = {"payment_info": {"with_nds": nds, "without_nds": False, "cash": nal, "cashless": True}, "services": {"isotherm": False, "refrigerator": False}, "administrative_area_from": cityfrom, "administrative_area_to": cityto, "cargo_weight": str(weight)} drive = requests.post(url_drive, json=data_drive, headers=headers) return render(request, 'ok.html', {'price': price, 'drive': drive.text}) else: form = PriceForm() return render(request, 'main.html', {'form': form})
def parseAndSaveAdresses(object): geolocator = Yandex() for subObject in object: location = geolocator.geocode(subObject.addr) subObject.lat = location.latitude subObject.lon = location.longitude subObject.save() print(subObject.lat, subObject.lon) time.sleep(3)
def parseAndSaveAdresses(object): geolocator = Yandex() for subObject in object: location = geolocator.geocode(subObject.addr) subObject.lat = location.latitude subObject.lon = location.longitude subObject.save() print(subObject.lat, subObject.lon) time.sleep(3)
def getLocation(city): try: geolocator = Yandex() # geolocator = Nominatim(); gcode = geolocator.geocode(city) latitude = gcode.latitude longitude = gcode.longitude return latitude, longitude except: return float(0), float(0)
def write_to_db_one(data_list): geolocator = Yandex() for item in data_list: try: db_session.query(Flat.href).filter(Flat.href==item["href"]).first()[0] except TypeError: item_type = item["type"] date = item["date"] metro_station = item["metro_station"] object_address = item["obj_address"] rooms = item["rooms"] area = item["area"] floor = item["floor"] href = item["href"] source = item["source"] name = item["name"] location = geolocator.geocode(object_address) object_latitude = location.latitude object_longitude = location.longitude db_item = Flat(item_type, date, metro_station, object_address, rooms, area, floor, href, source, name, object_latitude, object_longitude) db_session.add(db_item) db_session.commit()
def geoloc(city, adress, hnum): geolocator = Yandex() w = str(city), str(adress), str(hnum) adress = ' '.join(w) location = geolocator.geocode(adress) return list([location.latitude, location.longitude])
import codecs import json import re import csv from sys import argv if __name__ == "__main__": # sys.stdin = codecs.getreader('utf8')(sys.stdin) sys.stdout = codecs.getwriter('utf8')(sys.stdout) geolocator = Yandex() #geolocator = GoogleV3(api_key="AIzaSyDEjxgSQ6KDanVIRs2_S2BUI_PwEvIklII") #, secret_key="YVUA6X1jWYVoyS44e7y9ic9u") #geolocator = Nominatim() pairreader = csv.reader(sys.stdin, delimiter=',', quotechar='"') for row in pairreader: id = row[0].decode('utf8') if int(id) % 12 != int(argv[1]): continue _address = row[1].decode('utf8') address = re.sub(r'^"|"$', "", _address) address = re.sub(r'^\d+\,?\s*', '', address) location = geolocator.geocode(address) #"175 5th Avenue NYC") #print location print "%s,%f,%f,\"%s\",%s" % ( id, location.latitude, location.longitude, _address, json.dumps(location.address, location.latitude, location.longitude, location.raw))
import csv from sys import argv if __name__ == "__main__": # sys.stdin = codecs.getreader('utf8')(sys.stdin) sys.stdout = codecs.getwriter("utf8")(sys.stdout) geolocator = Yandex() # geolocator = GoogleV3(api_key="AIzaSyDEjxgSQ6KDanVIRs2_S2BUI_PwEvIklII") #, secret_key="YVUA6X1jWYVoyS44e7y9ic9u") # geolocator = Nominatim() pairreader = csv.reader(sys.stdin, delimiter=",", quotechar='"') for row in pairreader: id = row[0].decode("utf8") if int(id) % 12 != int(argv[1]): continue _address = row[1].decode("utf8") address = re.sub(r'^"|"$', "", _address) address = re.sub(r"^\d+\,?\s*", "", address) location = geolocator.geocode(address) # "175 5th Avenue NYC") # print location print '%s,%f,%f,"%s",%s' % ( id, location.latitude, location.longitude, _address, json.dumps(location.address, location.latitude, location.longitude, location.raw), )
def get_latitude(location): return location and location.latitude def get_longitude(location): return location and location.longitude if __name__ == '__main__': df = pd.read_csv('sales_transformed.csv') geolocator = Yandex( api_key=YANDEX_API_KEY, timeout=5, ) address_location = dict() for i, address in enumerate(df['address'].unique()): location = geolocator.geocode(address) address_location[address] = location if not location: print('No location {}'.format(address)) assert set(address_location.keys()) == set(df['address'].unique()), True df['location'] = df['address'].apply(lambda a: address_location[a]) df['latitude'] = df['location'].apply(get_latitude) df['longitude'] = df['location'].apply(get_longitude) df = df.drop('location', axis=1) df.to_csv('sales_transformed_with_coords.csv')
class AccidentsScraper(): def __init__(self): self.url = "http://www.planecrashinfo.com" self.subdomain = "/database.htm" self.data = [] self.geolocator = Yandex() self.reason_classifier = ( ReasonClassifier("../train/summary_train_set.txt")) def __download_html(self, url): response = urllib2.urlopen(url) html = response.read() return html def __get_accidents_links(self, html): bs = BeautifulSoup(html, 'html.parser') tds = bs.findAll('td') accidents_links = [] for td in tds: # Has this <td> element an <a> child? a = td.next_element.next_element if a.name == 'a': href = a['href'] # Preppend '/' if needed if href[0] != '/': href = '/' + href # Extract year year = re.search('[0-9]{4}', href).group(0) # Preppend year href = '/' + year + href accidents_links.append(href) return accidents_links def __clean_feature_name(self, feature_name): feature_name = feature_name.replace(':', '') feature_name = re.sub('\s+', '', feature_name) return feature_name def __clean_example_datum(self, example_datum): # For features 'Aboard' and 'Fatalities', extract just the 1st number example_datum = re.sub("[^\d]*(passengers.*crew.*)", '', example_datum) example_datum.strip() # Number? try: example_datum = int(example_datum) example_datum = str(example_datum) except ValueError: # Time? try: example_datum = re.search("\d\d:\d\d", example_datum).group(0) except AttributeError: # Date? try: datetime = parser.parse(example_datum) example_datum = str(datetime.day) + \ '/' + str(datetime.month) + '/' + str(datetime.year) except ValueError: #String pass example_datum = str(example_datum.encode('utf-8')).strip() return example_datum def __get_geographical_coordinates(self, location_str): try: location = self.geolocator.geocode(location_str) except: return '?', '?' if location is None: return '?', '?' else: return str(location.latitude), str(location.longitude) def __scrape_example_data(self, html): bs = BeautifulSoup(html, 'html.parser') example_data = [] features_names = [] trs = bs.findAll('tr') # The first <tr> element does not provide useful info trs.pop(0) for tr in trs: tds = tr.findAll('td') # Read features' names? if len(self.data) == 0: feature_name = tds[0].next_element.text feature_name_cleaned = self.__clean_feature_name(feature_name) features_names.append(feature_name_cleaned) example_datum = tds[1].next_element.text example_datum_cleaned = self.__clean_example_datum(example_datum) example_data.append(example_datum_cleaned) # If the datum is the LOCATION (index 2), add latitude and longitude if tr == trs[2]: location = (self.__get_geographical_coordinates( tds[1].next_element.text)) if len(self.data) == 0: features_names.append('Latitude') features_names.append('Longitude') example_data.append(location[0]) example_data.append(location[1]) # If the datum is the SUMMARY (index 12), assign it a category # (reason) using text mining techniques elif tr == trs[12]: summary = tds[1].next_element.text if len(self.data) == 0: features_names.append('Reason') reason = self.reason_classifier.classify(summary) example_data.append(reason) # Store features' names if len(features_names) > 0: self.data.append(features_names) # Store the data self.data.append(example_data) def __get_years_links(self, html): bs = BeautifulSoup(html, 'html.parser') anchors = bs.findAll('a', href=True) years_links = [] for a in anchors: # Match a year from 1900 to 2099 if re.match("(19|20)[0-9][0-9]", a.text.strip()): href = a['href'] # Preppend '/' if needed if href[0] != '/': href = '/' + href years_links.append(href) return years_links def scrape(self): print "Web Scraping of planes' crashes data from " + \ "'" + self.url + "'..." print "This process could take roughly 45 minutes.\n" # Start timer start_time = time.time() # Download HTML html = self.__download_html(self.url + self.subdomain) bs = BeautifulSoup(html, 'html.parser') # Get the links of each year years_links = self.__get_years_links(html) # For each year, get its accidents' links accidents_links = [] for y in years_links: print "Found link to a year of crash: " + self.url + y html = self.__download_html(self.url + y) current_year_accidents = self.__get_accidents_links(html) accidents_links.append(current_year_accidents) # Uncomment this break in case of debug mode #break # For each accident, extract its data for i in range(len(accidents_links)): for j in range(len(accidents_links[i])): print "scraping crash data: " + self.url + \ accidents_links[i][j] html = self.__download_html(self.url + \ accidents_links[i][j]) self.__scrape_example_data(html) # Show elapsed time end_time = time.time() print "\nelapsed time: " + \ str(round(((end_time - start_time) / 60) , 2)) + " minutes" def data2csv(self, filename): # Overwrite to the specified file. # Create it if it does not exist. file = open("../csv/" + filename, "w+") # Dump all the data with CSV format for i in range(len(self.data)): for j in range(len(self.data[i])): file.write(self.data[i][j] + ";") file.write("\n")
from geopy.geocoders import Yandex import json geolocator = Yandex(lang='en_US') location = geolocator.geocode("Paris", timeout=10) if location != None: print (json.dumps(location.raw, indent=4)) print (location.address) print (location.latitude, " -> ", location.longitude) else: print (location)
from geopy.geocoders import Yandex import json geolocator = Yandex(lang='en_US') location = geolocator.geocode("بغداد، العراق", timeout=10) if location != None: print(json.dumps(location.raw, indent=4)) print(location.address) print(location.latitude, " -> ", location.longitude) else: print(location)
#!/usr/bin/env python # -*- coding: utf8 -*- #google_api_key = "AIzaSyAZVa_DBWv0uQ_m6-UgWTToYAY6wnA2EiQ" import geopy from geopy.geocoders import Nominatim, GoogleV3, Bing, Yandex #geolocator = Bing() #geolocator = Nominatim() #geolocator = GoogleV3(domain = 'maps.googleapis.com') geolocator = Yandex() #location = geolocator.geocode(u"ул. Юпитера д.1 Ростов") #location = geolocator.geocode(u"улица Мурлычева, 30/28,р-н Пролетарский, ,Ростов-на-Дону") #location = geolocator.geocode(u"Ленина 42 Ростов") #location = geolocator.geocode(u"Турмалиновская 62 Ростов") #location = geolocator.geocode(u"Красноармейская, 200/1, Ростов-на-Дону") try: location = geolocator.geocode(u"Королева/Беляева, ЦЕНА СНИЖЕНА!!! 46м2, р-н Ворошиловский, Ростов-на-Дону",timeout=0.1) except geopy.exc.GeocoderTimedOut as e: print("Error: geocode failed with message '%s'"%(e.message)) location = geolocator.geocode(u"Королева/Беляева, ЦЕНА СНИЖЕНА!!! 46м2, р-н Ворошиловский, Ростов-на-Дону",timeout=0.10) print location.address.encode('utf-8') print (location.latitude, location.longitude) print repr(location.raw).decode("unicode-escape").encode('utf-8') print location.raw[u'metaDataProperty'][u'GeocoderMetaData'][u'precision']