Esempio n. 1
0
def write_to_db_one(data_list,start_number):
	counter = start_number
	for item in data_list:
		if counter <= (start_number - 1 + 25000):
			try:
				db_session.query(Flat.href).filter(Flat.href==item["href"]).first()[0]
			except TypeError:
				item_type = item["type"]
				date = item["date"]
				metro_station = item["metro_station"]
				object_address = item["obj_address"]
				rooms = item["rooms"]
				area = item["area"]
				floor = item["floor"]
				href = item["href"]
				source = item["source"]
				name = item["name"]
				try:
					geolocator = Yandex()
					location = geolocator.geocode(object_address)
					object_latitude = location.latitude
					object_longitude = location.longitude
				except (GeocoderTimedOut):
					geolocator = GoogleV3()
					location = geolocator.geocode(object_address)
					object_latitude = location.latitude
					object_longitude = location.longitude					
				db_item = Flat(item_type, date, metro_station, object_address, rooms, area, floor, href, source, name, object_latitude, object_longitude)
				db_session.add(db_item)
				db_session.commit()
				counter += 1
		else:
			break
Esempio n. 2
0
File: geo.py Progetto: NOdoff/Data
def get_location(uni):
    a = "University of"
    b = "University"
    geolocator = Yandex()
    location = geolocator.geocode(uni)
    location2 = geolocator.geocode(a + uni)
    location3 = geolocator.geocode(uni + b)

    if location is not None:
        return (location)
    elif location2 is not None:
        return (location2)
    else:
        return (location3)
Esempio n. 3
0
def geolocate(address, yandex=False, try_all=True):
    data = {}
    if yandex:
        geolocator = Yandex(lang='en_US')
        location = geolocator.geocode(address, timeout=10)
        if location is None and try_all:
            return geolocate(address, False, False)
        elif location is None:
            return {"address": address}
        data["country"] = location.address.split(",")[-1].strip()
        try:
            data["city"] = location.address.split(",")[-3].strip()
        except:
            data["city"] = location.address.split(",")[0].strip()
        try:
            data["street"] = " ".join(location.address.split(",")[:-3]).strip()
        except:
            data["street"] = location.address
    else:
        geolocator = Nominatim()
        location = geolocator.geocode(address, timeout=10)
        if location is None and try_all:
            return geolocate(address, True, False)
        data["country"] = location.address.split(",")[-1].strip()
        # check for zip code
        if location.address.split(",")[-2].strip().replace("-", "").replace(
                "_", "").replace(" ", "").isdigit():
            data["zip"] = location.address.split(",")[-2].strip()
            data["state"] = location.address.split(",")[-3].strip()
            try:
                data["region"] = location.address.split(",")[-4].strip()
                data["city"] = location.address.split(",")[-5].strip()
            except:
                data["city"] = location.address.split(",")[-0].strip()
        else:
            data["state"] = location.address.split(",")[-2].strip()
            try:
                data["region"] = location.address.split(",")[-3].strip()
                data["city"] = location.address.split(",")[-4].strip()
            except:
                data["city"] = location.address.split(",")[-0].strip()

        data["street"] = location.address

    data["address"] = location.address
    data["latitude"] = location.latitude
    data["longitude"] = location.longitude
    return data
Esempio n. 4
0
def get_03_uhouse(city=None, street=None, house=None, geocoder=None):
    pref_addr = ['РФ']  #, 'Нижний Новгород']
    if city: pref_addr.append(city)
    else: pref_addr.append('Нижний Новгород')
    if street: pref_addr.append(street)
    if house: pref_addr.append(str(house))
    #	print ', '.join(pref_addr)
    if len(pref_addr) < 2: return 10

    if geocoder == 'Yandex':
        from geopy.geocoders import Yandex
        geolocator = Yandex()
    else:
        from geopy.geocoders import Nominatim
        geolocator = Nominatim()
    try:
        location = geolocator.geocode(
            ', '.join(pref_addr))  #, exactly_one = False)
        if location == None: return None
        address = location.address
        pos = (location.latitude, location.longitude)
        if location.raw:
            return pos, address, location.raw['name']
        else:
            return pos, address

    except geopy.exc.GeocoderServiceError:
        return 11
Esempio n. 5
0
def get_location(city=None, street=None, house=None, geocoder=None):
    pref_addr = ['РФ']  #, 'Нижний Новгород']
    if city: pref_addr.append(city)
    else: pref_addr.append('Нижний Новгород')
    if street: pref_addr.append(street)
    if house: pref_addr.append(str(house))
    print ', '.join(pref_addr)
    if len(pref_addr) < 2: return 10

    if geocoder == 'Yandex':
        from geopy.geocoders import Yandex
        geolocator = Yandex()
    else:
        from geopy.geocoders import Nominatim
        geolocator = Nominatim()
    try:
        location = geolocator.geocode(
            ', '.join(pref_addr))  #, exactly_one = False)
        if location == None: return None
        if type(location) == list:
            print 'len location list:', len(location)
            for l in location:
                print '\t', l.address
                print '\t', (l.latitude, l.longitude)
        else:
            #	print 'type(location)', type(location)
            print '\t', location.address
            print '\t', (location.latitude, location.longitude)
            if location.raw:
                #	out_dict (location.raw, 'location.raw')
                print '\tname', location.raw['name']

    except geopy.exc.GeocoderServiceError:
        return 11
Esempio n. 6
0
def get_location(country):
    try:
        from geopy.geocoders import Yandex
        geo_locator = Yandex(lang='en_US')
        location = geo_locator.geocode(country, timeout=10)
        return location
    except ImportError:
        raise ImportError('Cannot import geocoder.')
Esempio n. 7
0
def price(request):
    if request.method == 'POST':
        form = PriceForm(request.POST)
        if form.is_valid():
            url_price = 'https://groozgo.ru/api/order/calc_price'
            url_drive = 'https://groozgo.ru/api/order/find_drivers'

            headers = {'Content-Type': 'application/json; charset=utf-8',
                       'X-Requested-With': 'XMLHttpRequest'}

            cityfrom = form.cleaned_data.get('cityfrom', None)
            cityto = form.cleaned_data.get('cityto', None)
            weight = form.cleaned_data.get('weight', None)
            nds = form.cleaned_data.get('nds', None)
            nal = form.cleaned_data.get('nal', None)
            from_value = "Россия, " + str(cityfrom)
            to_value = "Россия, " + str(cityto)

            geolocator = Yandex()
            loc_from = geolocator.geocode(str(cityfrom))
            loc_to = geolocator.geocode(str(cityto))
            loc_from_coord = (loc_from.latitude, loc_from.longitude)
            loc_to_coord = (loc_to.latitude, loc_to.longitude)
            distance = vincenty(loc_from_coord, loc_to_coord).meters

            data_price = {"distance": distance, "weight": str(weight), "from_administrative_area": cityfrom,
                          "to_administrative_area": cityto, "mkad_distance": 23813.79,
                          "is_refrigerator": False, "is_isotherm": False, "is_insurance": False,
                          "insurance_sum": 0, "porters_count": "0", "destination_points": {},
                          "destination_points_amount": 0,
                          "is_documents_back": False, "is_online_tracking": False, "back_doc_address": "",
                          "from_value": from_value, "to_value": to_value}

            price_temp = requests.post(url_price, json=data_price, headers=headers)
            price = round(price_temp.json()['totalPrice'])

            data_drive = {"payment_info": {"with_nds": nds, "without_nds": False, "cash": nal, "cashless": True},
                          "services": {"isotherm": False, "refrigerator": False}, "administrative_area_from": cityfrom,
                          "administrative_area_to": cityto, "cargo_weight": str(weight)}

            drive = requests.post(url_drive, json=data_drive, headers=headers)
            return render(request, 'ok.html', {'price': price, 'drive': drive.text})
    else:
        form = PriceForm()
    return render(request, 'main.html', {'form': form})
Esempio n. 8
0
def parseAndSaveAdresses(object):
    geolocator = Yandex()
    for subObject in object:
        location = geolocator.geocode(subObject.addr)
        subObject.lat = location.latitude
        subObject.lon = location.longitude
        subObject.save()
        print(subObject.lat, subObject.lon)
        time.sleep(3)
Esempio n. 9
0
def parseAndSaveAdresses(object):
    geolocator = Yandex()
    for subObject in object:
        location = geolocator.geocode(subObject.addr)
        subObject.lat = location.latitude
        subObject.lon = location.longitude
        subObject.save()
        print(subObject.lat, subObject.lon)
        time.sleep(3)
Esempio n. 10
0
def getLocation(city):

    try:
        geolocator = Yandex()
        # geolocator = Nominatim();

        gcode = geolocator.geocode(city)
        latitude = gcode.latitude
        longitude = gcode.longitude

        return latitude, longitude
    except:
        return float(0), float(0)
Esempio n. 11
0
def write_to_db_one(data_list):
	geolocator = Yandex()
	for item in data_list:
		try:
			db_session.query(Flat.href).filter(Flat.href==item["href"]).first()[0]
		except TypeError:
			item_type = item["type"]
			date = item["date"]
			metro_station = item["metro_station"]
			object_address = item["obj_address"]
			rooms = item["rooms"]
			area = item["area"]
			floor = item["floor"]
			href = item["href"]
			source = item["source"]
			name = item["name"]
			location = geolocator.geocode(object_address)
			object_latitude = location.latitude
			object_longitude = location.longitude
			db_item = Flat(item_type, date, metro_station, object_address, rooms, area, floor, href, source, name, object_latitude, object_longitude)
			db_session.add(db_item)
			db_session.commit()
Esempio n. 12
0
 def geoloc(city, adress, hnum):
     geolocator = Yandex()
     w = str(city), str(adress), str(hnum)
     adress = ' '.join(w)
     location = geolocator.geocode(adress)
     return list([location.latitude, location.longitude])
Esempio n. 13
0
import codecs
import json
import re
import csv

from sys import argv

if __name__ == "__main__":
    #  sys.stdin = codecs.getreader('utf8')(sys.stdin)
    sys.stdout = codecs.getwriter('utf8')(sys.stdout)

    geolocator = Yandex()
    #geolocator = GoogleV3(api_key="AIzaSyDEjxgSQ6KDanVIRs2_S2BUI_PwEvIklII") #, secret_key="YVUA6X1jWYVoyS44e7y9ic9u")
    #geolocator = Nominatim()

    pairreader = csv.reader(sys.stdin, delimiter=',', quotechar='"')
    for row in pairreader:
        id = row[0].decode('utf8')
        if int(id) % 12 != int(argv[1]):
            continue

        _address = row[1].decode('utf8')
        address = re.sub(r'^"|"$', "", _address)
        address = re.sub(r'^\d+\,?\s*', '', address)
        location = geolocator.geocode(address)  #"175 5th Avenue NYC")
        #print location
        print "%s,%f,%f,\"%s\",%s" % (
            id, location.latitude, location.longitude, _address,
            json.dumps(location.address, location.latitude, location.longitude,
                       location.raw))
Esempio n. 14
0
import csv

from sys import argv

if __name__ == "__main__":
    #  sys.stdin = codecs.getreader('utf8')(sys.stdin)
    sys.stdout = codecs.getwriter("utf8")(sys.stdout)

    geolocator = Yandex()
    # geolocator = GoogleV3(api_key="AIzaSyDEjxgSQ6KDanVIRs2_S2BUI_PwEvIklII") #, secret_key="YVUA6X1jWYVoyS44e7y9ic9u")
    # geolocator = Nominatim()

    pairreader = csv.reader(sys.stdin, delimiter=",", quotechar='"')
    for row in pairreader:
        id = row[0].decode("utf8")
        if int(id) % 12 != int(argv[1]):
            continue

        _address = row[1].decode("utf8")
        address = re.sub(r'^"|"$', "", _address)
        address = re.sub(r"^\d+\,?\s*", "", address)
        location = geolocator.geocode(address)  # "175 5th Avenue NYC")
        # print location
        print '%s,%f,%f,"%s",%s' % (
            id,
            location.latitude,
            location.longitude,
            _address,
            json.dumps(location.address, location.latitude, location.longitude, location.raw),
        )
Esempio n. 15
0

def get_latitude(location):
    return location and location.latitude


def get_longitude(location):
    return location and location.longitude


if __name__ == '__main__':
    df = pd.read_csv('sales_transformed.csv')
    geolocator = Yandex(
        api_key=YANDEX_API_KEY,
        timeout=5,
    )
    address_location = dict()
    for i, address in enumerate(df['address'].unique()):
        location = geolocator.geocode(address)
        address_location[address] = location
        if not location:
            print('No location {}'.format(address))
    assert set(address_location.keys()) == set(df['address'].unique()), True

    df['location'] = df['address'].apply(lambda a: address_location[a])
    df['latitude'] = df['location'].apply(get_latitude)
    df['longitude'] = df['location'].apply(get_longitude)
    df = df.drop('location', axis=1)

    df.to_csv('sales_transformed_with_coords.csv')
Esempio n. 16
0
class AccidentsScraper():
    def __init__(self):
        self.url = "http://www.planecrashinfo.com"
        self.subdomain = "/database.htm"
        self.data = []
        self.geolocator = Yandex()
        self.reason_classifier = (
            ReasonClassifier("../train/summary_train_set.txt"))

    def __download_html(self, url):
        response = urllib2.urlopen(url)
        html = response.read()
        return html

    def __get_accidents_links(self, html):
        bs = BeautifulSoup(html, 'html.parser')
        tds = bs.findAll('td')
        accidents_links = []
        for td in tds:
            # Has this <td> element an <a> child?
            a = td.next_element.next_element
            if a.name == 'a':
                href = a['href']
                # Preppend '/' if needed
                if href[0] != '/':
                    href = '/' + href
                # Extract year
                year = re.search('[0-9]{4}', href).group(0)
                # Preppend year
                href = '/' + year + href
                accidents_links.append(href)

        return accidents_links

    def __clean_feature_name(self, feature_name):
        feature_name = feature_name.replace(':', '')
        feature_name = re.sub('\s+', '', feature_name)
        return feature_name

    def __clean_example_datum(self, example_datum):
        # For features 'Aboard' and 'Fatalities', extract just the 1st number
        example_datum = re.sub("[^\d]*(passengers.*crew.*)", '', example_datum)
        example_datum.strip()

        # Number?
        try:
            example_datum = int(example_datum)
            example_datum = str(example_datum)
        except ValueError:
            # Time?
            try:
                example_datum = re.search("\d\d:\d\d", example_datum).group(0)
            except AttributeError:
                # Date?
                try:
                    datetime = parser.parse(example_datum)
                    example_datum = str(datetime.day) + \
                     '/' + str(datetime.month) + '/' + str(datetime.year)
                except ValueError:
                    #String
                    pass

        example_datum = str(example_datum.encode('utf-8')).strip()
        return example_datum

    def __get_geographical_coordinates(self, location_str):
        try:
            location = self.geolocator.geocode(location_str)
        except:
            return '?', '?'

        if location is None:
            return '?', '?'
        else:
            return str(location.latitude), str(location.longitude)

    def __scrape_example_data(self, html):
        bs = BeautifulSoup(html, 'html.parser')
        example_data = []
        features_names = []
        trs = bs.findAll('tr')

        # The first <tr> element does not provide useful info
        trs.pop(0)

        for tr in trs:
            tds = tr.findAll('td')

            # Read features' names?
            if len(self.data) == 0:
                feature_name = tds[0].next_element.text
                feature_name_cleaned = self.__clean_feature_name(feature_name)
                features_names.append(feature_name_cleaned)

            example_datum = tds[1].next_element.text
            example_datum_cleaned = self.__clean_example_datum(example_datum)
            example_data.append(example_datum_cleaned)

            # If the datum is the LOCATION (index 2), add latitude and longitude
            if tr == trs[2]:
                location = (self.__get_geographical_coordinates(
                    tds[1].next_element.text))
                if len(self.data) == 0:
                    features_names.append('Latitude')
                    features_names.append('Longitude')
                example_data.append(location[0])
                example_data.append(location[1])

            # If the datum is the SUMMARY (index 12), assign it a category
            # (reason) using text mining techniques
            elif tr == trs[12]:
                summary = tds[1].next_element.text
                if len(self.data) == 0:
                    features_names.append('Reason')
                reason = self.reason_classifier.classify(summary)
                example_data.append(reason)

        # Store features' names
        if len(features_names) > 0:
            self.data.append(features_names)

        # Store the data
        self.data.append(example_data)

    def __get_years_links(self, html):
        bs = BeautifulSoup(html, 'html.parser')
        anchors = bs.findAll('a', href=True)
        years_links = []
        for a in anchors:
            # Match a year from 1900 to 2099
            if re.match("(19|20)[0-9][0-9]", a.text.strip()):
                href = a['href']
                # Preppend '/' if needed
                if href[0] != '/':
                    href = '/' + href
                years_links.append(href)

        return years_links

    def scrape(self):
        print "Web Scraping of planes' crashes data from " + \
         "'" + self.url + "'..."

        print "This process could take roughly 45 minutes.\n"

        # Start timer
        start_time = time.time()

        # Download HTML
        html = self.__download_html(self.url + self.subdomain)
        bs = BeautifulSoup(html, 'html.parser')

        # Get the links of each year
        years_links = self.__get_years_links(html)

        # For each year, get its accidents' links
        accidents_links = []
        for y in years_links:
            print "Found link to a year of crash: " + self.url + y
            html = self.__download_html(self.url + y)
            current_year_accidents = self.__get_accidents_links(html)
            accidents_links.append(current_year_accidents)

            # Uncomment this break in case of debug mode
            #break

        # For each accident, extract its data
        for i in range(len(accidents_links)):
            for j in range(len(accidents_links[i])):
                print "scraping crash data: " + self.url + \
                 accidents_links[i][j]
                html = self.__download_html(self.url + \
                 accidents_links[i][j])
                self.__scrape_example_data(html)

        # Show elapsed time
        end_time = time.time()
        print "\nelapsed time: " + \
         str(round(((end_time - start_time) / 60) , 2)) + " minutes"

    def data2csv(self, filename):
        # Overwrite to the specified file.
        # Create it if it does not exist.
        file = open("../csv/" + filename, "w+")

        # Dump all the data with CSV format
        for i in range(len(self.data)):
            for j in range(len(self.data[i])):
                file.write(self.data[i][j] + ";")
            file.write("\n")
from geopy.geocoders import Yandex
import json
geolocator = Yandex(lang='en_US')

location = geolocator.geocode("Paris", timeout=10)

if location != None:
    print (json.dumps(location.raw, indent=4))
    print (location.address)
    print (location.latitude, " -> ", location.longitude)
else:
    print (location)
Esempio n. 18
0
from geopy.geocoders import Yandex
import json
geolocator = Yandex(lang='en_US')
location = geolocator.geocode("بغداد، العراق", timeout=10)
if location != None:
    print(json.dumps(location.raw, indent=4))
    print(location.address)
    print(location.latitude, " -> ", location.longitude)
else:
    print(location)
Esempio n. 19
0
#!/usr/bin/env python
# -*- coding: utf8 -*-
#google_api_key = "AIzaSyAZVa_DBWv0uQ_m6-UgWTToYAY6wnA2EiQ"

import geopy
from geopy.geocoders import Nominatim, GoogleV3, Bing, Yandex

#geolocator = Bing()
#geolocator = Nominatim()
#geolocator = GoogleV3(domain = 'maps.googleapis.com')
geolocator = Yandex()
#location = geolocator.geocode(u"ул. Юпитера д.1 Ростов")
#location = geolocator.geocode(u"улица Мурлычева, 30/28,р-н Пролетарский, ,Ростов-на-Дону")
#location = geolocator.geocode(u"Ленина 42  Ростов")
#location = geolocator.geocode(u"Турмалиновская 62 Ростов")
#location = geolocator.geocode(u"Красноармейская, 200/1, Ростов-на-Дону")
try:
    location = geolocator.geocode(u"Королева/Беляева, ЦЕНА СНИЖЕНА!!! 46м2, р-н Ворошиловский, Ростов-на-Дону",timeout=0.1)
except geopy.exc.GeocoderTimedOut as e:
    print("Error: geocode failed with message '%s'"%(e.message))
    location = geolocator.geocode(u"Королева/Беляева, ЦЕНА СНИЖЕНА!!! 46м2, р-н Ворошиловский, Ростов-на-Дону",timeout=0.10)
    
print location.address.encode('utf-8')
print (location.latitude, location.longitude)
print repr(location.raw).decode("unicode-escape").encode('utf-8')
print location.raw[u'metaDataProperty'][u'GeocoderMetaData'][u'precision']