def geopycheck():
    print("In version 0.6.2 beta and above, your geocoder scheme needs to get set, based on your OS.",
          "PyWeather can automatically do this now, or you can manually define your scheme.",
          "Type in 'automaticsetup' for the automatic setup, and 'manualsetup' for manual setup",
          "in the prompt below.", sep="\n")
    setupmethod = input("Input here: ").lower()
    if setupmethod == "manualsetup":
        print("Geopy's Google geocoder can work in HTTPS-enabled mode on 95% of platforms,",
              "but has a tendancy to fail on OS X, or other platforms. In the prompt below,",
              "enter 'https' for geopy to work in https mode, or 'http' for http mode.",
              "Please note: Your settings will not be validated!", sep="\n")
        geopymode = input("Input here: ").lower()
        if geopymode == "https":
            config['GEOCODER']['scheme'] = 'https'
            print("Changes saved.")
        else:
            config['GEOCODER']['scheme'] = 'https'
            if geopymode == "http":
                print("Changes saved.")
            else:
                print("Couldn't understand your input. Defaulting to 'http'.")
    else:
        if setupmethod == "automaticsetup":
            print("Starting automatic setup.")
        else:
            print("Couldn't understand your input. Defaulting to automatic setup.")

        import geopy
        from geopy import GoogleV3
        geocoder = GoogleV3(scheme='https')
        # Warm-up geocode
        try:
            geocoder.geocode("123 5th Avenue, New York, NY")
        except:
            isthisisheresopythondoesntyellatme = True
        try:
            geocoder.geocode("123 5th Avenue, New York, NY")
            print("The geocoder can operate with HTTPS enabled on your OS. Saving these changes...")
            config['GEOCODER']['scheme'] = 'https'
            print("Changes saved.")
        except geopy.exc.GeocoderServiceError:
            print("Geopy probably can't run without HTTPS (or your internet went down). Trying HTTP as the scheme...")
            geocoder = GoogleV3(scheme='http')
            try:
                geocoder.geocode("123 5th Avenue, New York, NY")
                print("The geocoder can operate, but without HTTPS enabled on your OS. Saving these changes...")
                config['GEOCODER']['scheme'] = 'http'
                print("Changes saved.")
            except geopy.exc.GeocoderServiceError:
                print("You probably don't have an internet connection, as HTTPS and HTTP validation both failed.",
                      "Defaulting to HTTP as the geopy scheme...", sep="\n")
                config['GEOCODER']['scheme'] = 'http'
                print("Changes saved.")
Exemple #2
0
def create_afs_house(soup, url, bedrooms):
    price = soup.find("div", class_="style12x")
    price = price.text
    price = re.search('£(.*)pw', price)  # Find price per week
    price = price.group(1)
    price = int(price)
    price = int((price * 52) / 12)
    print(price)

    pattern = re.compile(
        '[A-Z]{1,2}[0-9][0-9A-Z]?\s?[0-9][A-Z]{2}')  # Find postcode

    location_string = soup.find(text=pattern)
    location_string = str(location_string.string)
    location_string = re.sub(r'\([^)]*\)', '',
                             location_string)  # Remove whitespace

    geolocator = GoogleV3()
    location = geolocator.geocode(location_string)
    print(location.latitude)
    print(location.longitude)

    furnished = soup.find(text=re.compile("Furnished"))

    house = accommodation.Accommodation(price, bedrooms, "UNSURE",
                                        location.address, 1, url)
    house.lat = location.latitude
    house.long = location.longitude

    return house
Exemple #3
0
def geocode(address, attempt=0):
    # Google is free up to 2500 requests per day, then 0.50€ per 1000. We don't use
    # Nominatim because it doesn't like bulk requests. Other services cost money.
    service = GoogleV3(api_key=GOOGLE_API_KEY)

    query = '{address}, {locality}'.format(**address)
    point = None

    if attempt > 2:
        warning('Google timed out 3 times. Giving up on %s', query)

    else:
        try:
            point = service.geocode(query)

            if not point:
                raise GeopyError('Google returned empty object')

            if 'partial_match' in point.raw.keys():
                warning('Google partly matched %s', query)
            else:
                debug('Google matched %s', query)

        except GeocoderQuotaExceeded:
            raise

        except GeocoderTimedOut:
            geocode(address, attempt=attempt+1)

        except GeopyError as e:
            warning('Error geocoding %s (%s)', address['address'], e)

    return point
Exemple #4
0
def coordinates(address):
	"""We leverage GoogleV3 to geocode your specified location. 
	   Your location is used to determine the correct civil 
	   twilight, which helps with camera configuration.""" 
	client = GoogleV3()
	response = client.geocode(address)
	return (response.latitude, response.longitude)
Exemple #5
0
    def save(self, *args, **kwargs):
        if not self.lat_long_points:
            geolocator = GoogleV3()
            location = geolocator.geocode(self.zipcode.encode('utf-8'))
            if location:
                self.lat_long_points = Point((location.longitude, location.latitude,))

        return super(Instructors, self).save(*args, **kwargs)
def obtain_localisation(location):
    final_location = ", ".join(location.tolist())
    googlemaps = GoogleV3(user_agent="estate", api_key=config["API_KEY"])
    geocoder = RateLimiter(googlemaps.geocode)
    geocoded_location = geocoder(final_location)
    if geocoded_location:
        latitude, longitude = geocoded_location.latitude, geocoded_location.longitude
    else:
        latitude, longitude = np.nan, np.nan
    return latitude, longitude
Exemple #7
0
    def _collect_label_meta(self):
        if (self._data and not (self.locations is None) and os.getenv('GOOGLE')
                and not self._label_meta):
            # Group locations into lists by label
            labels = {}
            for i, val in enumerate(self._data):
                if val.location.label == -1:
                    continue
                labels.setdefault(int(val.location.label),
                                  list()).append(val.location)

            # Randomly select 1-5 locations / label
            sample_labels = {}
            for key in labels.iterkeys():
                limit = min(5, len(labels[key]))
                indices = [
                    randrange(0, len(labels[key])) for i in range(limit)
                ]
                sample_labels[key] = [labels[key][j] for j in indices]

            # Get City/Place names for each label's locations
            geocoder = GoogleV3(api_key=os.getenv('GOOGLE'))
            name_lists = {}
            # i = 0
            for key in sample_labels.iterkeys():
                for location in sample_labels[key]:
                    name_lists.setdefault(key, list()).append(
                        SocialExplorer._reverse_geocode(
                            geocoder, 'PlaceName', location.latitude,
                            location.longitude))

            # Associate the label with the most common name
            label_names = {}
            for key in labels.iterkeys():
                place_counts = {}
                max_count = 0
                max_name = None
                for name in name_lists[key]:
                    place_counts[name] = place_counts.setdefault(name, 0) + 1
                    if place_counts[name] > max_count:
                        max_count = place_counts[name]
                        max_name = name
                label_names[key] = max_name

            self._label_meta = {}
            for key, val in label_names.iteritems():
                self._label_meta[key] = {
                    "name": val,
                    "count": len(labels[key])
                }
        else:
            print 'Cannot get label names'
def geocode_all(addresses):
    geo = GoogleV3()
    for name, address in addresses:
        for t in [.1, .2, .4, .8, 1.6, 3.2, 6.4]:
            try:
                loc = geo.geocode(address.encode('ascii', 'ignore'))
                yield (name, address, loc.latitude, loc.longitude)
                break
            except GeocoderQuotaExceeded:
                sleep(t)
            except:
                print('Couldn\'t find %s - %s' % (name, repr(address)))
                break
Exemple #9
0
def search_street(street, lang='en'):
    location = None
    try:
        geolocator = Nominatim()
        location = geolocator.geocode(street, language=lang)
    except:
        try:
            geolocator = GoogleV3(api_key=config.GOOGLE_API_KEY)
            location = geolocator.geocode(street, language=lang)
        except:
            pass

    return location
Exemple #10
0
    def get(self, request, *args, **kwargs):
        query_dict = request.GET
        address = query_dict.get('location')
        distance = query_dict.get('distance')
        geolocator = GoogleV3()
        location = geolocator.geocode(address.encode('utf-8'), timeout=10)
        instructors = Instructors.objects.get_near_instructors(
            location.latitude, location.longitude, distance=distance)

        data = dict()
        data['instructors'] = [
            self.prepare_response_data(instructor)
            for instructor in instructors
        ]
        data['origin'] = {'lat': location.latitude, 'lon': location.longitude}

        return Response(data)
Exemple #11
0
def create_rightmove_house(item):
    page_url = item.find("meta", property="og:url")
    page_url = page_url["content"]
    price = item.find(id="propertyHeaderPrice")

    if price is not None:
        price = price.text
        price = re.search('£(.*) pcm', price)  # Get price per month
        price = price.group(1)
        price = int(price)

    bedrooms = item.find(string=re.compile("bedroom"))
    bedrooms = str(bedrooms.string)
    print(page_url)
    print(bedrooms)
    bedrooms = bedrooms.rsplit("bedroom", 1)[0]
    bedrooms = bedrooms.strip()
    bedrooms = int(bedrooms)

    address = item.find("address", class_="pad-0 fs-16 grid-25")
    address_string = str(address.string)
    geolocator = GoogleV3()
    address_string = geolocator.geocode(address_string)

    if address_string is None:
        address = ""
        latitude = ""
        longitude = ""
    else:
        address = address_string.address
        latitude = address_string.latitude
        longitude = address_string.latitude

    furnished_type = item.find(id="furnishedType")
    furnished_string = str(furnished_type.string)
    is_furnished = 0
    if furnished_string == "Furnished":
        is_furnished = 1

    house = accommodation.Accommodation(price, bedrooms, "UNSURE", address,
                                        is_furnished, page_url)
    house.lat = latitude
    house.long = longitude
    return house
    def location(self):
        """
        Get the location, but as a geopy location object

        Returns
        -------
        Location
        """
        # if the input was a string, we do a google lookup
        if isinstance(self._location, str):
            location = GoogleV3().geocode(self._location)

        # if the input was an iterable, it is latitude and longitude
        elif hasattr(self._location, '__iter__'):
            lat, long = self._location
            gepoint = Point(latitude=lat, longitude=long)
            location = Location(point=gepoint)

        else:
            raise ValueError('Invalid location')

        return location
    def tz(self):
        """
        Get the local timezone of the requested location

        Returns
        -------
        pytz.timezone
        """
        if self._tz is not None:
            tz = self._tz

        # if there already are some forecasts, the timezone is in there
        elif self._forecasts:
            tz = self._lookup_timezone()

        # use Google geocoder to lookup timezone
        else:
            lat, long, _alt = self.location.point
            tz = GoogleV3().timezone(location=(lat, long)).zone

        # return as a pytz object
        return pytz.timezone(tz)
Exemple #14
0
	def save(self, *args, **kwargs):
		my_locations = [l.name for l in self.profile.locations.all()]
		locations = self.cleaned_data.get('locations').split(',')
		should_delete = [x for x in my_locations if x not in locations]
		should_add = [x for x in locations if x not in my_locations]
		geolocator = None
		for name in should_add:
			if name:
				loc, created = Location.objects.get_or_create(name=name.lower())
				if created:
					if not geolocator:
						geolocator = GoogleV3()
					address, (la, lo) = geolocator.geocode(name)
					loc.latitude = la
					loc.longitude = lo
				loc.profiles.add(self.profile)
				loc.save()
		for name in should_delete:
			if name:
				loc, _ = Location.objects.get_or_create(name=name.lower())
				self.profile.locations.remove(loc)
		self.profile.save()

			

			

				
	


	



		
Exemple #15
0
print("RESULT 1:", ins.getsource(object))  # source code
print("RESULT 2:", ins.getmodule(object))  # module in which defined
print("RESULT 3:", ins.currentframe().f_lineno)  # own line number
lines, lnum = ins.getsourcelines(object)
print("RESULT 4:", ''.join(lines))

# %% Join / concatenate a list or tuple of strings together
''.join(['a', 'b', 'c'])
''.join(('a', 'b', 'c'))

# %%
# conda install -c conda-forge geopy
from geopy import GoogleV3
place = "221b Baker Street, London"
key = 'enter API key'  # From Google Cloud
location = GoogleV3(api_key=key).geocode(place)
print(location.address)
print(location.point)
location.raw

# %% Look inside python object
x = {'a': 1, 'b': 2}
dir(x)
dir(dir)

# %% Import features from future versions of python
from __future__ import print_function
print("Hello World!")

# %% any, all and not
x = [True, True, False]
Exemple #16
0
# Reads Wine mag csv and appends geocoded lat/lng coords
########################################################

import http.client
import json
import time
import sys
import collections
import csv
from geopy import GoogleV3
from geopy.exc import GeopyError

csv_name = 'winemag-data-185k-03272019.csv'
api_key = str(sys.argv[1])

geocoder = GoogleV3(api_key=api_key)
request_count = 0
request_limit = 100000

country_index = 5
province_index = 8
region_index = 6

header_row = []
rows = []

location_cache = {}

def read_csv():
    global header_row
    with open(csv_name, encoding='utf8') as csv_file:
Exemple #17
0
import csv
from geopy import GoogleV3
from invisibleroads_macros.disk import make_folder
from os.path import join
from sys import argv

target_folder, address_text_path = argv[1:]
geocode = GoogleV3().geocode
location_table_path = join(make_folder(target_folder), 'locations.csv')
csv_writer = csv.writer(open(location_table_path, 'w'))
csv_writer.writerow(['Address', 'Latitude', 'Longitude'])
for address in open(address_text_path):
    location = geocode(address)
    csv_writer.writerow([
        address.strip(), location.latitude, location.longitude])
print('location_table_path = ' + location_table_path)
Exemple #18
0
def update_lat(max_row):

    max_query_number = 2450
    with sqlite3.connect(db) as conn:
        cur = conn.cursor()
        cur.execute(
            "SELECT count(*) FROM tbl_address_lat WHERE create_date > date('now')"
        )
        today_query_number = cur.fetchone()[0]

    with sqlite3.connect(db) as conn:
        cur = conn.cursor()
        cur_update = conn.cursor()
        cur.execute(
            "SELECT address, id, state, postcode FROM tbl_property_ad "
            # "WHERE state = 'NSW' AND type = 'residential'")
            "WHERE lat is NULL and state = 'NSW' AND (type = 'residential' or type = 'house land package') LIMIT ?",
            (max_row, ))
        # rs = cur.fetchall()
        # it = iter(rs)
        # property_ = it.next()

        retry_num = 0
        while True:
            property_ = cur.fetchone()
            if not property_:
                break
            try:
                address_text = property_[0]
                property_id = property_[1]
                state = property_[2]
                postcode = property_[3]
                normalized_address = ""
                lat_ = None
                lng_ = None

                if address_text:
                    ADDRESS_TEXT = address_text.strip().upper()
                    dict_geo = saved_address_num(ADDRESS_TEXT)
                    if isinstance(dict_geo, dict):
                        try:
                            lat_ = dict_geo['geometry']['location']['lat']
                            lng_ = dict_geo['geometry']['location']['lng']
                            normalized_address = dict_geo['formatted_address']
                        except KeyError:  # empty dict_geo, so go for suburb location

                            lat_ = None
                            lng_ = None

                            # set saved address to suburb
                            ADDRESS_TEXT = state + " " + str(postcode)
                            normalized_address = ADDRESS_TEXT
                            dict_geo = saved_address_num(ADDRESS_TEXT)
                            if isinstance(dict_geo, dict):
                                try:
                                    lat_ = dict_geo['geometry']['location'][
                                        'lat']
                                    lng_ = dict_geo['geometry']['location'][
                                        'lng']
                                except KeyError:
                                    lat_ = None
                                    lng_ = None

                    if not lat_:
                        # if not found in db, go for geopy
                        if today_query_number < max_query_number:
                            geo = GoogleV3(
                                api_key=
                                "AIzaSyALRQvXf8IwBIU6HI8btqv4TtSMarfm-98",
                                timeout=20)
                            location = geo.geocode(ADDRESS_TEXT)
                            time.sleep(0.2)
                            today_query_number += 1
                            now_ = str(datetime.now())
                            print "Geocode quotation:", today_query_number, ":", address_text

                            if location:
                                lat_ = location.latitude
                                lng_ = location.longitude
                                if not normalized_address:
                                    normalized_address = location.address
                                cur_update.execute(
                                    "INSERT INTO tbl_address_lat (address_text, lat, long, api_string, create_date) "
                                    "VALUES (?, ?, ?, ?, ?)",
                                    (ADDRESS_TEXT, lat_, lng_,
                                     json.dumps(location.raw), now_))
                                conn.commit()
                            else:
                                cur_update.execute(
                                    "INSERT INTO tbl_address_lat (address_text, lat, long, api_string, create_date) "
                                    "VALUES (?, ?, ?, ?, ?)",
                                    (ADDRESS_TEXT, None, None, None, now_))
                                conn.commit()
                                lat_ = 0
                                lng_ = 0

                        else:
                            continue

                    # update back to tbl_property_ad
                    cur_update.execute(
                        "UPDATE tbl_property_ad SET lat = ?, long = ?, address_normalized = ?"
                        "WHERE id =  ?",
                        (lat_, lng_, normalized_address, property_id))
                    conn.commit()
            except Exception as err:
                print err
                retry_num += 1
                if ((retry_num > 5) and
                    (today_query_number > max_query_number - 200)) or (
                        retry_num > 15):
                    print input("Enter to exit:")
                    break
                time.sleep(60)
            else:
                retry_num = 0
Exemple #19
0
    'postcode': '10132'
}, {
    'country': 'China',
    'city': 'Shenzhen',
    'postcode': '518012'
}, {
    'country': 'China',
    'city': 'Fuzhou',
    'postcode': '350022'
}, {
    'country': 'Russia',
    'city': 'Moscow',
    'postcode': '105122'
}]

geo_locator = GoogleV3(api_key=GOOGLE_API_KEY)
for location in postcodes:
    geo_location = geo_locator.geocode(
        components={
            'country': location.get('country'),
            'locality': location.get('city'),
            'postal_code': location.get('postcode')
        })
    if geo_location:
        #print(geo_location.raw)
        print('https://www.google.com/maps/place/?q=place_id:{}'.format(
            geo_location.raw.get('place_id', )))
    else:
        print('Location {country}, {city}, {postcode} not found'.format(
            country=location.get('country'),
            city=location.get('city'),
Exemple #20
0
# Author/source: Koosha Golmohammadi

# Convert a list of addresses from an input file to geocodes 

import sys

from geopy import GoogleV3

geolocator = GoogleV3()

input_file = open(sys.argv[1],'r')
output_file = open(sys.argv[2],'w')

for line in input_file:
    print line
    address, (latitude, longitude) = geolocator.geocode(line, timeout=10)
    output_line = '%s,%s,%s\n' % (address, latitude, longitude)
    output_file.write(output_line)

input_file.close()
output_file.close()

#return 0
            #address is always on the next line
            address = lines[index + 1]
            #some address (below) is on the html of every single page and it obviously does not belong (Jamaica)
            if address == "        14692 Guy R Brewer Blvd<br>Jamaica, NY 11434":
                pass
            else:
                address = address.replace("        ", "")
                address = address.replace("<br>", ", ")
                addresses.append(address)
                haveNameLFaddress = False  #found address for previous name of supermarket
                print(address)

#Map all addresses
coords = []
NoneType = []
geolocator = GoogleV3()  # Nominatim()

for i in addresses:
    try:
        try:
            location = geolocator.geocode(i)
            if type(location) != type(None):
                coords.append([location.latitude, location.longitude])
                print("Valid count: %d, address: %s" % (len(coords), i))
                time.sleep(1)
            #Improper formatting of addresses cannot be mapped (thanks Yelp)
            else:
                NoneType.append(i)
                print("Invalid count: %d, address: %s" % (len(NoneType), i))
                time.sleep(1)
        except GeocoderTimedOut as e:
Exemple #22
0
 def __init__(self):
     self.geocoder = GoogleV3(api_key=secrets['api-key']['google'])
def get_zoopla_houses(location, bedrooms, price, bills_inc):
    house_list = []
    monthly_price = int(price)

    weekly_price = int((monthly_price * 12) / 52)

    parameters = {
        'area': location,
        'radius': 5,
        'listing_status': 'rent',
        'maximum_price': weekly_price,
        'minimum_beds': bedrooms,
        'maximum_beds': bedrooms,
        'api_key': 'zwqrekb5d6zawqmxud9bnpte'
    }

    r = requests.get('http://api.zoopla.co.uk/api/v1/property_listings.js',
                     params=parameters)

    result = r.json()

    for item in result['listing']:
        print(item['details_url'])

        conn = sqlite3.connect("houses.db")
        c = conn.cursor()

        house_url = item['details_url']
        house_bedrooms = int(item['num_bedrooms'])
        house_price = item['rental_prices']['per_month']
        house_bills = bills_inc
        house_lat = item['latitude']
        house_long = item['longitude']

        geolocator = GoogleV3()
        house_location = geolocator.geocode(item['displayable_address'])
        if house_location is None:
            house = accommodation.Accommodation(house_price, house_bedrooms,
                                                house_bills,
                                                item['displayable_address'],
                                                "UNSURE", house_url)
        else:
            house = accommodation.Accommodation(house_price, house_bedrooms,
                                                house_bills,
                                                house_location.address,
                                                "UNSURE", house_url)
        house.lat = house_lat
        house.long = house_long

        c.execute('''SELECT * FROM accommodations WHERE url=?''',
                  (house_url, ))

        result = c.fetchone()

        if result is None:  # Check if house is not already in database
            database.add_house_to_db(house)
            house_list.append(house)
        else:
            house = accommodation.Accommodation(result[1], result[2],
                                                result[3], result[6],
                                                result[7], result[0])
            house.lat = result[4]
            house.long = result[4]
            house_list.append(house)

        return house_list
Exemple #24
0
def create_app(
    schema_path='schemas/',
    rest_subdir='indexes',
    template_subdir='templates',
):
    # type: (str, str, str) -> flask.app.Flask

    logging.basicConfig(level=logging.DEBUG)

    app = Flask(__name__)

    # Configure
    configure_app(app)

    @app.errorhandler(Exception)
    def jsonify_exceptions(error):
        # type: (Exception) -> flask.app.Response
        # TODO put in sentry/rollbar/airbrake
        app.logger.exception('Unhandled error: %s', error)

        try:
            # ES exception
            problem = error.info['error']['reason']
        except AttributeError:
            problem = error.message

        return views.error_response(500, problem)

    # Connect to Elasticsearch
    es = configure_elasticsearch(app)
    app.cluster = ClusterClient(es)
    app.datastore = DataStore(es)

    # Connect to Database
    from .database import db
    db.init_app(app)
    app.db = db

    with app.app_context():
        db.engine.execute('CREATE EXTENSION IF NOT EXISTS HSTORE')
        db.create_all()

    # Create Geocoder
    app.geocode = partial(GoogleV3().geocode, exactly_one=False)

    # Setup auth
    app.register_blueprint(auth)
    app.before_first_request(create_oauth_flow)
    app.before_request(authenticate_user)

    # Setup URL rules
    configure_endpoints(app)

    # Load schemas
    app.schemastore = SwaggerSchemaStore()

    schema_dir = Path(schema_path)
    rest_dir = schema_dir.joinpath(rest_subdir)
    template_dir = schema_dir.joinpath(rest_subdir, template_subdir)

    #  Non-REST endpoints
    for json_file in schema_dir.glob('*.json'):
        _add_schema(app, json_file)

    #  Load templates before configuring indexes
    for json_file in template_dir.glob('*.json'):
        _add_template(app, json_file)

    #  REST'ish indexes
    for json_file in rest_dir.glob('*.json'):
        index, swagger_spec = _add_schema(app, json_file, force_security=True)
        configure_index(
            index, swagger_spec,
            app.config['ELASTICSEARCH_NON_RESETTABLE_INDEX_SETTINGS'], es)
        configure_mappings(index, swagger_spec, es)

    _list_routes(app)

    app.logger.info('RelES reporting for duty...')

    return app
Exemple #25
0
from geopy import GoogleV3

import config

geolocator = GoogleV3(api_key=config.GOOGLE_API_KEY)
location = geolocator.geocode('Eraclea', language='it')

print(location)
Exemple #26
0
from geopy import GoogleV3

place = "221b Baker Street, London"
location = GoogleV3().geocode(place)

print(location.address)
print(location.location)

# There’s also a useful distance class. It calculates the distance between two locations in your favorite unit of measurement.
Exemple #27
0
 def __init__(self):
     self.geo_locator = GoogleV3(api_key=settings.GOOGLE_API_KEY)
# seperating city and state from address to do further fact-based quantitative analysis 
address = fullTableDf["Address"].str.split(", ", n = 1, expand = True) 
fullTableDf["City"]= address[0] 
fullTableDf["State"]= address[1] 
fullTableDf = fullTableDf.loc[:, ["Symbol", "Address", 'City', 'State', 'DateAdded', 'Sector']]


# GENERATING THE MAP OF OUR HEADQUARTERS
# creating a smaller dataframe here because the complete dataframe is huge and my API times out before iterating over all entries
# 150 rows are chosen fom the beginning to plot on the map
# results will be similar on the complete dataset if we upgrade our API
smallDf = fullTableDf.iloc[:150]
print("\nHead of the smaller dataframe for plotting the map: ")
print(smallDf.head())

geocode = GoogleV3('AIzaSyDNqc0tWzXHx_wIp1w75-XTcCk4BSphB5w').geocode 
addresses = smallDf['Address'].tolist()

# generating latitudes and longitudes of all the HQ addresses in the dataset
print("\n\nPlease wait... generating coordinates for our map! It takes some time.\n")
latitudes = []
longitudes = []
for address in addresses:
        x = geocode(address)
        latitudes.append(x.latitude)
        longitudes.append(x.longitude)

smallDf['Latitude'] = pd.DataFrame({'Latitude': latitudes})
smallDf['Longitude'] = pd.DataFrame({'Longitude': longitudes})
print("\nColumns in the small dataframe after adding the coordinates: ")
print(smallDf.columns)
Exemple #29
0
def demo():
	##########################
	# 1. GET NEW DATASET     #
	# 2. ADD LOCATIONS       #
	# 3. TRAIN CLASSIFIERS   #
	# 4. MAKE PREDICTIONS    #
	# 5. FILTER, SORT, GROUP #
	# 6. VISUALIZE           #
	##########################
	print()

	######################
	# 1. GET NEW DATASET #
	######################
	print('\n1. GET NEW DATASET')
	# read Twitter tokens
	consumer_key, consumer_secret, access_token, access_token_secret = read_twitter_tokens('tokens/twitter_tokens.txt')
	# connect with the Twitter API
	twitter_api: tweepy.API = connect_to_twitter_api(consumer_key, consumer_secret, access_token, access_token_secret)
	# define keywords
	# define keywords
	# COVID_KEYWORDS: List[str] = [
	# 	'corona', 'covid', 'quaranteen', 'home', 'stay', 'inside', 'virology', 'doctor', 'nurse', 'virus', 'grandma',
	# 	'vaccin', 'sars', 'alone', 'strongtogether', 'elbow', 'mouth mask', 'protective equipment', 'hospitalization',
	# 	'increas', 'death', 'dead', 'impact', 'ICU', 'intensive care', 'applause', 'stay healthy', 'take care', 'risk',
	# 	'risk group', 'environment',
	# 	'U+1F637',  # Medical Mask Emoji
	# 	'U+1F691',  # Amublance Emoji
	# 	'U+1F92E',  # Vomiting Emoji
	# 	'U+1F912',  # Thermometer Emoji
	# ]
	# COVID_FAKE_KEYWORDS: List[str] = [
	# 	'coronascam', 'fakecorona', 'fake', 'coronahoax', 'hoaxcorona', 'gooutside', 'donotstayhome''fuckvirology',
	# 	'donttrustvirologists', 'coronadoesntexist', 'chinesevirushoax',
	# ]
	keywords: Dict[str, int] = {
		'covid': 100,  # get 100 tweets with 'covid' in it
		'corona': 100,  # get 100 tweet with 'corona' in it
		'coronahoax': 100,  # get tweets 100 with 'coronahoax' in it
	}
	# get new dataset
	new_dataset: List[Tweet] = get_new_tweets(twitter_api, keywords)
	print(f'First tweet:\n{new_dataset[0]}')
	# save new dataset
	save_tweets(new_dataset, 'tweets/new_dataset.pickle')

	####################
	# 2. ADD LOCATIONS #
	####################
	print('\n2. ADD LOCATION TO THOSE TWEETS')
	# read Google token
	geocoding_api_key: str = read_google_token('tokens/google_token.txt')
	# initialize Google API
	google_api: GoogleV3 = GoogleV3(api_key=geocoding_api_key)
	# add location to tweets when possible
	num_tweets_with_location_before: int = 0
	num_tweets_with_location_after: int = 0
	for tweet in new_dataset:
		if tweet.country_code is not None and tweet.continent is not None:
			num_tweets_with_location_before += 1
		tweet.add_location(google_api)
		if tweet.country_code is not None and tweet.continent is not None:
			num_tweets_with_location_after += 1
	print(f'Number of tweets with location before: {num_tweets_with_location_before}')
	print(f'Number of tweets with location after: {num_tweets_with_location_after}')
	# save new dataset with locations included
	save_tweets(new_dataset, 'tweets/new_dataset.pickle')

	########################
	# 3. TRAIN CLASSIFIERS #
	########################
	print('\n3. TRAIN CLASSIFIERS')
	# load train dataset
	train_dataset = load_tweets('tweets/train_dataset.pickle')
	# pre-process train dataset
	X: List[str] = [tweet.text for tweet in train_dataset]
	X: List[str] = preprocess_corpus(X)
	labels: List[bool] = [tweet.denier for tweet in train_dataset]

	# train on part of the data
	# train, validation split
	X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2)
	# vectorize
	vectorizer: CountVectorizer = CountVectorizer()
	X_train = vectorizer.fit_transform(X_train)
	X_test = vectorizer.transform(X_test)

	# create Complement Naive Bayes classifier
	naive_bayes_classifier = ComplementNB()
	# train Complement Naive Bayes classifier
	naive_bayes_classifier = naive_bayes_classifier.fit(X_train, y_train)
	# validate Complement Naive Bayes classifier
	naive_bayes_accuracy: float = naive_bayes_classifier.score(X_test, y_test)
	print(f'Naive Bayes accuracy:\t{naive_bayes_accuracy * 100:>3.2f}%')
	# save Naive Bayes classifier
	save_model(naive_bayes_classifier, 'models/naive_bayes.pickle')

	# create Decision Tree classifier
	decision_tree_classifier = DecisionTreeClassifier()
	# train Decision Tree classifier
	decision_tree_classifier = decision_tree_classifier.fit(X_train, y_train)
	# validate Decision Tree classifier
	decision_tree_accuracy: float = decision_tree_classifier.score(X_test, y_test)
	print(f'Decision Tree accuracy:\t{decision_tree_accuracy * 100:>3.2f}%')
	# save Decision Tree classifier
	save_model(decision_tree_classifier, 'models/decision_tree.pickle')

	# retrain best model on all of the data
	# vectorize
	vectorizer: CountVectorizer = CountVectorizer()
	X: List[str] = vectorizer.fit_transform(X)
	best_model = ComplementNB().fit(X, labels) \
		if naive_bayes_accuracy >= decision_tree_accuracy \
		else DecisionTreeClassifier().fit(X, labels)
	# save best mode
	save_model(best_model, 'models/best_model.pickle')

	#######################
	# 4. MAKE PREDICTIONS #
	#######################
	print('\n4. USE CLASSIFIERS')
	# load test dataset
	test_dataset = load_tweets('tweets/test_dataset.pickle')

	# pre-processing
	X: List[str] = [tweet.text for tweet in test_dataset]
	X: List[str] = preprocess_corpus(X)
	# vectorize
	X = vectorizer.transform(X)
	# make predictions
	y = best_model.predict(X)

	# add predictions to tweet
	for tweet, label in zip(test_dataset, y):
		tweet.denier = label

	##########################
	# 5. FILTER, SORT, GROUP #
	##########################
	print('\n5. USE VARIOUS FILTERS')
	# use filters
	tweets_filtered_by_hashtag: List[Tweet] = filter_by_hashtag(test_dataset, '#coronahoax')
	tweets_filtered_by_hashtags_all: List[Tweet] = filter_by_hashtags_all(test_dataset, ['#corona', '#coronahoax'])
	tweets_filtered_by_hashtags_any: List[Tweet] = filter_by_hashtags_any(test_dataset, ['#corona', '#coronahoax', '#coronavirus', '#covid19'])
	tweets_filtered_before: List[Tweet] = filter_before(test_dataset, datetime(2020, 4, 19, 18, 58, 46))
	tweets_filtered_at: List[Tweet] = filter_at(test_dataset, datetime(2020, 4, 19, 18, 58, 46))
	tweets_filtered_after: List[Tweet] = filter_after(test_dataset, datetime(2020, 4, 19, 18, 58, 46))
	tweets_filtered_between: List[Tweet] = filter_between(test_dataset, datetime(2020, 4, 19, 18, 0, 0), datetime(2020, 4, 19, 19, 0, 0))
	tweets_filtered_by_country_code: List[Tweet] = filter_by_country_code(test_dataset, 'US')
	tweets_filtered_by_country_codes: List[Tweet] = filter_by_country_codes(test_dataset, ['US', 'GB'])
	tweets_filtered_by_continent: List[Tweet] = filter_by_continent(test_dataset, 'Europe')
	tweets_filtered_by_continents: List[Tweet] = filter_by_continents(test_dataset, ['Europe', 'North America'])
	tweets_sorted_by_date_ascending: List[Tweet] = sort_by_date_ascending(test_dataset)
	tweets_sorted_by_date_descending: List[Tweet] = sort_by_date_descending(test_dataset)
	tweets_grouped_by_country_code: defaultdict = group_by_country_code(test_dataset)
	tweets_grouped_by_continent: defaultdict = group_by_continent(test_dataset)

	################
	# 6. VISUALIZE #
	################
	print('\n6. VISUALIZE')
	# continents
	CONTINENTS: Dict[str, str] = {
		'Asia': 'asia',
		'Europe': 'europe',
		'Africa': 'africa',
		'North America': 'north_america',
		'South America': 'south_america',
		'Oceania': 'oceania',
		'Antarctica': 'antartica',
	}

	# create series to plot
	num_tweets_per_country_per_continent_absolute = defaultdict(lambda: defaultdict(int))
	num_tweets_per_country_absolute = defaultdict(lambda: defaultdict(int))
	num_tweets_per_continent_absolute = defaultdict(lambda: defaultdict(int))
	for tweet in test_dataset:
		if tweet.has_location():
			country_code: str = tweet.country_code.lower()
			continent: str = CONTINENTS[tweet.continent]

			num_tweets_per_country_per_continent_absolute[tweet.continent][country_code] += 1
			num_tweets_per_country_absolute['World'][country_code] += 1
			num_tweets_per_continent_absolute['World'][continent] += 1

	# visualize plots
	title = 'Absolute number of tweets per country and per continent'
	series = num_tweets_per_country_per_continent_absolute
	filename = 'num_tweets_per_country_per_continent_absolute'
	visualize(title, series, filename, per_continent=False)

	title = 'Absolute number of tweets per country'
	series = num_tweets_per_country_absolute
	filename = 'num_tweets_per_country_absolute'
	visualize(title, series, filename, per_continent=False)

	title = 'Absolute number of tweets per continent'
	series = num_tweets_per_continent_absolute
	filename = 'num_tweets_per_continent_absolute'
	visualize(title, series, filename, per_continent=True)
Exemple #30
0
    # Begin to determine geocoder scheme
    print("Attempting to detect a geocoder scheme for your system, this should only take a few moments.")

    try:
        import geopy
        geopy_installed = True
    except ImportError:
        print("Failed to import geopy. It's recommended that you install geopy for PyWeather to work.",
              "Skipping geocoder scheme detection and defaulting to 'http' as the scheme.", sep="\n")
        geopy_installed = False

    if geopy_installed is True:
        # HTTPS validation
        from geopy import GoogleV3

        geocoder = GoogleV3(scheme='https')
        # I've found that one "warm up request", and then waiting ~15 seconds somehow helps determine if a platform is HTTP/HTTPS compatible.
        try:
            geocoder.geocode("123 5th Avenue, New York, NY")
        except:
            didthewarmupgeocodefail = "you bet"

        print("A warmup geocode has just been completed which helps with determining which scheme will work",
              "on your OS. Waiting 5 seconds before making another geocode requests (to prevent rate limiting).", sep="\n")
        time.sleep(5)

        try:
            geocoder.geocode("123 5th Avenue, New York, NY")
            print("The geocoder can operate with HTTPS enabled on your OS. Saving these changes...")
            config['GEOCODER']['scheme'] = 'https'
            print("Changes saved.")