Ejemplo n.º 1
0
    def _prepare_geo_db(self):
        """
        Prepares geolocation reference database.
        """

        geo_db = app_settings['geodb_filename']
        logging.debug("Preparing geolocation database '{}' file.".format(geo_db))
        try:
            self.reader = geoip2.database.Reader(geo_db)
        except FileNotFoundError:
            raise GracefulException("Could not find geolocation reference database!")
        except ValueError:
            raise GracefulException("Invalid geolocation reference database!")
Ejemplo n.º 2
0
    def _process_ip_list_slice(self, p, list_slice):
        """
        Processes an IP list slice, and writes locations to location database.

        :param p: Multiprocess process number.
        :param list_slice: IP list slice to process.
        """

        logging.debug("Process {} started to process IP list slice containing {} IPs.".format(p, len(list_slice)))
        location_db = LocationDB()

        for ip in list_slice:

            # Pseudo schema and Default values for location entry
            ip_epoch = datetime.datetime.now().strftime('%s')  # Epoch when location was written
            geolocated = 0  # Whether or not the IP was geolocated
            latitude = 0.0  # Latitude in decimal degrees format
            longitude = 0.0  # Longitude in decimal degrees format
            city = ""  # City name
            region = ""  # ISO 3166-2 principal subdivision code
            country = ""  # ISO 3166-1 alpha-2 country code
            forecast_temperature = 0.0
            # forecast_epoch = gen_epoch(0)  # Set initial epoch to coincide with "today"
            forecast_epoch = 0  # Set initial epoch to effectively "none"

            # Geolocate IP address
            try:
                response = self.reader.city(ip)
                geolocated = 1
                latitude = response.location.latitude
                longitude = response.location.longitude
                city = response.city.name
                region = response.subdivisions.most_specific.iso_code
                country = response.country.iso_code
                logging.debug(
                    "Successfully geolocated IP address '{}' to latitude: '{}' longitude: '{})"
                    .format(ip, latitude, longitude))
            except AttributeError:
                # This is not likely to happen, but should 'geodb' object fail, handle it
                raise GracefulException(
                    "Geolocation reference database object is invalid! Database was either not found previously, and "
                    "exception handling failed, or the database is no longer locked to us.")
            except AddressValueError:
                logging.debug(
                    "Failed to geolocate IP address '{}'! IP not found in geolocation reference database."
                    .format(ip))

            # Write location to location database
            location_db.insert_(
                '''
                INSERT OR IGNORE INTO locations 
                (ip, ip_epoch, geolocated, latitude, longitude, city, region, country, 
                forecast_temperature, forecast_epoch)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                '''.format(ip),
                ip, ip_epoch,geolocated, latitude, longitude, city, region, country,
                forecast_temperature, forecast_epoch)
Ejemplo n.º 3
0
 def display_histogram(self):
     """
     Display previously saved histogram file.
     """
     try:
         with open(self.tsv_file, 'rt') as f:
             logging.info("Histogram tsv file as saved:\n" + str(f.read()))
     except FileNotFoundError:
         raise GracefulException(
             "File '{}' was not found! Couldn't display histogram results.".format(self.tsv_file))
Ejemplo n.º 4
0
    def __init__(self, temperature_list):
        """

        :param temperature_list: List of temperatures.
        """
        self.temperature_list = temperature_list
        self.bins = app_settings['buckets']
        self.tsv_file = app_settings['tsv_output']
        self.histogram_array = None

        if not temperature_list:
            raise GracefulException("Unable to build location temperature list! List was empty.")
Ejemplo n.º 5
0
    def update_(self, sql):
        """
        Update row in table.

        :param sql: SQL statement.
        """

        try:
            with sqlite3.connect(self.db_file) as connection:
                cursor = connection.cursor()
                cursor.execute('{}'.format(sql))
                connection.commit()
        except sqlite3.IntegrityError or sqlite3.ProgrammingError or sqlite3.OperationalError as e:
            logging.error("Locations database exception: {}".format(e))
            raise GracefulException("Locations database failure!")
Ejemplo n.º 6
0
    def insert_(self, sql, *args):
        """
        Write (insert) row to table.

        :param sql: SQL statement.
        :param args: Table values vars.
        """

        try:
            with sqlite3.connect(self.db_file) as connection:
                cursor = connection.cursor()
                cursor.execute('{}'.format(sql), args)
                connection.commit()
        except sqlite3.IntegrityError or sqlite3.ProgrammingError or sqlite3.OperationalError as e:
            logging.error("Locations database exception: {}".format(e))
            raise GracefulException("Locations database failure!")
Ejemplo n.º 7
0
    def select_(self, sql):
        """
        Read (fetch) row from table.
        :param sql: SQL query.
        :return: Table rows matching query.
        """

        try:
            with sqlite3.connect(self.db_file) as connection:
                cursor = connection.cursor()
                cursor.execute('{}'.format(sql))
                rows = cursor.fetchall()
                return rows
        except sqlite3.OperationalError as e:
            logging.error("Locations database exception: {}".format(e))
            raise GracefulException("Locations database failure!")
Ejemplo n.º 8
0
 def save_histogram(self):
     """
     Save histogram as a tab delimited file.
     """
     try:
         np.savetxt(  # Save histogram and display it
             fname=self.tsv_file,
             X=self.histogram_array,
             comments="Exercise tsv content with a bucket count of {}:\n\n".format(self.bins),
             header='{}\t{}\t{}'.format("bucketMin", "bucketMax", "count"),
             fmt=['%.2f\t', '%.2f\t', '%d'],
             delimiter='\t',
             newline='\r\n')
         logging.info("Completed producing and saving histogram to '{}'.".format(self.tsv_file))
     except OSError:
         """
         If we can't save the histogram, handle exception, but after `FileNotFoundError` subclass to avoid ambiguity."
         """
         raise GracefulException("Could not save histogram to '{}'".format(self.tsv_file))
Ejemplo n.º 9
0
    def initialize_(self):
        """
        Initialize location database and 'locations' table.
        """

        try:
            """
            Attempt connection to location database. If the database or 'locations' table do not exist, they will be 
            created in an idempotent fashion.
            """
            with sqlite3.connect(self.db_file) as connection:
                cursor = connection.cursor()
                cursor.execute(
                    '''
                    CREATE TABLE IF NOT EXISTS locations (
                    ip TEXT PRIMARY KEY, ip_epoch INTEGER,
                    geolocated INTEGER, latitude REAL, longitude REAL, city TEXT, region TEXT, country TEXT, 
                    forecast_temperature REAL, forecast_epoch INTEGER 
                    )
                    ''')
                connection.commit()
        except sqlite3.OperationalError as e:
            logging.error("Locations database exception: {}".format(e))
            raise GracefulException("Unable to setup location database!")
Ejemplo n.º 10
0
def update_forecast_high_temperatures():
    """
    Ascertain next day's high temperature relative to a geographic location. Uses OpenWeatherMap API
    (https://openweathermap.org/api) via PyOWM library.

    PyOWM reference: https://pyowm.readthedocs.io/en/latest/usage-examples-v2/weather-api-usage-examples.html

    Using '/data/2.5/forecast/daily' route referenced here: https://openweathermap.org/forecast5

    If a location's `forecast_epoch` is less than the next day's offered epoch, the forecast temperature is stale and
    should be updated.

    We'll accept the first forecast day's information if the epoch is newer than a location's own, i.e., catch the next
    day's forecast before considering the day after next, etc.
    """

    # Create common database object
    location_db = LocationDB()

    # Find locations update eligible
    current_epoch = gen_epoch(0)
    logging.info(
        "Querying locations with a forecast epoch older than `{}`".format(
            current_epoch))
    rows = location_db.select_(
        'SELECT ip, latitude, longitude, forecast_epoch FROM locations WHERE forecast_epoch < {}'
        .format(current_epoch))
    if not rows or len(rows) < 0:
        logging.info("No locations with stale forecast data.")
        return
    else:

        # Update locations with latest forecast temperature
        owm = pyowm.OWM(app_settings['owm_api_key'], use_ssl=True)
        owm_rpm = app_settings['owm_rpm']
        location_count = len(rows)
        rate_limiter = 0  # Counter for rpm
        sleep_time = 60  # Time to sleep per rpm intervals. Defaults to 1min. interval
        time_to_complete = int(
            (location_count / 60) +
            (len(rows) / 60))  # Estimate time to complete queries
        logging.info(
            "It is estimated to take {}min. to complete fetching forecasts for {} location(s) due to OpenWeatherMap "
            "API rate limiter: {} queries/min. + {}sec. sleep timer each interval"
            .format(time_to_complete, location_count, owm_rpm, sleep_time))
        for row in rows:
            rate_limiter += 1
            ip, latitude, longitude, forecast_epoch = row[0], row[1], row[
                2], row[3]
            if app_settings['faux_temperature_data'] == 1:
                forecast_temperature, forecast_epoch = random.uniform(
                    70, 79), gen_epoch(1)
            else:
                try:
                    # Fetch weather forecast
                    logging.debug(
                        "Fetching daily weather forecast for IP '{}'".format(
                            ip))
                    forecast = owm.daily_forecast_at_coords(latitude,
                                                            longitude,
                                                            limit=2)
                    forecast_json = json.loads(forecast.get_forecast().to_JSON(
                    ))  # Load weather forecast
                    weathers = forecast_json[
                        'weathers']  # Build list of forecast days
                    # Loop forecast days and see if one is newer than our existing forecast epoch
                    for forecast_day in weathers:
                        forecast_day = dict(forecast_day)
                        rt = forecast_day['reference_time']
                        rt = int(rt)
                        logging.debug(
                            "Comparing IP '{}' forecast epoch '{}' to offered forecast day epoch '{}'"
                            .format(ip, forecast_epoch, rt))
                        if rt > forecast_epoch:
                            logging.debug(
                                "Accepting offered forecast day epoch '{}' for IP '{}'"
                                .format(rt, ip))
                            forecast_epoch = rt
                            forecast_temperature = float(
                                forecast_day['temperature']['max'])
                            forecast_temperature = (
                                forecast_temperature -
                                273.15) * 9 / 5 + 32  # Convert to fahrenheit
                            break
                        else:
                            logging.debug(
                                "Rejecting offered forecast day epoch '{}' for IP '{} as location epoch is newer"
                                .format(rt, ip))
                            continue
                except ValueError:
                    logging.debug(
                        "OpenWeatherMap API failure for '{}: Invalid latitude, longitude '{},{}'"
                        .format(ip, latitude, longitude))
                except owm.exceptions.api_response_error.UnauthorizedError:
                    """Catch OWM API key failures."""
                    raise GracefulException("Invalid OpenWeatherMap API key!")
                except NewConnectionError:
                    """Catch OWM API new connection failures."""
                    raise GracefulException(
                        "Couldn't establish connection with OpenWeatherMap API! Please check network connectivity, and "
                        "API service.")
                except owm.exceptions.api_call_error.APIInvalidSSLCertificateError:
                    """Catch OWM API invalid SSL certification failures."""
                    raise GracefulException(
                        "Couldn't establish session with OpenWeatherMap API! Invalid SSL certificate."
                    )
                except timeout or ReadTimeoutError or owm.exceptions.api_call_error.APICallTimeoutError:
                    """Catch OWM API connection breakdowns."""
                    raise GracefulException(
                        "Connection with OpenWeatherMap API timed out!")
                except pyowm.exceptions.api_call_error.APICallError as e:
                    """Catch OWM API internal errors, i.e., 500s, et al."""
                    raise GracefulException(
                        "OpenWeatherMap API returned error: {}".format(
                            e.cause))
                except owm_rpm.exceptions.api_response_error.APIResponseError as e:
                    """Catch OWM API abnormal returns for a location."""
                    logging.error(
                        "OpenWeatherMap API returned '{}' for IP '{}'".format(
                            e.status_code, ip))
                except owm.exceptions.api_response_error.NotFoundError as e:
                    """Catch OWM API returning a location not found."""
                    logging.error(
                        "OpenWeatherMap API returned 'not found' for IP '{}'. Cause: '{}'"
                        .format(ip, e.cause))

            # Update location database entry
            logging.debug(
                "Updating location IP '{}' with forecast high temperature '{}'"
                .format(ip, forecast_temperature))
            location_db.update_(
                'UPDATE locations SET forecast_temperature = "{}", forecast_epoch = "{}" WHERE ip = "{}"'
                .format(forecast_temperature, forecast_epoch, ip))

            # Basic rate limiting capability in lieu of a more complex object, or context manager
            if rate_limiter == owm_rpm:
                logging.debug(
                    "OpenWeatherMap API rate limiter sleeping for {}sec.".
                    format(sleep_time))
                rate_limiter = 0
                time.sleep(sleep_time)
Ejemplo n.º 11
0
    def _eval_log_file(self, filename, multiple_ips):
        """
        Evaluate log and produce IP list.

        :param filename: Log input filename.
        :param multiple_ips: Whether (True) or not (False) to consider multiple unique IPs within a single log entry.
        :return: List of public IP addresses.
        """

        ip_list = []
        logging.debug("Evaluating log file '" + filename + "'.")
        if app_settings['reduce_sample_size'] == 1:
            reduced_sample = True
            max_sample = app_settings['max_sample_size']
        else:
            reduced_sample = False
            max_sample = float('inf')
        try:
            with open(filename, 'rt') as f:
                for n, l in enumerate(f, 1):
                    if reduced_sample and n >= max_sample:
                        logging.info(
                            "Reached reduced sample size limit of {} log lines."
                            .format(max_sample))
                        break
                    try:
                        m = re.findall(r'[0-9]+(?:\.[0-9]+){3}',
                                       l)  # Find IP(s) in line
                        if m:
                            m = list(set(m))  # Remove duplicates
                            if len(m) > 1:
                                if multiple_ips:
                                    logging.debug(
                                        "Multiple IPs ('{}') in line {}".
                                        format(m, n))
                                    for ip in m:
                                        if staticmethod(self._eval_ip(ip, n)):
                                            ip_list.append(ip)
                                else:
                                    logging.debug(
                                        "Conflicting IPs ('{}') in line {}".
                                        format(m, n))
                                    pass
                            else:
                                ip, = m
                                logging.debug(
                                    "Found IP '{}' in line {}".format(ip, n))
                                if staticmethod(self._eval_ip(ip, n)):
                                    ip_list.append(ip)
                    except TypeError:
                        logging.debug("Unable to process line {}.".format(n))

                # Remove duplicates, sort list pseudo numerically, return list
                ips_total = len(ip_list)
                ip_list = list(set(ip_list))
                ips_unique = len(ip_list)
                ip_list.sort()
                logging.info(
                    "Parsed {} log entries, evaluated {} IPs, and built list composed of {} unique public IPs."
                    .format(n, ips_total, ips_unique))
                log_percent = round(((ips_total / n) * 100), 2)
                ips_percent = round(((ips_unique / ips_total) * 100), 2)
                logging.info(
                    "{}% of log entries contained an IP address, and of those {}% are geolocation eligible."
                    .format(log_percent, ips_percent))
                logging.debug("Produced IP list:\n{}".format(ip_list))
                return ip_list

        except FileNotFoundError:
            raise GracefulException(
                "File '{}' was not found!".format(filename))
Ejemplo n.º 12
0
 def build_ip_list(self):
     ip_list = self._eval_log_file(self.filename, self.multiple_ips)
     if not ip_list:
         raise GracefulException(
             "Parsing and evaluating log produced no geolocation results!")
     return ip_list