def setUp(self):
     self.test_geolocation_finder = GeolocationFinder()
     self.setup_mocks()
Example #2
0
 def __init__(self):
     self.validator = ValidatorClass(pathToPickleFiles)
     self.geo_finder = GeolocationFinder()
     self.database_handler = DatabaseHandler(dbURL, dbPort, dbUser,
                                             dbPasswd)
class GeolocationFinderTests(unittest.TestCase):
    def setUp(self):
        self.test_geolocation_finder = GeolocationFinder()
        self.setup_mocks()

    def setup_mocks(self):
        self.attrs = {'address': 'test_address', 'latitude': '0000', 'longitude': '0000'}
        self.test_geolocation = Mock(**self.attrs)
        self.test_geolocation_finder.geolocation = Mock(**self.attrs)

    def test_GetLocation_ReturnsNoneNoneNone_IfLocationIsNone(self):
        # Checks
        self.assertEqual((None, None, None), self.test_geolocation_finder.get_location(None))

    def test_get_location_sets_geolocation_from_cache_When_geolocation_exists_in_cache(self):
        # Arrange
        self.test_geolocation_finder.location_cache['Dublin'] = self.test_geolocation

        # Execute
        with patch.object(GeolocationFinder, 'set_geolocation_from_cache') as mock_method:
            self.test_geolocation_finder.get_location('Dublin')

        # Checks
        mock_method.assert_called_once_with('Dublin')

    def test_get_location_sets_geolocation_from_geolocator_when_not_present_in_cache(self):

        # Execute
        with patch.object(GeolocationFinder, 'set_geolocation_from_geolocator',
                          return_value=self.test_geolocation) as mock_method:
            self.test_geolocation_finder.get_location('Dublin')

        # Checks
        mock_method.assert_called_once_with('Dublin')

    def test_get_location_sets_geolocation_from_geolocator_adds_geolocation_to_cache(self):
        # Arrange
        self.test_geolocation_finder.location_cache = {}

        # Execute
        with patch.object(Nominatim, 'geocode', return_value=self.test_geolocation) as mock_method:
            self.test_geolocation_finder.get_location('Dublin')

        # Checks
        self.assertTrue('Dublin' in self.test_geolocation_finder.location_cache)

    def test_set_geolocation_from_cache_sets_the_geolocation(self):
        # Arrange
        self.test_geolocation_finder.location_cache['Dublin'] = self.test_geolocation

        # Execute
        self.test_geolocation_finder.set_geolocation_from_cache('Dublin')

        # Checks
        self.assertEqual(self.test_geolocation_finder.geolocation, self.test_geolocation)

    def test_set_geolocation_from_geolocator_sets_geolocation(self):

        # Execute
        with patch.object(Nominatim, 'geocode', return_value=self.test_geolocation):
            self.test_geolocation_finder.set_geolocation_from_geolocator('Dublin')

        # Checks
        self.assertEqual(self.test_geolocation_finder.geolocation, self.test_geolocation)

    def test_set_geolocation_from_geolocator_sets_geolocation_to_None_when_GeocoderTimedOut_exception_raised(self):

        # Execute
        with patch.object(Nominatim, 'geocode', side_effect=GeocoderTimedOut):
            self.test_geolocation_finder.set_geolocation_from_geolocator('Dublin')

        # Checks
        self.assertEqual(self.test_geolocation_finder.geolocation, None)

    def test_get_addr_lat_long_raises_AttributeError_and_returns_None_None_None_when_no_geolocation_passed(self):
        # Arrange
        self.test_geolocation_finder.geolocation = None

        # Execute
        address, lat, long = self.test_geolocation_finder.get_addr_lat_long()

        # Checks
        self.assertEqual(address, None)
        self.assertEqual(lat, None)
        self.assertEqual(long, None)

    def test_get_addr_lat_long(self):

        # Execute
        address, lat, long = self.test_geolocation_finder.get_addr_lat_long()

        # Checks
        self.assertEqual(address, 'test_address')
        self.assertEqual(lat, '0000')
        self.assertEqual(long, '0000')
Example #4
0
class DataCollector(StreamListener):
    def __init__(self):
        self.validator = ValidatorClass(pathToPickleFiles)
        self.geo_finder = GeolocationFinder()
        self.database_handler = DatabaseHandler(dbURL, dbPort, dbUser,
                                                dbPasswd)

    def on_data(self, raw_data):
        """
        When Listener detects a tweet with the keywords this method is called to handle the tweet.
        Sequence:
        - Load the json data
        - Validate tweet
        - Store if valid
        :param raw_data:
        :return: nothing
        """
        try:
            # Load the raw data
            json_data = json.loads(raw_data)

            # Get some required details from json data
            user_id, text, language, location, timestamp = self.get_data_from_json_data(
                json_data)

            # Check if text in tweet is valid before processing
            if text != 'invalid' and self.validator.validate_text_from_tweet(
                    text):
                record = {'created': timestamp, 'user_language': language}

                # Check if tweet contains a valid location
                if self.validator.validate_location(
                        location) and location != 'None':
                    # get location details of user
                    address, latitude, longitude = self.geo_finder.get_location(
                        location)

                    # If location has not returned None for lat and long, construct and record the map point in database
                    if (latitude is not None) and (longitude is not None) \
                            and (latitude != 'None') and (longitude != 'None'):
                        self.add_to_record(address, latitude, longitude,
                                           record)
                        self.record_map_point(latitude, longitude, timestamp,
                                              text)
                # Check if language is english
                if self.language_is_english(language):
                    self.database_handler.write_english_tweet_to_database(
                        record)
        except TypeError:
            logger.logging.exception('Error during on_data method')
        except ValueError:
            logger.logging.exception('Error during on_data method')

    def language_is_english(self, language):
        """
        Checks is language provided is english
        :param language:
        :return boolean value True/False
        """
        return (language == 'en') or (language == 'en-gb')

    def add_to_record(self, address, latitude, longitude, record):
        """
        Add location values to record which is a dictionary
        :param address: string value for address
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param record: dictionary
        :return: nothing
        """
        record['address'] = address
        record['latitude'] = latitude
        record['longitude'] = longitude

    def record_map_point(self, latitude, longitude, timestamp, text):
        """
        creats a record(dictionary) for map point and calls the database handler to store it
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param timestamp: string value for timestamp
        :param text: string value for text
        :return: nothing
        """
        map_point_record = {
            'date': int(timestamp),
            'lat': latitude,
            'long': longitude,
            'text': text
        }
        self.database_handler.write_map_point_to_database(map_point_record)

    def get_data_from_json_data(self, json_data):
        """
        extracts appropriate data from json data, if KeyError occurs sets attribute to unknown or none
        :param json_data:
        :return: user_id(string), text(string), user_language(string), location(string), timestamp(string)
        """
        try:
            user_id = json_data['user']['id_str']

        except KeyError:
            logger.logging.exception('KeyError while accessing user ID')
            user_id = 'unknown'
        try:
            user_language = json_data['user']['lang']
        except KeyError:
            logger.logging.exception('KeyError while accessing user language')
            user_language = 'unknown'
        try:
            location = json_data['user']['location']
        except KeyError:
            logger.logging.exception('KeyError while accessing user location')
            location = None
        try:
            text = json_data['text'].lower()
        except KeyError:
            # if keyError is raised set the text to a banned word so it will not be accepted
            text = 'invalid text'
            logger.logging.exception('KeyError while accessing tweet text')
        # Get time tweet picked up
        timestamp = self.get_timestamp()

        return user_id, text, user_language, location, timestamp

    def get_timestamp(self):
        """
        creates a timestamp in string format
        :return: timestamp(string)
        """
        now = datetime.datetime.now()
        day = str(now.day)
        month = str(now.month)
        year = str(now.year)

        if len(day) == 1:
            day = '0' + day
        if len(month) == 1:
            month = '0' + month
        timestamp = year + month + day
        return timestamp

    def on_error(self, status_code):
        logging.error('Twitter Stream returned status code:' +
                      str(status_code))
Example #5
0
 def __init__(self):
     self.validator = ValidatorClass(pathToPickleFiles)
     self.geo_finder = GeolocationFinder()
     self.database_handler = DatabaseHandler(dbURL, dbPort,dbUser, dbPasswd)
Example #6
0
class DataCollector(StreamListener):
    def __init__(self):
        self.validator = ValidatorClass(pathToPickleFiles)
        self.geo_finder = GeolocationFinder()
        self.database_handler = DatabaseHandler(dbURL, dbPort,dbUser, dbPasswd)

    def on_data(self, raw_data):
        """
        When Listener detects a tweet with the keywords this method is called to handle the tweet.
        Sequence:
        - Load the json data
        - Validate tweet
        - Store if valid
        :param raw_data:
        :return: nothing
        """
        try:
            # Load the raw data
            json_data = json.loads(raw_data)

            # Get some required details from json data
            user_id, text, language, location, timestamp = self.get_data_from_json_data(json_data)

            # Check if text in tweet is valid before processing
            if text != 'invalid' and self.validator.validate_text_from_tweet(text):
                record = {'created': timestamp, 'user_language': language}

                # Check if tweet contains a valid location
                if self.validator.validate_location(location) and location != 'None':
                    # get location details of user
                    address, latitude, longitude = self.geo_finder.get_location(location)

                    # If location has not returned None for lat and long, construct and record the map point in database
                    if (latitude is not None) and (longitude is not None) \
                            and (latitude != 'None') and (longitude != 'None'):
                        self.add_to_record(address, latitude, longitude, record)
                        self.record_map_point(latitude, longitude, timestamp, text)
                # Check if language is english
                if self.language_is_english(language):
                    self.database_handler.write_english_tweet_to_database(record)
        except TypeError:
            logger.logging.exception('Error during on_data method')
        except ValueError:
            logger.logging.exception('Error during on_data method')

    def language_is_english(self, language):
        """
        Checks is language provided is english
        :param language:
        :return boolean value True/False
        """
        return (language == 'en') or (language == 'en-gb')

    def add_to_record(self, address, latitude, longitude, record):
        """
        Add location values to record which is a dictionary
        :param address: string value for address
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param record: dictionary
        :return: nothing
        """
        record['address'] = address
        record['latitude'] = latitude
        record['longitude'] = longitude

    def record_map_point(self, latitude, longitude, timestamp, text):
        """
        creats a record(dictionary) for map point and calls the database handler to store it
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param timestamp: string value for timestamp
        :param text: string value for text
        :return: nothing
        """
        map_point_record = {'date': int(timestamp), 'lat': latitude, 'long': longitude, 'text': text}
        self.database_handler.write_map_point_to_database(map_point_record)

    def get_data_from_json_data(self, json_data):
        """
        extracts appropriate data from json data, if KeyError occurs sets attribute to unknown or none
        :param json_data:
        :return: user_id(string), text(string), user_language(string), location(string), timestamp(string)
        """
        try:
            user_id = json_data['user']['id_str']

        except KeyError:
            logger.logging.exception('KeyError while accessing user ID')
            user_id = 'unknown'
        try:
            user_language = json_data['user']['lang']
        except KeyError:
            logger.logging.exception('KeyError while accessing user language')
            user_language = 'unknown'
        try:
            location = json_data['user']['location']
        except KeyError:
            logger.logging.exception('KeyError while accessing user location')
            location = None
        try:
            text = json_data['text'].lower()
        except KeyError:
            # if keyError is raised set the text to a banned word so it will not be accepted
            text = 'invalid text'
            logger.logging.exception('KeyError while accessing tweet text')
        # Get time tweet picked up
        timestamp = self.get_timestamp()

        return user_id, text, user_language, location, timestamp

    def get_timestamp(self):
        """
        creates a timestamp in string format
        :return: timestamp(string)
        """
        now = datetime.datetime.now()
        day = str(now.day)
        month = str(now.month)
        year = str(now.year)

        if len(day) == 1:
            day = '0' + day
        if len(month) == 1:
            month = '0' + month
        timestamp = year + month + day
        return timestamp

    def on_error(self, status_code):
        logging.error('Twitter Stream returned status code:' + str(status_code))
 def setUp(self):
     self.test_geolocation_finder = GeolocationFinder()
     self.setup_mocks()
class GeolocationFinderTests(unittest.TestCase):
    def setUp(self):
        self.test_geolocation_finder = GeolocationFinder()
        self.setup_mocks()

    def setup_mocks(self):
        self.attrs = {
            'address': 'test_address',
            'latitude': '0000',
            'longitude': '0000'
        }
        self.test_geolocation = Mock(**self.attrs)
        self.test_geolocation_finder.geolocation = Mock(**self.attrs)

    def test_GetLocation_ReturnsNoneNoneNone_IfLocationIsNone(self):
        # Checks
        self.assertEqual((None, None, None),
                         self.test_geolocation_finder.get_location(None))

    def test_get_location_sets_geolocation_from_cache_When_geolocation_exists_in_cache(
            self):
        # Arrange
        self.test_geolocation_finder.location_cache[
            'Dublin'] = self.test_geolocation

        # Execute
        with patch.object(GeolocationFinder,
                          'set_geolocation_from_cache') as mock_method:
            self.test_geolocation_finder.get_location('Dublin')

        # Checks
        mock_method.assert_called_once_with('Dublin')

    def test_get_location_sets_geolocation_from_geolocator_when_not_present_in_cache(
            self):

        # Execute
        with patch.object(GeolocationFinder,
                          'set_geolocation_from_geolocator',
                          return_value=self.test_geolocation) as mock_method:
            self.test_geolocation_finder.get_location('Dublin')

        # Checks
        mock_method.assert_called_once_with('Dublin')

    def test_get_location_sets_geolocation_from_geolocator_adds_geolocation_to_cache(
            self):
        # Arrange
        self.test_geolocation_finder.location_cache = {}

        # Execute
        with patch.object(Nominatim,
                          'geocode',
                          return_value=self.test_geolocation) as mock_method:
            self.test_geolocation_finder.get_location('Dublin')

        # Checks
        self.assertTrue(
            'Dublin' in self.test_geolocation_finder.location_cache)

    def test_set_geolocation_from_cache_sets_the_geolocation(self):
        # Arrange
        self.test_geolocation_finder.location_cache[
            'Dublin'] = self.test_geolocation

        # Execute
        self.test_geolocation_finder.set_geolocation_from_cache('Dublin')

        # Checks
        self.assertEqual(self.test_geolocation_finder.geolocation,
                         self.test_geolocation)

    def test_set_geolocation_from_geolocator_sets_geolocation(self):

        # Execute
        with patch.object(Nominatim,
                          'geocode',
                          return_value=self.test_geolocation):
            self.test_geolocation_finder.set_geolocation_from_geolocator(
                'Dublin')

        # Checks
        self.assertEqual(self.test_geolocation_finder.geolocation,
                         self.test_geolocation)

    def test_set_geolocation_from_geolocator_sets_geolocation_to_None_when_GeocoderTimedOut_exception_raised(
            self):

        # Execute
        with patch.object(Nominatim, 'geocode', side_effect=GeocoderTimedOut):
            self.test_geolocation_finder.set_geolocation_from_geolocator(
                'Dublin')

        # Checks
        self.assertEqual(self.test_geolocation_finder.geolocation, None)

    def test_get_addr_lat_long_raises_AttributeError_and_returns_None_None_None_when_no_geolocation_passed(
            self):
        # Arrange
        self.test_geolocation_finder.geolocation = None

        # Execute
        address, lat, long = self.test_geolocation_finder.get_addr_lat_long()

        # Checks
        self.assertEqual(address, None)
        self.assertEqual(lat, None)
        self.assertEqual(long, None)

    def test_get_addr_lat_long(self):

        # Execute
        address, lat, long = self.test_geolocation_finder.get_addr_lat_long()

        # Checks
        self.assertEqual(address, 'test_address')
        self.assertEqual(lat, '0000')
        self.assertEqual(long, '0000')