Example #1
0
class ValidatorTests(unittest.TestCase):
    def setUp(self):
        self.test_validator = ValidatorClass(path_to_pickle_files)

    def test_validate_location_returns_False_when_location_is_None(self):
        self.assertFalse(self.test_validator.validate_location(None))

    def test_validate_location_returns_False_when_location_is_string_None(
            self):
        self.assertFalse(self.test_validator.validate_location('None'))

    def test_validate_location_returns_True_when_location_is_not_None(self):
        self.assertTrue(self.test_validator.validate_location('Dublin'))

    def test_validate_text_returns_True_when_text_contains_no_word_in_banned_word_list(
            self):
        self.assertTrue(
            self.test_validator.validate_text_from_tweet('I have the flu!'))

    def test_validate_text_returns_False_when_text_is_empty(self):
        self.assertFalse(self.test_validator.validate_text_from_tweet(''))

    def test_validate_text_returns_False_when_text_contains_word_in_banned_word_list(
            self):
        self.assertFalse(
            self.test_validator.validate_text_from_tweet(
                'invalid text because has jab'))

    def test_validate_text_returns_True_when_text_is_valid(self):
        self.assertTrue(
            self.test_validator.validate_text_from_tweet('I have manflu'))
Example #2
0
class ValidatorTests(unittest.TestCase):
    def setUp(self):
        self.test_validator = ValidatorClass(path_to_pickle_files)

    def test_validate_location_returns_False_when_location_is_None(self):
        self.assertFalse(self.test_validator.validate_location(None))

    def test_validate_location_returns_False_when_location_is_string_None(self):
        self.assertFalse(self.test_validator.validate_location('None'))

    def test_validate_location_returns_True_when_location_is_not_None(self):
        self.assertTrue(self.test_validator.validate_location('Dublin'))

    def test_validate_text_returns_True_when_text_contains_no_word_in_banned_word_list(self):
        self.assertTrue(self.test_validator.validate_text_from_tweet('I have the flu!'))

    def test_validate_text_returns_False_when_text_is_empty(self):
        self.assertFalse(self.test_validator.validate_text_from_tweet(''))

    def test_validate_text_returns_False_when_text_contains_word_in_banned_word_list(self):
        self.assertFalse(self.test_validator.validate_text_from_tweet('invalid text because has jab'))

    def test_validate_text_returns_True_when_text_is_valid(self):
        self.assertTrue(self.test_validator.validate_text_from_tweet('I have manflu'))
Example #3
0
class DataCollector(StreamListener):
    def __init__(self):
        self.validator = ValidatorClass(pathToPickleFiles)
        self.geo_finder = GeolocationFinder()
        self.database_handler = DatabaseHandler(dbURL, dbPort, dbUser,
                                                dbPasswd)

    def on_data(self, raw_data):
        """
        When Listener detects a tweet with the keywords this method is called to handle the tweet.
        Sequence:
        - Load the json data
        - Validate tweet
        - Store if valid
        :param raw_data:
        :return: nothing
        """
        try:
            # Load the raw data
            json_data = json.loads(raw_data)

            # Get some required details from json data
            user_id, text, language, location, timestamp = self.get_data_from_json_data(
                json_data)

            # Check if text in tweet is valid before processing
            if text != 'invalid' and self.validator.validate_text_from_tweet(
                    text):
                record = {'created': timestamp, 'user_language': language}

                # Check if tweet contains a valid location
                if self.validator.validate_location(
                        location) and location != 'None':
                    # get location details of user
                    address, latitude, longitude = self.geo_finder.get_location(
                        location)

                    # If location has not returned None for lat and long, construct and record the map point in database
                    if (latitude is not None) and (longitude is not None) \
                            and (latitude != 'None') and (longitude != 'None'):
                        self.add_to_record(address, latitude, longitude,
                                           record)
                        self.record_map_point(latitude, longitude, timestamp,
                                              text)
                # Check if language is english
                if self.language_is_english(language):
                    self.database_handler.write_english_tweet_to_database(
                        record)
        except TypeError:
            logger.logging.exception('Error during on_data method')
        except ValueError:
            logger.logging.exception('Error during on_data method')

    def language_is_english(self, language):
        """
        Checks is language provided is english
        :param language:
        :return boolean value True/False
        """
        return (language == 'en') or (language == 'en-gb')

    def add_to_record(self, address, latitude, longitude, record):
        """
        Add location values to record which is a dictionary
        :param address: string value for address
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param record: dictionary
        :return: nothing
        """
        record['address'] = address
        record['latitude'] = latitude
        record['longitude'] = longitude

    def record_map_point(self, latitude, longitude, timestamp, text):
        """
        creats a record(dictionary) for map point and calls the database handler to store it
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param timestamp: string value for timestamp
        :param text: string value for text
        :return: nothing
        """
        map_point_record = {
            'date': int(timestamp),
            'lat': latitude,
            'long': longitude,
            'text': text
        }
        self.database_handler.write_map_point_to_database(map_point_record)

    def get_data_from_json_data(self, json_data):
        """
        extracts appropriate data from json data, if KeyError occurs sets attribute to unknown or none
        :param json_data:
        :return: user_id(string), text(string), user_language(string), location(string), timestamp(string)
        """
        try:
            user_id = json_data['user']['id_str']

        except KeyError:
            logger.logging.exception('KeyError while accessing user ID')
            user_id = 'unknown'
        try:
            user_language = json_data['user']['lang']
        except KeyError:
            logger.logging.exception('KeyError while accessing user language')
            user_language = 'unknown'
        try:
            location = json_data['user']['location']
        except KeyError:
            logger.logging.exception('KeyError while accessing user location')
            location = None
        try:
            text = json_data['text'].lower()
        except KeyError:
            # if keyError is raised set the text to a banned word so it will not be accepted
            text = 'invalid text'
            logger.logging.exception('KeyError while accessing tweet text')
        # Get time tweet picked up
        timestamp = self.get_timestamp()

        return user_id, text, user_language, location, timestamp

    def get_timestamp(self):
        """
        creates a timestamp in string format
        :return: timestamp(string)
        """
        now = datetime.datetime.now()
        day = str(now.day)
        month = str(now.month)
        year = str(now.year)

        if len(day) == 1:
            day = '0' + day
        if len(month) == 1:
            month = '0' + month
        timestamp = year + month + day
        return timestamp

    def on_error(self, status_code):
        logging.error('Twitter Stream returned status code:' +
                      str(status_code))
Example #4
0
class DataCollector(StreamListener):
    def __init__(self):
        self.validator = ValidatorClass(pathToPickleFiles)
        self.geo_finder = GeolocationFinder()
        self.database_handler = DatabaseHandler(dbURL, dbPort,dbUser, dbPasswd)

    def on_data(self, raw_data):
        """
        When Listener detects a tweet with the keywords this method is called to handle the tweet.
        Sequence:
        - Load the json data
        - Validate tweet
        - Store if valid
        :param raw_data:
        :return: nothing
        """
        try:
            # Load the raw data
            json_data = json.loads(raw_data)

            # Get some required details from json data
            user_id, text, language, location, timestamp = self.get_data_from_json_data(json_data)

            # Check if text in tweet is valid before processing
            if text != 'invalid' and self.validator.validate_text_from_tweet(text):
                record = {'created': timestamp, 'user_language': language}

                # Check if tweet contains a valid location
                if self.validator.validate_location(location) and location != 'None':
                    # get location details of user
                    address, latitude, longitude = self.geo_finder.get_location(location)

                    # If location has not returned None for lat and long, construct and record the map point in database
                    if (latitude is not None) and (longitude is not None) \
                            and (latitude != 'None') and (longitude != 'None'):
                        self.add_to_record(address, latitude, longitude, record)
                        self.record_map_point(latitude, longitude, timestamp, text)
                # Check if language is english
                if self.language_is_english(language):
                    self.database_handler.write_english_tweet_to_database(record)
        except TypeError:
            logger.logging.exception('Error during on_data method')
        except ValueError:
            logger.logging.exception('Error during on_data method')

    def language_is_english(self, language):
        """
        Checks is language provided is english
        :param language:
        :return boolean value True/False
        """
        return (language == 'en') or (language == 'en-gb')

    def add_to_record(self, address, latitude, longitude, record):
        """
        Add location values to record which is a dictionary
        :param address: string value for address
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param record: dictionary
        :return: nothing
        """
        record['address'] = address
        record['latitude'] = latitude
        record['longitude'] = longitude

    def record_map_point(self, latitude, longitude, timestamp, text):
        """
        creats a record(dictionary) for map point and calls the database handler to store it
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param timestamp: string value for timestamp
        :param text: string value for text
        :return: nothing
        """
        map_point_record = {'date': int(timestamp), 'lat': latitude, 'long': longitude, 'text': text}
        self.database_handler.write_map_point_to_database(map_point_record)

    def get_data_from_json_data(self, json_data):
        """
        extracts appropriate data from json data, if KeyError occurs sets attribute to unknown or none
        :param json_data:
        :return: user_id(string), text(string), user_language(string), location(string), timestamp(string)
        """
        try:
            user_id = json_data['user']['id_str']

        except KeyError:
            logger.logging.exception('KeyError while accessing user ID')
            user_id = 'unknown'
        try:
            user_language = json_data['user']['lang']
        except KeyError:
            logger.logging.exception('KeyError while accessing user language')
            user_language = 'unknown'
        try:
            location = json_data['user']['location']
        except KeyError:
            logger.logging.exception('KeyError while accessing user location')
            location = None
        try:
            text = json_data['text'].lower()
        except KeyError:
            # if keyError is raised set the text to a banned word so it will not be accepted
            text = 'invalid text'
            logger.logging.exception('KeyError while accessing tweet text')
        # Get time tweet picked up
        timestamp = self.get_timestamp()

        return user_id, text, user_language, location, timestamp

    def get_timestamp(self):
        """
        creates a timestamp in string format
        :return: timestamp(string)
        """
        now = datetime.datetime.now()
        day = str(now.day)
        month = str(now.month)
        year = str(now.year)

        if len(day) == 1:
            day = '0' + day
        if len(month) == 1:
            month = '0' + month
        timestamp = year + month + day
        return timestamp

    def on_error(self, status_code):
        logging.error('Twitter Stream returned status code:' + str(status_code))