Esempio n. 1
0
    def write_to_file_chunks(file_path, lines, chunk_size=100):
        """
        This function writes a list to a file in the file_path.
        It divides the file lines into chunks and writes the chunks
        instead of writing individual lines.

        Parameters
        ----------
        file_path : str
            The file path.
        lines : list
            The lines we want to write to the file.
        chunk_size : int or 100
            The chunk size to write in one IO operation.

        Returns
        -------

        """
        try:
            FileUtil.check_directory(file_path)
            f = io.open(file_path, 'a', encoding="utf-8")
            if len(lines) < 100:
                lines = map(lambda x: x + '\n', lines)
                f.writelines(lines)
            else:
                chunks = list(FileUtil.chunks(lines, chunk_size))
                for chunk in chunks:
                    chunk = map(lambda x: x + '\n', chunk)
                    f.writelines(chunk)
            f.close()
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 2
0
    def csv_reader(file_path, columns_names, index_col_name):
        """
        This function reads a csv file in the file_path
        and returns a list of its lines.

        Parameters
        ----------
        file_path : str
            The file path.
        columns_names : list
            The columns names you want to read.
        index_col_name : str
            The index column name.

        Returns
        -------

        """
        try:
            csv_file = pandas.read_csv(file_path, sep="[,]", lineterminator='\n', engine='python', header=None, names=columns_names, index_col=index_col_name, quoting=csv.QUOTE_NONE)
            rows = [tuple(x) for x in csv_file.values]
            rows = rows[1:]
            rows = [(day,) + x for x, day in zip(rows, list(csv_file.index.values)[1:])]
            return rows
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 3
0
    def check_directory(file_path):
        """
        This function checks the existence of a directory
        and creates it if it is not existent.


        Parameters
        ----------
        file_path : str
            The file path.

        Returns
        -------

        """
        try:
            the_dir_path = file_path.split('/')
            if len(the_dir_path) > 1:
                file_name = the_dir_path[len(the_dir_path)-1]
                the_directory = file_path[:-len(file_name)]
                if not os.path.exists(the_directory):
                    path = Path(the_directory)
                    path.mkdir(parents=True)
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 4
0
    def handle_status(self, status, config):
        """
        This function either stores the status in the statuses list or calls
        handle_batching depending on the time elapsed.


        Parameters
        ----------
        status : tweepy.Status
            The status to handle.
        config : str
            The application keys file path (for reactions collecting).

        Returns
        -------

        """
        try:
            clock = datetime.now()
            if self.hourly_elapsed_time < 60:
                self.parse(status)
                #print(self.hourly_elapsed_time)
                self.hourly_elapsed_time = (clock - self.hourly_start_time).total_seconds()/60
                self.daily_elapsed_time = (clock - self.daily_start_time).days
            else:
                self.hourly_elapsed_time = 0
                self.hourly_start_time = clock
                self.handle_batching()
                if self.daily_elapsed_time > 3:
                    self.daily_elapsed_time = 0
                    self.daily_start_time = clock
                    self.handle_reactions(config, self.days_range)
        except Exception as e:
            ErrorWrapper(e).handle()
Esempio n. 5
0
    def get_ids(self, days_range):
        """
        This function gets the statuses ids for tweets which came days_range days ago.


        Parameters
        ----------
        days_range : int
            The number of days range to count back.

        Returns
        -------
        ids : list
            The days_range previous days tweets' ids.

        """
        try:
            today = datetime.today()
            ids = []
            dates = DatesHandler(today, today - timedelta(days_range)).date_range()
            for date in dates:
                day_ids = []
                only_files = [f for f in listdir(self.location+"/1/"+str(date.date())) if isfile(join(self.location+"/1/"+str(date.date()), f))]
                for f in only_files:
                    hour_ids = {"data": FileUtil.file_reader(self.location + "/1/" + str(date.date()) + "/" + f),
                                "index": f}
                    day_ids.append(hour_ids)
                ids.append(day_ids)
            return ids
        except Exception as e:
            ErrorWrapper(e).handle()
Esempio n. 6
0
    def handle_reactions(self, config, days_range):
        """
        This function gets the favourites and retweets count and writes them in file system.


        Parameters
        ----------
        config : str
            The Twitter application configuration file path.
        days_range : int
            The previous days range to get.

        Returns
        -------

        """
        try:
            (favourites_count, retweets_count) = self.get_reactions(config, days_range)
            today = datetime.today()
            dates = DatesHandler(today, today - timedelta(days_range)).date_range()
            i = self.days_range
            for date in dates:
                favourites_files_path = self.location + "/16/" + str(date.date()) + "/"
                retweets_files_path = self.location + "/17/" + str(date.date()) + "/"
                for j in range(len(favourites_count[i])):
                    FileUtil.write_to_file_chunks(favourites_files_path+favourites_count[i][j]["index"], favourites_count[i][j]["data"], 500)
                    FileUtil.write_to_file_chunks(retweets_files_path + retweets_count[i][j]["index"], retweets_count[i][j]["data"], 500)
                i = i + 1
        except Exception as e:
            ErrorWrapper(e).handle()
Esempio n. 7
0
    def build_api(self):
        """
        This function builds the api instance using the config attribute.


        Parameters
        ----------

        Returns
        -------
        Api
            The twitter API instance.

        """
        try:
            config = FileUtil.file_reader(self.config)
            auth = OAuthHandler(config[0], config[1])
            auth.set_access_token(config[2], config[3])
            return API(auth,
                       wait_on_rate_limit=True,
                       wait_on_rate_limit_notify=True,
                       retry_count=10,
                       retry_delay=5,
                       retry_errors=5)
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 8
0
    def write_to_file(file_path, lines):
        """
        This function writes a list to a file in the file_path.

        Parameters
        ----------
        file_path : str
            The file path.
        lines : list
            The lines we want to write to the file.

        Returns
        -------

        """
        try:
            FileUtil.check_directory(file_path)
            f = io.open(file_path, 'a', encoding="utf-8")
            for n, line in enumerate(lines):
                if line.startswith(" "):
                    lines[n] = "" + line.rstrip()
                else:
                    lines[n] = line.rstrip()
                f.write(u''.join(line+'\n'))
            f.close()
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 9
0
    def parse(self, status):
        """
        This function parses the Status object and adds every attribute in its suitable partial list.


        Parameters
        ----------
        status : tweepy.Status
            The status to parse.

        Returns
        -------

        """
        try:
            text = status.text
            text = re.sub(u"\n", u" ", text)
            text = re.sub(u"\\s+", u" ", text)
            self.statuses[0].append(str(status.id))
            self.statuses[1].append(text)
            if hasattr(status, 'retweeted_status'):
                self.statuses[2].append(str(True))
                self.statuses[7].append(str(status.retweeted_status.id))
                self.statuses[8].append(str(status.retweeted_status.user.screen_name))
                self.statuses[9].append(str(status.retweeted_status.retweet_count))
                self.statuses[10].append(str(status.retweeted_status.favorite_count))
            else:
                self.statuses[2].append(str(False))
                self.statuses[7].append(str(None))
                self.statuses[8].append(str(None))
                self.statuses[9].append(str(None))
                self.statuses[10].append(str(None))
            if hasattr(status, 'place') and hasattr(status.place, 'country_code'):
                self.statuses[3].append(str(status.place.country_code))
            else:
                self.statuses[3].append(str(None))
            self.statuses[4].append(str(status.created_at))
            self.statuses[5].append(str(status.user.id))
            self.statuses[6].append(str(status.user.screen_name))
            self.statuses[11].append(str(len(status.entities['urls'])))
            self.statuses[12].append(str(len(status.entities['hashtags'])))
            self.statuses[13].append(str(status.user.followers_count))
            self.statuses[14].append(str(status.user.friends_count))
        except Exception as e:
            ErrorWrapper(e).handle()
            
Esempio n. 10
0
    def get_reactions(self, config, days_range):
        """
        This function gets the favourites and retweets count for them to store in file system.


        Parameters
        ----------
        config : str
            The Twitter application configuration file path.
        days_range : int
            The number of days range to count back.

        Returns
        -------
        favourites_count : list
            A list of favourites count for every collected tweet in the past days_range days.
        retweets_count : list
            A list of retweets count for every collected tweet in the past days_range days.

        """
        try:
            ids = self.get_ids(days_range)
            favourites_count = []
            retweets_count = []
            api = TwitterApiWrapper(config)
            for day in ids:
                day_favourites = []
                day_retweets = []
                for hour in day:
                    hour_favourites = {"data": [], "index": ""}
                    hour_retweets = {"data": [], "index": ""}
                    for tweet_id in hour["data"]:
                        hour_favourites["data"].append(str(api.get_favourites(tweet_id)))
                        hour_retweets["data"].append(str(api.get_retweets(tweet_id)))
                    hour_favourites["index"] = hour["index"]
                    hour_retweets["index"] = hour["index"]
                    day_favourites.append(hour_favourites)
                    day_retweets.append(hour_retweets)
                favourites_count.append(day_favourites)
                retweets_count.append(day_retweets)
            return favourites_count, retweets_count
        except Exception as e:
            ErrorWrapper(e).handle()
Esempio n. 11
0
    def handle_batching(self):
        """
        This function writes the statuses list in the file system.


        Parameters
        ----------

        Returns
        -------

        """
        try:
            for i in range(0, 15, 1):
                store_path = self.location+"/"+str(i+1)+"/"+str(datetime.today().date())+"/"+str(datetime.now().time().hour) + "__" + str(i)+".txt"
                FileUtil.write_to_file_chunks(store_path, self.statuses[i], 500)
                self.statuses[i] = []
        except Exception as e:
            ErrorWrapper(e).handle()
Esempio n. 12
0
    def date_range(self):
        """
        This function gets a range of dates between the start_date and end_date attributes.


        Parameters
        ----------

        Yields
        -------
        list
            The range of datetime items.

        """
        try:
            for n in range(int((self.end_date - self.start_date).days) + 1):
                yield self.start_date + timedelta(n)
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 13
0
    def file_reader(file_path):
        """
        This function reads a file in the file_path
        and returns a list of its lines.

        Parameters
        ----------
        file_path : str
            The file path.

        Returns
        -------

        """
        try:
            f = open(file_path, encoding="utf-8", buffering=(2 << 16) + 8)
            lines = f.read().splitlines()
            return lines
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 14
0
    def get_stream_api_instance(self):
        """
        This function builds a streaming api instance using the api instance.


        Parameters
        ----------

        Returns
        -------
        Stream
            The twitter streaming API instance.

        """
        try:
            api_instance = self.build_api()
            stream = Stream(auth=api_instance.auth, listener=self.listener)
            return stream
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 15
0
    def get_user(self, status_id):
        """
        This function gets the status author User's object.


        Parameters
        ----------
        status_id : int
            The status ID.

        Returns
        -------
        int
            The status author User's object.

        """
        try:
            s = self.get_status_obj(status_id)
            return s.author
        except Exception as e:
            ErrorWrapper(e).handle()
Esempio n. 16
0
    def append_files(read_file_path, write_file_path):
        """
        This function appends the file in read_file_path lines
        to the file in write_file_path.

        Parameters
        ----------
        read_file_path : str
            The read file path.
        write_file_path : str
            The write file path.

        Returns
        -------

        """
        try:
            appended_file_lines = FileUtil.file_reader(read_file_path)
            FileUtil.write_to_file(write_file_path, appended_file_lines)
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 17
0
    def get_status_obj(self, status_id):
        """
        This function gets the status using its ID.


        Parameters
        ----------
        status_id : int
            The status ID.

        Returns
        -------
        Status
            The resulting Status object.

        """
        try:
            api = self.build_api()
            return api.get_status(status_id)
        except Exception as e:
            ErrorWrapper(e).handle()
Esempio n. 18
0
    def get_friends(self, status_id):
        """
        This function gets the status's author friends count.


        Parameters
        ----------
        status_id : int
            The status ID.

        Returns
        -------
        int
            The status's author friends count.

        """
        try:
            s = self.get_user(status_id)
            return len(s.friends_ids(s.id))
        except Exception as e:
            ErrorWrapper(e).handle()
Esempio n. 19
0
    def __init__(self, start_time, location, days_range, config, batcher=None):
        """
        Initializer for StatusParser class.


        Parameters
        ----------
        start_time : datetime
            The batch start time.
        location : str
            The batch requested location in file system.
        days_range : int
            The number of previous days to look for when collecting reactions (favourites and retweets).
        batcher : Batcher or None
            The copy constructor object.
        config : str
            The application keys file path (for reactions collection).

        """
        try:
            if batcher is None:
                self.daily_start_time = start_time
                self.hourly_start_time = start_time
                self.hourly_elapsed_time = 0
                self.daily_elapsed_time = 0
                self.location = location
                self.days_range = days_range
                self.config = config
                self.statuses = [[] for x in range(15)]
            else:
                self.daily_start_time = batcher.daily_start_time
                self.hourly_start_time = batcher.hourly_start_time
                self.hourly_elapsed_time = batcher.hourly_elapsed_time
                self.daily_elapsed_time = batcher.daily_elapsed_time
                self.location = batcher.location
                self.days_range = batcher.days_range
                self.config = batcher.config
                self.statuses = batcher.statuses
        except Exception as e:
            ErrorWrapper(e).handle()
Esempio n. 20
0
    def write_item(file_path, line):
        """
        This function writes a line to file.


        Parameters
        ----------
        file_path : str
            The write file path.
        line : str
            The item we want to write on a line in the file.

        Returns
        -------

        """
        try:
            f = io.open(file_path, 'a', encoding="utf-8")
            f.write(line)
            f.close()
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 21
0
    def filter(self, keywords, stream, languages=None):
        """
        This function opens a connection with Twitter and starts to filter
        tweets coming from the stream on Keywords list.


        Parameters
        ----------
        keywords : list
            The keywords to filter on list.
        stream : Stream
            The stream instance.
        languages : list
            The requested tweets language

        Returns
        -------

        """
        try:
            stream.filter(track=keywords, async=True, languages=languages)
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 22
0
    def __init__(self, config_file_path, listener_class=None):
        """
        Initializer for TwitterApiWrapper class.


        Parameters
        ----------
        config_file_path : str
            The application's keys file path.
        listener_class : object or None
            The listener implementation class name.

        """
        try:
            self.config = config_file_path
            if listener_class is not None:
                if listener_class.__class__.__bases__[0] is StreamListener:
                    self.listener = listener_class
                else:
                    raise ParametersError(
                        Exception("wrong parameters class type"), 50)
        except ParametersError as e:
            print(ErrorWrapper(e.ex, e.code).handle())
Esempio n. 23
0
    def extract_date_info(read_file_path, hours_write_file_path,
                          week_days_write_file_path):
        """
        This function extracts the hour and the week day from date input file.
        It stores the results in output files whose paths are function arguments.


        Parameters
        ----------
        read_file_path : str
            The date data file path.

        hours_write_file_path : str
            The desired hours only data file path.

        week_days_write_file_path : str
            The desired week days only data file path.

        Returns
        -------

        """
        try:
            lines = FileUtil.file_reader(read_file_path)
            hours = []
            week_days = []
            for line in lines:
                parts = line.split(" ")
                week_day = parts[0]
                time = parts[3]
                hour = time.split(":")[0]
                hours.append(hour)
                week_days.append(week_day)
            FileUtil.write_to_file(hours_write_file_path, hours)
            FileUtil.write_to_file(week_days_write_file_path, week_days)
        except Exception as e:
            print(ErrorWrapper(e).handle())
Esempio n. 24
0
    def get_retweets(self, status_id):
        """
        This function gets the status's retweeting count using its ID.


        Parameters
        ----------
        status_id : int
            The status ID.

        Returns
        -------
        int
            The status retweeting count.

        """
        try:
            s = self.get_status_obj(status_id)
            if s is not None:
                return s.retweet_count
            else:
                return -1
        except Exception as e:
            ErrorWrapper(e).handle()