Ejemplo n.º 1
0
class Crawler(object):
    def __init__(self, config_path, seed):
        self.seed = seed
        self.client = Client(config_path, log=True)
        self.client.dump_stats(user=self.seed)
        self.client.extended_stats(user=self.seed)
        self.pp = pprint.PrettyPrinter(depth=6)

    def start_crawler(self):
        pass
Ejemplo n.º 2
0
def user_lookup(args):
    twit = Client(args.config_path, log=True)
    twit.dump_stats(user=args.user_lookup)
    twit.extended_stats(user=args.user_lookup)
Ejemplo n.º 3
0
def get_my_stats(args):
    twit = Client(args.config_path, log=True)
    twit.dump_stats()
Ejemplo n.º 4
0
class Daemon(object):

    class LogHandler(object):

        def __init__(self):
            self.recent_logs = []
            self.recent_errors = []

        def emit(self, record, rec_type='info'):
            if rec_type == 'info':
                try:
                    if self.recent_logs[-1] == record:
                        return
                except IndexError:
                    pass
                if len(self.recent_logs) >= 15:
                    del(self.recent_logs[0])
                self.recent_logs.append(record)
            elif rec_type == 'error':
                if len(self.recent_errors) >= 10:
                    del(self.recent_errors[0])
                self.recent_errors.append(record)


    def __init__(self, config_path, log=True):
        self.config_path = config_path
        self.client = Client(config_path, log=log)
        self.reload_config()
        self.tweets = self.load_tweets()
        self.running = True
        self.start_time = datetime.now()
        self.total_retweets = 0
        self.total_tweets = 0
        self.total_follows = 0
        self.total_favourites = 0
        self.uptime = ''
        self.last_tweet_user = ''
        self.last_tweet_text = ''
        self.log_handler = self.LogHandler()
        signal(SIGUSR1, self.catch_signal)
        if log:
            logging.basicConfig(level=self.client.log_level,
                format='[%(levelname)s] (%(threadName)-10s) %(message)s'
            )

    def spawn_watchers(self):
        Timer(
            self.config_reload_time,
            self.config_watcher
        ).start()
        Timer(
            self.tweets_reload_time,
            self.tweet_watcher
        ).start()
        uptime_thread = Timer(
            2,
            self.track_uptime
        ).start()

    def spawn_tweet_thread(self):
        tweet_thread = Thread(
            name = 'tweeter',
            target = self.async_tweet
        )
        tweet_thread.setDaemon(True)
        tweet_thread.start()
    
    def spawn_retweet_thread(self):
        retweet_thread = Thread(
            name = 'retweeter',
            target = self.async_retweet
        )
        retweet_thread.setDaemon(True)
        retweet_thread.start()

    def reload_config(self):
        self.filters = {
            'following_limit': {'tweet_suffix': 'author.friends_count', 'value': 0},
            'favourites_limit': {'tweet_suffix': 'author.favourites_count', 'value': 0},
            'followers_limit': {'tweet_suffix': 'author.followers_count', 'value': 0},
            'statuses_limit': {'tweet_suffix': 'author.statuses_count', 'value': 0},
            'retweeted_limit': {'tweet_suffix': 'retweet_count', 'value': 0},
            'blocked_hashtags': {'tweet_suffix': "entities['hashtags']", 'value': []},
            'blocked_user_mentions': {'tweet_suffix': "entities['user_mentions']", 'value': []},
            'blocked_users': {'tweet_suffix': 'user.screen_name', 'value': []},
            'trigger_phrases': {'tweet_suffix': 'n/a', 'value': []}

        }
        with open(self.config_path, 'r') as f:
            try:
                data = load(f.read())
            except:
                msg = "Error in configuration syntax, keeping old values"
                self.log_handler.emit(msg, rec_type='error')
                logging.info(msg)
                return
        if not isinstance(data['watched_hashtags'], list):
            raise BadConfiguration
        else:
            self.watched_hashtags = data['watched_hashtags'] or []
        try:
            if path.isfile(data['tweets_path']):
                self.tweets_path = data['tweets_path']
            else:
                raise BadConfiguration
        except:
            self.tweets_path = None
        try:
            self.min_hour = int(data['minimum_hour']) or None
            self.max_hour = int(data['maximum_hour']) or None
        except KeyError:
            self.min_hour = None
            self.max_hour = None
        try:
            self.follow_users = bool(data['follow_users']) or False
        except KeyError:
            self.follow_users = False
        try:
            self.retweet_sleep = int(data['retweet_sleep']) or 300
        except KeyError:
            self.retweet_sleep = 300
        try:
            self.tweet_sleep = int(data['tweet_sleep']) or 1200
        except KeyError:
            self.tweet_sleep = 1200
        try:
            self.config_reload_time = int(data['config_reload_time']) or 30
        except KeyError:
            self.config_reload_time = 30
        try:
            self.tweets_reload_time = int(data['tweets_reload_time']) or 30
        except KeyError:
            self.tweets_reload_time = 30
        for key, value in self.filters.iteritems():
            try:
                config = data[key]
                if isinstance(config, type(value['value'])):
                    if isinstance(config, int):
                        cmd = "self.filters['%s']['value'] = int(%s)" % (key, config)
                        exec(cmd)
                    elif isinstance(config, list):
                        cmd = "self.filters['%s']['value'] = '%s'.split(':')" % (
                            key, ':'.join(config)
                        )
                        exec(cmd)
                else:
                    logging.info("Unexpected data type for %s. Using default %s" % (
                        key, value
                    ))
            except KeyError:
                pass
            self.last_config_reload = datetime.now()

    def async_tweet(self):
        while True:
            if not self.running:
                break
            if self.is_operating_time():
                try:
                    tweet = choice(self.tweets)
                except:
                    tweet = None
                if tweet:
                    try:
                        # need moar white space!
                        for i in range(0,5):
                            logging.debug('----------------------------------')
                        logging.info(tweet)
                        if tweet != '\n':
                            self.client.tweet(tweet)
                            self.total_tweets += 1
                            self.update_tweets(tweet)
                            self.log_handler.emit(
                                "\tTweeted: %s" % tweet
                            )
                            sleep(self.tweet_sleep)
                        else:
                            self.update_tweets(tweet)
                    except tweepy.TweepError as e:
                        logging.info(e.reason)
                        sleep(300)
                else:
                    logging.info("No more tweets...")
                    sleep(300)
            else:
                sleep(60)

    def async_retweet(self):
        while True:
            if not self.running:
                break
            try:
                if self.is_operating_time():
                    tag = choice(self.watched_hashtags)
                    for tweet in self.client.search(tag):
                        # need moar white space!
                        for i in range(0,5):
                            logging.debug('----------------------------------')
                        logging.info('\nTweet by: @' + tweet.user.screen_name)
                        logging.info(tweet.text)
                        if self.is_worth_while_tweet(tweet):
                            retweeted = False
                            favourited = False
                            followed = False
                            try:
                                self.client.favourite(tweet)
                                self.total_favourites += 1
                                favourited = True
                                sleep(3)
                            except tweepy.TweepError as e:
                                sleep(3)
                                logging.info(e.reason)
                                sleep(3)
                            if self.follow_users:
                                if not tweet.user.following:
                                    try:
                                        self.client.follow(tweet.user)
                                        self.total_follows += 1
                                        followed = True
                                        sleep(3)
                                    except tweepy.TweepError as e:
                                        logging.info(e.reason)
                                        sleep(3)
                                else:
                                    logging.info("Already following the user")
                            try:
                                self.client.retweet(tweet)
                                self.total_retweets += 1
                                retweeted = True
                            except tweepy.TweepError as e:
                                logging.info(e.reason)
                                reason = e.reason
                                sleep(5)
                            if retweeted:
                                self.log_handler.emit(
                                    "\tRetweeted: retweeter sleeping for %s seconds" % self.retweet_sleep
                                )
                            elif not retweeted:
                                self.log_handler.emit(
                                    "\tSkipped: %s" % reason
                                )
                            if retweeted:
                                sleep(self.retweet_sleep)
                            break
                    else:
                        continue
                else:
                    sleep(60)
                    continue
            except StopIteration:
                logging.debug("Hit query end")
                sleep(30)
                continue

    def is_worth_while_tweet(self, tweet):
        self.client.dump_tweet_stats(tweet)
        self.log_handler.emit(
            "Tweet by @%s: %s" % (
                tweet.user.screen_name,
                tweet.text
            )
        )
        if tweet.user.screen_name == self.client.name:
            return self.log_filtered("self_tweet")
        for key, value in self.filters.iteritems():
            if isinstance(value['value'], int):
                cmd = "int(tweet.%s) < int(%s) or False" % (
                    value['tweet_suffix'], value['value']
                )
                if eval(cmd):
                    data = eval("int(tweet.%s)" % value['tweet_suffix'])
                    return self.log_filtered(key, data=data)
            elif isinstance(value['value'], list):
                for item in value['value']:
                    if key == 'trigger_phrases':
                        cmd = "str('%s').lower() in tweet.text.lower() or False" % (
                            item
                        )
                        status = eval(cmd)
                    else:
                        cmd = 'type(tweet.%s)' % value['tweet_suffix']
                        itype = eval(cmd)
                        if itype is str:
                            cmd = "tweet.%s.lower() == str('%s').lower().replace('@', '').replace('#', '') or False" % (
                                value['tweet_suffix'], item
                            )
                            status = eval(cmd)
                        elif itype is list:
                            cmd = "type(tweet.%s[0])" % value['tweet_suffix']
                            try:
                                stype = eval(cmd)
                                if stype is dict:
                                    cmd = "[ True for x in tweet.%s if str('%s').lower() in x['text'].lower() ]" % (
                                        value['tweet_suffix'], item
                                    )
                                else:
                                    cmd = "[ True for x in tweet.%s if str('%s').lower() in x.lower() ]" % (
                                        value['tweet_suffix'], item
                                    )
                                res = eval(cmd)
                            except IndexError:
                                res = []
                            except Exception as e:
                                data = eval("tweet.%s" % value['tweet_suffix'])
                                self.log_handler.emit("filter_error: %s - %s" % (
                                        key, item
                                    ),
                                    rec_type = 'error'
                                )
                                status = True
                            if len(res) >= 1:
                                status = True
                            else:
                                status = False
                    if status:
                        return self.log_filtered(key)
            else:
                self.log_handler.emit("Unfixed filter: %s" % key, rec_type='error')
        return True

    def get_run_metrics(self):
        logging.info("[*] Uptime: %s" % self.uptime)
        logging.info("[*] Total Tweets: %s" % self.total_tweets)
        logging.info("[*] Total Retweets: %s" % self.total_retweets)
        logging.info("[*] Total Follows: %s" % self.total_follows)
        logging.info("[*] Total Favourites: %s" % self.total_favourites)
    
    def track_uptime(self):
        if self.running:
            now = datetime.now()
            uptime = now - self.start_time
            seconds = uptime.total_seconds()
            hours = seconds // 3600
            minutes = (seconds % 3600) // 60
            seconds = seconds % 60
            if seconds == 0 or seconds > 1:
                second_string = 'seconds'
            else:
                second_string = 'second'
            if minutes == 0 or minutes > 1:
                minute_string = 'minutes'
            else:
                minute_string = 'minute'
            if hours == 0 or hours > 1:
                hour_string = 'hours'
            else:
                hour_string = 'hour'
            self.uptime = '%s %s, %s %s, %s %s' % (
                int(hours),
                hour_string,
                int(minutes),
                minute_string,
                int(seconds),
                second_string
            )
            uptime_thread = Timer(
                2,
                self.track_uptime
            ).start()


    def is_operating_time(self):
        if not self.min_hour or not self.max_hour:
            return True
        now = datetime.now()
        if not int(self.min_hour) < int(now.hour) \
                or not int(now.hour) < int(self.max_hour):
            logging.info("We are sleeping right now...")
            self.log_handler.emit("We are sleeping...")
            return False
        else:
            return True


    def log_filtered(self, ftype, data=None):
        logging.info("Failed to meet %s filter" % ftype)
        if not data:
            self.log_handler.emit(
                "\tSkipped: Failed to meet %s filter" % ftype
            )
        else:
            self.log_handler.emit(
                "\tSkipped: Failed to meet %s filter: %s" % (ftype, data)
            ) 
        return False


    def update_tweets(self, tweet):
        self.tweets.remove(tweet)
        with open(self.tweets_path, 'w') as f:
            f.writelines(self.tweets)


    def load_tweets(self):
        try:
            with open(self.tweets_path, 'r') as f:
                return f.readlines()
        except:
            return []


    def stop_threads(self):
        self.running = False


    def catch_signal(self, signum, frame):
        logging.info("[*] Caught signal. Dumping stats...")
        self.get_run_metrics()
        self.client.dump_stats()


    def config_watcher(self):
        if self.running:
            self.reload_config()
            logging.info("Configuration reloaded")
            Timer(
                self.config_reload_time,
                self.config_watcher
            ).start()


    def tweet_watcher(self):
        if self.running:
            self.tweets = self.load_tweets()
            logging.info("Tweets reloaded")
            Timer(
                self.tweets_reload_time,
                self.tweet_watcher
            ).start()