コード例 #1
0
ファイル: twitter_api.py プロジェクト: xinyiZzz/tweetf0rm
    def __init__(self, *args, **kwargs):
        """
		Constructor with apikeys, and output folder

		* apikeys: apikeys
		"""
        logger.info(kwargs)
        import copy

        apikeys = copy.copy(kwargs.pop('apikeys', None))

        if not apikeys:
            raise MissingArgs('apikeys is missing')

        self.apikeys = copy.copy(apikeys)  # keep a copy
        #self.crawler_id = kwargs.pop('crawler_id', None)

        oauth2 = kwargs.pop(
            'oauth2', True
        )  # default to use oauth2 (application level access, read-only)

        if oauth2:
            apikeys.pop('oauth_token')
            apikeys.pop('oauth_token_secret')
            twitter = twython.Twython(apikeys['app_key'],
                                      apikeys['app_secret'],
                                      oauth_version=2)
            access_token = twitter.obtain_access_token()
            kwargs['access_token'] = access_token
            apikeys.pop('app_secret')

        kwargs.update(apikeys)

        super(TwitterAPI, self).__init__(*args, **kwargs)
コード例 #2
0
    def __init__(self,
                 node_id,
                 crawler_id,
                 apikeys,
                 handlers,
                 redis_config,
                 proxies=None):
        if (handlers == None):
            raise MissingArgs("you need a handler to write the data to...")

        super(UserRelationshipCrawler, self).__init__(node_id, crawler_id,
                                                      redis_config, handlers)

        self.apikeys = copy.copy(apikeys)
        self.tasks = {
            "TERMINATE": "TERMINATE",
            "CRAWL_FRIENDS": {
                "users": "find_all_friends",
                "ids": "find_all_friend_ids",
                "network_type": "friends"
            },
            "CRAWL_FOLLOWERS": {
                "users": "find_all_followers",
                "ids": "find_all_follower_ids",
                "network_type": "followers"
            },
            "CRAWL_USER_TIMELINE": "fetch_user_timeline",
            "CRAWL_TWEET": "fetch_tweet_by_id"
        }
        self.node_queue = NodeQueue(self.node_id, redis_config=redis_config)
        self.client_args = {"timeout": 300}
        self.proxies = iter(proxies) if proxies else None
        self.user_api = None

        self.init_user_api()
コード例 #3
0
ファイル: streaming_api.py プロジェクト: CoolDarran/tweetf0rm
    def __init__(self, *args, **kwargs):
        """
        Constructor with apikeys, and output folder

        * apikeys: apikeys
        """
        logger.info(kwargs)
        import copy

        apikeys = copy.copy(kwargs.pop('apikeys', None))
        output = copy.copy(kwargs.pop('output', None))

        if not apikeys:
            raise MissingArgs('apikeys is missing')
        if not output:
            raise MissingArgs('output is missing')

        self.apikeys = copy.copy(apikeys)  # keep a copy
        self.output = open(output, 'a')
        self.counter = 0

        kwargs.update(apikeys)

        super(Streamer, self).__init__(*args, **kwargs)
コード例 #4
0
ファイル: twitter_api.py プロジェクト: xinyiZzz/tweetf0rm
    def find_all_friend_ids(self,
                            user_id=None,
                            write_to_handlers=[],
                            cmd_handlers=[],
                            bucket="friend_ids"):

        if (not user_id):
            raise MissingArgs("user_id cannot be None")

        retry_cnt = MAX_RETRY_CNT
        cursor = -1
        while cursor != 0 and retry_cnt > 1:
            try:
                friend_ids = self.get_friends_ids(user_id=user_id,
                                                  cursor=cursor,
                                                  count=200)

                for handler in write_to_handlers:
                    handler.append(json.dumps(friend_ids),
                                   bucket=bucket,
                                   key=user_id)

                for handler in cmd_handlers:
                    handler.append(json.dumps(friend_ids),
                                   bucket=bucket,
                                   key=user_id)

                cursor = int(friend_ids['next_cursor'])

                logger.debug("find #%d friend_ids... NEXT_CURSOR: %d" %
                             (len(friend_ids["ids"]), cursor))

                time.sleep(2)
            except twython.exceptions.TwythonRateLimitError:
                self.rate_limit_error_occured('friends', '/friends/ids')
            except Exception as exc:
                time.sleep(10)
                logger.debug("exception: %s" % exc)
                retry_cnt -= 1
                if (retry_cnt == 0):
                    raise MaxRetryReached("max retry reached due to %s" %
                                          (exc))

        logger.debug("finished find_all_friend_ids for %s..." % (user_id))