def __follower_following__(self, username, limit=1000, type_="followers", proxy=None, interval=0): """ Followers/Followings scraper :param username: :param limit: :param type_: :param proxy: :param interval: :return: """ result = [] cursor = str() first_request: bool = True has_more: bool = True # mode = FF -> followers/followings user-agent req = RequestHandler(user_agent="FF") # if proxy enabled set it if proxy: req.proxy = proxy while has_more: if first_request: url = MOBILE_URL + f"/{username}/{type_}/?lang=en" res = req.get(url) first_request = False else: url = MOBILE_URL + f"/{username}/{type_}/?lang=en&cursor={cursor}" res = req.get(url) if res: # extract cursor cursor = self.__extract_cursor__(res) if cursor: has_more = True else: has_more = False # parse followers/followings extracted_ff = self.__extract_ff__(res) result.extend(extracted_ff) # if there was limit if int(limit) > 0: if len(result) > int(limit): return result[:int(limit)] else: sleep(interval) continue else: return result # interval sleep(interval) return result
def __profile__(self, username, proxy): """ get user profile """ req = RequestHandler(user_agent="TIMELINE") if proxy: req.proxy = proxy url = BASE_URL + username + "/?lang=en" res = req.get(url=url) if res: return self.__extract_profile__(res) else: return None
def __timeline__(self, username, limit=1000, proxy=None, interval=0): """ timeline scraper :param username: :param limit: :param proxy: :param interval: :return: """ result: list = [] cursor = "-1" has_more = True req = RequestHandler(user_agent="TIMELINE", ret="json") if proxy: req.proxy = proxy while has_more: url = BASE_URL + TIMELINE_WITH_TOKEN_QUERY + f"+from:{username}" url = url.replace("%TOKEN%", cursor) res = req.get(url) if res: cursor, has_more = self.__extract_timeline_cursor__( response=res) extracted_tweets = self.__extract_timeline__(res['items_html']) result.extend(extracted_tweets) # check limitation if int(limit) > 0: if len(result) > int(limit): return result[:int(limit)] else: sleep(interval) continue else: return result sleep(interval) return result
def searchTweets(self, username="", since="", until="", query="", limit=1000, verified=False, proxy="", interval=0): """Advanced search engine""" cursor = "-1" has_more: bool = True result: list = [] req = RequestHandler(user_agent="TIMELINE", ret="json") if proxy: req.proxy = proxy if since: since = int( time.mktime(datetime.strptime(since, "%Y-%m-%d").timetuple())) if until: if len(until) == 4: until = f"{until}-01-01" query_structure = { "from": f"+from:{username}", "since": f"+since:{since}", "verified": ":verified", "until": f"+until:{until}", "query": f"+{query}" } if username and query: """ not allowed """ raise QueryError( "`username` and `query` parameter not allowed together.") if since and until: """ not allowed """ raise QueryError( "`since` and `until` parameter not allowed together.") url = BASE_URL + TIMELINE_WITH_TOKEN_QUERY url = url.replace("%TOKEN%", cursor) # if there was username or query if username or query: if username: url += query_structure['from'] else: url += query_structure['query'] # if username and query aren't set properly raise error else: raise ParameterRequired( "`username` or `query` required for search.") if since or until: if since: url += query_structure['since'] elif until: url += query_structure['until'] if verified: url += query_structure['verified'] while has_more: res = req.get(url=url) if res: cursor, has_more = self.__extract_timeline_cursor__( response=res) if cursor: extracted_tweets = self.__extract_timeline__( res['items_html']) result.extend(extracted_tweets) url = url.replace("%TOKEN%", cursor) # check limitation if int(limit) > 0: if len(result) > int(limit): return result[:int(limit)] else: sleep(interval) continue else: break sleep(interval) else: return result return result