def crawler_item(browser, user_name: str, media_id: int, media_name: str, mt,
                 xpath):
    """ 抓取单个账号用户信息

    :param user_name: <str> 账号名称
    :param media_id: <int> 媒体ID
    :param media_name: <str> 媒体名称
    :return: <None> 已将结果存入数据库
    """
    # 使用twitter-scraper包抓取账户信息(关注数+正在关注数可能错误)
    try:
        profile = Profile(user_name)
    except:
        print("账号不存在!")
        return
    writing_item = profile.to_dict()
    writing_item["media_id"] = media_id
    writing_item["media_name"] = media_name

    # 抓取账户粉丝数和正在关注数(Selenium爬虫)
    browser.get("https://twitter.com/" + user_name)
    time.sleep(tool.get_scope_random(12))
    following_count = None
    followers_count = None
    try:
        following_count = browser.find_element_by_xpath(
            xpath["following_count"][0]).get_attribute("title")
        followers_count = browser.find_element_by_xpath(
            xpath["followers_count"][0]).get_attribute("title")
    except:
        try:
            following_count = browser.find_element_by_xpath(
                xpath["following_count"][1]).get_attribute("title")
            followers_count = browser.find_element_by_xpath(
                xpath["followers_count"][1]).get_attribute("title")
        except:
            print("Selenium抓取关注数+正在关注失败!")

    # 依据Selenium爬虫结果修正抓取结果
    if following_count is not None:
        following_count = following_count.replace(",", "")
        print("修正正在关注数量:", writing_item["following_count"], "→",
              following_count)
        writing_item["following_count"] = following_count
    if followers_count is not None:
        followers_count = followers_count.replace(",", "")
        print("修正关注者数量:", writing_item["followers_count"], "→",
              followers_count)
        writing_item["followers_count"] = followers_count

    # 将数据写入到数据库
    writing_list = list()
    writing_list.append(writing_item)
    write_num = mysql.insert_pure(mt, writing_list)
    print("存储记录数:", write_num)
    print(writing_list)
def get_user_data(username: str) -> dict:
    """Gets user data.

    Args:
        username (str): username of tweet.

    Returns:
        dict: dictionary containing user info.
    """
    profile = Profile(username=username)
    return profile.to_dict()
def twitter_scraper(username):
    try:
        profile = Profile(username)
        data = profile.to_dict()
        data['pfp_url'] = data.pop('profile_photo')
        data['website'] = 'https://' + data['website'] if data[
            'website'] else ''
        data['url'] = f'https://twitter.com/{username}'
        data.pop('username')
        return data
    except:
        return "User not found"
Beispiel #4
0
import sys
from twitter_scraper import Profile, get_tweets
from twitter_functions import process_tweets

try:
    opts = [opt for opt in sys.argv[1:] if opt.startswith("-")]
    args = [arg for arg in sys.argv[1:] if not arg.startswith("-")]
    profile_name = args[0]
    tweets = {}

    if "--profilename" or "-p" in opts:
        profile = Profile(profile_name)
        profile.to_dict()

        for tweet in get_tweets(profile_name, pages=1):
            if tweet['entries']['photos']:
                sep = ' pic'
                tweet_text = tweet['text'].split(sep, 1)[0]
                tweets.update([(tweet_text, tweet['entries']['photos'][0])])
        process_tweets(tweets)
except:
    raise SystemExit(f"Usage: {sys.argv[0]} -p twitter_handle")
Beispiel #5
0
def scrap_bio(username):
    profile = Profile(username)
    return profile.to_dict()['biography']