コード例 #1
0
def get_user_info(twitter_user):
    """
    An example of using the query_user_info method
    :param twitter_user: the twitter user to capture user data
    :return: twitter_user_data: returns a dictionary of twitter user data
    """
    user_info = query_user_info(user=twitter_user)
    twitter_user_data = {}
    twitter_user_data["user"] = user_info.user  # feature: screen_name_length
    twitter_user_data["fullname"] = user_info.full_name  # user name
    twitter_user_data["location"] = user_info.location  # feature: location
    twitter_user_data["blog"] = user_info.blog  # feature: url
    twitter_user_data["date_joined"] = user_info.date_joined  # feature: age
    twitter_user_data["id"] = user_info.id  # twitter account id
    twitter_user_data[
        "num_tweets"] = user_info.tweets  # feature: statuses_count
    twitter_user_data[
        "following"] = user_info.following  # feature: friends_count
    twitter_user_data[
        "followers"] = user_info.followers  # feature: followers_count
    twitter_user_data["likes"] = user_info.likes  # feature: favourites_count
    twitter_user_data["lists"] = user_info.lists  # feature: listed_count
    #twitter_user_data["is_verified"] = user_info.is_verified
    twitter_user_data["description"] = user_info.description

    # verified
    # quoted tweets

    max_num_tweets = 10
    latest_tweets = query_tweets_from_user(
        twitter_user, limit=max_num_tweets)  # list of 200 tweet objects
    tweets, tweets_html, hashtags, has_media, num_retweets, num_likes, links, num_replies, reply_to_users, timestamp_epochs, is_quoted_tweet, quoted_user, quoted_text, = get_tweet_attribute(
        latest_tweets)

    twitter_user_data["tweets"] = tweets
    twitter_user_data["tweets_html"] = tweets_html
    twitter_user_data["hashtags"] = hashtags
    twitter_user_data["has_media"] = has_media
    twitter_user_data["num_retweets"] = num_retweets
    twitter_user_data["num_likes"] = num_likes
    twitter_user_data['links'] = links
    twitter_user_data['num_replies'] = num_replies
    twitter_user_data['reply_to_users'] = reply_to_users
    twitter_user_data['timestamp_epochs'] = timestamp_epochs
    twitter_user_data['is_quoted_tweet'] = is_quoted_tweet
    #twitter_user_data['is_retweet'] = is_retweet
    twitter_user_data['quoted_user'] = quoted_user
    twitter_user_data['quoted_text'] = quoted_text
    #twitter_user_data['retweet_user'] = retweet_user
    #twitter_user_data['retweet_text'] = retweet_text

    return twitter_user_data
コード例 #2
0
def get_user_info(twitter_user):
    user_info = query_user_info(user= twitter_user)
    twitter_user_data = {}
    twitter_user_data["user"] = user_info.user
    twitter_user_data["fullname"] = user_info.full_name
    twitter_user_data["location"] = user_info.location
    twitter_user_data["blog"] = user_info.blog
    twitter_user_data["date_joined"] = user_info.date_joined
    twitter_user_data["id"] = user_info.id
    twitter_user_data["num_tweets"] = user_info.tweets
    twitter_user_data["following"] = user_info.following
    twitter_user_data["followers"] = user_info.followers
    twitter_user_data["likes"] = user_info.likes
    twitter_user_data["lists"] = user_info.lists
    
    return twitter_user_data
コード例 #3
0
def get_user_info(twitter_user):
    """
    An example of using the query_user_info method
    :param twitter_user: the twitter user to capture user data
    :return: twitter_user_data: returns a dictionary of twitter user data
    """
    userinfo = query_user_info(user= twitter_user)
    twitter_user_data = {"user":0, "fullname":0, "date_joined":0, "id":0, "num_tweets":0, "following":0, "followers":0, "likes":0, "lists":0}
    twitter_user_data["user"] = userinfo.user if userinfo else 0
    twitter_user_data["fullname"] = userinfo.full_name if userinfo else 0
    twitter_user_data["date_joined"] = userinfo.date_joined if userinfo else 0
    twitter_user_data["id"] = userinfo.id if userinfo else 0
    twitter_user_data["num_tweets"] = userinfo.tweets if userinfo else 0
    twitter_user_data["following"] = userinfo.following if userinfo else 0
    twitter_user_data["followers"] = userinfo.followers if userinfo else 0
    twitter_user_data["likes"] = userinfo.likes if userinfo else 0
    
    return twitter_user_data
コード例 #4
0
def get_user_info(twitter_user):
    try:
        user_info = query_user_info(user=twitter_user)
        twitter_user_data = {}
        twitter_user_data["screen_name"] = user_info.user
        twitter_user_data["username"] = user_info.full_name
        twitter_user_data["location"] = user_info.location
        twitter_user_data["blog"] = user_info.blog
        twitter_user_data["date_joined"] = dt.datetime.strptime(
            replace(user_info.date_joined), '%d/%m/%Y')
        twitter_user_data["user_id"] = user_info.id
        twitter_user_data["num_tweets"] = user_info.tweets
        twitter_user_data["following"] = user_info.following
        twitter_user_data["followers"] = user_info.followers
        twitter_user_data["likes"] = user_info.likes
        twitter_user_data["lists"] = user_info.lists
    except:
        twitter_user_data = "None"
    return twitter_user_data
コード例 #5
0
def get_user_info(twitter_user):
    """
    An example of using the query_user_info method
    :param twitter_user: the twitter user to capture user data
    :return: twitter_user_data: returns a dictionary of twitter user data
    """
    user_info = query_user_info(user=twitter_user)
    twitter_user_data = {}
    twitter_user_data["user"] = user_info.user
    twitter_user_data["fullname"] = user_info.full_name
    twitter_user_data["location"] = user_info.location
    twitter_user_data["blog"] = user_info.blog
    twitter_user_data["date_joined"] = user_info.date_joined
    twitter_user_data["id"] = user_info.id
    twitter_user_data["num_tweets"] = user_info.tweets
    twitter_user_data["following"] = user_info.following
    twitter_user_data["followers"] = user_info.followers
    twitter_user_data["likes"] = user_info.likes
    twitter_user_data["lists"] = user_info.lists

    return twitter_user_data
コード例 #6
0
def get_user_info(twitter_user):
    """
    An example of using the query_user_info method
    :param twitter_user: the twitter user to capture user data
    :return: twitter_user_data: returns a dictionary of twitter user data
    """
    user_info = query_user_info(user= twitter_user)
    twitter_user_data = {}
    twitter_user_data["user"] = user_info.user
    twitter_user_data["fullname"] = user_info.full_name
    twitter_user_data["location"] = user_info.location
    twitter_user_data["blog"] = user_info.blog
    twitter_user_data["date_joined"] = user_info.date_joined
    twitter_user_data["id"] = user_info.id
    twitter_user_data["num_tweets"] = user_info.tweets
    twitter_user_data["following"] = user_info.following
    twitter_user_data["followers"] = user_info.followers
    twitter_user_data["likes"] = user_info.likes
    twitter_user_data["lists"] = user_info.lists
    
    return twitter_user_data
コード例 #7
0
def queringUserInfo(username):
    user_info = query_user_info(user=username)
    f = open(username + ".txt", "w")
    twitter_user_data = {}
    twitter_user_data["Kullanıcı ismi"] = user_info.user
    twitter_user_data["Tam isim"] = user_info.full_name
    twitter_user_data["Tweet Sayısı"] = user_info.tweets
    twitter_user_data["Takip ettiği"] = user_info.following
    twitter_user_data["Takipçi"] = user_info.followers

    f.write(
        "Kullanıcı Adı:{} \n Tweet Sayısı:{} \n Takip Edilen Sayısı:{} \n Takipçi Sayısı:{}\n"
        .format(twitter_user_data["Kullanıcı ismi"],
                twitter_user_data["Tweet Sayısı"],
                twitter_user_data["Takip ettiği"],
                twitter_user_data["Takipçi"]))
    f.close()

    with open(username + ".txt", "a", encoding="utf-8") as f1:
        f1.write("Tam İsim:{} \n".format(twitter_user_data["Tam isim"]))
    f1.close()

    queringTweets(username)
    return twitter_user_data
コード例 #8
0
def main():
    try:
        parser = argparse.ArgumentParser(
            formatter_class=argparse.RawTextHelpFormatter, description=__doc__)

        parser.add_argument("query", type=str, help="Advanced twitter query")
        parser.add_argument("-o",
                            "--output",
                            type=str,
                            default="tweets.json",
                            help="Path to a JSON file to store the gathered "
                            "tweets to.")
        parser.add_argument("-l",
                            "--limit",
                            type=int,
                            default=None,
                            help="Number of minimum tweets to gather.")
        parser.add_argument(
            "-a",
            "--all",
            action='store_true',
            help="Set this flag if you want to get all tweets "
            "in the history of twitter. Begindate is set to 2006-03-01."
            "This may take a while. You can increase the number of parallel"
            "processes depending on the computational power you have.")
        parser.add_argument(
            "-c",
            "--csv",
            action='store_true',
            help=
            "Set this flag if you want to save the results to a CSV format.")
        parser.add_argument(
            "-u",
            "--user",
            action='store_true',
            help=
            "Set this flag to if you want to scrape tweets from a specific user"
            "The query should then consist of the profilename you want to scrape without @"
        )
        parser.add_argument(
            "--profiles",
            action='store_true',
            help=
            "Set this flag to if you want to scrape profile info of all the users where you"
            "have previously scraped from. After all of the tweets have been scraped it will start"
            "a new process of scraping profile pages.")
        parser.add_argument(
            "--lang",
            type=str,
            default=None,
            help=
            "Set this flag if you want to query tweets in \na specific language. You can choose from:\n"
            "en (English)\nar (Arabic)\nbn (Bengali)\n"
            "cs (Czech)\nda (Danish)\nde (German)\nel (Greek)\nes (Spanish)\n"
            "fa (Persian)\nfi (Finnish)\nfil (Filipino)\nfr (French)\n"
            "he (Hebrew)\nhi (Hindi)\nhu (Hungarian)\n"
            "id (Indonesian)\nit (Italian)\nja (Japanese)\n"
            "ko (Korean)\nmsa (Malay)\nnl (Dutch)\n"
            "no (Norwegian)\npl (Polish)\npt (Portuguese)\n"
            "ro (Romanian)\nru (Russian)\nsv (Swedish)\n"
            "th (Thai)\ntr (Turkish)\nuk (Ukranian)\n"
            "ur (Urdu)\nvi (Vietnamese)\n"
            "zh-cn (Chinese Simplified)\n"
            "zh-tw (Chinese Traditional)")
        parser.add_argument(
            "-d",
            "--dump",
            action="store_true",
            help=
            "Set this flag if you want to dump the tweets \nto the console rather than outputting to a file"
        )
        parser.add_argument(
            "-ow",
            "--overwrite",
            action="store_true",
            help=
            "Set this flag if you want to overwrite the existing output file.")
        parser.add_argument(
            "-bd",
            "--begindate",
            type=valid_date,
            default="2006-03-21",
            help=
            "Scrape for tweets starting from this date. Format YYYY-MM-DD. \nDefault value is 2006-03-21",
            metavar='\b')
        parser.add_argument(
            "-ed",
            "--enddate",
            type=valid_date,
            default=dt.date.today(),
            help=
            "Scrape for tweets until this date. Format YYYY-MM-DD. \nDefault value is the date of today.",
            metavar='\b')
        parser.add_argument(
            "-p",
            "--poolsize",
            type=int,
            default=20,
            help="Specify the number of parallel process you want to run. \n"
            "Default value is set to 20. \nYou can change this number if you have more computing power available. \n"
            "Set to 1 if you dont want to run any parallel processes.",
            metavar='\b')
        parser.add_argument(
            "--loglevel",
            type=valid_loglevel,
            default=logging.INFO,
            help="Specify the level for logging. \n"
            "Must be a valid value from https://docs.python.org/2/library/logging.html#logging-levels. \n"
            "Default log level is set to INFO.")
        parser.add_argument(
            "-dp",
            "--disableproxy",
            action="store_true",
            default=False,
            help=
            "Set this flag if you want to disable use of proxy servers when scrapping tweets and user profiles. \n"
        )
        args = parser.parse_args()

        logging.basicConfig()
        logger.setLevel(args.loglevel)

        if isfile(args.output) and not args.dump and not args.overwrite:
            logger.error("Output file already exists! Aborting.")
            exit(-1)

        if args.all:
            args.begindate = dt.date(2006, 3, 1)

        if args.user:
            tweets = query_tweets_from_user(user=args.query,
                                            limit=args.limit,
                                            use_proxy=not args.disableproxy)
        else:
            tweets = query_tweets(query=args.query,
                                  limit=args.limit,
                                  begindate=args.begindate,
                                  enddate=args.enddate,
                                  poolsize=args.poolsize,
                                  lang=args.lang,
                                  use_proxy=not args.disableproxy)

        if args.dump:
            pprint([tweet.__dict__ for tweet in tweets])
        else:
            if tweets:
                with open(args.output, "w", encoding="utf-8") as output:
                    if args.csv:
                        f = csv.writer(output,
                                       delimiter=";",
                                       quoting=csv.QUOTE_NONNUMERIC)
                        f.writerow([
                            "screen_name", "username", "user_id", "tweet_id",
                            "tweet_url", "timestamp", "timestamp_epochs",
                            "text", "text_html", "links", "hashtags",
                            "has_media", "img_urls", "video_url", "likes",
                            "retweets", "replies", "is_replied", "is_reply_to",
                            "parent_tweet_id", "reply_to_users"
                        ])
                        for t in tweets:
                            f.writerow([
                                t.screen_name, t.username, t.user_id,
                                t.tweet_id, t.tweet_url, t.timestamp,
                                t.timestamp_epochs, t.text, t.text_html,
                                t.links, t.hashtags, t.has_media, t.img_urls,
                                t.video_url, t.likes, t.retweets, t.replies,
                                t.is_replied, t.is_reply_to, t.parent_tweet_id,
                                t.reply_to_users
                            ])
                    else:
                        json.dump(tweets, output, cls=JSONEncoder)
            if args.profiles and tweets:
                list_users = list(set([tweet.username for tweet in tweets]))
                list_users_info = [
                    query_user_info(elem, not args.disableproxy)
                    for elem in list_users
                ]
                filename = 'userprofiles_' + args.output
                with open(filename, "w", encoding="utf-8") as output:
                    json.dump(list_users_info, output, cls=JSONEncoder)
    except KeyboardInterrupt:
        logger.info("Program interrupted by user. Quitting...")
コード例 #9
0
ファイル: main.py プロジェクト: cenguix/twitterscraper
def main():
    try:
        parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
            description=__doc__
        )

        parser.add_argument("query", type=str, help="Advanced twitter query")
        parser.add_argument("-o", "--output", type=str, default="tweets.json",
                            help="Path to a JSON file to store the gathered "
                                 "tweets to.")
        parser.add_argument("-l", "--limit", type=int, default=None,
                            help="Number of minimum tweets to gather.")
        parser.add_argument("-a", "--all", action='store_true',
                            help="Set this flag if you want to get all tweets "
                                 "in the history of twitter. Begindate is set to 2006-03-01."
                                 "This may take a while. You can increase the number of parallel"
                                 "processes depending on the computational power you have.")
        parser.add_argument("-c", "--csv", action='store_true',
                                help="Set this flag if you want to save the results to a CSV format.")
        parser.add_argument("-u", "--user", action='store_true',
                            help="Set this flag to if you want to scrape tweets from a specific user"
                                 "The query should then consist of the profilename (user) you want to scrape without @")
        parser.add_argument("--profiles", action='store_true',
                            help="Set this flag to if you want to scrape profile info of all the users where you" 
                            "have previously scraped from. After all of the tweets have been scraped it will start"
                            "a new process of scraping profile pages.")
        parser.add_argument("--lang", type=str, default=None,
                            help="Set this flag if you want to query tweets in \na specific language. You can choose from:\n"
                                 "en (English)\nar (Arabic)\nbn (Bengali)\n"
                                 "cs (Czech)\nda (Danish)\nde (German)\nel (Greek)\nes (Spanish)\n"
                                 "fa (Persian)\nfi (Finnish)\nfil (Filipino)\nfr (French)\n"
                                 "he (Hebrew)\nhi (Hindi)\nhu (Hungarian)\n"
                                 "id (Indonesian)\nit (Italian)\nja (Japanese)\n"
                                 "ko (Korean)\nmsa (Malay)\nnl (Dutch)\n"
                                 "no (Norwegian)\npl (Polish)\npt (Portuguese)\n"
                                 "ro (Romanian)\nru (Russian)\nsv (Swedish)\n"
                                 "th (Thai)\ntr (Turkish)\nuk (Ukranian)\n"
                                 "ur (Urdu)\nvi (Vietnamese)\n"
                                 "zh-cn (Chinese Simplified)\n"
                                 "zh-tw (Chinese Traditional)"
                                 )
        parser.add_argument("-d", "--dump", action="store_true",
                            help="Set this flag if you want to dump the tweets \nto the console rather than outputting to a file")
        parser.add_argument("-bd", "--begindate", type=valid_date, default="2006-03-21",
                            help="Scrape for tweets starting from this date. Format YYYY-MM-DD. \nDefault value is 2006-03-21", metavar='\b')
        parser.add_argument("-ed", "--enddate", type=valid_date, default=dt.date.today(),
                            help="Scrape for tweets until this date. Format YYYY-MM-DD. \nDefault value is the date of today.", metavar='\b')
        parser.add_argument("-p", "--poolsize", type=int, default=20, help="Specify the number of parallel process you want to run. \n"
                            "Default value is set to 20. \nYou can change this number if you have more computing power available. \n"
                            "Set to 1 if you dont want to run any parallel processes.", metavar='\b')
        args = parser.parse_args()

        if isfile(args.output) and not args.dump:
            logger.error("Output file already exists! Aborting.")
            exit(-1)

        if args.all:
            args.begindate = dt.date(2006,3,1)
            args.enddate = dt.date.today()

        if args.user:
            tweets = query_tweets_from_user(user = args.query, limit = args.limit)
        else:
            tweets = query_tweets(query = args.query, limit = args.limit,
                              begindate = args.begindate, enddate = args.enddate,
                              poolsize = args.poolsize, lang = args.lang)

        if args.dump:
            print(json.dumps(tweets, cls=JSONEncoder))
        else:
            if tweets:
                with open(args.output, "w", encoding="utf-8") as output:
                    if args.csv:
                        f = csv.writer(output)
                        f.writerow(["user", "fullname", "tweet-id", "timestamp", "url", "likes", "replies", "retweets", "text", "html"])
                        for x in tweets:
                            f.writerow([x.user, x.fullname, x.id, x.timestamp, x.url,
                                        x.likes, x.replies, x.retweets,
                                        x.text, x.html])
                    else:
                        json.dump(tweets, output, cls=JSONEncoder)

            if args.profiles and tweets:
                list_users = list(set([tweet.user for tweet in tweets]))
                # list_users_info = [query_user_info(elem) for elem in list_users]
                filename = 'userprofiles_' + args.output

                with open(filename, "w", encoding="utf-8") as output:
                    if args.csv:
                        f = csv.writer(output)
                        f.writerow(["user","fullname","location","blog","date_joined","id","num_tweets","following","followers","likes","lists"])
                        for elem in list_users:
                            u = query_user_info(elem)
                            if u is None:
                                continue
                            else:
                                f.writerow([u.user, u.full_name, u.location, u.blog, u.date_joined, u.id, u.tweets, u.following,
                                u.followers, u.likes, u.lists])

                    else:
                        for elem in list_users:
                            u = query_user_info(elem)
                            if u is None:
                                continue
                            else:
                                json.dump(u, output, cls=JSONEncoder, indent=2)

    except KeyboardInterrupt:
        logger.info("Program interrupted by user. Quitting...")
コード例 #10
0
ファイル: main.py プロジェクト: noproxy/twitterscraper
def get_follower(user: str):
    info: User = query_user_info(user)
    return info.followers