Python rest_scrape_single Beispiele, tools.rest_tools.rest_scrape_single Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: newstweet_tools.py Projekt: joyceeexinyiwang/SCSThesis

    def getRepliesAndOthers(self, output_path, maxTweets, account_handle,
                            tweet_id):

        print("Getting Replies and Others for id=" + str(tweet_id))

        # STEP 1: scrape all with news corp handle
        possible_related = rest.rest_scrape_single("@" + account_handle,
                                                   maxTweets, api)

        # STEP 2: filter by reply status, recursively
        related = []
        add_ids = set()
        for r in related:
            if self.isRelatedToTweet(r._json, tweet_id):
                related.append(tweet)
                add_ids.add(tweet['id'])

        related = list(map(lambda t: json.dumps(t._json), related))

        if not os.path.exists(output_path):
            os.makedirs(output_path)

        self.writeFile(output_path,
                       str(tweet_id) + "_replies_others.json", related)

        return related

Beispiel #2

0

Datei anzeigen

def rpr():

	idN = 1069475079195713536
	appN = 4
	auth = cred.getAuth(appN, "app")
	api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
	tool = NewsTools(api)

	now = datetime.datetime.now()
	path = "data/@nytimes_opinion_" + str(idN) # "data/" + str(idN) + "_" + now.strftime("%Y-%m-%d-%H-%M")

	# get tweet object by ID
	tweet = api.get_status(id=idN, tweet_mode='extended')

	print("Created at: " + tweet._json["created_at"])
	print("Retweet count = " + str(tweet._json["retweet_count"]))
	print("favorite count = " + str(tweet._json["favorite_count"]))

	scraped = []
	terms = []
	terms.append("A few genetically modified people already walk among us.")
	terms.append("nytimes genetically")
	terms.append("nytimes modified")
	terms.append("nytimes china")
	terms.append("https://twitter.com/nytimes/status/1069475079195713536") # full url of tweet
	terms.append("https://t.co/kxRdkjfFDc")
	for t in terms:
		scraped.extend(rest.rest_scrape_single(t, 1000000, api))

	scraped = list(map(lambda x: x._json, scraped))
	retweets = []
	quotes = []
	replies = []

	retweets_seen, quotes_seen, replies_seen = set(), set(), set()

	for t in scraped:
		if "retweeted_status" in t and t["retweeted_status"]["id"] == idN:
			if t["id"] not in retweets_seen:
				retweets_seen.add(t["id"])
				retweets.append(t)

		if "quoted_status" in t and t["quoted_status"]["id"] == idN:
			if t["id"] not in quotes_seen:
				quotes_seen.add(t["id"])
				quotes.append(t)

		if t["in_reply_to_status_id"] != None:
			if t["in_reply_to_status_id"] in replies_seen or t["in_reply_to_status_id"] in quotes_seen or t["in_reply_to_status_id"] in retweets_seen:
				if t["id"] not in replies_seen:
					replies_seen.add(t["id"])
					replies.append(t)

	writeFile(path, "retweets.json", retweets)
	writeFile(path, "quotes.json", quotes)
	writeFile(path, "replies.json", replies)

	readAndCategorize(path + "/@nytimes", idN, retweets_seen, quotes_seen, replies_seen, path+"/retweets.json", path+"/quotes.json", path+"/replies.json")

Beispiel #3

0

Datei anzeigen

Datei: newstweet_tools.py Projekt: joyceeexinyiwang/SCSThesis

    def getQuotes(self, output_path, maxTweets, tweet_url, tweet_id):

        print("Getting Quotes for id=" + str(tweet_id))

        # STEP 1: scrape using tweet url
        possible_quotes = rest.rest_scrape_single(tweet_url, maxTweets, api)

        # STEP 2: filter by quote status

        quotes = list(
            filter(
                lambda q: "quoted_status" in q._json and q._json[
                    "quoted_status"]["id"] == tweet_id, possible_quotes))
        quotes = list(map(lambda t: json.dumps(t._json), quotes))

        if not os.path.exists(output_path):
            os.makedirs(output_path)

        self.writeFile(output_path, str(tweet_id) + "_quotes.json", quotes)

        return quotes

Beispiel #4

0

Datei anzeigen

Datei: newstweet_tools.py Projekt: joyceeexinyiwang/SCSThesis

    def getRetweets(self, output_path, maxTweets, tweet_segment, tweet_id):

        print("Getting Retweets for id=" + str(tweet_id))

        # STEP 1: scrape using tweet url
        possible_retweets = rest.rest_scrape_single(
            "\"" + tweet_segment + "\"", maxTweets, api)

        # STEP 2: filter by retweet status
        retweets = []
        for rt in possible_retweets:
            if "retweeted_status" in rt._json and rt._json["retweeted_status"][
                    "id"] == tweet_id:
                retweets.append(rt)

        retweets = list(map(lambda t: json.dumps(t._json), retweets))

        if not os.path.exists(output_path):
            os.makedirs(output_path)

        self.writeFile(output_path, str(tweet_id) + "_retweets.json", retweets)

        return retweets

Beispiel #5

0

Datei anzeigen

def rpr():

    idN = 1067738896979644416
    appN = 3
    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
    tool = NewsTools(api)

    now = datetime.datetime.now()
    path = "data/@nytimes_" + str(
        idN)  # "data/" + str(idN) + "_" + now.strftime("%Y-%m-%d-%H-%M")

    # get tweet object by ID
    tweet = api.get_status(id=idN, tweet_mode='extended')

    print("Created at: " + tweet._json["created_at"])
    print("Retweet count = " + str(tweet._json["retweet_count"]))
    print("favorite count = " + str(tweet._json["favorite_count"]))

    scraped = []
    terms = []
    terms.append("I feel proud, actually")
    terms.append("nytimes genetically")
    terms.append("nytimes modified")
    terms.append("https://twitter.com/nytimes/status/1067738896979644416"
                 )  # full url of tweet
    terms.append("https://t.co/9v4nxxN96s")
    for t in terms:
        scraped.extend(rest.rest_scrape_single(t, 1000000, api))

    scraped = list(map(lambda x: x._json, scraped))
    retweets = []
    quotes = []
    replies = []

    retweets_seen, quotes_seen, replies_seen = set(), set(), set()

    for t in scraped:
        if "retweeted_status" in t and t["retweeted_status"]["id"] == idN:
            if t["id"] not in retweets_seen:
                retweets_seen.add(t["id"])
                retweets.append(t)

        if "quoted_status" in t and t["quoted_status"]["id"] == idN:
            if t["id"] not in quotes_seen:
                quotes_seen.add(t["id"])
                quotes.append(t)

        if t["in_reply_to_status_id"] != None:
            if t["in_reply_to_status_id"] in replies_seen or t[
                    "in_reply_to_status_id"] in quotes_seen or t[
                        "in_reply_to_status_id"] in retweets_seen:
                if t["id"] not in replies_seen:
                    replies_seen.add(t["id"])
                    replies.append(t)

    writeFile(path, "retweets.json", retweets)
    writeFile(path, "quotes.json", quotes)
    writeFile(path, "replies.json", replies)

Beispiel #6

0

Datei anzeigen

def rpr():

    idN = 1067244642155094019  # "A Chinese scientist claimed he created the world’s first genetically edited babies, a step ethicists have feared. But he offered no proof."
    appN = 4
    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
    tool = NewsTools(api)

    now = datetime.datetime.now()
    path = "data/@nytimes_factual_" + str(
        idN)  # "data/" + str(idN) + "_" + now.strftime("%Y-%m-%d-%H-%M")

    # get tweet object by ID
    tweet = api.get_status(id=idN, tweet_mode='extended')

    print("Created at: " + tweet._json["created_at"])
    print("Retweet count = " + str(tweet._json["retweet_count"]))
    print("favorite count = " + str(tweet._json["favorite_count"]))

    scraped = []
    terms = []
    terms.append("A Chinese scientist claimed he created")
    terms.append("nytimes genetically")
    terms.append("nytimes modified")
    terms.append("https://twitter.com/nytimes/status/1067244642155094019"
                 )  # full url of tweet
    terms.append("https://t.co/z0fChZxgyM")
    for t in terms:
        scraped.extend(rest.rest_scrape_single(t, 1000000, api))

    scraped = list(map(lambda x: x._json, scraped))
    retweets = []
    quotes = []
    replies = []

    retweets_seen, quotes_seen, replies_seen = set(), set(), set()

    for t in scraped:
        if "retweeted_status" in t and t["retweeted_status"]["id"] == idN:
            if t["id"] not in retweets_seen:
                retweets_seen.add(t["id"])
                retweets.append(t)

        if "quoted_status" in t and t["quoted_status"]["id"] == idN:
            if t["id"] not in quotes_seen:
                quotes_seen.add(t["id"])
                quotes.append(t)

        if t["in_reply_to_status_id"] != None:
            if t["in_reply_to_status_id"] in replies_seen or t[
                    "in_reply_to_status_id"] in quotes_seen or t[
                        "in_reply_to_status_id"] in retweets_seen:
                if t["id"] not in replies_seen:
                    replies_seen.add(t["id"])
                    replies.append(t)

    writeFile(path, "self.json", [json.dumps(tweet._json)])
    writeFile(path, "retweets.json", retweets)
    writeFile(path, "quotes.json", quotes)
    writeFile(path, "replies.json", replies)

    readAndCategorize(path + "/@nytimes", idN, retweets_seen, quotes_seen,
                      replies_seen, path + "/retweets.json",
                      path + "/quotes.json", path + "/replies.json")

Beispiel #7

0

Datei anzeigen

def rpr():

    idN = 1069463438391459840
    appN = 3
    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
    tool = NewsTools(api)

    now = datetime.datetime.now()
    path = "data/@nytopinion_" + str(
        idN)  # "data/" + str(idN) + "_" + now.strftime("%Y-%m-%d-%H-%M")

    # get tweet object by ID
    tweet = api.get_status(id=idN, tweet_mode='extended')

    print("Created at: " + tweet._json["created_at"])
    print("Retweet count = " + str(tweet._json["retweet_count"]))
    print("favorite count = " + str(tweet._json["favorite_count"]))

    scraped = []
    terms = []
    terms.append("He was all noblesse oblige")
    terms.append("nytopinion Bush")
    terms.append("nytimes Bush")
    terms.append("https://twitter.com/nytopinion/status/1069463438391459840"
                 )  # full url of tweet
    terms.append("https://t.co/pDfFORy45z")
    for t in terms:
        scraped.extend(rest.rest_scrape_single(t, 1000000, api))

    scraped = list(map(lambda x: x._json, scraped))
    retweets = []
    quotes = []
    replies = []

    retweets_seen, quotes_seen, replies_seen = set(), set(), set()

    for t in scraped:
        if "retweeted_status" in t and t["retweeted_status"]["id"] == idN:
            if t["id"] not in retweets_seen:
                retweets_seen.add(t["id"])
                retweets.append(t)

        if "quoted_status" in t and t["quoted_status"]["id"] == idN:
            if t["id"] not in quotes_seen:
                quotes_seen.add(t["id"])
                quotes.append(t)

        if t["in_reply_to_status_id"] != None:
            if t["in_reply_to_status_id"] in replies_seen or t[
                    "in_reply_to_status_id"] in quotes_seen or t[
                        "in_reply_to_status_id"] in retweets_seen:
                if t["id"] not in replies_seen:
                    replies_seen.add(t["id"])
                    replies.append(t)

    writeFile(path, "retweets.json", retweets)
    writeFile(path, "quotes.json", quotes)
    writeFile(path, "replies.json", replies)

    readAndCategorize(path + "/@nytopinion", idN, retweets_seen, quotes_seen,
                      replies_seen, path + "/retweets.json",
                      path + "/quotes.json", path + "/replies.json")

Beispiel #8

0

Datei anzeigen

Datei: tweet_network.py Projekt: joyceeexinyiwang/SCSThesis

def network(idN, terms, path, appN):

    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    now = datetime.datetime.now()
    path = path + "/" + str(idN) + "/" + now.strftime("%Y-%m-%d-%H-%M")

    # get tweet object by ID
    tweet = api.get_status(id=idN, tweet_mode='extended')

    print("Created at: " + tweet._json["created_at"])
    print("Retweet count = " + str(tweet._json["retweet_count"]))
    print("Favorite count = " + str(tweet._json["favorite_count"]))

    scraped = []
    for t in terms:
        scraped.extend(rest.rest_scrape_single(t, 1000000, api))

    scraped = list(map(lambda x: x._json, scraped))
    basics.writeFile(path, "all.json",
                     list(map(lambda x: json.dumps(x), scraped)))

    retweets = []
    quotes = []
    replies = []

    retweets_seen, quotes_seen, replies_seen = set(), set(), set()

    for t in scraped:
        if "retweeted_status" in t and t["retweeted_status"]["id"] == idN:
            if t["id"] not in retweets_seen:
                retweets_seen.add(t["id"])
                retweets.append(t)

        if "quoted_status" in t and t["quoted_status"]["id"] == idN:
            if t["id"] not in quotes_seen:
                quotes_seen.add(t["id"])
                quotes.append(t)

        if t["in_reply_to_status_id"] != None:
            if t["in_reply_to_status_id"] in replies_seen or t[
                    "in_reply_to_status_id"] in quotes_seen or t[
                        "in_reply_to_status_id"] in retweets_seen:
                if t["id"] not in replies_seen:
                    replies_seen.add(t["id"])
                    replies.append(t)

    ids = list(map(lambda x: x["id"], retweets))
    ids.extend(list(map(lambda x: x["id"], quotes)))
    ids.extend(list(map(lambda x: x["id"], replies)))
    ids = list(map(lambda x: str(x), ids))
    ids.append(str(idN))

    basics.writeFile(path, "ids.csv", [",".join(ids)])
    basics.writeFile(path, "retweets.json",
                     list(map(lambda x: json.dumps(x), retweets)))
    basics.writeFile(path, "quotes.json",
                     list(map(lambda x: json.dumps(x), quotes)))
    basics.writeFile(path, "replies.json",
                     list(map(lambda x: json.dumps(x), replies)))