コード例 #1
0
ファイル: one.py プロジェクト: joyceeexinyiwang/SCSThesis
def isNewsProfessional(inputPath, outputPath):

    idN = 1070755071271555072
    appN = 6
    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
    tool = NewsTools(api)

    userIDs = set()
    result = []

    tweets = None
    print("Processing..." + inputPath)
    with open(inputPath) as i_file:
        tweets = i_file.read().strip().split("\n")

    for t in tweets:
        t = json.loads(t)
        userID = t["user"]["id"]
        screen_name = t["user"]["screen_name"]
        if (userID not in userIDs):
            userIDs.add(userID)
            isNewsProf = tool.isNewsProfessional(t["user"]["screen_name"])
            result.append(screen_name + "\t" + str(isNewsProf))
            print(screen_name + " " + str(isNewsProf))

    o_file = open(outputPath, "w")
    o_file.write((" ".join(result)).strip())
コード例 #2
0
def filter(tool, inputFolder, inputFile, outputFolder, handle, maxTweets,
           appN):

    print("## Filtering by relevance to @" + handle)

    inTweets = ""
    with open(inputFolder + "/" + inputFile) as i_file:
        inTweets = i_file.read()
    tweets = inTweets.split("\n")

    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    relevantTweets = []
    irrelevantTweets = []

    for tweet in tweets:
        print(".", end="")
        (result, accum) = tool.isRelatedToAgency(json.loads(tweet), handle)
        if result:
            relevantTweets.append(tweet)
            relevantTweets.extend(accum)
        else:
            irrelevantTweets.append(tweet)
            relevantTweets.extend(accum)

    writeFile(inputFolder, "relevant.json", relevantTweets)
    writeFile(inputFolder, "irrelevant.json", irrelevantTweets)

    return (relevantTweets, irrelevantTweets)
コード例 #3
0
ファイル: scrape.py プロジェクト: joyceeexinyiwang/SCSThesis
def run(qFile, outputFolder, maxTweets, appN):

    # #Create Query list
    qlist = ""
    with open(qFile) as i_file:
        qlist = i_file.read()
        print(qlist)
    queries = qlist.split(",")

    qFilename = qFile[qFile.rindex("/") + 1:qFile.index(".")]
    print("## Running scrape on file {a}".format(a=qFilename))

    i = appN

    auth = cred.getAuth(i, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    now = datetime.datetime.now()
    path = outputFolder + "/" + qFilename + "_" + now.strftime(
        "%Y-%m-%d-%H-%M")

    ## Start Scrapping
    print('## Running scrape /"{keywords}/" on app #{a})'.format(
        keywords=str(queries), a=i))
    ScrapeResults = rest.rest_scrape(queries, api, path + "/by_keywords",
                                     int(maxTweets))

    allTweets = basics.dedup(path + "/by_keywords", path + "/cleaned",
                             "deduped.json")
    basics.separateByDate(allTweets, path + "/by_dates")
コード例 #4
0
def filter(inputFolder, inputFile, handle, maxTweets, appN):
    inTweets = ""
    with open(inputFolder + "/" + inputFile) as i_file:
        inTweets = i_file.read()
    tweets = inTweets.split("\n")

    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    relevantTweets = []
    irrelevantTweets = []
    for tweet in tweets:
        if news.isRelatedToAgency(api, json.loads(tweet), handle):
            relevantTweets.append(tweet)
        else:
            irrelevantTweets.append(tweet)

    outpath = inputFolder + "/result"

    if not os.path.exists(outpath):
        os.makedirs(outpath)

    writeFile(outpath, "relevant.json", relevantTweets)
    writeFile(outpath, "irrelevant.json", irrelevantTweets)
    clean.dedup(inputFolder, outpath)
コード例 #5
0
def scrape_from_tweets(inputFolder, inputFile, maxTweets, appN):   

	#Create Query list
	inTweets = ""
	with open(inputFolder + "/" + inputFile) as i_file:
		inTweets = i_file.read()
	tweets = inTweets.split("\n")

	auth = cred.getAuth(appN, "app")
	api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

	outpath = inputFolder + "/result"

	for tweet in tweets:
		jt = json.loads(tweet)
		tweet_id = jt["id"]
		account_handle = jt["user"]["screen_name"]

		op = outpath

		tweet_segment = " ".join((jt["full_text"].split(" "))[2:min(10, len(jt["full_text"].split(" "))-2)])

		if (tweet_segment != ""):
			retweets = news.getRetweets(api, op, maxTweets, tweet_segment, tweet_id)

		try:
			tweet_url = jt["entities"]["urls"]["url"]
			quotes = news.getQuotes(api, op, maxTweets, tweet_url, tweet_id)
		except:
			print("\t\tCan't find tweet url for id=" + str(tweet_id))
		
		replies_and_others = news.getRepliesAndOthers(api, op, maxTweets, account_handle, tweet_id)

	clean.clean(outpath, outpath)
コード例 #6
0
def getSourceID(user_id):
    i = 0
    auth = credentials.getAuth(i, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
    user = api.get_user(id="_vaguely_")
    return user.id
コード例 #7
0
def rpr():

	idN = 1069475079195713536
	appN = 4
	auth = cred.getAuth(appN, "app")
	api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
	tool = NewsTools(api)

	now = datetime.datetime.now()
	path = "data/@nytimes_opinion_" + str(idN) # "data/" + str(idN) + "_" + now.strftime("%Y-%m-%d-%H-%M")

	# get tweet object by ID
	tweet = api.get_status(id=idN, tweet_mode='extended')

	print("Created at: " + tweet._json["created_at"])
	print("Retweet count = " + str(tweet._json["retweet_count"]))
	print("favorite count = " + str(tweet._json["favorite_count"]))

	scraped = []
	terms = []
	terms.append("A few genetically modified people already walk among us.")
	terms.append("nytimes genetically")
	terms.append("nytimes modified")
	terms.append("nytimes china")
	terms.append("https://twitter.com/nytimes/status/1069475079195713536") # full url of tweet
	terms.append("https://t.co/kxRdkjfFDc")
	for t in terms:
		scraped.extend(rest.rest_scrape_single(t, 1000000, api))

	scraped = list(map(lambda x: x._json, scraped))
	retweets = []
	quotes = []
	replies = []

	retweets_seen, quotes_seen, replies_seen = set(), set(), set()

	for t in scraped:
		if "retweeted_status" in t and t["retweeted_status"]["id"] == idN:
			if t["id"] not in retweets_seen:
				retweets_seen.add(t["id"])
				retweets.append(t)

		if "quoted_status" in t and t["quoted_status"]["id"] == idN:
			if t["id"] not in quotes_seen:
				quotes_seen.add(t["id"])
				quotes.append(t)

		if t["in_reply_to_status_id"] != None:
			if t["in_reply_to_status_id"] in replies_seen or t["in_reply_to_status_id"] in quotes_seen or t["in_reply_to_status_id"] in retweets_seen:
				if t["id"] not in replies_seen:
					replies_seen.add(t["id"])
					replies.append(t)

	writeFile(path, "retweets.json", retweets)
	writeFile(path, "quotes.json", quotes)
	writeFile(path, "replies.json", replies)

	readAndCategorize(path + "/@nytimes", idN, retweets_seen, quotes_seen, replies_seen, path+"/retweets.json", path+"/quotes.json", path+"/replies.json")
コード例 #8
0
def printThread(path, startN, endN, appN):

    input_str = ""
    with open(path) as i_file:
        input_str = i_file.read()

    tweets_l = input_str.split("\n")
    folderName = path[:path.rindex(".")] + "_thread"
    if not os.path.exists(folderName):
        os.makedirs(folderName)

    count = 0
    size = 10
    fileCount = 0

    filename = "thread_" + str(count) + "_" + str(count + size) + ".txt"
    f = open(folderName + "/" + filename, "w")
    f.write(
        "Extracted threads from {a} and writing into {b}\n\n---\n---\n---\n\n".
        format(a=folderName, b=filename))

    i = appN
    auth = cred.getAuth(i, "user")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    tool = NewsTools(api)

    agents = {}

    for t_str in tweets_l[startN:endN]:

        print("woop")
        t = json.loads(t_str)
        f.write("\n-\n")
        a = printTweet(tool, t, f, agents)
        agents.update(a)

        count += 1
        if count >= size:
            count = 0
            fileCount += 1
            filename = "thread_" + str(startN + fileCount * size) + "_" + str(
                startN + fileCount * size + size) + ".txt"
            f = open(folderName + "/" + filename, "w")
            f.write(
                "Extracted threads from {a} and writing into {b}\n\n---\n---\n---\n\n"
                .format(a=folderName, b=filename))

    filename_a = "thread_agents.txt"
    f = None
    f = open(folderName + "/" + filename_a, "w")
    f.write(json.dumps(agents))
コード例 #9
0
ファイル: agents.py プロジェクト: joyceeexinyiwang/SCSThesis
def run(handle, qFile, maxTweets, appN):

    #Create Query list
    qlist = ""
    with open(qFile) as i_file:
        qlist = i_file.read()
    qlist = qlist.split(",")

    queries = []

    for q in qlist:
        queries.append(handle + " " + q)

    qFilename = qFile[qFile.rindex("/") + 1:qFile.index(".")]
    print("## Running scrape on file {a}".format(a=qFilename))

    i = appN

    auth = cred.getAuth(i, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    now = datetime.datetime.now()
    path = "data/" + handle + "_" + qFilename + "/" + now.strftime(
        "%Y-%m-%d-%H-%M")

    ## Start Scrapping
    print('## Running scrape \"{keywords}\" on app #{a})'.format(
        keywords=str(queries), a=i))
    ScrapeResults = rest.rest_scrape(queries, api, path + "/by_keywords",
                                     int(maxTweets))

    ## Clean up

    i = appN

    auth = cred.getAuth(i, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
コード例 #10
0
def filter(inputFolder, inputFile, maxTweets, appN):
	inTweets = ""
	with open(inputFolder + "/" + inputFile) as i_file:
		inTweets = i_file.read()
	tweets = inTweets.split("\n")

	auth = cred.getAuth(appN, "app")
	api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

	outpath = inputFolder + "/result"

	for tweet in tweets:
コード例 #11
0
ファイル: func.py プロジェクト: joyceeexinyiwang/SCSThesis
def main(argv):
    print("\nRunning...")

    idN = int(argv[0])
    i = int(argv[1])
    auth = credentials.getAuth(i, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    tweet = gen.getTweet(idN, api)

    print(tweet)
コード例 #12
0
def getFollowers(user_id):
    i = 0
    auth = credentials.getAuth(i, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    followers = []
    for page in tweepy.Cursor(api.followers_ids,
                              id=user_id,
                              tweet_mode='extended').pages():
        followers.extend(page)

    return followers
コード例 #13
0
ファイル: one.py プロジェクト: joyceeexinyiwang/SCSThesis
def bigFile(term):
    appN = 4
    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
    now = datetime.datetime.now()
    path = "data/" + term + "_" + now.strftime("%Y-%m-%d-%H-%M")
    rest.rest_scrape([term],
                     api,
                     path,
                     100000000,
                     file_size=100000,
                     fileName=None,
                     max_num_errors=5)
コード例 #14
0
ファイル: one.py プロジェクト: joyceeexinyiwang/SCSThesis
def one():

    idN = 1070755071271555072
    appN = 6
    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
    tool = NewsTools(api)

    # get tweet object by ID
    tweet = api.get_status(id=idN, tweet_mode='extended')

    print(str(tweet._json))
    o_file = open("this.json", "w")
    o_file.write(str(tweet._json))
コード例 #15
0
def run(queryFile, appNumber):

    #Create Query list
    queries = ""
    with open(queryFile) as i_file:
        queries = i_file.read()
    queries_list = queries.split(",")

    # choose which Twitter app to use for this query
    i = appNumber

    print('Streaming for ' + queryFile)

    auth = cred.getAuth(i, "user")
    now = datetime.now()
    out_Path = "data/" + now.strftime("%Y-%m-%d-%H-%M") + "-stream"

    stream.stream_tweets(auth, queries_list, out_Path)
コード例 #16
0
def getTimeline(user_id):

    i = 0
    auth = credentials.getAuth(i, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    tweets = []
    for page in tweepy.Cursor(api.user_timeline,
                              id=user_id,
                              tweet_mode='extended',
                              count=20).pages():
        tweets.extend(page)

    tweet_str_list = []
    for t in tweets:
        tweet_str_list.append(t._json)

    return tweet_str_list
コード例 #17
0
def scrape_from_id(inputFolder, inputFile, maxTweets, appN):   

	#Create Query list
	ids = ""
	with open(inputFolder + "/" + inputFile) as i_file:
		inTweets = i_file.read()
	ids = ids.split("\n")

	auth = cred.getAuth(appN, "app")
	api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

	outpath = inputFolder + "/result"

	for curID in ids:

		tweet = None
		try:
			tweet = api.get_status(id=int(curID), tweet_mode='extended')
		except:
			print("get_status error with id=" + curID)

		jt = tweet._json
		tweet_id = jt["id"]
		account_handle = jt["user"]["screen_name"]
		tweet_segment = " ".join((jt["full_text"].split(" "))[2:])

		op = outpath

		retweets = news.getRetweets(api, op, maxTweets, tweet_segment, tweet_id)

		try:
			tweet_url = jt["entities"]["urls"]["url"]
			quotes = news.getQuotes(api, op, maxTweets, tweet_url, tweet_id)
		except:
			print("Can't find tweet url for id=" + str(tweet_id))
		
		replies_and_others = news.getRepliesAndOthers(api, op, maxTweets, account_handle, tweet_id)

	clean.clean(outpath, outpath)
コード例 #18
0
def getSource(inputFolder, outputFolder, appNumber):

    auth = cred.getAuth(appNumber, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    if not os.path.exists(outputFolder + "/NEW/"):
        os.makedirs(outputFolder + "/NEW/")

    f = open(outputFolder + "/NEW/source.json", "w")

    for (dirpath, dirnames, filenames) in os.walk(inputFolder):
        for filename in filenames:
            if filename.endswith('.json'):
                print("Currently on " + filename)
                with open(dirpath + "/" + filename) as i_file:

                    iDs = set()

                    for line in i_file:
                        t = json.loads(line)

    f.close()
コード例 #19
0
def network(idN, terms, path, appN):

    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    now = datetime.datetime.now()
    path = path + "/" + str(idN) + "/" + now.strftime("%Y-%m-%d-%H-%M")

    # get tweet object by ID
    tweet = api.get_status(id=idN, tweet_mode='extended')

    print("Created at: " + tweet._json["created_at"])
    print("Retweet count = " + str(tweet._json["retweet_count"]))
    print("Favorite count = " + str(tweet._json["favorite_count"]))

    scraped = []
    for t in terms:
        scraped.extend(rest.rest_scrape_single(t, 1000000, api))

    scraped = list(map(lambda x: x._json, scraped))
    basics.writeFile(path, "all.json",
                     list(map(lambda x: json.dumps(x), scraped)))

    retweets = []
    quotes = []
    replies = []

    retweets_seen, quotes_seen, replies_seen = set(), set(), set()

    for t in scraped:
        if "retweeted_status" in t and t["retweeted_status"]["id"] == idN:
            if t["id"] not in retweets_seen:
                retweets_seen.add(t["id"])
                retweets.append(t)

        if "quoted_status" in t and t["quoted_status"]["id"] == idN:
            if t["id"] not in quotes_seen:
                quotes_seen.add(t["id"])
                quotes.append(t)

        if t["in_reply_to_status_id"] != None:
            if t["in_reply_to_status_id"] in replies_seen or t[
                    "in_reply_to_status_id"] in quotes_seen or t[
                        "in_reply_to_status_id"] in retweets_seen:
                if t["id"] not in replies_seen:
                    replies_seen.add(t["id"])
                    replies.append(t)

    ids = list(map(lambda x: x["id"], retweets))
    ids.extend(list(map(lambda x: x["id"], quotes)))
    ids.extend(list(map(lambda x: x["id"], replies)))
    ids = list(map(lambda x: str(x), ids))
    ids.append(str(idN))

    basics.writeFile(path, "ids.csv", [",".join(ids)])
    basics.writeFile(path, "retweets.json",
                     list(map(lambda x: json.dumps(x), retweets)))
    basics.writeFile(path, "quotes.json",
                     list(map(lambda x: json.dumps(x), quotes)))
    basics.writeFile(path, "replies.json",
                     list(map(lambda x: json.dumps(x), replies)))
コード例 #20
0
def rpr():

    idN = 1067738896979644416
    appN = 3
    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
    tool = NewsTools(api)

    now = datetime.datetime.now()
    path = "data/@nytimes_" + str(
        idN)  # "data/" + str(idN) + "_" + now.strftime("%Y-%m-%d-%H-%M")

    # get tweet object by ID
    tweet = api.get_status(id=idN, tweet_mode='extended')

    print("Created at: " + tweet._json["created_at"])
    print("Retweet count = " + str(tweet._json["retweet_count"]))
    print("favorite count = " + str(tweet._json["favorite_count"]))

    scraped = []
    terms = []
    terms.append("I feel proud, actually")
    terms.append("nytimes genetically")
    terms.append("nytimes modified")
    terms.append("https://twitter.com/nytimes/status/1067738896979644416"
                 )  # full url of tweet
    terms.append("https://t.co/9v4nxxN96s")
    for t in terms:
        scraped.extend(rest.rest_scrape_single(t, 1000000, api))

    scraped = list(map(lambda x: x._json, scraped))
    retweets = []
    quotes = []
    replies = []

    retweets_seen, quotes_seen, replies_seen = set(), set(), set()

    for t in scraped:
        if "retweeted_status" in t and t["retweeted_status"]["id"] == idN:
            if t["id"] not in retweets_seen:
                retweets_seen.add(t["id"])
                retweets.append(t)

        if "quoted_status" in t and t["quoted_status"]["id"] == idN:
            if t["id"] not in quotes_seen:
                quotes_seen.add(t["id"])
                quotes.append(t)

        if t["in_reply_to_status_id"] != None:
            if t["in_reply_to_status_id"] in replies_seen or t[
                    "in_reply_to_status_id"] in quotes_seen or t[
                        "in_reply_to_status_id"] in retweets_seen:
                if t["id"] not in replies_seen:
                    replies_seen.add(t["id"])
                    replies.append(t)

    writeFile(path, "retweets.json", retweets)
    writeFile(path, "quotes.json", quotes)
    writeFile(path, "replies.json", replies)
コード例 #21
0
def rpr():

    idN = 1067244642155094019  # "A Chinese scientist claimed he created the world’s first genetically edited babies, a step ethicists have feared. But he offered no proof."
    appN = 4
    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
    tool = NewsTools(api)

    now = datetime.datetime.now()
    path = "data/@nytimes_factual_" + str(
        idN)  # "data/" + str(idN) + "_" + now.strftime("%Y-%m-%d-%H-%M")

    # get tweet object by ID
    tweet = api.get_status(id=idN, tweet_mode='extended')

    print("Created at: " + tweet._json["created_at"])
    print("Retweet count = " + str(tweet._json["retweet_count"]))
    print("favorite count = " + str(tweet._json["favorite_count"]))

    scraped = []
    terms = []
    terms.append("A Chinese scientist claimed he created")
    terms.append("nytimes genetically")
    terms.append("nytimes modified")
    terms.append("https://twitter.com/nytimes/status/1067244642155094019"
                 )  # full url of tweet
    terms.append("https://t.co/z0fChZxgyM")
    for t in terms:
        scraped.extend(rest.rest_scrape_single(t, 1000000, api))

    scraped = list(map(lambda x: x._json, scraped))
    retweets = []
    quotes = []
    replies = []

    retweets_seen, quotes_seen, replies_seen = set(), set(), set()

    for t in scraped:
        if "retweeted_status" in t and t["retweeted_status"]["id"] == idN:
            if t["id"] not in retweets_seen:
                retweets_seen.add(t["id"])
                retweets.append(t)

        if "quoted_status" in t and t["quoted_status"]["id"] == idN:
            if t["id"] not in quotes_seen:
                quotes_seen.add(t["id"])
                quotes.append(t)

        if t["in_reply_to_status_id"] != None:
            if t["in_reply_to_status_id"] in replies_seen or t[
                    "in_reply_to_status_id"] in quotes_seen or t[
                        "in_reply_to_status_id"] in retweets_seen:
                if t["id"] not in replies_seen:
                    replies_seen.add(t["id"])
                    replies.append(t)

    writeFile(path, "self.json", [json.dumps(tweet._json)])
    writeFile(path, "retweets.json", retweets)
    writeFile(path, "quotes.json", quotes)
    writeFile(path, "replies.json", replies)

    readAndCategorize(path + "/@nytimes", idN, retweets_seen, quotes_seen,
                      replies_seen, path + "/retweets.json",
                      path + "/quotes.json", path + "/replies.json")
コード例 #22
0
def rpr():

    idN = 1069463438391459840
    appN = 3
    auth = cred.getAuth(appN, "app")
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)
    tool = NewsTools(api)

    now = datetime.datetime.now()
    path = "data/@nytopinion_" + str(
        idN)  # "data/" + str(idN) + "_" + now.strftime("%Y-%m-%d-%H-%M")

    # get tweet object by ID
    tweet = api.get_status(id=idN, tweet_mode='extended')

    print("Created at: " + tweet._json["created_at"])
    print("Retweet count = " + str(tweet._json["retweet_count"]))
    print("favorite count = " + str(tweet._json["favorite_count"]))

    scraped = []
    terms = []
    terms.append("He was all noblesse oblige")
    terms.append("nytopinion Bush")
    terms.append("nytimes Bush")
    terms.append("https://twitter.com/nytopinion/status/1069463438391459840"
                 )  # full url of tweet
    terms.append("https://t.co/pDfFORy45z")
    for t in terms:
        scraped.extend(rest.rest_scrape_single(t, 1000000, api))

    scraped = list(map(lambda x: x._json, scraped))
    retweets = []
    quotes = []
    replies = []

    retweets_seen, quotes_seen, replies_seen = set(), set(), set()

    for t in scraped:
        if "retweeted_status" in t and t["retweeted_status"]["id"] == idN:
            if t["id"] not in retweets_seen:
                retweets_seen.add(t["id"])
                retweets.append(t)

        if "quoted_status" in t and t["quoted_status"]["id"] == idN:
            if t["id"] not in quotes_seen:
                quotes_seen.add(t["id"])
                quotes.append(t)

        if t["in_reply_to_status_id"] != None:
            if t["in_reply_to_status_id"] in replies_seen or t[
                    "in_reply_to_status_id"] in quotes_seen or t[
                        "in_reply_to_status_id"] in retweets_seen:
                if t["id"] not in replies_seen:
                    replies_seen.add(t["id"])
                    replies.append(t)

    writeFile(path, "retweets.json", retweets)
    writeFile(path, "quotes.json", quotes)
    writeFile(path, "replies.json", replies)

    readAndCategorize(path + "/@nytopinion", idN, retweets_seen, quotes_seen,
                      replies_seen, path + "/retweets.json",
                      path + "/quotes.json", path + "/replies.json")