def getAllReplies(tweets, attributes, screen_name): parsedData = tweetParser.parsetweet(tweets,["id","in_reply_to_status_id","usermentions"]) idList = map(long, [a for a,b,c in parsedData]) replyToIdList = [b for a,b,c in parsedData] usermentions = list(set([item for sublist in [c for a,b,c in parsedData] for item in sublist.split(",")])) if "" in usermentions: usermentions.remove("") print "making the file" # dumping the data filename = '../Data/%s_replyTo_extracted_tweets_data.csv' % screen_name if not path.isfile(filename): with open(filename, 'wb') as f: writer = csv.writer(f) writer.writerow(attributes) retrievedones = [] for screenNames in usermentions: names = screenNames.split(",") repliesTo = [] for c in names: print "Doing for ",c retrievedTweets = allTweetRetriever.get_all_tweets(c) for tweet in retrievedTweets: if tweet.in_reply_to_status_id is not None: if long(tweet.in_reply_to_status_id) in idList: if tweet.id not in retrievedones: retrievedones.append(tweet.id) repliesTo.append(tweet) parsedReplies = tweetParser.parsetweet(repliesTo,attributes) appendToCsv.appendToCsvFile(filename,parsedReplies)
def getAllReplies(tweets, attributes, screen_name): parsedData = tweetParser.parsetweet( tweets, ["id", "in_reply_to_status_id", "usermentions"]) idList = map(long, [a for a, b, c in parsedData]) replyToIdList = [b for a, b, c in parsedData] usermentions = list( set([ item for sublist in [c for a, b, c in parsedData] for item in sublist.split(",") ])) if "" in usermentions: usermentions.remove("") print "making the file" # dumping the data filename = '../Data/%s_replyTo_extracted_tweets_data.csv' % screen_name if not path.isfile(filename): with open(filename, 'wb') as f: writer = csv.writer(f) writer.writerow(attributes) retrievedones = [] for screenNames in usermentions: names = screenNames.split(",") repliesTo = [] for c in names: print "Doing for ", c retrievedTweets = allTweetRetriever.get_all_tweets(c) for tweet in retrievedTweets: if tweet.in_reply_to_status_id is not None: if long(tweet.in_reply_to_status_id) in idList: if tweet.id not in retrievedones: retrievedones.append(tweet.id) repliesTo.append(tweet) parsedReplies = tweetParser.parsetweet(repliesTo, attributes) appendToCsv.appendToCsvFile(filename, parsedReplies)
element = getattr(tweet, c, "") try: element = element.encode('utf8', 'ignore') except: pass line.append(element) outtweets.append(line) appendToCsv.appendToCsvFile(filename, outtweets[::-1]) # Specifying the list of attributes to be extracted attributes = [ "id", "in_reply_to_status_id", "retweeted", "created_at", "text", "favorite_count", "retweet_count", "geo", "usermentions", "hashtags", "current_author", "original_authour" ] #,"retweeters"] # creating file if not present to store extracted data filename = '../Data/%s_extracted_tweets_data.csv' % screen_name if not os.path.exists(filename): with open(filename, 'wb') as f: writer = csv.writer(f) writer.writerow(attributes) # Parsing the twitter data parsedData = tweetParser.parsetweet(tweets, attributes) # Storing the twitter data appendToCsv.appendToCsvFile(filename, parsedData[::-1]) # Retrieving the replies reply.getAllReplies(tweets, attributes, screen_name)
outtweets = [] for tweet in tweets: line = [] for c in listOfAttributes: element = getattr(tweet, c, "") try: element = element.encode('utf8', 'ignore') except: pass line.append(element) outtweets.append(line) appendToCsv.appendToCsvFile(filename,outtweets[::-1]) # Specifying the list of attributes to be extracted attributes = ["id","in_reply_to_status_id","retweeted","created_at","text","favorite_count","retweet_count","geo","usermentions","hashtags","current_author","original_authour"] #,"retweeters"] # creating file if not present to store extracted data filename = '../Data/%s_extracted_tweets_data.csv' % screen_name if not os.path.exists(filename): with open(filename, 'wb') as f: writer = csv.writer(f) writer.writerow(attributes) # Parsing the twitter data parsedData = tweetParser.parsetweet(tweets,attributes) # Storing the twitter data appendToCsv.appendToCsvFile(filename,parsedData[::-1]) # Retrieving the replies reply.getAllReplies(tweets,attributes,screen_name)