def extractJson(fileName): """ Extract tweets from JSON file into a list of Tweets """ tweetList = list() try: file = open(fileName, encoding='utf-8') json_str = file.read() data = json.loads(json_str) for tweet in data['results']: nTweet = Tweet() nTweet.id = tweet['id'] nTweet.userId = tweet['from_user_id'] nTweet.text = tweet['text'] nTweet.user = tweet['from_user'] nTweet.userName = tweet['from_user_name'] nTweet.profileImgUrlHttp = tweet['profile_image_url'] nTweet.source = tweet['source'] nTweet.toUser = tweet['to_user'] nTweet.date = tweet['created_at'] for mention in tweet['entities']['user_mentions']: nTweet.userMentions.append(mention["id"]) tweetList.append(nTweet) file.close() except(ValueError): sys.exit("Error while parsing{0}".format(fileName) + "Not a valid JSON file") return tweetList
def extractTweets(fileName): """ Extract tweets from JSON file into a list of Tweets """ tweetList = list() try: file = open(fileName) json_str = file.read() data = json.loads(json_str) print("Parsing " + fileName + " ...") for tweet in data["results"]: nTweet = Tweet() nTweet.id = tweet["id"] nTweet.userId = tweet["from_user_id"] nTweet.text = tweet["text"] nTweet.user = tweet["from_user"] nTweet.userName = tweet["from_user_name"] nTweet.profileImgUrlHttp = tweet["profile_image_url"] nTweet.source = tweet["source"] nTweet.toUser = tweet["to_user"] nTweet.date = tweet["created_at"] if "urls" in tweet["entities"]: for urls in tweet["entities"]["urls"]: urlStr = urls["url"] expandedUrl = urls["expanded_url"] try: u = urllib.urlopen(expandedUrl) expandedUrl = u.url u = None except IOError as ioe: print("Error urllib.urlopen") print("---> URL = {}".format(expandedUrl)) continue nTweet.urls.append(expandedUrl) for mention in tweet["entities"]["user_mentions"]: nTweet.userMentions.append( {"id": mention["id"], "name": mention["name"], "screenName": mention["screen_name"]} ) tweetList.append(nTweet) file.close() except (ValueError): sys.exit("Error while parsing {0}".format(fileName) + " Not a valid JSON file") return tweetList