def getInfo(self, uName): r = praw.Reddit('Reddit User Data Scraper') try: user = r.get_redditor(uName) except: self.isValid = False return comments = user.get_comments(limit = None) totStr = "" for c in comments: data = vars(c)['body'] data = data.encode('ascii','ignore') data = data.strip() totStr = totStr + data + " " #get and save the subreddit srName = vars(vars(c)['subreddit'])['display_name'] srName = srName.lower() if srName not in self.userSubreddits: self.userSubreddits[srName] = 0 else: self.userSubreddits[srName] = self.userSubreddits[srName] + 1 submissions = user.get_submitted(limit = None) for s in submissions: data = vars(s)['title'] data = data.encode('ascii', 'ignore') data = data.strip() totStr = totStr + data + " " #get and save the subreddit srName = vars(vars(s)['subreddit'])['display_name'] srName = srName.lower() if srName not in self.userSubreddits: self.userSubreddits[srName] = 0 else: self.userSubreddits[srName] = self.userSubreddits[srName] + 1 srListTemp = sorted(self.userSubreddits.iteritems(), key=operator.itemgetter(1)) for sr in srListTemp: self.srList.insert(0, sr[0]) parser = Parser(totStr) self.allWords = parser.getWordList() tempWords = [] for word in self.allWords: if len(word) == 0: continue if word[0] in self.punc: word = word[1:] if word[-1:] in self.punc: word = word[:-1] tempWords.append(word) self.allWords = tempWords
if submissions is None: continue sys.stderr.write("Processing subreddit # " + str(i) + ": " + subreddit[:-1] + "\n") text = "" for submission in submissions: data = vars(submission)['title'] data = data.encode('ascii', 'ignore') data = data.strip() text = text + data + " " if text == "": continue; parser = Parser(text) wordList = parser.getWordList() text = " ".join(wordList) srName = subreddit[:-1] srName = srName.lower() srDatum = { "name" : srName, "text" : text } pprint(wordList[:10]) if srData.find_one({"name": srName}) == None: srData.insert(srDatum) else: srData.update({ "name": srName }, srDatum) except: sys.stderr.write("Error in processing subreddit # " + str(i) + ": " + subreddit[:-1] + "\n")