Exemple #1
0
    def getInfo(self, uName):
        r = praw.Reddit('Reddit User Data Scraper')
        try:
            user = r.get_redditor(uName)
        except:
            self.isValid = False
            return
         
        comments = user.get_comments(limit = None)
        totStr = ""
        
        for c in comments:
            data = vars(c)['body']
            data = data.encode('ascii','ignore')
            data = data.strip()
            totStr = totStr + data + " "

            #get and save the subreddit
            srName = vars(vars(c)['subreddit'])['display_name']
            srName = srName.lower()
            if srName not in self.userSubreddits:
                self.userSubreddits[srName] = 0
            else:
                self.userSubreddits[srName] = self.userSubreddits[srName] + 1

        submissions = user.get_submitted(limit = None)
    
        for s in submissions:
            data = vars(s)['title']
            data = data.encode('ascii', 'ignore')
            data = data.strip()
            totStr = totStr + data + " " 

            #get and save the subreddit
            srName = vars(vars(s)['subreddit'])['display_name']
            srName = srName.lower()
            if srName not in self.userSubreddits:
                self.userSubreddits[srName] = 0
            else:
                self.userSubreddits[srName] = self.userSubreddits[srName] + 1
        
        srListTemp = sorted(self.userSubreddits.iteritems(), key=operator.itemgetter(1))
        for sr in srListTemp:
            self.srList.insert(0, sr[0])

        parser = Parser(totStr)        
        self.allWords = parser.getWordList()
        tempWords = []
        for word in self.allWords:
            if len(word) == 0:
                continue
            if word[0] in self.punc:
                word = word[1:]
            if word[-1:] in self.punc:
                word = word[:-1]
            tempWords.append(word)
        self.allWords = tempWords
Exemple #2
0
        if submissions is None:
            continue
    
        sys.stderr.write("Processing subreddit # " + str(i) + ": " + subreddit[:-1] + "\n")
        text = "" 
        for submission in submissions:
            data = vars(submission)['title']
            data = data.encode('ascii', 'ignore')
            data = data.strip()
            text = text + data + " "
            
        if text == "":
            continue;

        parser = Parser(text)    
        wordList = parser.getWordList()
        
        text = " ".join(wordList)
        srName = subreddit[:-1]
        srName = srName.lower()
        
        srDatum = { "name" : srName,
                    "text" : text }
        pprint(wordList[:10])
            
        if srData.find_one({"name": srName}) == None:
            srData.insert(srDatum)
        else:
            srData.update({ "name": srName }, srDatum)
    except:
        sys.stderr.write("Error in processing subreddit # " + str(i) + ": " + subreddit[:-1] + "\n")