def importHTML(self, rawhtml, url = ''):
        """Populate object by scraping chunk of HTML
       
        rawhtml : May be a string or a list of strings.
        url : Optional param, useful to specify URL explicitly
                in situations where the URL is known.
                Many vB installations use only relative links
                so it can be hard to discover a URL from code.
        """

        html = []
        # Clean up the raw html
        if type(rawhtml) == type(list()):
            for h in rawhtml:
                html.append(vbutils.cleanEncoding(h))
        else:
            html.append(vbutils.cleanEncoding(rawhtml))
        
        self.id = vbscrape.scrapeThreadID(html[0]) 
        if url:
            self.url = url
        else:
            self.url = vbscrape.scrapeThreadURL(self.id, html[0]) 
        self.forum = vbutils.makeSlug(vbscrape.scrapeForumName(html[0]))
        self.title = vbutils.makeSlug(vbscrape.scrapeThreadTitle(html[0]))
        self.numpages = vbscrape.scrapeNumPages(html[0])

        self.post = {} 
        for h in html:
            self.post.update(vbscrape.scrapePosts(h))
    def importJSON(self, jsondata):
        """Populate object from a string of JSON data"""
       
        # ''.join is used to accomodate list input
        clean = vbutils.cleanEncoding(''.join(jsondata), isHTML = False)

        # Try to load JSON data from the input str
        try:
            j = json.loads(clean)
        except:
            print "Error: Could not find JSON data."
            return None
        
        # Loop over posts creating Post objs
        self.post = {}
        
        print "Found %s posts." % len(j["post"])
        for id, p in j["post"].iteritems():
            print "Importing Post #%s ..." % str(id)
            if type(p) is dict:
                # Keyword args must be str
                # but our JSON dict has unicode keys
                # so we must convert before passing
                # into the Post.__init__() method
                kw = vbutils.convertKeysToStr(p)
                self.post[id] = vbpost.Post(**kw)
            elif (type(p) in [str, unicode]):
                self.post[id] = vbpost.Post(jsonstr = p)
        self.lastupdate = j["lastupdate"]
        self.forum = j["forum"]
        self.id = j["id"]
        self.numpages = j["numpages"]
        self.title = j["title"]
        self.url = j["url"]
Example #3
0
    def importHTML(self, rawhtml):
        """Populate object by scraping chunk of HTML
        """

        # TODO clean up the HTML
        # Converting to UTF-8 bc JSON uses UTF-8
        html = vbutils.cleanEncoding(rawhtml)
        
        # Force integer type conversion 
        self.id = int(vbscrape.scrapePostID(html))
        self.postcount = int(vbscrape.scrapePostCount(html, self.id))
        self.authorid = int(vbscrape.scrapePostAuthorID(html))

        self.permalink = vbscrape.scrapePostPermalink(html)
        self.dateposted = vbscrape.scrapePostDate(html)
        self.title = vbscrape.scrapePostTitle(html)
        self.message = vbscrape.scrapePostMessage(html)
        self.sig = vbscrape.scrapePostSig(html)
        self.editnote = vbscrape.scrapePostEditNote(html)
Example #4
0
    def importJSON(self, jsondata):
        """Populate object from JSON string
        """
 
        # ''.join is used to accomodate list input
        clean = vbutils.cleanEncoding(''.join(jsondata), isHTML = False)

        # Try to load JSON data from the input str
        try:
            j = json.loads(clean)
        except TypeError:
            print "Error: Could not find JSON data."
            return None
        
        self.permalink = j["permalink"]
        self.id = j["id"]
        self.postcount = j["postcount"]
        self.dateposted = j["dateposted"]
        self.title = j["title"]
        self.authorid = j["authorid"]
        self.message  = j["message"]
        self.sig = j["sig"]
        self.editnote = j["editnote"]