Ejemplo n.º 1
0
    def readWebPage(self, urlString, depth=1, isExternal=False):
        webPageData = self.db.websites.search(
            filters=all(eq('address',
                           WebPage.parseUrl(urlString).string))).rows()
        pageLinks = []
        result = None

        if len(webPageData) == 0:
            return result

        webPageData = webPageData[0]
        pageId = webPageData[0]

        depthData = self.db.session.search('depth',
                                           all(eq('website_id', pageId)))
        if len(depthData) > 0:
            depth = depthData[0][0]

        result = WebPage(url=webPageData[1],
                         depth=depth,
                         isExternal=isExternal)

        query = self.db.execute(
            'SELECT w.{0}, r.{0} from links join websites as w on links.{1} = w.id join websites as r on links.{2} = r.id WHERE w.id = {3};'
            .format(self.db.websites.fields[1], self.db.links.fields[1],
                    self.db.links.fields[2], pageId))

        for row in iter(query):
            pageLinks.append(
                WebPage(url=row[1], parent=result, depth=depth + 1))
        result.links = pageLinks

        return result