Example #1
0
 def date_as_datetime(self):
     s = self._split_date()
     if s is None:
         print "date_as_datetime() - No RE for ", self._json['date']
         print "Full doc: ", self._json
     return datetime(int(s.group(3)), DateManager.month_index(s.group(2)), int(s.group(1)))
Example #2
0
class MsgFactory(object):

    def __init__(self, webclient=None):
        self.webclient = webclient
        self.soup = None
        self.soupfragment = None
        self.date_manager = DateManager()

    # Private method
    def createMsg(self, soupfragment):
        result = {"user": soupfragment.find('a').contents[0],
                    "date": self._get_date(soupfragment),
                    "body": self._build_msg(soupfragment),
                    "id": soupfragment.find('div', 'inner')['id']
        }
        return result

    def _get_date(self, soupfragment):
        """ If date is 'hoy' changes it for the actual date
        """
        date = soupfragment.find('div', 'smalltext').contents[2]
        if date == ' ':
            date = self.date_manager.hoy() + "," + soupfragment.find('div', 'smalltext').contents[4]
        if date == ' ':
            date = "Unable to discover the date"
        return date

    def _build_msg(self, fullmsg):
        contents = fullmsg.find("div", "inner")
        return self._get_content_recursively(contents)

    def createListOfMsgs(self):
        l = list()
        self.soup = BeautifulSoup(self.webclient.sourceCode())
        for msg in self.soup.find_all("div", "post_wrapper"):
            l.append(self.createMsg(msg))

        return l

    def nextUrl(self):
        for link in self.soup.find_all("a", "navPages"):
            content = str(link.contents[0])
            #print "Search: ", content
            if content == ">>":
                return link['href']
        return ""

    def changeUrl(self, url):
        self.webclient.load(url)
        self.soupfragment = BeautifulSoup(self.webclient.sourceCode())

    def _get_content_recursively(self, bs):
        result = ""
        for c in bs.contents:
            if type(c) is not Tag:
                ud = UnicodeDammit(c)
                result += ud.unicode_markup + '\n'
            else:
                ud = UnicodeDammit(self._get_content_recursively(c))
                result += ud.unicode_markup
        return result
Example #3
0
 def __init__(self, webclient=None):
     self.webclient = webclient
     self.soup = None
     self.soupfragment = None
     self.date_manager = DateManager()
Example #4
0
 def test_month_index(self):
     self.assertEqual(DateManager.month_index("Enero"), 1)
     self.assertEqual(DateManager.month_index("enero"), 1)