def date_as_datetime(self): s = self._split_date() if s is None: print "date_as_datetime() - No RE for ", self._json['date'] print "Full doc: ", self._json return datetime(int(s.group(3)), DateManager.month_index(s.group(2)), int(s.group(1)))
class MsgFactory(object): def __init__(self, webclient=None): self.webclient = webclient self.soup = None self.soupfragment = None self.date_manager = DateManager() # Private method def createMsg(self, soupfragment): result = {"user": soupfragment.find('a').contents[0], "date": self._get_date(soupfragment), "body": self._build_msg(soupfragment), "id": soupfragment.find('div', 'inner')['id'] } return result def _get_date(self, soupfragment): """ If date is 'hoy' changes it for the actual date """ date = soupfragment.find('div', 'smalltext').contents[2] if date == ' ': date = self.date_manager.hoy() + "," + soupfragment.find('div', 'smalltext').contents[4] if date == ' ': date = "Unable to discover the date" return date def _build_msg(self, fullmsg): contents = fullmsg.find("div", "inner") return self._get_content_recursively(contents) def createListOfMsgs(self): l = list() self.soup = BeautifulSoup(self.webclient.sourceCode()) for msg in self.soup.find_all("div", "post_wrapper"): l.append(self.createMsg(msg)) return l def nextUrl(self): for link in self.soup.find_all("a", "navPages"): content = str(link.contents[0]) #print "Search: ", content if content == ">>": return link['href'] return "" def changeUrl(self, url): self.webclient.load(url) self.soupfragment = BeautifulSoup(self.webclient.sourceCode()) def _get_content_recursively(self, bs): result = "" for c in bs.contents: if type(c) is not Tag: ud = UnicodeDammit(c) result += ud.unicode_markup + '\n' else: ud = UnicodeDammit(self._get_content_recursively(c)) result += ud.unicode_markup return result
def __init__(self, webclient=None): self.webclient = webclient self.soup = None self.soupfragment = None self.date_manager = DateManager()
def test_month_index(self): self.assertEqual(DateManager.month_index("Enero"), 1) self.assertEqual(DateManager.month_index("enero"), 1)