def test_authors(self): author = Author() author.name = u'Susanne Lysvold' author.email = u'*****@*****.**' author.role = None author2 = Author() author2.name = u'Emil Indsetviken' author2.email = None author2.role = None authors = self.analyser.authors() self.assertEquals(author, authors[0]) self.assertEquals(author2, authors[1])
def authors(self): byline = self.soup.find('div', 'byline') # for some reason the mail is not connected to the # name, but we can fix that. authors = [] for address, li in izip(byline.find_all('address'), byline.find_all('li', 'icon-email')): author = Author() author.name = address.find(class_='fn').string # NRK is still trying to hide the email address # from spammers. href = li.a['href'] author.mail = unquote(href[21:-1])[7:] author.role = address.find(class_='role').string.strip() authors.append(author) return authors
def authors(self): byline = self.soup.find('ul', 'byline') authors = [] for address in byline.find_all('address'): author = Author() author.name = address.span.string # To find the mail we need to to do some unquoting # as nrk is trying to protect the mail address # from spammers behind a quoted string. if address.script: script = address.script.string m = search(".*?'(.*)'.*$", script) m = search(".*?'(.*)'.*$", unquote(m.group(1))) html = BeautifulSoup(m.group(1)) author.email = html.a.string authors.append(author) return authors
def test_authors(self): author = Author() author.name = u'Håkon Rysst Heilmann' author.mail = u'*****@*****.**' author.role = u'Journalist' self.assertEquals(author, self.analyser.authors()[0])