def test_authors(self): author = Author() author.name = u'Susanne Lysvold' author.email = u'*****@*****.**' author.role = None author2 = Author() author2.name = u'Emil Indsetviken' author2.email = None author2.role = None authors = self.analyser.authors() self.assertEquals(author, authors[0]) self.assertEquals(author2, authors[1])
def authors(self): byline = self.soup.find('ul', 'byline') authors = [] for address in byline.find_all('address'): author = Author() author.name = address.span.string # To find the mail we need to to do some unquoting # as nrk is trying to protect the mail address # from spammers behind a quoted string. if address.script: script = address.script.string m = search(".*?'(.*)'.*$", script) m = search(".*?'(.*)'.*$", unquote(m.group(1))) html = BeautifulSoup(m.group(1)) author.email = html.a.string authors.append(author) return authors