def main(): if len(sys.argv) > 1: content = retrieve_file(sys.argv[1]) else: content = retrieve_url() soup = BeautifulSoup(content) rss_items = [] tweets = soup.findAll('table', { 'class': re.compile(r'\btweet\b') }) for tweet in tweets: author = tweet.find('strong', { 'class': 'fullname' }).text username = tweet.find('span', { 'class': 'username' }).text content = tweet.find('div', { 'class': 'dir-ltr' }) tweet_id = tweet.find('td', { 'class': 'timestamp' }).find('a')['name'] date = parse_date(tweet.find('td', { 'class': 'timestamp' }).find('a').text) links = content.findAll('a') first_link = None for link in links: href = link.get('data-url') or link.get('href') if href.startswith('/'): href = 'https://www.twitter.com' + href link['href'] = href if not first_link and not href.startswith('https://www.twitter.com'): first_link = href description = '<a href="https://www.twitter.com/%s">%s - %s</a><br/>%s' % (username, username, author, content) rss_items.append(PyRSS2Gen.RSSItem( title = content.text, link = first_link, description = description, pubDate = date, guid = PyRSS2Gen.Guid(tweet_id, isPermaLink = 0) )) rss = PyRSS2Gen.RSS2( title = 'Starred Tweets', link = 'http://dropbox.kuijjer.com/starred_tweets.rss', description = 'Starred Tweets', lastBuildDate = datetime.datetime.now(), items = rss_items, ) print xmlpp.get_pprint(rss.to_xml())
def m_mniReformatClick( self, event ): ''' Use pretty print to reformat xml-valid files ''' self.m_statusBar.SetStatusText('') # Grab the file text and send it to the slightly # modified xmlpp to prettify body = self.m_txtMain.GetValue() body = xmlpp.get_pprint(body) self.m_txtMain.SetValue(body)
def prettyPrintXML(xmlString): return xmlpp.get_pprint(xmlString)
def testSpecialTokens(self): specialtokens = """<xml:test foo="b:ar">foo::bar:/adf32</xml:test>""" result = """<xml:test foo="b:ar">\n foo::bar:/adf32\n</xml:test>\n""" self.assertEquals(xmlpp.get_pprint(specialtokens), result)
def testBasic(self): basic = """<xml><test><test>foo bar</test></test><test>foo</test></xml>""" result = """<xml>\n <test>\n <test>\n foo bar\n </test>\n </test>\n <test>\n foo\n </test>\n</xml>\n""" self.assertEquals(xmlpp.get_pprint(basic), result)
def testEncodingWithCDATA(self): encodingWithCDATA = """<?xml version="1.0" encoding="UTF-8" ?><testcase><system-out><![CDATA[<fdaa>fda>]]></system-out><system-err><![CDATA[]]></system-err></testcase>""" result = """<?xml version="1.0" encoding="UTF-8" ?>\n<testcase>\n <system-out>\n <![CDATA[<fdaa>fda>]]>\n </system-out>\n <system-err>\n <![CDATA[]]>\n </system-err>\n</testcase>\n""" self.assertEquals(xmlpp.get_pprint(encodingWithCDATA), result)
def addToLibraryFile(libraryFile, epubData): """ Add an EPUB to an ATOM library file """ ### TODO: Finish implementing functionality newAuthor = epubData["creator"] newTitle = epubData["title"] baseEntry = [ " <entry>", " <id>{0}</id>".format(epubData["identifier"]), " <title>{0}</title>".format(epubData["title"]), " <author>", " <name>{0}</name>".format(epubData["creator"]), " </author>", ' <content type="xhtml">', ' <div xmlns="http://www.w3.org/1999/xhtml">Published: {0}, Language: {1}, Subject: {2}</div>'.format( epubData["published"], epubData["language"], epubData["subject"] ), " </content>", " <summary>{0}</summary>".format(epubData["description"]), " <updated>{0}Z</updated>".format(strftime("%Y-%m-%d %H:%M:%S")), ' <link type="application/epub+zip" href="{0}/{1}.epub" />'.format( epubData["authorHash"], epubData["titleHash"] ), ' <link rel="http://opds-spec.org/opds-cover-image-thumbnail" type="image/jpeg" title="cover thumbnail" href="{0}/{1}_tn.jpg" />'.format( epubData["authorHash"], epubData["titleHash"] ), ' <link rel="http://opds-spec.org/opds-cover-image" type="image/jpeg" title="cover image" href="{0}/{1}.jpg" />'.format( epubData["authorHash"], epubData["titleHash"] ), ' <link rel="x-stanza-cover-image-thumbnail" type="image/jpeg" href="{0}/{1}_tn.jpg" />'.format( epubData["authorHash"], epubData["titleHash"] ), ' <link rel="x-stanza-cover-image" type="image/jpeg" href="{0}/{1}.jpg" />'.format( epubData["authorHash"], epubData["titleHash"] ), " </entry>", ] with open(libraryFile, "r") as libFile: atom = libFile.read() # Pretty print and standardize the ATOM file to make parsing a little easier atom = xmlpp.get_pprint(atom) atom = atom.replace("\r\n", "\n") entries = atom.split("\n") # Consider using xml rather than RegEx to parse the file i = 0 for line in entries: if line.strip() == "<entry>": # Check to find the Alpha location in the file match = re.search("<.*>(.*)</.*>", entries[i + 1]) curTitle = match.groups()[0] match = re.search("<.*>(.*)</.*>", entries[i + 7]) curAuthor = match.groups()[0] if newAuthor > curAuthor: pass elif newAuthor == curAuthor: if newTitle > curTitle: pass elif newTitle == curTitle: end = i + 1 while entries[end].strip() <> "</entry>": end += 1 entries[i : end + 1] = baseEntry break else: entries[i:1] = baseEntry break elif newAuthor < curAuthor: entries[i:1] = baseEntry break i += 1 # Now write the file out with open(libraryFile, "w") as f: atom = "\n".join(entries)[1:] f.write(atom)