def track(func): """A syntactic-sugar decorator to automatically track yielded references from an entry. See :class:`Translation` in :mod:`acrylamid.views.entry` for an example.""" def dec(entry, item): append(entry, item) return item return lambda entry, **kw: map(partial(dec, entry), func(entry, **kw))
def wordpress(xml): """WordPress to Acrylamid, inspired by the Astraeus project.""" if 'xmlns:wp' not in xml: raise InputError('not a WP dump') global USED_WORDPRESS USED_WORDPRESS = True def generate(item): entry = { 'title': item.find('title').text, 'link': item.find('link').text, 'content': (item.find('%sencoded' % cons).text or '').replace('\n', '<br />\n'), 'description': item.find('%sencoded' % excerptns).text or '', 'date': datetime.strptime(item.find('%spost_date' % wpns).text, "%Y-%m-%d %H:%M:%S"), 'author': item.find('%screator' % dcns).text, 'tags': [tag.text for tag in item.findall('category')] } # attachment, nav_menu_item, page, post entry['type'] = item.find('%spost_type' % wpns).text if entry['type'] == 'post': entry['type'] = 'entry' if item.find('%sstatus' % wpns).text != 'publish': entry['draft'] = True return entry try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') # wordpress name spaces dcns = '{http://purl.org/dc/elements/1.1/}' cons = '{http://purl.org/rss/1.0/modules/content/}' defaults = { 'title': tree.find('channel/title').text, 'www_root': tree.find('channel/link').text } for version in range(1, 10): wpns = '{http://wordpress.org/export/1.%i/}' % version excerptns = '{http://wordpress.org/export/1.%i/excerpt/}' % version if tree.find('channel/%swxr_version' % wpns) is None: continue entries = list(map(generate, tree.findall('channel/item'))) return defaults, [entry for entry in entries if entry['type'] in ('page', 'entry')]
def atom(xml): def parse_date_time(stamp): ts = parsedate_tz(stamp) ts = mktime_tz(ts) return datetime.fromtimestamp(ts) def generate(item): entry = {} try: entry['title'] = item.find(ns + 'title').text entry['date'] = item.find(ns + 'updated').text entry['link'] = item.find(ns + 'link').text entry['content'] = item.find(ns + 'content').text except (AttributeError, TypeError): raise AcrylamidException( 'invalid Atom feed: provide at least title, ' + 'link, content and updated!') if item.find(ns + 'content').get('type', 'text') == 'html': entry['content'] = unescape(entry['content']) return { 'title': entry['title'], 'content': entry['content'], 'date': datetime.strptime(entry['date'], "%Y-%m-%dT%H:%M:%SZ"), 'link': entry['link'], 'tags': [x.get('term') for x in item.findall(ns + 'category')] } try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') if not tree.tag.endswith('/2005/Atom}feed'): raise InputError('no Atom feed') ns = '{http://www.w3.org/2005/Atom}' # etree Y U have stupid namespace handling? defaults = {} defaults['sitename'] = tree.find(ns + 'title').text defaults['author'] = tree.find(ns + 'author').find(ns + 'name').text www_root = [ a for a in tree.findall(ns + 'link') if a.attrib.get('rel', 'alternate') == 'alternate' ] if www_root: defaults['www_root'] = www_root[0].attrib.get('href') return defaults, list(map(generate, tree.findall(ns + 'entry')))
def determine_version(self, bundle, env, hunk=None): if not hunk and not has_placeholder(bundle.output): hunks = [FileHunk(bundle.resolve_output(env)), ] elif not hunk: src = sum(map(env.resolver.resolve_source, bundle.contents), []) hunks = [FileHunk(hunk) for hunk in src + bundle.resolve_depends(env)] else: hunks = [hunk, ] hasher = self.hasher() for hunk in hunks: hasher.update(hunk.data()) return hasher.hexdigest()[:self.length]
def atom(xml): def parse_date_time(stamp): ts = parsedate_tz(stamp) ts = mktime_tz(ts) return datetime.fromtimestamp(ts) def generate(item): entry = {} try: entry['title'] = item.find(ns + 'title').text entry['date'] = item.find(ns + 'updated').text entry['link'] = item.find(ns + 'link').text entry['content'] = item.find(ns + 'content').text except (AttributeError, TypeError): raise AcrylamidException('invalid Atom feed: provide at least title, ' + 'link, content and updated!') if item.find(ns + 'content').get('type', 'text') == 'html': entry['content'] = unescape(entry['content']) return {'title': entry['title'], 'content': entry['content'], 'date': datetime.strptime(entry['date'], "%Y-%m-%dT%H:%M:%SZ"), 'link': entry['link'], 'tags': [x.get('term') for x in item.findall(ns + 'category')]} try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') if not tree.tag.endswith('/2005/Atom}feed'): raise InputError('no Atom feed') ns = '{http://www.w3.org/2005/Atom}' # etree Y U have stupid namespace handling? defaults = {} defaults['sitename'] = tree.find(ns + 'title').text defaults['author'] = tree.find(ns + 'author').find(ns + 'name').text www_root = [a for a in tree.findall(ns + 'link') if a.attrib.get('rel', 'alternate') == 'alternate'] if www_root: defaults['www_root'] = www_root[0].attrib.get('href') return defaults, list(map(generate, tree.findall(ns + 'entry')))
def wordpress(xml): """WordPress to Acrylamid, inspired by the Astraeus project.""" if 'xmlns:wp' not in xml: raise InputError('not a WP dump') global USED_WORDPRESS USED_WORDPRESS = True def generate(item): entry = { 'title': item.find('title').text, 'link': item.find('link').text, 'content': (item.find('%sencoded' % cons).text or '').replace('\n', '<br />\n'), 'date': datetime.strptime(item.find('%spost_date' % wpns).text, "%Y-%m-%d %H:%M:%S"), 'author': item.find('%screator' % dcns).text, 'tags': [tag.text for tag in item.findall('category')] } if item.find('%spost_type' % wpns).text == 'page': entry['type'] = 'page' if item.find('%sstatus' % wpns).text != 'publish': entry['draft'] = True return entry try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') # wordpress name spaces dcns = '{http://purl.org/dc/elements/1.1/}' cons = '{http://purl.org/rss/1.0/modules/content/}' defaults = { 'title': tree.find('channel/title').text, 'www_root': tree.find('channel/link').text } for version in range(1, 10): wpns = '{http://wordpress.org/export/1.%i/}' % version return defaults, list(map(generate, tree.findall('channel/item')))
def determine_version(self, bundle, env, hunk=None): if not hunk and not has_placeholder(bundle.output): hunks = [ FileHunk(bundle.resolve_output(env)), ] elif not hunk: src = sum(map(env.resolver.resolve_source, bundle.contents), []) hunks = [ FileHunk(hunk) for hunk in src + bundle.resolve_depends(env) ] else: hunks = [ hunk, ] hasher = self.hasher() for hunk in hunks: hasher.update(hunk.data()) return hasher.hexdigest()[:self.length]
def rss(xml): if 'xmlns:wp' in xml: raise InputError('WordPress dump') def parse_date_time(stamp): ts = parsedate_tz(stamp) ts = mktime_tz(ts) return datetime.fromtimestamp(ts) def generate(item): entry = {} for k, v in iteritems({'title': 'title', 'date': 'pubDate', 'link': 'link', 'content': 'description'}): try: entry[k] = item.find(v).text if k != 'content' \ else unescape(item.find(v).text) except (AttributeError, TypeError): pass if any(filter(lambda k: k not in entry, ['title', 'date', 'link', 'content'])): raise AcrylamidException('invalid RSS 2.0 feed: provide at least title, ' \ + 'link, content and pubDate!') return {'title': entry['title'], 'content': entry['content'], 'date': parse_date_time(entry['date']), 'link': entry['link'], 'tags': [cat.text for cat in item.findall('category')]} try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') if tree.tag != 'rss' or tree.attrib.get('version') != '2.0': raise InputError('no RSS 2.0 feed') defaults = {'author': None} channel = tree.getchildren()[0] for k, v in iteritems({'title': 'sitename', 'link': 'www_root', 'language': 'lang', 'author': 'author'}): try: defaults[v] = channel.find(k).text except AttributeError: pass return defaults, list(map(generate, channel.findall('item'))) try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') if tree.tag != 'rss' or tree.attrib.get('version') != '2.0': raise InputError('no RSS 2.0 feed') defaults = {'author': None} channel = tree.getchildren()[0] for k, v in iteritems({'title': 'sitename', 'link': 'www_root', 'language': 'lang', 'author': 'author'}): try: defaults[v] = channel.find(k).text except AttributeError: pass return defaults, list(map(generate, channel.findall('item')))
def rss(xml): if 'xmlns:wp' in xml: raise InputError('WordPress dump') def parse_date_time(stamp): ts = parsedate_tz(stamp) ts = mktime_tz(ts) return datetime.fromtimestamp(ts) def generate(item): entry = {} for k, v in iteritems({ 'title': 'title', 'date': 'pubDate', 'link': 'link', 'content': 'description' }): try: entry[k] = item.find(v).text if k != 'content' \ else unescape(item.find(v).text) except (AttributeError, TypeError): pass if any( filter(lambda k: k not in entry, ['title', 'date', 'link', 'content'])): raise AcrylamidException('invalid RSS 2.0 feed: provide at least title, ' \ + 'link, content and pubDate!') return { 'title': entry['title'], 'content': entry['content'], 'date': parse_date_time(entry['date']), 'link': entry['link'], 'tags': [cat.text for cat in item.findall('category')] } try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') if tree.tag != 'rss' or tree.attrib.get('version') != '2.0': raise InputError('no RSS 2.0 feed') defaults = {'author': None} channel = tree.getchildren()[0] for k, v in iteritems({ 'title': 'sitename', 'link': 'www_root', 'language': 'lang', 'author': 'author' }): try: defaults[v] = channel.find(k).text except AttributeError: pass return defaults, list(map(generate, channel.findall('item'))) try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') if tree.tag != 'rss' or tree.attrib.get('version') != '2.0': raise InputError('no RSS 2.0 feed') defaults = {'author': None} channel = tree.getchildren()[0] for k, v in iteritems({ 'title': 'sitename', 'link': 'www_root', 'language': 'lang', 'author': 'author' }): try: defaults[v] = channel.find(k).text except AttributeError: pass return defaults, list(map(generate, channel.findall('item')))