Ejemplo n.º 1
0
def track(func):
    """A syntactic-sugar decorator to automatically track yielded
    references from an entry. See :class:`Translation` in
    :mod:`acrylamid.views.entry` for an example."""
    def dec(entry, item):
        append(entry, item)
        return item

    return lambda entry, **kw: map(partial(dec, entry), func(entry, **kw))
Ejemplo n.º 2
0
def wordpress(xml):
    """WordPress to Acrylamid, inspired by the Astraeus project."""

    if 'xmlns:wp' not in xml:
        raise InputError('not a WP dump')

    global USED_WORDPRESS
    USED_WORDPRESS = True

    def generate(item):

        entry = {
            'title': item.find('title').text,
            'link': item.find('link').text,

            'content': (item.find('%sencoded' % cons).text or '').replace('\n', '<br />\n'),
            'description': item.find('%sencoded' % excerptns).text or '',
            'date': datetime.strptime(item.find('%spost_date' % wpns).text,
                "%Y-%m-%d %H:%M:%S"),

            'author': item.find('%screator' % dcns).text,
            'tags': [tag.text for tag in item.findall('category')]
        }

        # attachment, nav_menu_item, page, post
        entry['type'] = item.find('%spost_type' % wpns).text

        if entry['type'] == 'post':
            entry['type'] = 'entry'

        if item.find('%sstatus' % wpns).text != 'publish':
            entry['draft'] = True

        return entry

    try:
        tree = ElementTree.fromstring(xml.encode('utf-8'))
    except ElementTree.ParseError:
        raise InputError('no well-formed XML')

    # wordpress name spaces
    dcns = '{http://purl.org/dc/elements/1.1/}'
    cons = '{http://purl.org/rss/1.0/modules/content/}'

    defaults = {
        'title': tree.find('channel/title').text,
        'www_root': tree.find('channel/link').text
    }

    for version in range(1, 10):
        wpns = '{http://wordpress.org/export/1.%i/}' % version
        excerptns = '{http://wordpress.org/export/1.%i/excerpt/}' % version
        if tree.find('channel/%swxr_version' % wpns) is None:
            continue
        entries = list(map(generate, tree.findall('channel/item')))
        return defaults, [entry for entry in entries if entry['type'] in ('page', 'entry')]
Ejemplo n.º 3
0
def track(func):
    """A syntactic-sugar decorator to automatically track yielded
    references from an entry. See :class:`Translation` in
    :mod:`acrylamid.views.entry` for an example."""

    def dec(entry, item):
        append(entry, item)
        return item

    return lambda entry, **kw: map(partial(dec, entry), func(entry, **kw))
Ejemplo n.º 4
0
def atom(xml):
    def parse_date_time(stamp):
        ts = parsedate_tz(stamp)
        ts = mktime_tz(ts)
        return datetime.fromtimestamp(ts)

    def generate(item):

        entry = {}

        try:
            entry['title'] = item.find(ns + 'title').text
            entry['date'] = item.find(ns + 'updated').text
            entry['link'] = item.find(ns + 'link').text
            entry['content'] = item.find(ns + 'content').text
        except (AttributeError, TypeError):
            raise AcrylamidException(
                'invalid Atom feed: provide at least title, ' +
                'link, content and updated!')

        if item.find(ns + 'content').get('type', 'text') == 'html':
            entry['content'] = unescape(entry['content'])

        return {
            'title': entry['title'],
            'content': entry['content'],
            'date': datetime.strptime(entry['date'], "%Y-%m-%dT%H:%M:%SZ"),
            'link': entry['link'],
            'tags': [x.get('term') for x in item.findall(ns + 'category')]
        }

    try:
        tree = ElementTree.fromstring(xml.encode('utf-8'))
    except ElementTree.ParseError:
        raise InputError('no well-formed XML')

    if not tree.tag.endswith('/2005/Atom}feed'):
        raise InputError('no Atom feed')

    ns = '{http://www.w3.org/2005/Atom}'  # etree Y U have stupid namespace handling?
    defaults = {}

    defaults['sitename'] = tree.find(ns + 'title').text
    defaults['author'] = tree.find(ns + 'author').find(ns + 'name').text

    www_root = [
        a for a in tree.findall(ns + 'link')
        if a.attrib.get('rel', 'alternate') == 'alternate'
    ]
    if www_root:
        defaults['www_root'] = www_root[0].attrib.get('href')

    return defaults, list(map(generate, tree.findall(ns + 'entry')))
Ejemplo n.º 5
0
    def determine_version(self, bundle, env, hunk=None):

        if not hunk and not has_placeholder(bundle.output):
            hunks = [FileHunk(bundle.resolve_output(env)), ]
        elif not hunk:
            src = sum(map(env.resolver.resolve_source, bundle.contents), [])
            hunks = [FileHunk(hunk) for hunk in src + bundle.resolve_depends(env)]
        else:
            hunks = [hunk, ]

        hasher = self.hasher()
        for hunk in hunks:
            hasher.update(hunk.data())
        return hasher.hexdigest()[:self.length]
Ejemplo n.º 6
0
def atom(xml):

    def parse_date_time(stamp):
        ts = parsedate_tz(stamp)
        ts = mktime_tz(ts)
        return datetime.fromtimestamp(ts)

    def generate(item):

        entry = {}

        try:
            entry['title'] = item.find(ns + 'title').text
            entry['date'] = item.find(ns + 'updated').text
            entry['link'] = item.find(ns + 'link').text
            entry['content'] = item.find(ns + 'content').text
        except (AttributeError, TypeError):
            raise AcrylamidException('invalid Atom feed: provide at least title, '
                                     + 'link, content and updated!')

        if item.find(ns + 'content').get('type', 'text') == 'html':
            entry['content'] = unescape(entry['content'])

        return {'title': entry['title'],
               'content': entry['content'],
               'date': datetime.strptime(entry['date'], "%Y-%m-%dT%H:%M:%SZ"),
               'link': entry['link'],
               'tags': [x.get('term') for x in item.findall(ns + 'category')]}

    try:
        tree = ElementTree.fromstring(xml.encode('utf-8'))
    except ElementTree.ParseError:
        raise InputError('no well-formed XML')

    if not tree.tag.endswith('/2005/Atom}feed'):
        raise InputError('no Atom feed')

    ns = '{http://www.w3.org/2005/Atom}'  # etree Y U have stupid namespace handling?
    defaults = {}

    defaults['sitename'] = tree.find(ns + 'title').text
    defaults['author'] = tree.find(ns + 'author').find(ns + 'name').text

    www_root = [a for a in tree.findall(ns + 'link')
        if a.attrib.get('rel', 'alternate') == 'alternate']
    if www_root:
         defaults['www_root'] = www_root[0].attrib.get('href')

    return defaults, list(map(generate, tree.findall(ns + 'entry')))
Ejemplo n.º 7
0
def wordpress(xml):
    """WordPress to Acrylamid, inspired by the Astraeus project."""

    if 'xmlns:wp' not in xml:
        raise InputError('not a WP dump')

    global USED_WORDPRESS
    USED_WORDPRESS = True

    def generate(item):

        entry = {
            'title': item.find('title').text,
            'link': item.find('link').text,

            'content': (item.find('%sencoded' % cons).text or '').replace('\n', '<br />\n'),
            'date': datetime.strptime(item.find('%spost_date' % wpns).text,
                "%Y-%m-%d %H:%M:%S"),

            'author': item.find('%screator' % dcns).text,
            'tags': [tag.text for tag in item.findall('category')]
        }

        if item.find('%spost_type' % wpns).text == 'page':
            entry['type'] = 'page'

        if item.find('%sstatus' % wpns).text != 'publish':
            entry['draft'] = True

        return entry

    try:
        tree = ElementTree.fromstring(xml.encode('utf-8'))
    except ElementTree.ParseError:
        raise InputError('no well-formed XML')

    # wordpress name spaces
    dcns = '{http://purl.org/dc/elements/1.1/}'
    cons = '{http://purl.org/rss/1.0/modules/content/}'

    defaults = {
        'title': tree.find('channel/title').text,
        'www_root': tree.find('channel/link').text
    }

    for version in range(1, 10):
        wpns = '{http://wordpress.org/export/1.%i/}' % version
        return defaults, list(map(generate, tree.findall('channel/item')))
Ejemplo n.º 8
0
    def determine_version(self, bundle, env, hunk=None):

        if not hunk and not has_placeholder(bundle.output):
            hunks = [
                FileHunk(bundle.resolve_output(env)),
            ]
        elif not hunk:
            src = sum(map(env.resolver.resolve_source, bundle.contents), [])
            hunks = [
                FileHunk(hunk) for hunk in src + bundle.resolve_depends(env)
            ]
        else:
            hunks = [
                hunk,
            ]

        hasher = self.hasher()
        for hunk in hunks:
            hasher.update(hunk.data())
        return hasher.hexdigest()[:self.length]
Ejemplo n.º 9
0
def rss(xml):

    if 'xmlns:wp' in xml:
        raise InputError('WordPress dump')

    def parse_date_time(stamp):
        ts = parsedate_tz(stamp)
        ts = mktime_tz(ts)
        return datetime.fromtimestamp(ts)

    def generate(item):

        entry = {}
        for k, v in iteritems({'title': 'title', 'date': 'pubDate',
                               'link': 'link', 'content': 'description'}):
            try:
                entry[k] = item.find(v).text if k != 'content' \
                                             else unescape(item.find(v).text)
            except (AttributeError, TypeError):
                pass

        if any(filter(lambda k: k not in entry, ['title', 'date', 'link', 'content'])):
            raise AcrylamidException('invalid RSS 2.0 feed: provide at least title, ' \
                                     + 'link, content and pubDate!')

        return {'title': entry['title'],
               'content': entry['content'],
               'date': parse_date_time(entry['date']),
               'link': entry['link'],
               'tags': [cat.text for cat in item.findall('category')]}

    try:
        tree = ElementTree.fromstring(xml.encode('utf-8'))
    except ElementTree.ParseError:
        raise InputError('no well-formed XML')
    if tree.tag != 'rss' or tree.attrib.get('version') != '2.0':
        raise InputError('no RSS 2.0 feed')

    defaults = {'author': None}
    channel = tree.getchildren()[0]

    for k, v in iteritems({'title': 'sitename', 'link': 'www_root',
                           'language': 'lang', 'author': 'author'}):
        try:
            defaults[v] = channel.find(k).text
        except AttributeError:
            pass

    return defaults, list(map(generate, channel.findall('item')))

    try:
        tree = ElementTree.fromstring(xml.encode('utf-8'))
    except ElementTree.ParseError:
        raise InputError('no well-formed XML')
    if tree.tag != 'rss' or tree.attrib.get('version') != '2.0':
        raise InputError('no RSS 2.0 feed')

    defaults = {'author': None}
    channel = tree.getchildren()[0]

    for k, v in iteritems({'title': 'sitename', 'link': 'www_root',
                           'language': 'lang', 'author': 'author'}):
        try:
            defaults[v] = channel.find(k).text
        except AttributeError:
            pass

    return defaults, list(map(generate, channel.findall('item')))
Ejemplo n.º 10
0
def rss(xml):

    if 'xmlns:wp' in xml:
        raise InputError('WordPress dump')

    def parse_date_time(stamp):
        ts = parsedate_tz(stamp)
        ts = mktime_tz(ts)
        return datetime.fromtimestamp(ts)

    def generate(item):

        entry = {}
        for k, v in iteritems({
                'title': 'title',
                'date': 'pubDate',
                'link': 'link',
                'content': 'description'
        }):
            try:
                entry[k] = item.find(v).text if k != 'content' \
                                             else unescape(item.find(v).text)
            except (AttributeError, TypeError):
                pass

        if any(
                filter(lambda k: k not in entry,
                       ['title', 'date', 'link', 'content'])):
            raise AcrylamidException('invalid RSS 2.0 feed: provide at least title, ' \
                                     + 'link, content and pubDate!')

        return {
            'title': entry['title'],
            'content': entry['content'],
            'date': parse_date_time(entry['date']),
            'link': entry['link'],
            'tags': [cat.text for cat in item.findall('category')]
        }

    try:
        tree = ElementTree.fromstring(xml.encode('utf-8'))
    except ElementTree.ParseError:
        raise InputError('no well-formed XML')
    if tree.tag != 'rss' or tree.attrib.get('version') != '2.0':
        raise InputError('no RSS 2.0 feed')

    defaults = {'author': None}
    channel = tree.getchildren()[0]

    for k, v in iteritems({
            'title': 'sitename',
            'link': 'www_root',
            'language': 'lang',
            'author': 'author'
    }):
        try:
            defaults[v] = channel.find(k).text
        except AttributeError:
            pass

    return defaults, list(map(generate, channel.findall('item')))

    try:
        tree = ElementTree.fromstring(xml.encode('utf-8'))
    except ElementTree.ParseError:
        raise InputError('no well-formed XML')
    if tree.tag != 'rss' or tree.attrib.get('version') != '2.0':
        raise InputError('no RSS 2.0 feed')

    defaults = {'author': None}
    channel = tree.getchildren()[0]

    for k, v in iteritems({
            'title': 'sitename',
            'link': 'www_root',
            'language': 'lang',
            'author': 'author'
    }):
        try:
            defaults[v] = channel.find(k).text
        except AttributeError:
            pass

    return defaults, list(map(generate, channel.findall('item')))