Example #1
0
def basic_asset_for_element(asset_el):
    atom_id = asset_el.findtext('{http://www.w3.org/2005/Atom}id')
    logging.debug('Parsing asset %s', atom_id)

    try:
        asset = Asset.objects.get(atom_id=atom_id)
    except Asset.DoesNotExist:
        asset = Asset(atom_id=atom_id)
    asset.imported = True

    publ = asset_el.findtext('{http://www.w3.org/2005/Atom}published')
    publ_dt = datetime.strptime(publ, '%Y-%m-%dT%H:%M:%SZ')
    asset.published = publ_dt

    content_el = asset_el.find('{http://www.w3.org/2005/Atom}content')
    content_type = content_el.get('type')
    if content_type == 'html':
        asset.content = content_el.text
    elif content_type == 'xhtml':
        html_el = content_el.find('{http://www.w3.org/1999/xhtml}div')
        html = html_el.text or u''
        html += u''.join(ElementTree.tostring(el) for el in html_el.getchildren())
        asset.content = html

    author_el = asset_el.find('{http://www.w3.org/2005/Atom}author')
    author_name = author_el.findtext('{http://www.w3.org/2005/Atom}name')
    openid = author_el.findtext('{http://www.w3.org/2005/Atom}uri')
    # Import "gone" folks' comments anonymously.
    if openid != 'http://www.vox.com/gone/':
        asset.author = person_for_openid(openid, author_name)

    return asset
Example #2
0
def import_assets(assets):
    for tpasset in assets:
        logging.debug('Parsing asset %s', tpasset.url_id)
        try:
            asset = Asset.objects.get(atom_id=tpasset.id)
        except Asset.DoesNotExist:
            asset = Asset(atom_id=tpasset.id)
        asset.imported = True

        if tpasset.author and tpasset.author.url_id != '6p0000000000000014':
            av = tpasset.author.avatar_link
            userpic_url = av.url_template.replace('{spec}', '50si') if av.url_template else av.url
            asset.author = person_for_openid(tpasset.author.profile_page_url,
                tpasset.author.display_name or tpasset.author.preferred_username,
                userpic_url)
        else:
            asset.author = None

        asset.published = tpasset.published

        if tpasset.object_type == 'Post':
            asset.title = tpasset.title
            asset.summary = tpasset.excerpt
            asset.content = tpasset.rendered_content
            asset.slug = tpasset.filename
        elif tpasset.object_type == 'Comment':
            assert tpasset.text_format == 'html_convert_linebreaks', 'This comment %s has unexpected text formatting %r' % (tpasset.url_id, tpasset.text_format)
            asset.content = tpasset.content.replace('\n', '<br>\n')

            asset.in_reply_to = Asset.objects.get(atom_id=tpasset.in_reply_to.id)
            root_id = tpasset.api_data['root']['id']
            asset.in_thread_of = Asset.objects.get(atom_id=root_id)
        else:
            # what
            logging.error('Unexpected object type %r for asset %s', tpasset.object_type, tpasset.url_id)
            continue

        logging.debug('Hello, %s %s (%s)!', tpasset.object_type.lower(), tpasset.url_id, asset.title)
        asset.save()
Example #3
0
def import_events(source, atomid_prefix, foafsource):
    tree = ElementTree.parse(source)

    username = tree.getroot().get("username")
    server = tree.getroot().get("server")
    server_domain = ".".join(server.rsplit(".", 2)[1:])
    openid_for = partial(generate_openid, server_domain)
    if atomid_prefix is None:
        atomid_prefix = "urn:lj:%s:atom1:%s:" % (server_domain, username)

    post_author = make_my_openid(openid_for(username))

    # First, if there's a FOAF, learn all my friends' names and faces.
    if foafsource:
        import_foaf(foafsource, server_domain)

    # Now update groups and friends, so we can knit the posts together right.
    group_objs = dict()
    for group in tree.findall("/friends/group"):
        id = int(group.findtext("id"))
        name = group.findtext("name")

        tag = "%sgroup:%d" % (atomid_prefix, id)
        group_obj, created = giraffe.friends.models.Group.objects.get_or_create(
            tag=tag, defaults={"display_name": name}
        )
        group_objs[id] = group_obj

    all_friends_tag = "%sfriends" % atomid_prefix
    all_friends_group, created = giraffe.friends.models.Group.objects.get_or_create(
        tag=all_friends_tag, defaults={"display_name": "Friends"}
    )

    for friend in tree.findall("/friends/friend"):
        friendname = friend.findtext("username")
        openid = openid_for(friendname)

        ident_person = person_for_openid(openid, friend.findtext("fullname"))

        # Update their groups.
        group_ids = tuple(int(groupnode.text) for groupnode in friend.findall("groups/group"))
        logging.debug("Setting %s's groups to %r", friendname, group_ids)
        ident_person.groups = [all_friends_group] + [group_objs[id] for id in group_ids]

    # Import the posts.
    for event in tree.findall("/events/event"):
        ditemid = event.get("ditemid")
        logging.debug("Parsing event %s", ditemid)
        atom_id = "%s%s" % (atomid_prefix, ditemid)

        try:
            post = Asset.objects.get(atom_id=atom_id)
        except Asset.DoesNotExist:
            post = Asset(atom_id=atom_id)

        event_props = {}
        for prop in event.findall("props/prop"):
            key = prop.get("name")
            val = prop.get("value")
            event_props[key] = val

        post.title = event.findtext("subject") or ""
        post.author = post_author

        publ = event.findtext("date")
        assert publ, "event has no date :("
        publ_dt = datetime.strptime(publ, "%Y-%m-%d %H:%M:%S")
        # TODO: is this in the account's timezone or what?
        post.published = publ_dt

        content_root = BeautifulSoup(event.findtext("event"))
        # Add line breaks to the post if it's not preformatted.
        if not int(event_props.get("opt_preformatted", 0)):
            format_soup(content_root)
        # Remove any lj-raw tags.
        for el in content_root.findAll(re.compile(r"lj-(?:raw|cut)")):
            # Replace it with its children.
            el_parent = el.parent
            el_index = el_parent.contents.index(el)
            el.extract()
            for child in reversed(list(el.contents)):
                el_parent.insert(el_index, child)
        # TODO: handle opt_nocomments prop
        # TODO: put music and mood in the post content
        # TODO: handle taglist prop
        post.content = str(content_root)

        post.imported = True
        post.save()
        logging.info("Saved new post %s (%s) as #%d", ditemid, post.title, post.pk)

        security = event.get("security")
        private_group = giraffe.friends.models.Group.objects.get(tag="private")
        if security == "private":
            logging.debug("Oh ho post %s is all fancy private", ditemid)
            post.private_to = [private_group]
        elif security == "usemask":
            bin = lambda s: str(s) if s <= 1 else bin(s >> 1) + str(s & 1)

            mask = int(event.get("allowmask"))
            logging.debug("Post %s has mask %s?", ditemid, bin(mask))

            if mask == 1:
                mask_groups = [all_friends_group]
                # Plus all the other bits are 0, so we'll add no other groups.
            else:
                mask_groups = list()

            for i in range(1, 30):
                mask = mask >> 1
                if mask == 0:
                    break
                logging.debug("    Remaining mask %s", bin(mask))
                if mask & 0x01:
                    logging.debug("    Yay %s has group %d!", ditemid, i)
                    if i in group_objs:
                        logging.debug("    And group %d exists woohoo!!", i)
                        mask_groups.append(group_objs[i])

            logging.debug("So post %s gets %d groups", ditemid, len(mask_groups))
            post.private_to = mask_groups

        # Import the comments.
        for comment in event.findall("comments/comment"):
            import_comment(comment, post, openid_for)