def basic_asset_for_element(asset_el): atom_id = asset_el.findtext('{http://www.w3.org/2005/Atom}id') logging.debug('Parsing asset %s', atom_id) try: asset = Asset.objects.get(atom_id=atom_id) except Asset.DoesNotExist: asset = Asset(atom_id=atom_id) asset.imported = True publ = asset_el.findtext('{http://www.w3.org/2005/Atom}published') publ_dt = datetime.strptime(publ, '%Y-%m-%dT%H:%M:%SZ') asset.published = publ_dt content_el = asset_el.find('{http://www.w3.org/2005/Atom}content') content_type = content_el.get('type') if content_type == 'html': asset.content = content_el.text elif content_type == 'xhtml': html_el = content_el.find('{http://www.w3.org/1999/xhtml}div') html = html_el.text or u'' html += u''.join(ElementTree.tostring(el) for el in html_el.getchildren()) asset.content = html author_el = asset_el.find('{http://www.w3.org/2005/Atom}author') author_name = author_el.findtext('{http://www.w3.org/2005/Atom}name') openid = author_el.findtext('{http://www.w3.org/2005/Atom}uri') # Import "gone" folks' comments anonymously. if openid != 'http://www.vox.com/gone/': asset.author = person_for_openid(openid, author_name) return asset
def import_events(source, atomid_prefix, foafsource): tree = ElementTree.parse(source) username = tree.getroot().get("username") server = tree.getroot().get("server") server_domain = ".".join(server.rsplit(".", 2)[1:]) openid_for = partial(generate_openid, server_domain) if atomid_prefix is None: atomid_prefix = "urn:lj:%s:atom1:%s:" % (server_domain, username) post_author = make_my_openid(openid_for(username)) # First, if there's a FOAF, learn all my friends' names and faces. if foafsource: import_foaf(foafsource, server_domain) # Now update groups and friends, so we can knit the posts together right. group_objs = dict() for group in tree.findall("/friends/group"): id = int(group.findtext("id")) name = group.findtext("name") tag = "%sgroup:%d" % (atomid_prefix, id) group_obj, created = giraffe.friends.models.Group.objects.get_or_create( tag=tag, defaults={"display_name": name} ) group_objs[id] = group_obj all_friends_tag = "%sfriends" % atomid_prefix all_friends_group, created = giraffe.friends.models.Group.objects.get_or_create( tag=all_friends_tag, defaults={"display_name": "Friends"} ) for friend in tree.findall("/friends/friend"): friendname = friend.findtext("username") openid = openid_for(friendname) ident_person = person_for_openid(openid, friend.findtext("fullname")) # Update their groups. group_ids = tuple(int(groupnode.text) for groupnode in friend.findall("groups/group")) logging.debug("Setting %s's groups to %r", friendname, group_ids) ident_person.groups = [all_friends_group] + [group_objs[id] for id in group_ids] # Import the posts. for event in tree.findall("/events/event"): ditemid = event.get("ditemid") logging.debug("Parsing event %s", ditemid) atom_id = "%s%s" % (atomid_prefix, ditemid) try: post = Asset.objects.get(atom_id=atom_id) except Asset.DoesNotExist: post = Asset(atom_id=atom_id) event_props = {} for prop in event.findall("props/prop"): key = prop.get("name") val = prop.get("value") event_props[key] = val post.title = event.findtext("subject") or "" post.author = post_author publ = event.findtext("date") assert publ, "event has no date :(" publ_dt = datetime.strptime(publ, "%Y-%m-%d %H:%M:%S") # TODO: is this in the account's timezone or what? post.published = publ_dt content_root = BeautifulSoup(event.findtext("event")) # Add line breaks to the post if it's not preformatted. if not int(event_props.get("opt_preformatted", 0)): format_soup(content_root) # Remove any lj-raw tags. for el in content_root.findAll(re.compile(r"lj-(?:raw|cut)")): # Replace it with its children. el_parent = el.parent el_index = el_parent.contents.index(el) el.extract() for child in reversed(list(el.contents)): el_parent.insert(el_index, child) # TODO: handle opt_nocomments prop # TODO: put music and mood in the post content # TODO: handle taglist prop post.content = str(content_root) post.imported = True post.save() logging.info("Saved new post %s (%s) as #%d", ditemid, post.title, post.pk) security = event.get("security") private_group = giraffe.friends.models.Group.objects.get(tag="private") if security == "private": logging.debug("Oh ho post %s is all fancy private", ditemid) post.private_to = [private_group] elif security == "usemask": bin = lambda s: str(s) if s <= 1 else bin(s >> 1) + str(s & 1) mask = int(event.get("allowmask")) logging.debug("Post %s has mask %s?", ditemid, bin(mask)) if mask == 1: mask_groups = [all_friends_group] # Plus all the other bits are 0, so we'll add no other groups. else: mask_groups = list() for i in range(1, 30): mask = mask >> 1 if mask == 0: break logging.debug(" Remaining mask %s", bin(mask)) if mask & 0x01: logging.debug(" Yay %s has group %d!", ditemid, i) if i in group_objs: logging.debug(" And group %d exists woohoo!!", i) mask_groups.append(group_objs[i]) logging.debug("So post %s gets %d groups", ditemid, len(mask_groups)) post.private_to = mask_groups # Import the comments. for comment in event.findall("comments/comment"): import_comment(comment, post, openid_for)
def import_comment(comment_el, asset, openid_for): jtalkid = comment_el.get("jtalkid") atom_id = "%s:talk:%s" % (asset.atom_id, jtalkid) logging.debug("Yay importing comment %s", jtalkid) try: comment = Asset.objects.get(atom_id=atom_id) except Asset.DoesNotExist: comment = Asset(atom_id=atom_id) comment_props = {} for prop in comment_el.findall("props/prop"): key = prop.get("name") val = prop.get("value") comment_props[key] = val comment.title = comment_el.findtext("subject") or "" body = comment_el.findtext("body") if int(comment_props.get("opt_preformatted") or 0): comment.content = body else: logging.debug(" Oops, comment not preformatted, let's parse it") content_root = BeautifulSoup(body) format_soup(content_root) comment.content = str(content_root) comment.in_reply_to = asset comment.in_thread_of = asset.in_thread_of or asset poster = comment_el.get("poster") if poster: openid = openid_for(poster) logging.debug(" Saving %s as comment author", openid) comment.author = person_for_openid(openid) else: logging.debug(" Oh huh this comment was anonymous, fancy that") comment.imported = True comment.save() comment.private_to = asset.private_to.all() for reply_el in comment_el.findall("comments/comment"): import_comment(reply_el, comment, openid_for)
def import_assets(assets): for tpasset in assets: logging.debug('Parsing asset %s', tpasset.url_id) try: asset = Asset.objects.get(atom_id=tpasset.id) except Asset.DoesNotExist: asset = Asset(atom_id=tpasset.id) asset.imported = True if tpasset.author and tpasset.author.url_id != '6p0000000000000014': av = tpasset.author.avatar_link userpic_url = av.url_template.replace('{spec}', '50si') if av.url_template else av.url asset.author = person_for_openid(tpasset.author.profile_page_url, tpasset.author.display_name or tpasset.author.preferred_username, userpic_url) else: asset.author = None asset.published = tpasset.published if tpasset.object_type == 'Post': asset.title = tpasset.title asset.summary = tpasset.excerpt asset.content = tpasset.rendered_content asset.slug = tpasset.filename elif tpasset.object_type == 'Comment': assert tpasset.text_format == 'html_convert_linebreaks', 'This comment %s has unexpected text formatting %r' % (tpasset.url_id, tpasset.text_format) asset.content = tpasset.content.replace('\n', '<br>\n') asset.in_reply_to = Asset.objects.get(atom_id=tpasset.in_reply_to.id) root_id = tpasset.api_data['root']['id'] asset.in_thread_of = Asset.objects.get(atom_id=root_id) else: # what logging.error('Unexpected object type %r for asset %s', tpasset.object_type, tpasset.url_id) continue logging.debug('Hello, %s %s (%s)!', tpasset.object_type.lower(), tpasset.url_id, asset.title) asset.save()