def break_apart_object(outputobject): """Break apart an object and prepare it for entry into the database.""" maxid = 0 for x in outputobject.getElementsByTagName("comment"): id = int(x.getAttribute("id")) jitemid = int(x.getAttribute("jitemid")) posterid = int(x.getAttribute("posterid")) state = x.getAttribute("state") if x.getAttribute("parentid"): parentid = int(x.getAttribute("parentid")) else: parentid = "" date = dwump.gettext(x.getElementsByTagName("date")) body = unicode(dwump.gettext(x.getElementsByTagName("body"))) # .encode('utf-8') subject = unicode(dwump.gettext(x.getElementsByTagName("subject"))) # .encode('utf-8') maxid = id add_to_db(id, jitemid, posterid, state, parentid, date, body, subject, cursor) return maxid
def execute_startup_metadata(startid, session): """Gets meta information. Meta can change, grab it all every run.""" r = urllib2.urlopen(urllib2.Request("http://www.dreamwidth.org/export_comments.bml?get=comment_meta&startid=%s&authas=hs_worldcup" % startid, headers = {'Cookie': "ljsession=%s" % session})) meta = xml.dom.minidom.parse(r) r.close() maxmetaid_text = dwump.gettext(meta.getElementsByTagName("maxid")) maxmetaid = int(maxmetaid_text) set_comment_maxid(maxmetaid, cursor) populate_dwids(meta, cursor) if startid < maxmetaid: execute_startup_metadata((startid+10000), session) return