def loadbill(fn, maplightid=None): bill = xmltramp.load(fn) d = bill2dict(bill) d.maplightid = maplightid try: bill_id = d.id db.insert('bill', seqname=False, **d) except IntegrityError: bill_id = d.pop('id') db.update('bill', where="id=" + web.sqlquote(bill_id), **d) positions = {} for vote in bill.actions['vote':]: if not vote().get('roll'): continue rolldoc = '/us/%s/rolls/%s%s-%s.xml' % ( d.session, vote('where'), vote('datetime')[:4], vote('roll')) roll = xmltramp.load(GOVTRACK_CRAWL + rolldoc) for voter in roll['voter':]: positions[govtrackp(voter('id'))] = fixvote(voter('vote')) if None in positions: del positions[None] with db.transaction(): db.delete('position', where='bill_id=$bill_id', vars=locals()) for p, v in positions.iteritems(): db.insert('position', seqname=False, bill_id=bill_id, politician_id=p, vote=v)
def loadroll(fn): roll = web.storage() roll.id = fn.split('/')[-1].split('.')[0] vote = xmltramp.load(fn) if vote['bill':]: b = vote.bill roll.bill_id = 'us/%s/%s%s' % (b('session'), b('type'), b('number')) else: roll.bill_id = None roll.type = str(vote.type) roll.question = str(vote.question) roll.required = str(vote.required) roll.result = str(vote.result) try: db.insert('roll', seqname=False, **roll) except IntegrityError: if not db.update('roll', where="id=" + web.sqlquote(roll.id), bill_id=roll.bill_id): print "\nMissing bill:", roll.bill_id raise NotDone with db.transaction(): db.delete('vote', where="roll_id=$roll.id", vars=locals()) for voter in vote['voter':]: rep = govtrackp(voter('id')) if rep: db.insert('vote', seqname=False, politician_id=rep, roll_id=roll.id, vote=fixvote(voter('vote'))) else: pass #@@!--check again after load_everyone
def cosmos(url, tipe, start=0, format="xml", version=None): """Get incoming links (cosmos) for url.""" args = {'url':url, 'type':tipe, 'start':start, 'format':format, 'key':LICENSE_KEY} if version: args['version'] = version url = "http://api.technorati.com/cosmos?" + urllib.urlencode(args) return xmltramp.load(url)
def get_senate_offices_from_senators_cfm_file(): """returns a dict with district names as keys and email-contact urls as values """ out = {} d = xmltramp.load('senators_cfm.xml') for member in d: out.setdefault(str(member.state), []).append(str(member.email)) return out
def method(_self=self, _method=method, **params): _method = _method.replace("_", ".") url = HOST + PATH + "?method=%s&%s&api_key=%s" % \ (_method, urlencode(params), self.api_key) try: rsp = xmltramp.load(url) except: return None return _self._parseResponse(rsp)
def loadbill(fn, maplightid=None): bill = xmltramp.load(fn) d = bill2dict(bill) if maplightid: d["maplightid"] = maplightid db.insert("bill", seqname=False, **d) print "\r %-25s" % d["id"], sys.stdout.flush() done = [] for vote in bill.actions["vote":]: if not vote().get("roll"): continue if vote("where") in done: continue # don't count veto overrides done.append(vote("where")) votedoc = "%s/rolls/%s%s-%s.xml" % (d["session"], vote("where"), vote("datetime")[:4], vote("roll")) vote = xmltramp.load("../data/crawl/govtrack/us/" + votedoc) yeas = 0 neas = 0 for voter in vote["voter":]: if fixvote(voter("vote")) == 1: yeas += 1 elif fixvote(voter("vote")) == -1: neas += 1 rep = govtrackp(voter("id")) if rep: # UGLY HACK: if a politician (bob_menendez for instance) voted # for the same bill in both chambers of congress the insert # fails. if not db.select("vote", where="bill_id=$d['id'] AND politician_id=$rep", vars=locals()): db.insert("vote", seqname=False, politician_id=rep, bill_id=d["id"], vote=fixvote(voter("vote"))) else: print print "Updating:", votedoc, rep, d["id"], fixvote(voter("vote")) db.update( "vote", where="bill_id=$d['id'] AND politician_id=$rep", vote=fixvote(voter("vote")), vars=locals(), ) db.update("bill", where="id = $d['id']", yeas=yeas, neas=neas, vars=locals())
def handle(self, per_page = 20, force = False, **options): stream = PageType.objects.get( folder = "stream" ) url = "http://api.flickr.com/services/rest/?method=flickr.favorites.getPublicList&api_key=%s&user_id=%s&extras=date_taken,tags,last_update,geo&per_page=%s"%( config.get("flickr", "key"), config.get("flickr", "me"), per_page ) xml = xmltramp.load(url) def mirror( url, name ): filename = os.path.join( MEDIA_ROOT, "thumbnail", name ) if not os.path.exists( filename ): print "Fetching %s to %s"%( url, filename ) urllib.urlretrieve( url, filename ) return "thumbnail/%s"%name for entry in xml['photos']['photo':]: page = self.getPage( "stream", u"fave:///%s"%entry('id') ) updated = datetime.fromtimestamp( int(entry('lastupdate')) ) if page.updated and page.updated >= updated and not force: # page is unchanged continue page.slug = entry("id") page.latitude = entry('latitude') page.longitude = entry('longitude') page.format = "markdown" icon = """<a href="http://www.flickr.com/photos/jerakeen/favorites"><img src="%sweb/icons/flickr.png" title="flickr favourites"></a> """%MEDIA_URL page.link = "http://flickr.com/photos/%s/%s"%( entry('owner'), entry('id') ) if entry('title'): page.body = icon + """Marked "<a href="%s">%s</a>" as a favourite on Flickr"""%( page.link, entry('title') ) else: page.body = icon + """Marked an <a href="%s">untitled photo</a> as a favourite on Flickr"""%( page.link ) page.title = re.sub(r'<.*?>','', page.body ).strip() if not page.title: page.title = page.slug page.excerpt = "" page.guid = None # explicit - a favouriting is not the object. page.date = datetime.fromtimestamp(int(entry('date_faved'))) # don't download images, I don't own them. Not that anyone really cares page.thumbnail = "http://farm%s.static.flickr.com/%s/%s_%s_s.jpg"%( entry('farm'), entry('server'), entry('id'), entry('secret') ) page.image = "http://farm%s.static.flickr.com/%s/%s_%s_m.jpg"%( entry('farm'), entry('server'), entry('id'), entry('secret') ) page.updated = updated page.save() page.set_tags( entry('tags').split(' ') + ["cougar:syndicate=flickr-favourites"] )
def fetch(self, user): self.user = user self.loved = [] params = { 'method': 'user.getlovedtracks', 'user': user, 'api_key': LASTFM_KEY, } self.url = LASTFM_URL + urllib.urlencode(params) lfm = xmltramp.load(self.url) if lfm('status') == 'ok': self.open_key(self.url) for loved in lfm.lovedtracks['track':]: self.handle_if_newer(int(loved.date('uts')), loved) self.close_key()
def fetch_weekly_charts(user_id): params = { 'user': user_id, 'api_key': LASTFM_KEY, 'method': 'user.gettopartists', 'period': '7day', 'limit': 200, } url = LASTFM_URL + urllib.urlencode(params) try: lfm = xmltramp.load(url) if lfm('status') == 'ok': return lfm[0] # first child element return lfm except Exception, e: return xmltramp.Element('error', value=str(e), attrs={'class': e.__class__.__name__})
def parse_can(opensecretsid, year=2008): out = web.storage() out.opensecretsid = opensecretsid d = xmltramp.load(CANSUM % (year, opensecretsid)) out.total = int(d.candidate.totals('total_receipts')) for source in d.candidate.totals.sources: if source('type') == 'PAC': out.business_pac = 0 # in case it doesn't appear for sd in source: if sd('type') == "Business": out.business_pac = int(sd('total_receipts')) bad = 0 for sector in d.candidate.totals.sectors: if sector('name') not in ['Labor']: bad += int(sector('pac')) out.badmoney = bad return out
def outbound(url, format="xml", version=None): args = {'url':url, 'format':format, 'key':LICENSE_KEY } if version: args['version'] = version url = "http://api.technorati.com/outbound?" + urllib.urlencode(args) return xmltramp.load(url)
def get_senate_offices(): out = {} d = xmltramp.load('senators_cfm.xml') for member in d: out.setdefault(str(member.state), []).append(str(member.email)) return out
def handle(self, per_page=20, force_update=False, **options): pagetype = PageType.objects.get(folder="photos") url = ( "http://api.flickr.com/services/rest/?method=flickr.photos.search&api_key=%s&user_id=%s&extras=date_taken,tags,last_update,geo,original_format&per_page=%s" % (config.get("flickr", "key"), config.get("flickr", "me"), per_page) ) xml = xmltramp.load(url) did_something = False for entry in xml["photos"]["photo":]: page = self.getPage("photos", "flickr:///%s" % entry("id")) updated = datetime.fromtimestamp(int(entry("lastupdate"))) if page.updated and page.updated >= updated and not force_update: # page is unchanged continue did_something = True # something has changed page.date = iso8601.parse_date(entry("datetaken")) page.date = page.date.replace(tzinfo=None) # http://code.djangoproject.com/ticket/5304 # TODO - this is the local time when the camera took the photo. # We need to remove UTCOFFSET seconds from this time to get the GMT photo time # so it lines up properly in the stream. We can do this by finding out # where we were through dopplr. utcoffset = 3600 # TODO - but will do till I get this done properly. page.date -= timedelta(seconds=utcoffset) page.slug = entry("id") page.guid = "tag:flickr.com,2004:/photo/%s" % (entry("id")) page.format = "markdown" page.title = entry("title") page.latitude = entry("latitude") page.longitude = entry("longitude") page.link = "http://flickr.com/photos/jerakeen/%s" % entry("id") # link files off flickr page.thumbnail = "http://farm%s.static.flickr.com/%s/%s_%s_s.jpg" % ( entry("farm"), entry("server"), entry("id"), entry("secret"), ) page.image = "http://farm%s.static.flickr.com/%s/%s_%s_m.jpg" % ( entry("farm"), entry("server"), entry("id"), entry("secret"), ) # local mirror # page.image = self.mirror("http://farm%s.static.flickr.com/%s/%s_%s_m.jpg"%( entry('farm'), entry('server'), entry('id'), entry('secret') ), "flickr/%s_240.jpg"%entry('id') ) # page.thumbnail = self.mirror("http://farm%s.static.flickr.com/%s/%s_%s_s.jpg"%( entry('farm'), entry('server'), entry('id'), entry('secret') ), "flickr/%s_75.jpg"%entry('id') ) # download the orivginal # self.mirror( # "http://farm%s.static.flickr.com/%s/%s_%s_o.%s"%( entry('farm'), entry('server'), entry('id'), entry('originalsecret'), entry('originalformat') ), # "flickr/%s_original.%s"%( entry('id'), entry('originalformat') ) # ) extra = xmltramp.load( "http://api.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=%s&photo_id=%s" % (config.get("flickr", "key"), entry("id")) ) page.body = extra["photo"]["description"] page.excerpt = extra["photo"]["description"] # only mark page as updated if this actually worked page.updated = updated page.save() # must be done after page save page.set_tags(entry("tags").split(" ") + ["cougar:syndicate=flickr"]) # avoid doing photoset updates if we didn't do anytihng if not did_something: return # list photosets url = "http://api.flickr.com/services/rest/?method=flickr.photosets.getList&api_key=%s&user_id=%s" % ( config.get("flickr", "key"), config.get("flickr", "me"), ) photosets = xmltramp.load(url) for photoset in photosets["photosets"]: photos = xmltramp.load( "http://api.flickr.com/services/rest/?method=flickr.photosets.getPhotos&api_key=%s&photoset_id=%s&per_page=%s" % (config.get("flickr", "key"), photoset("id"), 100) ) setpage = self.getPage("sets", "flickr:///set/%s" % photoset("id")) setpage.title = unicode(photoset.title) setpage.excerpt = "" setpage.body = unicode(photoset.description) setpage.body += ( "\n\nPhotos from <a href='http://flickr.com/photos/jerakeen/sets/%s/'>this flickr set</a>." % photoset("id") ) if not setpage.slug: setpage.slug = slugify(setpage.title) # flickr let me set a photo as the photoset primary photo # setpage.thumbnail = self.mirror("http://farm%s.static.flickr.com/%s/%s_%s_s.jpg"%( photoset('farm'), photoset('server'), photoset('primary'), photoset('secret') ), "flickr/photoset-%s_75.jpg"%photoset('id') ) setpage.thumbnail = "http://farm%s.static.flickr.com/%s/%s_%s_s.jpg" % ( photoset("farm"), photoset("server"), photoset("primary"), photoset("secret"), ) setpage.image = setpage.thumbnail # use machine tags to associate syndicated photos with this set. for photo in photos["photoset"]: try: photopage = Page.objects.get(extref="flickr:///%s" % photo("id")) except ObjectDoesNotExist: continue photopage.add_tag("flickr:set=%s" % photoset("id")) photopage.add_tag("cougar:set=%s" % setpage.slug) if photopage.status != "draft": photopage.status = "draft" photopage.save() # do this after we add the photos, so the set can get it's date, etc properly. setpage.save() if did_something: PageType.objects.get(folder="photos").ping()
def bloginfo(url, format="xml", version=None): """Get information about a blog.""" args = {'url':url, 'format':format, 'key':LICENSE_KEY } if version: args['version'] = version url = "http://api.technorati.com/bloginfo?" + urllib.urlencode(args) return xmltramp.load(url)