Esempio n. 1
0
def loadbill(fn, maplightid=None):
    bill = xmltramp.load(fn)
    d = bill2dict(bill)
    d.maplightid = maplightid
    
    try:
        bill_id = d.id
        db.insert('bill', seqname=False, **d)
    except IntegrityError:
        bill_id = d.pop('id')
        db.update('bill', where="id=" + web.sqlquote(bill_id), **d)
    
    positions = {}
    for vote in bill.actions['vote':]:
        if not vote().get('roll'): continue
        
        rolldoc = '/us/%s/rolls/%s%s-%s.xml' % (
          d.session, vote('where'), vote('datetime')[:4], vote('roll'))
        roll = xmltramp.load(GOVTRACK_CRAWL + rolldoc)
        for voter in roll['voter':]:
            positions[govtrackp(voter('id'))] = fixvote(voter('vote'))

    if None in positions: del positions[None]
    with db.transaction():
        db.delete('position', where='bill_id=$bill_id', vars=locals())
        for p, v in positions.iteritems():
            db.insert('position', seqname=False, 
              bill_id=bill_id, politician_id=p, vote=v)
Esempio n. 2
0
def loadbill(fn, maplightid=None):
    bill = xmltramp.load(fn)
    d = bill2dict(bill)
    d.maplightid = maplightid

    try:
        bill_id = d.id
        db.insert('bill', seqname=False, **d)
    except IntegrityError:
        bill_id = d.pop('id')
        db.update('bill', where="id=" + web.sqlquote(bill_id), **d)

    positions = {}
    for vote in bill.actions['vote':]:
        if not vote().get('roll'): continue

        rolldoc = '/us/%s/rolls/%s%s-%s.xml' % (
            d.session, vote('where'), vote('datetime')[:4], vote('roll'))
        roll = xmltramp.load(GOVTRACK_CRAWL + rolldoc)
        for voter in roll['voter':]:
            positions[govtrackp(voter('id'))] = fixvote(voter('vote'))

    if None in positions: del positions[None]
    with db.transaction():
        db.delete('position', where='bill_id=$bill_id', vars=locals())
        for p, v in positions.iteritems():
            db.insert('position',
                      seqname=False,
                      bill_id=bill_id,
                      politician_id=p,
                      vote=v)
Esempio n. 3
0
def loadroll(fn):
    roll = web.storage()
    roll.id = fn.split('/')[-1].split('.')[0]
    vote = xmltramp.load(fn)
    if vote['bill':]:
        b = vote.bill
        roll.bill_id = 'us/%s/%s%s' % (b('session'), b('type'), b('number'))
    else:
        roll.bill_id = None
    roll.type = str(vote.type)
    roll.question = str(vote.question)
    roll.required = str(vote.required)
    roll.result = str(vote.result)
    
    try:
        db.insert('roll', seqname=False, **roll)
    except IntegrityError:
        if not db.update('roll', where="id=" + web.sqlquote(roll.id), bill_id=roll.bill_id):
            print "\nMissing bill:", roll.bill_id
            raise NotDone
    
    with db.transaction():
        db.delete('vote', where="roll_id=$roll.id", vars=locals())
        for voter in vote['voter':]:
            rep = govtrackp(voter('id'))
            if rep:
                db.insert('vote', seqname=False, 
                  politician_id=rep, roll_id=roll.id, vote=fixvote(voter('vote')))
            else:
                pass #@@!--check again after load_everyone
Esempio n. 4
0
def loadroll(fn):
    roll = web.storage()
    roll.id = fn.split('/')[-1].split('.')[0]
    vote = xmltramp.load(fn)
    if vote['bill':]:
        b = vote.bill
        roll.bill_id = 'us/%s/%s%s' % (b('session'), b('type'), b('number'))
    else:
        roll.bill_id = None
    roll.type = str(vote.type)
    roll.question = str(vote.question)
    roll.required = str(vote.required)
    roll.result = str(vote.result)

    try:
        db.insert('roll', seqname=False, **roll)
    except IntegrityError:
        if not db.update('roll',
                         where="id=" + web.sqlquote(roll.id),
                         bill_id=roll.bill_id):
            print "\nMissing bill:", roll.bill_id
            raise NotDone

    with db.transaction():
        db.delete('vote', where="roll_id=$roll.id", vars=locals())
        for voter in vote['voter':]:
            rep = govtrackp(voter('id'))
            if rep:
                db.insert('vote',
                          seqname=False,
                          politician_id=rep,
                          roll_id=roll.id,
                          vote=fixvote(voter('vote')))
            else:
                pass  #@@!--check again after load_everyone
Esempio n. 5
0
def cosmos(url, tipe, start=0, format="xml", version=None):
	"""Get incoming links (cosmos) for url."""
	args = {'url':url, 'type':tipe, 'start':start, 'format':format, 
	        'key':LICENSE_KEY}
	if version: args['version'] = version
	url = "http://api.technorati.com/cosmos?" + urllib.urlencode(args)
	return xmltramp.load(url)
Esempio n. 6
0
def get_senate_offices_from_senators_cfm_file():
    """returns a dict with district names as keys and email-contact urls as values
    """
    out = {}
    d = xmltramp.load('senators_cfm.xml')
    for member in d: 
        out.setdefault(str(member.state), []).append(str(member.email))
    return out
Esempio n. 7
0
 def method(_self=self, _method=method, **params):
     _method = _method.replace("_", ".")
     url = HOST + PATH + "?method=%s&%s&api_key=%s" % \
             (_method, urlencode(params), self.api_key)
     try:
             rsp = xmltramp.load(url)
     except:
             return None
     return _self._parseResponse(rsp)
Esempio n. 8
0
 def method(_self=self, _method=method, **params):
     _method = _method.replace("_", ".")
     url = HOST + PATH + "?method=%s&%s&api_key=%s" % \
             (_method, urlencode(params), self.api_key)
     try:
             rsp = xmltramp.load(url)
     except:
             return None
     return _self._parseResponse(rsp)
Esempio n. 9
0
def loadbill(fn, maplightid=None):
    bill = xmltramp.load(fn)
    d = bill2dict(bill)
    if maplightid:
        d["maplightid"] = maplightid
    db.insert("bill", seqname=False, **d)
    print "\r  %-25s" % d["id"],
    sys.stdout.flush()

    done = []
    for vote in bill.actions["vote":]:
        if not vote().get("roll"):
            continue
        if vote("where") in done:
            continue  # don't count veto overrides
        done.append(vote("where"))

        votedoc = "%s/rolls/%s%s-%s.xml" % (d["session"], vote("where"), vote("datetime")[:4], vote("roll"))
        vote = xmltramp.load("../data/crawl/govtrack/us/" + votedoc)
        yeas = 0
        neas = 0
        for voter in vote["voter":]:
            if fixvote(voter("vote")) == 1:
                yeas += 1
            elif fixvote(voter("vote")) == -1:
                neas += 1
            rep = govtrackp(voter("id"))
            if rep:
                # UGLY HACK: if a politician (bob_menendez for instance) voted
                # for the same bill in both chambers of congress the insert
                # fails.
                if not db.select("vote", where="bill_id=$d['id'] AND politician_id=$rep", vars=locals()):
                    db.insert("vote", seqname=False, politician_id=rep, bill_id=d["id"], vote=fixvote(voter("vote")))
                else:
                    print
                    print "Updating:", votedoc, rep, d["id"], fixvote(voter("vote"))
                    db.update(
                        "vote",
                        where="bill_id=$d['id'] AND politician_id=$rep",
                        vote=fixvote(voter("vote")),
                        vars=locals(),
                    )
        db.update("bill", where="id = $d['id']", yeas=yeas, neas=neas, vars=locals())
Esempio n. 10
0
    def handle(self, per_page = 20, force = False, **options):
        stream = PageType.objects.get( folder = "stream" )

        url = "http://api.flickr.com/services/rest/?method=flickr.favorites.getPublicList&api_key=%s&user_id=%s&extras=date_taken,tags,last_update,geo&per_page=%s"%( config.get("flickr", "key"), config.get("flickr", "me"), per_page )
        xml = xmltramp.load(url)

        def mirror( url, name ):
            filename = os.path.join( MEDIA_ROOT, "thumbnail", name )
            if not os.path.exists( filename ):
                print "Fetching %s to %s"%( url, filename )
                urllib.urlretrieve( url, filename )
            return "thumbnail/%s"%name
            
        for entry in xml['photos']['photo':]:
            page = self.getPage( "stream", u"fave:///%s"%entry('id') )
            
            updated = datetime.fromtimestamp( int(entry('lastupdate')) )
            if page.updated and page.updated >= updated and not force:
                # page is unchanged
                continue

            page.slug = entry("id")

            page.latitude = entry('latitude')
            page.longitude = entry('longitude')

            page.format = "markdown"

            icon = """<a href="http://www.flickr.com/photos/jerakeen/favorites"><img src="%sweb/icons/flickr.png" title="flickr favourites"></a> """%MEDIA_URL
            page.link = "http://flickr.com/photos/%s/%s"%( entry('owner'), entry('id') )
            if entry('title'):
                page.body = icon + """Marked "<a href="%s">%s</a>" as a favourite on Flickr"""%( page.link, entry('title') )
            else:
                page.body = icon + """Marked an <a href="%s">untitled photo</a> as a favourite on Flickr"""%( page.link )
            
            page.title = re.sub(r'<.*?>','', page.body ).strip()
            if not page.title:
                page.title = page.slug
            page.excerpt = ""
            page.guid = None # explicit - a favouriting is not the object.
            page.date = datetime.fromtimestamp(int(entry('date_faved')))
            
            # don't download images, I don't own them. Not that anyone really cares
            page.thumbnail = "http://farm%s.static.flickr.com/%s/%s_%s_s.jpg"%( entry('farm'), entry('server'), entry('id'), entry('secret') )
            page.image = "http://farm%s.static.flickr.com/%s/%s_%s_m.jpg"%( entry('farm'), entry('server'), entry('id'), entry('secret') )
            
            page.updated = updated
            page.save()

            page.set_tags( entry('tags').split(' ') + ["cougar:syndicate=flickr-favourites"] )
Esempio n. 11
0
 def fetch(self, user):
     self.user = user
     self.loved = []
     params = {
         'method': 'user.getlovedtracks',
         'user': user,
         'api_key': LASTFM_KEY,
     }
     self.url = LASTFM_URL + urllib.urlencode(params)
     lfm = xmltramp.load(self.url)
     if lfm('status') == 'ok':
         self.open_key(self.url)
         for loved in lfm.lovedtracks['track':]:
             self.handle_if_newer(int(loved.date('uts')), loved)
         self.close_key()
Esempio n. 12
0
def fetch_weekly_charts(user_id):
    params = {
        'user': user_id,
        'api_key': LASTFM_KEY,
        'method': 'user.gettopartists',
        'period': '7day',
        'limit': 200,
    }
    url = LASTFM_URL + urllib.urlencode(params)
    try:
        lfm = xmltramp.load(url)
        if lfm('status') == 'ok':
            return lfm[0]   # first child element
        return lfm
    except Exception, e:
        return xmltramp.Element('error', value=str(e), attrs={'class': e.__class__.__name__})
Esempio n. 13
0
def parse_can(opensecretsid, year=2008):
    out = web.storage()
    out.opensecretsid = opensecretsid

    d = xmltramp.load(CANSUM % (year, opensecretsid))
    out.total = int(d.candidate.totals('total_receipts'))

    for source in d.candidate.totals.sources:
        if source('type') == 'PAC':
            out.business_pac = 0  # in case it doesn't appear
            for sd in source:
                if sd('type') == "Business":
                    out.business_pac = int(sd('total_receipts'))

    bad = 0
    for sector in d.candidate.totals.sectors:
        if sector('name') not in ['Labor']:
            bad += int(sector('pac'))
    out.badmoney = bad

    return out
Esempio n. 14
0
def parse_can(opensecretsid, year=2008):
    out = web.storage()
    out.opensecretsid = opensecretsid

    d = xmltramp.load(CANSUM % (year, opensecretsid))
    out.total = int(d.candidate.totals('total_receipts'))

    for source in d.candidate.totals.sources:
        if source('type') == 'PAC':
            out.business_pac = 0 # in case it doesn't appear
            for sd in source:
                if sd('type') == "Business":
                    out.business_pac = int(sd('total_receipts'))

    bad = 0
    for sector in d.candidate.totals.sectors:
        if sector('name') not in ['Labor']:
            bad += int(sector('pac'))
    out.badmoney = bad

    return out
Esempio n. 15
0
def outbound(url, format="xml", version=None):
	args = {'url':url, 'format':format, 'key':LICENSE_KEY }
	if version: args['version'] = version
	url = "http://api.technorati.com/outbound?" + urllib.urlencode(args)
	return xmltramp.load(url)
def get_senate_offices():
    out = {}
    d = xmltramp.load('senators_cfm.xml')
    for member in d: 
        out.setdefault(str(member.state), []).append(str(member.email))
    return out
Esempio n. 17
0
    def handle(self, per_page=20, force_update=False, **options):
        pagetype = PageType.objects.get(folder="photos")

        url = (
            "http://api.flickr.com/services/rest/?method=flickr.photos.search&api_key=%s&user_id=%s&extras=date_taken,tags,last_update,geo,original_format&per_page=%s"
            % (config.get("flickr", "key"), config.get("flickr", "me"), per_page)
        )
        xml = xmltramp.load(url)

        did_something = False

        for entry in xml["photos"]["photo":]:
            page = self.getPage("photos", "flickr:///%s" % entry("id"))

            updated = datetime.fromtimestamp(int(entry("lastupdate")))
            if page.updated and page.updated >= updated and not force_update:
                # page is unchanged
                continue

            did_something = True  # something has changed

            page.date = iso8601.parse_date(entry("datetaken"))
            page.date = page.date.replace(tzinfo=None)  # http://code.djangoproject.com/ticket/5304

            # TODO - this is the local time when the camera took the photo.
            # We need to remove UTCOFFSET seconds from this time to get the GMT photo time
            # so it lines up properly in the stream. We can do this by finding out
            # where we were through dopplr.
            utcoffset = 3600  # TODO - but will do till I get this done properly.
            page.date -= timedelta(seconds=utcoffset)

            page.slug = entry("id")
            page.guid = "tag:flickr.com,2004:/photo/%s" % (entry("id"))
            page.format = "markdown"

            page.title = entry("title")

            page.latitude = entry("latitude")
            page.longitude = entry("longitude")

            page.link = "http://flickr.com/photos/jerakeen/%s" % entry("id")

            # link files off flickr
            page.thumbnail = "http://farm%s.static.flickr.com/%s/%s_%s_s.jpg" % (
                entry("farm"),
                entry("server"),
                entry("id"),
                entry("secret"),
            )
            page.image = "http://farm%s.static.flickr.com/%s/%s_%s_m.jpg" % (
                entry("farm"),
                entry("server"),
                entry("id"),
                entry("secret"),
            )

            # local mirror
            # page.image = self.mirror("http://farm%s.static.flickr.com/%s/%s_%s_m.jpg"%( entry('farm'), entry('server'), entry('id'), entry('secret') ), "flickr/%s_240.jpg"%entry('id') )
            # page.thumbnail = self.mirror("http://farm%s.static.flickr.com/%s/%s_%s_s.jpg"%( entry('farm'), entry('server'), entry('id'), entry('secret') ), "flickr/%s_75.jpg"%entry('id') )

            # download the orivginal
            # self.mirror(
            #    "http://farm%s.static.flickr.com/%s/%s_%s_o.%s"%( entry('farm'), entry('server'), entry('id'), entry('originalsecret'), entry('originalformat') ),
            #    "flickr/%s_original.%s"%( entry('id'), entry('originalformat') )
            # )

            extra = xmltramp.load(
                "http://api.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=%s&photo_id=%s"
                % (config.get("flickr", "key"), entry("id"))
            )
            page.body = extra["photo"]["description"]
            page.excerpt = extra["photo"]["description"]

            # only mark page as updated if this actually worked
            page.updated = updated
            page.save()

            # must be done after page save
            page.set_tags(entry("tags").split(" ") + ["cougar:syndicate=flickr"])

        # avoid doing photoset updates if we didn't do anytihng
        if not did_something:
            return

        # list photosets
        url = "http://api.flickr.com/services/rest/?method=flickr.photosets.getList&api_key=%s&user_id=%s" % (
            config.get("flickr", "key"),
            config.get("flickr", "me"),
        )
        photosets = xmltramp.load(url)
        for photoset in photosets["photosets"]:
            photos = xmltramp.load(
                "http://api.flickr.com/services/rest/?method=flickr.photosets.getPhotos&api_key=%s&photoset_id=%s&per_page=%s"
                % (config.get("flickr", "key"), photoset("id"), 100)
            )

            setpage = self.getPage("sets", "flickr:///set/%s" % photoset("id"))
            setpage.title = unicode(photoset.title)
            setpage.excerpt = ""
            setpage.body = unicode(photoset.description)
            setpage.body += (
                "\n\nPhotos from <a href='http://flickr.com/photos/jerakeen/sets/%s/'>this flickr set</a>."
                % photoset("id")
            )
            if not setpage.slug:
                setpage.slug = slugify(setpage.title)

            # flickr let me set a photo as the photoset primary photo
            # setpage.thumbnail = self.mirror("http://farm%s.static.flickr.com/%s/%s_%s_s.jpg"%( photoset('farm'), photoset('server'), photoset('primary'), photoset('secret') ), "flickr/photoset-%s_75.jpg"%photoset('id') )
            setpage.thumbnail = "http://farm%s.static.flickr.com/%s/%s_%s_s.jpg" % (
                photoset("farm"),
                photoset("server"),
                photoset("primary"),
                photoset("secret"),
            )
            setpage.image = setpage.thumbnail

            # use machine tags to associate syndicated photos with this set.
            for photo in photos["photoset"]:
                try:
                    photopage = Page.objects.get(extref="flickr:///%s" % photo("id"))
                except ObjectDoesNotExist:
                    continue
                photopage.add_tag("flickr:set=%s" % photoset("id"))
                photopage.add_tag("cougar:set=%s" % setpage.slug)
                if photopage.status != "draft":
                    photopage.status = "draft"
                    photopage.save()

            # do this after we add the photos, so the set can get it's date, etc properly.
            setpage.save()

        if did_something:
            PageType.objects.get(folder="photos").ping()
Esempio n. 18
0
def bloginfo(url, format="xml", version=None):
	"""Get information about a blog."""
	args = {'url':url, 'format':format, 'key':LICENSE_KEY }
	if version: args['version'] = version
	url = "http://api.technorati.com/bloginfo?" + urllib.urlencode(args)
	return xmltramp.load(url)