Example #1
0
def parse_rules():
    rules = glob.glob('../src/chrome/content/rules/*.xml')
    out = []
    for rule in rules:
        ruleset = xmltramp.seed(file(rule))
        #print ruleset('name')
        hosts = []
        for k in ruleset['target':]:
            hosts.append(k('host'))

        for k in ruleset['rule':]:
            for host in hosts:
                escaped_host = host.replace('.', '\\.')
                if (k('from') == "^http://(www\.)?%s/" %
                        escaped_host) and (k('to') == "https://www.%s/" % host
                                           or k('to') == "https://%s/" % host):
                    out.extend([(host, False), ('www.' + host, False)])
                    break
                elif k('from') == "^http://([^/:@]*)\.%s/" % escaped_host and \
                  k('to') == "https://$1.%s/" % host:
                    out.extend([(host, True)])
                elif k('from') == '^http://%s/' % escaped_host and \
                  k('to') == "https://%s/" % host:
                    out.append((host, False))
            else:
                pass  #print '  ', host, k('from').encode('utf8'), k('to').encode('utf8')
        #print '  ', out
        return out
Example #2
0
def parse_rules():
    rules = glob.glob('../src/chrome/content/rules/*.xml')
    out = []
    for rule in rules:
        ruleset = xmltramp.seed(file(rule))
        #print ruleset('name')
        hosts = []
        for k in ruleset['target':]:
            hosts.append(k('host'))

        for k in ruleset['rule':]:
            for host in hosts:
                escaped_host = host.replace('.', '\\.')
                if (
                  k('from') == "^http://(www\.)?%s/" % escaped_host
                ) and (
                  k('to') == "https://www.%s/" % host or 
                  k('to') == "https://%s/" % host
                ):
                    out.extend([(host, False), ('www.' + host, False)])
                    break
                elif k('from') == "^http://([^/:@]*)\.%s/" % escaped_host and \
                  k('to') == "https://$1.%s/" % host:
                    out.extend([(host, True)])
                elif k('from') == '^http://%s/' % escaped_host and \
                  k('to') == "https://%s/" % host:
                    out.append((host, False))
            else:
                pass #print '  ', host, k('from').encode('utf8'), k('to').encode('utf8')
        #print '  ', out
        return out
Example #3
0
    def handle(self, **options):
        url = "http://upcoming.yahooapis.com/services/rest/?api_key=%s&method=user.getWatchlist&user_id=%s&show=upcoming"%(UPCOMING_KEY, UPCOMING_ID)
        request = urllib2.urlopen( url )
        xml = xmltramp.seed( request )
        
        for event in xml["event":]:
            page = self.getPage( "stream", u"upcoming:///%s"%event('id') )

            page.slug = str(event('id'))
            page.format = "markdown"
            page.excerpt = ""
            
            # the date the event happens, not the date it was added. Use implicit 'now' add date
            date = iso8601.parse_date( event('start_date') + "T00:00:00" ).date() # stupid

            page.link = "http://upcoming.yahoo.com/event/%s/"%( event('id') )
            page.guid = page.link

            icon = """<a href="http://upcoming.yahoo.com/user/123407" title="upcoming"><img src="%sweb/icons/upcoming.png" title="upcoming"></a> """%MEDIA_URL
            if event('status') == 'attend':
                page.body = icon + """I will be attending <a href="%s">%s</a> on %s"""%( page.link, event('name'), date.strftime("%A %B %d") )
            else:
                page.body = icon + """I'm considering <a href="%s">%s</a> on %s"""%( page.link, event('name'), date.strftime("%A %B %d") )
            
            page.title = "Upcoming" # re.sub(r'<.*?>','', page.body ).strip()
            
            if not page.id: # only the first time
                # http://code.google.com/apis/maps/documentation/services.html#Geocoding_Direct
                if event('venue_zip'):
                    address = ", ".join([
                        event('venue_zip'),
                        event("venue_country_name"),
                    ])
                else:
                    address = ", ".join( [
                        event('venue_address'),
                        re.sub(r',.*','', event('venue_city')),
                        event("venue_country_name")
                    ] )
                geocode = "http://maps.google.com/maps/geo?key=%s&q=%s&output=json"%( self.getConfig("google", "maps_key"), urllib.quote( address.encode('utf-8'), "" ) )
                request = urllib2.urlopen( geocode )
                data = json.load( request )
                if "Placemark" in data:
                    print "Got location for %s"%address
                    page.longitude, page.latitude, precision = data["Placemark"][0]["Point"]['coordinates']
                else:
                    print "Can't find location for %s"%address

            page.save()
            
            page.set_tags( ["cougar:syndicate=upcoming"] )
            
            if not page.human_tags():
                text = event('description') or event('name')
                if text:
                    url = "http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction?appid=%s&output=json&context=%s"%( config.get('yahoo','appid'), urllib.quote(text.encode('utf8'), "") )
                    keywords = json.load( urllib2.urlopen( url ) )
                    tags = keywords['ResultSet']['Result']
                    page.set_human_tags( tags )
Example #4
0
    def handle(self, **other):
        pagetype = PageType.objects.get( folder = "stream" )

        realm = 'Nordrassil'
        character = 'Granark'

        # TODO - US armoury address
        #url = "http://armory.wow-europe.com/character-sheet.xml?r=%s&n=%s"%( urllib.quote(realm,''), urllib.quote(character,'') )
        url = "http://eu.wowarmory.com/character-achievements.xml?r=%s&n=%s"%( urllib.quote(realm,''), urllib.quote(character,'') )

        # Need to specify firefox as user agent as this makes the server return an XML file.
        opener = urllib2.build_opener()
        opener.addheaders = [ ('user-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-GB; rv:1.8.1.4) Gecko/20070515 Firefox/2.0.0.4') ]


        # timeout in seconds - the armoury falls over a lot
        socket.setdefaulttimeout(10)
        req = urllib2.Request( url)
        try:
            data = opener.open(req)
        except urllib2.HTTPError:
            return
        except urllib2.URLError:
            return

        xml = xmltramp.seed( data )

        for ach in xml['achievements']['summary']['achievement':]:
            page = self.getPage( "stream", "armory:///%s"%ach('id') )
            if not page.id:
                # new page
                page.date = iso8601.parse_date( ach('dateCompleted')[0:10] + "T21:00:00+00:00" ) # default to evening sometime, that's when I'm usually playing
                if page.date.day == datetime.now().day and page.date.month == datetime.now().month:
                    page.date = datetime.now()
                page.date = page.date.replace(tzinfo=None) # http://code.djangoproject.com/ticket/5304
                    
            desc = unicode(ach('desc'))
            page.format = "markdown"

            page.slug = slugify( ach('title') )
            page.link = url # TODO - can's link directly to armory, apparently.
            icon = """<a href="%s"><img src="%sweb/icons/wow.png" title="flickr favourites"></a> """%( url, MEDIA_URL )
            page.body = icon + """Gained the achievement "<a href="%s" title="%s">%s</a>" """%( "http://www.wowhead.com/?achievement=%s"%ach('id'), ach("desc"), ach('title') )
            page.title = re.sub(r'<.*?>','', page.body ).strip()
            page.excerpt = ""
            
            page.save()

            page.set_tags( ["cougar:syndicate=wow-armory", "warcraft"] )
Example #5
0
    def handle(self, **options):
        pagetype = PageType.objects.get(folder="stream")

        url = "http://ws.audioscrobbler.com/2.0/?method=user.getlovedtracks&user=jerakeen&api_key=%s" % LASTFM_KEY
        request = urllib2.urlopen(url)
        xml = xmltramp.seed(request)

        for track in xml.lovedtracks["track":]:
            link = "http://%s" % str(track.url)
            extref = u"lastfm:///%s" % hashlib.md5(link).hexdigest()

            try:
                page = Page.objects.get(extref=extref)
            except ObjectDoesNotExist:
                print("Creating page %s" % extref)
                page = Page(pagetype=pagetype, extref=extref, status="published")
                page.save()  # gets round some initial date setting bugs

            page.date = datetime.fromtimestamp(int(track.date("uts")))
            page.slug = hashlib.md5(link).hexdigest()[0:8]
            page.format = "markdown"
            page.excerpt = ""
            icon = (
                """<a href="http://www.last.fm/user/jerakeen" title="last.fm"><img src="%sweb/icons/lastfm.png" title="last.fm"></a> """
                % MEDIA_URL
            )

            page.body = icon + """Loved the track <a href="%s">%s</a> by <a href="%s">%s</a>""" % (
                link,
                track.name,
                track.artist.url,
                track.artist.name,
            )

            page.title = re.sub(r"<.*?>", "", page.body)
            page.link = link
            try:
                page.image = filter(lambda i: i("size") == "large", track["image":])[0]
            except IndexError:
                page.image = (
                    "http://cdn.last.fm/depth/catalogue/noimage/cover_large.gif"
                )  # "http://cdn.last.fm/flatness/catalogue/noimage/cover_large.gif"

            page.save()
            page.set_tags(["music", "cougar:syndicate=lastfm"])
Example #6
0
    def handle(self, url = "http://jerakeen.tumblr.com/api/read", **options):

        opener = urllib2.build_opener()
        opener.addheaders = [ ('user-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-GB; rv:1.8.1.4) Gecko/20070515 Firefox/2.0.0.4') ]
        req = urllib2.Request( url)
        data = opener.open(req)
        xml = xmltramp.seed( data )

        for entry in xml.posts:
            #print entry.__repr__(1)
            extref = "tumblr:///%s"%entry('id')
            
            page = self.getPage( 'notes', extref )

            page.date = datetime.fromtimestamp( int(entry("unix-timestamp") ) )
            page.slug = entry("id")
            page.format = "raw"
            page.excerpt = ""
            
            tags = [ "cougar:syndicate=tumblr" ]
            
            for tag in entry['tag':]:
                tags += [ unicode(tag) ]
            
            if entry("type") == 'photo':
                page.title = re.sub(r'[\.\|].*', '', re.sub(r'<.*?>','', unicode(entry['photo-caption']) ) )
                if not page.title:
                    page.title = "Photo"
                    
                page.body = entry['photo-caption']
                
                photos = {}
                for p in entry['photo-url':]:
                    photos[ int(p('max-width')) ] = unicode(p)
                
                def mirror( size, photos = photos ):
                    url = photos[size]
                    filename = os.path.join( MEDIA_ROOT, "thumbnail", "tumblr", re.sub(r'^.*/','',url) )
                    if not os.path.exists( filename ):
                        print "Fetching %s to %s"%( url, filename )
                        urllib.urlretrieve( url, filename )
                    return re.sub(r'^.*/',"thumbnail/tumblr/",url)
                    
                page.image = mirror( 250 )
                page.thumbnail = mirror( 75 )
                tags += ["tumblr:type=photo"]

            elif entry("type") == 'link':
                try:
                    page.title = self.text_of( entry['link-text'] )
                except KeyError:
                    page.title = "Link"
                page.link = self.text_of( entry['link-url'] )
                page.body = self.text_of( entry['link-description'] )
                tags += ["tumblr:type=link"]
                #page.pagetype = PageType.objects.get( folder = "links" )

            elif entry("type") == 'quote':
                page.title = "Quote" # TODO
                source = re.sub( r'^\&mdash;\s+', '', self.text_of( entry['quote-source'] ) ) # strip leading mdash
                page.body = "<blockquote>%s</blockquote>\n\n<cite>%s</cite>\n"%( self.text_of(entry['quote-text']), source )
                tags += ["tumblr:type=quote"]
                
            elif entry("type") == 'regular':
                try:
                    page.title = self.text_of(entry['regular-title'])
                except KeyError:
                    page.title = "Page"
                try:
                    page.body = self.text_of(entry['regular-body'])
                except KeyError:
                    page.body = ""
                tags += ["tumblr:type=regular"]

            elif entry("type") == 'conversation':
                try:
                    page.title = self.text_of(entry['conversation-title'])
                except Exception:
                    page.title = "Conversation"
                body = self.text_of(entry['conversation-text'])
                page.body = "<pre>" + body + "</pre>"

                lines = entry['conversation'][:]
                def line_to_tr(line):
                    return """<tr><td valign="top" align="right" class="name">%s</td><td valign="top" class="words">%s</td></tr>"""%( line('name'), self.text_of(line) )
                page.body = """<table border="0" margin="0" padding="0" class="conversation">%s</table>"""%( "\n".join(map(line_to_tr, lines)) )

                tags += ["tumblr:type=regular"]

            elif entry("type") == 'video':
                page.title = unicode(entry['video-caption'])
                page.body = unicode(entry['video-player'])
                tags += ["tumblr:type=video"]

            else:
                print "Can't handle page type '%s'"%entry('type')
                continue
                
            if len(page.title) > 50:
                page.title = page.title[0:45] + "..."

            page.save()
            page.set_tags( tags )
# Need to specify firefox as user agent as this makes the server return an XML file.
opener = urllib2.build_opener()
opener.addheaders = [ ('user-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-GB; rv:1.8.1.4) Gecko/20070515 Firefox/2.0.0.4') ]

# timeout in seconds - the armoury falls over a lot
socket.setdefaulttimeout(10)
req = urllib2.Request( url)
try:
    data = opener.open(req)
except urllib2.HTTPError:
    sys.exit(1)
except urllib2.URLError:
    sys.exit(1)
    
xml = xmltramp.seed( data )

achievements = []

for character in xml['guildInfo']['guild']['members']['character':]:
    char_url = "http://%s/character-achievements.xml?r=%s&n=%s"%( wowserver, urllib.quote(realm,''), urllib.quote(character('name').encode('utf-8'),'') )
    

    char_req = urllib2.Request(char_url)
    try:
        char_data = opener.open(char_req)
    except urllib2.HTTPError:
        sys.exit(1)
    except urllib2.URLError:
        sys.exit(1)
    char_xml = xmltramp.seed( char_data )