Example #1
0
 def obj_id(self):
     id = Regexp(CleanText('./a/@href'), '//www.france.tv/(.*)/', default=None)(self)
     if not id:
         id = CleanText('.')(self)
         id = id.encode('ascii', 'ignore')
         id = hashlib.md5(id).hexdigest()
         id = u'vid_%s' % id
     return id
Example #2
0
 def __init__(self, *args, **kwargs):
     HTMLPage.__init__(self, *args, **kwargs)
     json_content = Regexp(
         CleanText('//script'),
         r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]"
     )(self.doc)
     json_content = codecs.unicode_escape_decode(json_content)[0]
     json_content = json_content.encode('utf-8',
                                        'surrogatepass').decode('utf-8')
     self.doc = json.loads(json_content)