Esempio n. 1
0
 def __init__(self, *args, **kwargs):
     HTMLPage.__init__(self, *args, **kwargs)
     json_content = Regexp(
         CleanText('//script'),
         r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]"
     )(self.doc)
     json_content = codecs.unicode_escape_decode(json_content)[0]
     json_content = json_content.encode('utf-8',
                                        'surrogatepass').decode('utf-8')
     self.doc = json.loads(json_content)
Esempio n. 2
0
 def __init__(self, *args, **kwargs):
     HTMLPage.__init__(self, *args, **kwargs)
     json_content = Regexp(
         CleanText('//script'),
         r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);")(self.doc)
     json_content = codecs.unicode_escape_decode(json_content)[0]
     json_content = json_content.encode('utf-8',
                                        'surrogatepass').decode('utf-8')
     self.doc = {
         "advert":
         json.loads(json_content).get('advert', {}).get('mainAdvert', {}),
         "agency":
         json.loads(json_content).get('agency', {})
     }