Esempio n. 1
0
	def get_target(self,url,target_prefix=""):
		content, ts = self.get_html(url)
		if content == -1:
			return -1
		data = json.loads(content)['data']
		if data:
			try:
				ps = parser(data)
				return ps
			except Exception, e:
				print e
				return -1			
Esempio n. 2
0
	def get_target(self,url,target_prefix=""):
		content, ts = self.get_html(url)
		if content == -1:
			return -1
		data = json.loads(content)['data']
		if data:
			try:
				ps = parser(data)
				return ps
			except Exception, e:
				print e
				return -1			
	def get_target(self,url,target_prefix=""):
		content, ts = self.get_html(url)
		if content == -1:
			return -1
		lines = content.splitlines()
		for line in lines:
			if line.startswith(target_prefix):
				n = line.find('html":"')
				target =  line[n+7:-12]
				target = target.replace("\\t","")
				target = target.replace("\\n","")
				target = target.replace("\\r",'')
				target = target.replace("\\",'')
				ps = parser(target)
				if ps == None:
					#logger.logger_error("bad content")
					print "why here..."
					return -1
				return ps
		#logger.logger_error("bad content, please check the resource")
		print "bad content, please check the resource"
		save_to_disk("tmp.html",content)
		return -1
Esempio n. 4
0
 def get_target(self, url, target_prefix=""):
     content, ts = self.get_html(url)
     if content == -1:
         return -1
     lines = content.splitlines()
     for line in lines:
         if line.startswith(target_prefix):
             n = line.find('html":"')
             target = line[n + 7:-12]
             target = target.replace("\\t", "")
             target = target.replace("\\n", "")
             target = target.replace("\\r", '')
             target = target.replace("\\", '')
             ps = parser(target)
             if ps == None:
                 #logger.logger_error("bad content")
                 print "why here..."
                 return -1
             return ps
     #logger.logger_error("bad content, please check the resource")
     print "bad content, please check the resource"
     save_to_disk("tmp.html", content)
     return -1