コード例 #1
0
ファイル: web.py プロジェクト: matcher/glacier
class Web(object):
	"""
	HTTP operasyonlari icin kullanilir 
        """

	def __init__(self):
		#get logger
		self.logger=Logger(self.__class__.__name__)
		
		#get configuration
		self.config=Config.getconfig("WEB")

		#log url errors by default
		self._log_url_faults=False


	def ping(self,url):
		"""
		Bir URL'in erisilir olup olmadigini kontrol eder

		@type url:	str
		@param url:	test edilmesi istenen URL

		@rtype:	L{cimri.system.web.WebReport}
		@return: ping sonuclarini iceren WebReport objecti
		"""

		return self.get(url,ping=True)
		


	def get(self,url,unicode=True,download=False,file=None,ping=False,cache=None,timeout=None):
		"""
		Bir URL'deki contenti yuklemek ya da dosya olarak indirmek icin kullanilir

		@type url:	str
		@param url:	acilmasi istenen URL

		@type unicode:	bool
		@param unicode: URLdeki contentin unicode olarak varsayilip sayilmamasi gerektigini kontrol eder

		@type download:	bool
		@param download:True ise URLdeki content bir dosya olarak indirilir, aksi takdirde content string olarak doner

		@type file:	str
		@param file:	URLdeki content dosya olarak indirildiyse dosyanin path ve ismi

		@type ping:	bool
		@param ping:	sadece URLin erisilir olup olmadigini kontrol eder. herhangi bir content yuklenmez ya da indirilmez.

		@type cache:	dict
		@param cache:	URL islemleri ile ilgili cache operasyonlarini kontrol eder. eger None ise herhangi bir cache operasyonu
				yapilmaz.

				eger "read" keyi varsa cache dictionarysinde, istenen content URL yerine bulunursda belirtilen cache
				bolumunden okunur. eger "write" keyi varsa, URLden alinan content belirtilen cache bolumune yazilir.

		@rtype:	L{cimri.system.web.WebReport}
		@return: sonuclari iceren WebReport objecti
		"""

		#initialize report
		report=WebReport(url)
	
		#download file
		f=None

		try:
			#get timeout
			timeout=int(self.config.get("url_open_timeout")) if timeout is None else timeout

			#create url resoure
			res=URL(url)

			#ping only?
			if ping:
				#open and check if url is accessible
	                	res.open(timeout=timeout)
	
			#download?
			elif download:
				#record file name
				report.file=file				

				#open file to save to
				f=open(file,'w')

				#download and write
				f.write(res.download(timeout=timeout, cached=False))

			elif unicode:
				#use cached version?
				if cache is not None and "read" in cache:
					report.content=Cache(cache["read"]).get("web.url."+hash_url(url))

				#download url (if not looking for a cached version or if the cached version not found)
				if report.content is None:
					report.content=res.download(timeout=timeout, cached=False)
				
					#write to cache?
					if cache is not None and "write" in cache:
						Cache(cache["write"]).set("web.url."+hash_url(url),report.content)

			else:
				#use cached version?
				if cache is not None and "read" in cache:
					report.content=Cache(cache["read"]).get("web.url."+hash_url(url))

				#download url (if not looking for a cached version or if the cached version not found)
				if report.content is None:
		                	res.open(timeout=timeout)
					report.content=res.download(cached=False,timeout=timeout)
					#report.content=res.read()
				
					#write to cache?
					if cache is not None and "write" in cache:
						Cache(cache["write"]).set("web.url."+hash_url(url),report.content)
				
                except HTTP400BadRequest as e:
			report.error=WebError("exception ocurred opening url. bad request",400,url)
                        self.log(str(report.error))
                        self.log(str(e))

                except HTTP401Authentication as e:
			report.error=WebError("exception ocurred opening url. url requires authentication",401,url)
                        self.log(str(report.error))
                        self.log(str(e))

                except HTTP403Forbidden as e:
			report.error=WebError("exception ocurred opening url. url not accessible",403,url)
                        self.log(str(report.error))
                        self.log(str(e))

                except HTTP404NotFound as e:
			report.error=WebError("exception ocurred opening url. not found",404,url)
                        self.log(str(report.error))
                        self.log(str(e))

                except HTTPError as e:
			report.error=WebError("exception ocurred opening url",None,url)
                        self.log(str(report.error))
                        self.log(str(e))

                except URLError as e:
			report.error=WebError("exception ocurred opening url. url contains errors",None,url)
                        self.log(str(report.error))
                        self.log(str(e))

                except URLTimeout as e:
			report.error=WebError("exception ocurred opening url. url load timed out",None,url)
                        self.log(str(report.error))
                        self.log(str(e))

		except IOError as e:
			report.error=FileError("exception ocurred writing to file",file)
                        self.log(str(report.error))
                        self.log(str(e))

		except Exception as e:
			report.error=WebError("exception ocurred",None,url)
                        self.log(str(report.error))
                        self.log(str(e))
		
		finally:
			if f!=None:
				f.close()

		#add information
		report.headers=res.headers
		report.query=res.query if (res!=None and res.query!=None) else None
		report.content_url=res.redirect if (res!=None and res.redirect!=None) else report.content_url

		return report


	def log(self,msg):
		if self._log_url_faults is True:
			self.logger.error(msg)