Python url_matcher 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: crawley.utils

메소드/함수: url_matcher

hotexamples.com에서의 예제들: 10

Python url_matcher - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 crawley.utils.url_matcher에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: utils_test.py 프로젝트: 4iji/crawley

	def test_url_matcher(self):

		self.assertTrue(url_matcher("http://www.google.com.ar", "%www.google.com%"))
		self.assertTrue(url_matcher("http://www.google.com.ar", "http://www.google.com%"))
		self.assertTrue(url_matcher("http://www.google.com.ar", "%www.google.com.ar"))
		self.assertTrue(url_matcher("http://www.google.com.ar", "http://www.google.com.ar"))

		self.assertFalse(url_matcher("http://www.google.com.ar", "%www.google.com"))
		self.assertFalse(url_matcher("http://www.google.com.ar", "www.google.com%"))
		self.assertFalse(url_matcher("http://www.google.com.ar", "%www.goo.com%"))
		self.assertFalse(url_matcher("http://www.google.com.ar", "http://www.goo.com.ar"))

예제 #2

파일 보기

파일: base.py 프로젝트: cosmospham/crawley

    def _search_in_urls_list(self, urls_list, url, default=True):
        """
            Searches an url in a list of urls
        """

        if not urls_list:
            return default

        for pattern in urls_list:
            if url_matcher(url, pattern):
                return True

        return False

예제 #3

파일 보기

파일: base.py 프로젝트: cosmospham/crawley

    def _get_response(self, url, data=None):
        """
            Returns the response data from a request

            params:
                data: if this param is present it makes a POST.
        """

        for pattern, post_data in self.post_urls:
            if url_matcher(url, pattern):
                data = post_data

        return self._make_request(url, data)

예제 #4

파일 보기

파일: base.py 프로젝트: wgfi110/crawley

    def _search_in_urls_list(self, urls_list, url, default=True):
        """
            Searches an url in a list of urls
        """

        if not urls_list:
            return default

        for pattern in urls_list:
            if url_matcher(url, pattern):
                return True

        return False

예제 #5

파일 보기

파일: base.py 프로젝트: wgfi110/crawley

    def _get_response(self, url, data=None):
        """
            Returns the response data from a request

            params:
                data: if this param is present it makes a POST.
        """

        for pattern, post_data in self.post_urls:
            if url_matcher(url, pattern):
                data = post_data

        return self._make_request(url, data)

예제 #6

파일 보기

    def _validate(self, response):
        """
            Override this method in order to provide more validations before the data extraction with the given scraper class
        """

        for pattern in self.matching_urls:

            if url_matcher(response.url, pattern):

                if self.debug:
                    print "%s matches the url %s" % (self.__class__.__name__, response.url)
                return

        self.on_cannot_scrape(response)

예제 #7

파일 보기

파일: utils_test.py 프로젝트: 4iji/crawley

	def _test_url_matcher_with_regex(self):

		self.assertTrue(url_matcher("http://www.google.com.ar", "http://([a-z.]+)"))
		self.assertTrue(url_matcher("http://www.google.com.ar", "http://(([a-z]+.){4})"))
		self.assertTrue(url_matcher("http://www.google.com.ar", "[a-z/:.]+"))

		self.assertFalse(url_matcher("http://www.google.com.ar", "http://([a-z]+)"))
		self.assertFalse(url_matcher("http://www.google.com.ar", "http://(([a-z]+.){1})"))
		self.assertFalse(url_matcher("http://www.google.com.ar", "[a-z:.]+"))

예제 #8

파일 보기

파일: base.py 프로젝트: hammadk373/crawley

 def _validate(self, response):
     """
         Override this method in order to provide more validations before the data extraction with the given scraper class
     """
                     
     for pattern in self.matching_urls:
         
         if url_matcher(response.url, pattern):
             
             if self.debug:
                 print "%s matches the url %s" % (self.__class__.__name__, response.url)
             
             return
     
     raise ScraperCantParseError("The Scraper %s can't parse the html from %s" % (self.__class__.__name__, response.url))

예제 #9

파일 보기

    def test_url_matcher(self):

        self.assertTrue(
            url_matcher("http://www.google.com.ar", "%www.google.com%"))
        self.assertTrue(
            url_matcher("http://www.google.com.ar", "http://www.google.com%"))
        self.assertTrue(
            url_matcher("http://www.google.com.ar", "%www.google.com.ar"))
        self.assertTrue(
            url_matcher("http://www.google.com.ar",
                        "http://www.google.com.ar"))

        self.assertFalse(
            url_matcher("http://www.google.com.ar", "%www.google.com"))
        self.assertFalse(
            url_matcher("http://www.google.com.ar", "www.google.com%"))
        self.assertFalse(
            url_matcher("http://www.google.com.ar", "%www.goo.com%"))
        self.assertFalse(
            url_matcher("http://www.google.com.ar", "http://www.goo.com.ar"))

예제 #10

파일 보기

    def _test_url_matcher_with_regex(self):

        self.assertTrue(
            url_matcher("http://www.google.com.ar", "http://([a-z.]+)"))
        self.assertTrue(
            url_matcher("http://www.google.com.ar", "http://(([a-z]+.){4})"))
        self.assertTrue(url_matcher("http://www.google.com.ar", "[a-z/:.]+"))

        self.assertFalse(
            url_matcher("http://www.google.com.ar", "http://([a-z]+)"))
        self.assertFalse(
            url_matcher("http://www.google.com.ar", "http://(([a-z]+.){1})"))
        self.assertFalse(url_matcher("http://www.google.com.ar", "[a-z:.]+"))