예제 #1
0
파일: utils_test.py 프로젝트: 4iji/crawley
	def test_url_matcher(self):

		self.assertTrue(url_matcher("http://www.google.com.ar", "%www.google.com%"))
		self.assertTrue(url_matcher("http://www.google.com.ar", "http://www.google.com%"))
		self.assertTrue(url_matcher("http://www.google.com.ar", "%www.google.com.ar"))
		self.assertTrue(url_matcher("http://www.google.com.ar", "http://www.google.com.ar"))

		self.assertFalse(url_matcher("http://www.google.com.ar", "%www.google.com"))
		self.assertFalse(url_matcher("http://www.google.com.ar", "www.google.com%"))
		self.assertFalse(url_matcher("http://www.google.com.ar", "%www.goo.com%"))
		self.assertFalse(url_matcher("http://www.google.com.ar", "http://www.goo.com.ar"))
예제 #2
0
파일: base.py 프로젝트: cosmospham/crawley
    def _search_in_urls_list(self, urls_list, url, default=True):
        """
            Searches an url in a list of urls
        """

        if not urls_list:
            return default

        for pattern in urls_list:
            if url_matcher(url, pattern):
                return True

        return False
예제 #3
0
파일: base.py 프로젝트: cosmospham/crawley
    def _get_response(self, url, data=None):
        """
            Returns the response data from a request

            params:
                data: if this param is present it makes a POST.
        """

        for pattern, post_data in self.post_urls:
            if url_matcher(url, pattern):
                data = post_data

        return self._make_request(url, data)
예제 #4
0
파일: base.py 프로젝트: wgfi110/crawley
    def _search_in_urls_list(self, urls_list, url, default=True):
        """
            Searches an url in a list of urls
        """

        if not urls_list:
            return default

        for pattern in urls_list:
            if url_matcher(url, pattern):
                return True

        return False
예제 #5
0
파일: base.py 프로젝트: wgfi110/crawley
    def _get_response(self, url, data=None):
        """
            Returns the response data from a request

            params:
                data: if this param is present it makes a POST.
        """

        for pattern, post_data in self.post_urls:
            if url_matcher(url, pattern):
                data = post_data

        return self._make_request(url, data)
예제 #6
0
    def _validate(self, response):
        """
            Override this method in order to provide more validations before the data extraction with the given scraper class
        """

        for pattern in self.matching_urls:

            if url_matcher(response.url, pattern):

                if self.debug:
                    print "%s matches the url %s" % (self.__class__.__name__, response.url)
                return

        self.on_cannot_scrape(response)
예제 #7
0
파일: utils_test.py 프로젝트: 4iji/crawley
	def _test_url_matcher_with_regex(self):

		self.assertTrue(url_matcher("http://www.google.com.ar", "http://([a-z.]+)"))
		self.assertTrue(url_matcher("http://www.google.com.ar", "http://(([a-z]+.){4})"))
		self.assertTrue(url_matcher("http://www.google.com.ar", "[a-z/:.]+"))

		self.assertFalse(url_matcher("http://www.google.com.ar", "http://([a-z]+)"))
		self.assertFalse(url_matcher("http://www.google.com.ar", "http://(([a-z]+.){1})"))
		self.assertFalse(url_matcher("http://www.google.com.ar", "[a-z:.]+"))
예제 #8
0
파일: base.py 프로젝트: hammadk373/crawley
 def _validate(self, response):
     """
         Override this method in order to provide more validations before the data extraction with the given scraper class
     """
                     
     for pattern in self.matching_urls:
         
         if url_matcher(response.url, pattern):
             
             if self.debug:
                 print "%s matches the url %s" % (self.__class__.__name__, response.url)
             
             return
     
     raise ScraperCantParseError("The Scraper %s can't parse the html from %s" % (self.__class__.__name__, response.url))        
예제 #9
0
    def test_url_matcher(self):

        self.assertTrue(
            url_matcher("http://www.google.com.ar", "%www.google.com%"))
        self.assertTrue(
            url_matcher("http://www.google.com.ar", "http://www.google.com%"))
        self.assertTrue(
            url_matcher("http://www.google.com.ar", "%www.google.com.ar"))
        self.assertTrue(
            url_matcher("http://www.google.com.ar",
                        "http://www.google.com.ar"))

        self.assertFalse(
            url_matcher("http://www.google.com.ar", "%www.google.com"))
        self.assertFalse(
            url_matcher("http://www.google.com.ar", "www.google.com%"))
        self.assertFalse(
            url_matcher("http://www.google.com.ar", "%www.goo.com%"))
        self.assertFalse(
            url_matcher("http://www.google.com.ar", "http://www.goo.com.ar"))
예제 #10
0
    def _test_url_matcher_with_regex(self):

        self.assertTrue(
            url_matcher("http://www.google.com.ar", "http://([a-z.]+)"))
        self.assertTrue(
            url_matcher("http://www.google.com.ar", "http://(([a-z]+.){4})"))
        self.assertTrue(url_matcher("http://www.google.com.ar", "[a-z/:.]+"))

        self.assertFalse(
            url_matcher("http://www.google.com.ar", "http://([a-z]+)"))
        self.assertFalse(
            url_matcher("http://www.google.com.ar", "http://(([a-z]+.){1})"))
        self.assertFalse(url_matcher("http://www.google.com.ar", "[a-z:.]+"))