Ejemplo n.º 1
0
 def render_POST(self, request, merge=False):
     obj = self.read_json(request)
     project_spec = self.spec_manager.project_spec(request.project,
                                                   request.auth_info)
     resource = None
     try:
         # validate the request path and data
         rpath = request.postpath
         resource = rpath[0]
         if resource == 'spiders':
             resource = 'spider'
             if len(rpath) == 1 or not rpath[1]:
                 return self.handle_spider_command(project_spec, obj)
             elif len(rpath) == 2:
                 clean_spider(obj)
             elif len(rpath) == 3:
                 resource = 'template'
                 if obj.get('original_body') is None:
                     templ = project_spec.template_json(rpath[1], rpath[2])
                     obj['original_body'] = templ.get('original_body', '')
                 obj = add_plugin_data(obj, project_spec.plugins)
         get_schema_validator(resource).validate(obj)
     except (KeyError, IndexError):
         self.not_found()
     except (AssertionError, ValidationError) as ex:
         self.bad_request(
             "The %s data was not valid. Validation failed with the error: %s."
             % (resource or 'input', ex.message))
     except BaseHTTPError as ex:
         self.error(ex.status, ex.title, ex.body)
     else:
         project_spec.savejson(obj, request.postpath)
         return ''
Ejemplo n.º 2
0
 def render_POST(self, request, merge=False):
     obj = self.read_json(request)
     project_spec = self.spec_manager.project_spec(
         request.project, request.auth_info)
     resource = None
     try:
         # validate the request path and data
         rpath = request.postpath
         resource = rpath[0]
         if resource == 'spiders':
             resource = 'spider'
             if len(rpath) == 1 or not rpath[1]:
                 return self.handle_spider_command(project_spec, obj)
             elif len(rpath) == 2:
                 clean_spider(obj)
             elif len(rpath) == 3:
                 resource = 'template'
                 if obj.get('original_body') is None:
                     templ = project_spec.template_json(rpath[1], rpath[2])
                     obj['original_body'] = templ.get('original_body', '')
                 obj = add_plugin_data(obj, project_spec.plugins)
         get_schema_validator(resource).validate(obj)
     except (KeyError, IndexError):
         self.not_found()
     except (AssertionError, ValidationError) as ex:
         self.bad_request(
             "The %s data was not valid. Validation failed with the error: %s."
             % (resource or 'input', ex.message))
     except BaseHTTPError as ex:
         self.error(ex.status, ex.title, ex.body)
     else:
         project_spec.savejson(obj, request.postpath)
         return ''
Ejemplo n.º 3
0
 def render_POST(self, request, merge=False):
     obj = self.read_json(request)
     project_spec = self.spec_manager.project_spec(request.project,
                                                   request.auth_info)
     try:
         # validate the request path and data
         rpath = request.postpath
         resource = rpath[0]
         if resource == 'spiders':
             resource = 'spider'
             if len(rpath) == 1 or not rpath[1]:
                 return self.handle_spider_command(project_spec, obj)
             elif len(rpath) == 2:
                 clean_spider(obj)
             elif len(rpath) == 3:
                 resource = 'template'
                 template = obj
                 if obj.get('original_body') is None:
                     template = project_spec.template_json(
                         rpath[1], rpath[2])
                 original_body = template.get('original_body', '')
                 obj['original_body'] = original_body
                 annotate_template(obj)
                 # Remove annotations field which is not used by slybot
                 obj.pop('annotations', None)
         get_schema_validator(resource).validate(obj)
     except (KeyError, IndexError):
         self.error(404, "Not Found", "No such resource")
     except ValidationError as ex:
         self.bad_request("Json failed validation: %s" % ex.message)
     except BaseHTTPError as ex:
         self.error(ex.status, ex.title, ex.body)
     else:
         project_spec.savejson(obj, request.postpath)
         return ''
Ejemplo n.º 4
0
    def test_valid_url(self):
        legacy_start_urls = [
            'http://www.example.com/',
            'http://www.example.com/經濟',
            'http://www.example.com/?q=經濟',
            'http://www.example.com/#經濟',
            'http://faß.de',
            'http://例.jp/',
            'http://[2001:0000:1234:0000:0000:C1C0:ABCD:0876]/foo/bar',
            'http://[2001::]/foo/bar',
            'http://8.8.8.8/foo/bar',
            'http://*****:*****@localhost:8080/foo/bar',
            'http://*****:*****@localhost:8080/foo/bar',
            'http://domain.com/path/file.html?param=FOO^111&param2=bar&param3=true&_param4=on', # Anonymized URL form sentry d46840d2457c4042b1b58f2fa40e984b
            'https://domain.com/path/file.htm?param=foo#hash/foo/bar/baz:foo|bar:baz',          # Anonymized URL from sentry 01dd2fa09d9540b69ebd33372b2b3a2d
            'https://domain.com/path/file.htm?param=foo#hash/foo/bar/baz:foo|bar%5B%5D:12345',  # Anonymized URL from sentry 87d49ee751494c90a8941dcbdacea634
            'http://domain.com/path?bar[foo]=baz&foo[bar]=12345',                               # Anonymized URL from sentry 9f6835f5decd4d57b9475f04f0a58bd4
        ]

        start_urls = map(start_url_schema, legacy_start_urls)
        validator = get_schema_validator("spider")

        self.assertEqual(validator.validate(spider_json(legacy_start_urls)), None)
        self.assertEqual(validator.validate(spider_json(start_urls)), None)
Ejemplo n.º 5
0
    def test_valid_fragments(self):
        fragments = ([{
            'type': 'fixed',
            'value': 'domain.com'
        }, {
            'type': 'range',
            'value': '0-10'
        }], [{
            'type': 'range',
            'value': '0-10'
        }], [{
            'type': 'list',
            'value': 'a b c'
        }], [{
            'type': 'list',
            'value': 'one_element'
        }])

        validator = get_schema_validator("spider")
        for fragment in fragments:
            start_url = [{
                'url': 'http://domain.com',
                'type': 'generated',
                'fragments': fragment
            }]
            self.assertEqual(validator.validate(spider_json(start_url)), None)
Ejemplo n.º 6
0
 def test_regex_formatting_ok(self):
     obj = {
         "0": {
             "regular_expression": "Item: (\d+)"
         }
     }
     validator = get_schema_validator("extractors")
     self.assertEqual(validator.validate(obj), None)
Ejemplo n.º 7
0
 def test_valid_url(self):
     obj = {
         "start_urls": ['http://www.example.com/'],
         "links_to_follow": "none",
         "respect_nofollow": True,
         "templates": [],
     }
     validator = get_schema_validator("spider")
     self.assertEqual(validator.validate(obj), None)
Ejemplo n.º 8
0
 def test_regex_formatting_wrong(self):
     obj = {
         "0": {
             "regular_expression": "Item: (\d+"
         }
     }
     validator = get_schema_validator("extractors")
     self.assertRaisesRegexp(ValidationError, "Invalid regular expression",
                 validator.validate, obj)
Ejemplo n.º 9
0
 def test_regex_formatting_wrong(self):
     obj = {
         "0": {
             "regular_expression": "Item: (\d+"
         }
     }
     validator = get_schema_validator("extractors")
     with self.assertRaises(ValidationError):
         validator.validate(obj)
Ejemplo n.º 10
0
 def render_POST(self, request):
     obj = self.read_json(request)
     project_spec = self.spec_manager.project_spec(request.project)
     try:
         # validate the request path and data
         resource = request.postpath[0]
         if resource == 'spiders':
             if len(request.postpath) == 1 or not request.postpath[1]:
                 return self.handle_spider_command(project_spec, obj)
             annotate_templates(obj)
             resource = 'spider'
         get_schema_validator(resource).validate(obj)
     except (KeyError, IndexError) as _ex:
         self.error(404, "Not Found", "No such resource")
     except ValidationError as ex:
         self.bad_request("Json failed validation: %s" % ex.message)
     project_spec.savejson(obj, request.postpath)
     return ''
Ejemplo n.º 11
0
 def render_POST(self, request):
     obj = self.read_json(request)
     project_spec = self.spec_manager.project_spec(request.project)
     try:
         # validate the request path and data
         resource = request.postpath[0]
         if resource == 'spiders':
             if len(request.postpath) == 1 or not request.postpath[1]:
                 return self.handle_spider_command(project_spec, obj)
             annotate_templates(obj)
             resource = 'spider'
         get_schema_validator(resource).validate(obj)
     except (KeyError, IndexError) as _ex:
         self.error(404, "Not Found", "No such resource")
     except ValidationError as ex:
         self.bad_request("Json failed validation: %s" % ex.message)
     project_spec.savejson(obj, request.postpath)
     return ''
Ejemplo n.º 12
0
 def verify_data(self, path=None, obj=None, project_spec=None):
     if not path or obj is None or project_spec is None:
         raise self.errors.BadRequest('No path received')
     resource = path[0]
     if path[0] == 'spiders':
         resource = 'spider'
         if len(path) == 1 or not path[1]:
             return self.handle_spider_command(project_spec, obj)
         elif len(path) == 2:
             clean_spider(obj)
         elif len(path) == 3:
             resource = 'template'
             if obj.get('original_body') is None:
                 templ = project_spec.template_json(path[1], path[2])
                 obj['original_body'] = templ.get('original_body', '')
             obj = add_plugin_data(obj, project_spec.plugins)
     get_schema_validator(resource).validate(obj)
     return obj
Ejemplo n.º 13
0
 def verify_data(self, path=None, obj=None, project_spec=None):
     if not path or obj is None or project_spec is None:
         raise self.errors.BadRequest('No path received')
     resource = path[0]
     if path[0] == 'spiders':
         resource = 'spider'
         if len(path) == 1 or not path[1]:
             return self.handle_spider_command(project_spec, obj)
         elif len(path) == 2:
             clean_spider(obj)
         elif len(path) == 3:
             resource = 'template'
             if obj.get('original_body') is None:
                 templ = project_spec.template_json(path[1], path[2])
                 obj['original_body'] = templ.get('original_body', '')
             obj = add_plugin_data(obj, project_spec.plugins)
     get_schema_validator(resource).validate(obj)
     return obj
Ejemplo n.º 14
0
 def test_invalid_url(self):
     obj = {
         "start_urls": ['www.example.com'],
         "links_to_follow": "none",
         "respect_nofollow": True,
         "templates": [],
     }
     validator = get_schema_validator("spider")
     self.assertRaisesRegexp(ValidationError, "Invalid url:", validator.validate, obj)
Ejemplo n.º 15
0
 def test_valid_url(self):
     obj = {
         "start_urls": ['http://www.example.com/'],
         "links_to_follow": "none",
         "respect_nofollow": True,
         "templates": [],
     }
     validator = get_schema_validator("spider")
     self.assertEqual(validator.validate(obj), None)
Ejemplo n.º 16
0
    def test_valid_mixed_fragments(self):
        start_urls = [
            {'type': 'url', 'url': 'http://www.example.com/'},
            {'type': 'generated', 'url': 'http://', 'fragments': [
                {'type': 'fixed', 'value': 'http://'}
            ]},
        ]
        validator = get_schema_validator("spider")

        self.assertEqual(validator.validate(spider_json(start_urls)), None)
Ejemplo n.º 17
0
 def test_invalid_url(self):
     obj = {
         "start_urls": ['www.example.com'],
         "links_to_follow": "none",
         "respect_nofollow": True,
         "templates": [],
     }
     validator = get_schema_validator("spider")
     self.assertRaisesRegexp(ValidationError, "Invalid url:",
                             validator.validate, obj)
Ejemplo n.º 18
0
 def test_schema_format(self):
     validator = get_schema_validator('spider')
     spider = {
         'start_urls_type': 'generated_urls',
         'start_urls': [],
         'links_to_follow': 'none',
         'respect_nofollow': True
     }
     for spec in self.specs.values():
         spider['generated_urls'] = spec
         validator.validate(spider)
Ejemplo n.º 19
0
 def test_schema_format(self):
     validator = get_schema_validator('spider')
     spider = {
         'start_urls_type': 'generated_urls',
         'start_urls': [],
         'links_to_follow': 'none',
         'respect_nofollow': True
     }
     for spec in self.specs.values():
         spider['generated_urls'] = spec
         validator.validate(spider)
Ejemplo n.º 20
0
    def test_invalid_url(self):
        legacy_start_urls = (
            12345, # Not a string
            'example.com', # Lacks protocol
            'http://[:::1]/foo/bar', # Bad IPv6 addr
            'http://http://foo.com/bar', # Double protocol
            'spotify:foobar', # Not http/s protocol
            '/foo', # relative
            '?foo', # relative
            '#foo', # relative
        )

        validator = get_schema_validator("spider")
        for invalid_url in legacy_start_urls:
            with self.assertRaises(ValidationError):
                validator.validate(spider_json([invalid_url]))
Ejemplo n.º 21
0
 def test_invalid_url(self):
     for invalid_url in (
             12345,  # Not a string
             'example.com',  # Lacks protocol
             'http://[:::1]/foo/bar',  # Bad IPv6 addr
             '/foo',  # relative
             '?foo',  # relative
             '#foo',  # relative
     ):
         obj = {
             "start_urls": [invalid_url],
             "links_to_follow": "none",
             "respect_nofollow": True,
             "templates": [],
         }
         validator = get_schema_validator("spider")
         with self.assertRaises(ValidationError):
             validator.validate(obj)
 def test_invalid_url(self):
     for invalid_url in (
             12345, # Not a string
             'example.com', # Lacks protocol
             'http://[:::1]/foo/bar', # Bad IPv6 addr
             '/foo', # relative
             '?foo', # relative
             '#foo', # relative
         ):
         obj = {
             "start_urls": [invalid_url],
             "links_to_follow": "none",
             "respect_nofollow": True,
             "templates": [],
         }
         validator = get_schema_validator("spider")
         with self.assertRaises(ValidationError):
             validator.validate(obj)
Ejemplo n.º 23
0
    def test_valid_fragments(self):
        fragments = (
            [
                {'type': 'fixed', 'value': 'domain.com'},
                {'type': 'range', 'value': '0-10'}
            ],
            [{'type': 'range', 'value': '0-10'}],
            [{'type': 'list', 'value': 'a b c'}],
            [{'type': 'list', 'value': 'one_element'}]
        )

        validator = get_schema_validator("spider")
        for fragment in fragments:
            start_url = [{
                'url': 'http://domain.com',
                'type': 'generated',
                'fragments': fragment
            }]
            self.assertEqual(validator.validate(spider_json(start_url)), None)
 def test_valid_url(self):
     obj = {
         "start_urls": [
             'http://www.example.com/',
             'http://www.example.com/經濟',
             'http://www.example.com/?q=經濟',
             'http://www.example.com/#經濟',
             'http://faß.de',
             'http://例.jp/',
             'http://[2001:0000:1234:0000:0000:C1C0:ABCD:0876]/foo/bar',
             'http://[2001::]/foo/bar',
             'http://8.8.8.8/foo/bar',
         ],
         "links_to_follow": "none",
         "respect_nofollow": True,
         "templates": [],
     }
     validator = get_schema_validator("spider")
     self.assertEqual(validator.validate(obj), None)
Ejemplo n.º 25
0
 def test_valid_url(self):
     obj = {
         "start_urls": [
             'http://www.example.com/',
             'http://www.example.com/經濟',
             'http://www.example.com/?q=經濟',
             'http://www.example.com/#經濟',
             'http://faß.de',
             'http://例.jp/',
             'http://[2001:0000:1234:0000:0000:C1C0:ABCD:0876]/foo/bar',
             'http://[2001::]/foo/bar',
             'http://8.8.8.8/foo/bar',
         ],
         "links_to_follow":
         "none",
         "respect_nofollow":
         True,
         "templates": [],
     }
     validator = get_schema_validator("spider")
     self.assertEqual(validator.validate(obj), None)
Ejemplo n.º 26
0
 def test_regex_formatting_wrong(self):
     obj = {"0": {"regular_expression": "Item: (\d+"}}
     validator = get_schema_validator("extractors")
     self.assertRaisesRegexp(ValidationError, "Invalid regular expression",
                             validator.validate, obj)
Ejemplo n.º 27
0
 def validump_resource(jsonres, restype):
     get_schema_validator(restype).validate(jsonres)
     return json.dumps(jsonres)
Ejemplo n.º 28
0
 def validump_resource(jsonres, restype):
     get_schema_validator(restype).validate(jsonres)
     return json.dumps(jsonres)