def from_url(self, url, discard=None, data_found_at=None, next_url_generators=[]): self._url_extractor = UrlExtractor(url, next_url_generators) self._data_sanitizer = DataSanitizer(discard, data_found_at) self._informer = Informer(self._username, self._password) return self
def test_should_return_None_if_cleaned_dta_is_blank_list(self): sanitizer = DataSanitizer(discard_value=['val1', 'val2'], data_key='key') data = sanitizer.clean(' val1 val2 {"key" : []}') self.assertEquals(None, data)
def test_should_remove_all_specified_in_discard_value(self): sanitizer = DataSanitizer(discard_value=['val1', 'val2'], data_key='key') data = sanitizer.clean(' val1 val2 {"key" : ["hello", "test"]}') self.assertEquals(["hello", "test"], data)
def test_should_return_None_if_exception_occurs(self): with patch.object(simplejson, 'loads') as mock_loads: mock_loads.side_effect = Exception('problem') sanitizer = DataSanitizer() data = sanitizer.clean("{'key' : [{'hello':1, 'world':2}]}") self.assertEquals([], data)
def test_should_return_data_as_is_if_data_key_not_defined(self): sanitizer = DataSanitizer() data = sanitizer.clean('{"key" : [{"hello":1, "world":2}]}') self.assertEquals({'key': [{'hello': 1, 'world': 2}]}, data)
def test_should_return_data_without_truncation_if_no_discard_value(self): sanitizer = DataSanitizer(data_key='key') data = sanitizer.clean('{"key" : [{"hello":1, "world":2}]}') self.assertEquals([{'hello': 1, 'world': 2}], data)
def test_should_clean_data_passed_in(self): sanitizer = DataSanitizer(discard_value=['val'], data_key='key') data = sanitizer.clean(' val {"key" : [1,2]}') self.assertEquals([1, 2], data)