def __init__(self, model: dict): # Load external if model: if isinstance(model, str): _model = detect_actions(model) if not model: raise CrawlinoValueError("Invalid model input values", exc_info=True, extra={"input_model": model}) else: model = _model # Inline declaration else: self.name = gt(model, "name", None) if not self.name: raise CrawlinoValueError("Error in Models: Models must " "have 'type' property.") self.fields = CMModelsFields(gt(model, "fields", None)) self.mappers = CMModelsMappers(gt(model, "mappers", None))
def __init__(self, crawler_file: File): self.crawler_file = crawler_file self.name = gt(self.crawler_file.parsed, "name", None) self.description = gt(self.crawler_file.parsed, "description", "") self.tags = gt(self.crawler_file.parsed, "tags", []) if not self.name: raise CrawlinoValueError("Error in self.models: self.models must " "have 'name' property.") if not isinstance(self.tags, list) or \ not all(isinstance(x, str) for x in self.tags): raise CrawlinoValueError("tags must be a list of strings") self.config = [ CMConfig(type=x.get("type", None), config=x.get("config", None), name=x.get("name", None)) for x in self.crawler_file.parsed.get("config", []) ] self.sources = [ CMSource(type=x.get("type", None), config=x.get("config", None), name=x.get("name", None)) for x in self.crawler_file.parsed.get("sources") ] self.model = [ CMSource(type=x.get("type", None), config=x.get("config", None), name=x.get("name", None)) for x in self.crawler_file.parsed.get("sources") ] _input = self.crawler_file.parsed.get("input") self.input = [ CMInput(type=_input.get("type", None), config=_input.get("config", None), name=_input.get("name", None)) ] _extractors = self.crawler_file.parsed.get("extractors", {}) self.extractors = [ CMRuleSet(x.get("ruleSet")) for x in _extractors if "ruleSet" in x ] self.hooks = [ CMHook(type=x.get("type", None), config=x.get("config", None), name=x.get("name", None)) for x in self.crawler_file.parsed.get("hooks", []) ] # Setting default hook if not self.hooks: self.hooks = [CMHook(type="print", config=None, name=None)]
def __init__(self, fields: List[Dict]): self._raw_data = fields self.fields = {} if not fields: return for field in fields: f = CMModelsField(field["name"], field["name"], gt(field, "key", False)) if f.name in self.fields.keys(): raise CrawlinoFormatError("Repeated property type for fields", exc_info=True, extra={'repeated_field': f.name}) self.fields[f.name] = f
def test_gt_dict_return_ok(): a = {"hello": "world"} assert gt(a, "hello", None) == "world"
def test_gt_non_dict_ok(): a = 1 assert gt(a, "xxx", "world") == 1
def test_gt_dict_return_default(): a = {"hello": "world"} assert gt(a, "xxx", "world") == "world"