def __init__(self, model: dict): # Load external if model: if isinstance(model, str): _model = detect_actions(model) if not model: raise CrawlinoValueError("Invalid model input values", exc_info=True, extra={"input_model": model}) else: model = _model # Inline declaration else: self.name = gt(model, "name", None) if not self.name: raise CrawlinoValueError("Error in Models: Models must " "have 'type' property.") self.fields = CMModelsFields(gt(model, "fields", None)) self.mappers = CMModelsMappers(gt(model, "mappers", None))
def extractor_regex(prev_step: PluginReturnedData, **kwargs) \ -> PluginReturnedData: log.debug("Starting plugin - extractor::xpath") # # Applying expressions # try: expressions = kwargs["expressions"] except KeyError: raise CrawlinoValueError("You can't run a rule without expressions") content_to_analyze = kwargs["content"] regex_group = kwargs.get("reportGroup", None) result = None if content_to_analyze: content_to_analyze = str(content_to_analyze) for expression in expressions: # Remove last \n if expression[-1] == "\n": expression = expression[:-1] if regex_group: try: found = re.search(expression, content_to_analyze) except TypeError as e: log.debug(e) continue if found: try: regex_group = int(regex_group) except ValueError: raise CrawlinoValueError( f"Invalid 'reportGroup'. Value must be an integer") result = found.group(regex_group) break else: for line in content_to_analyze.splitlines(): try: if re.search(expression, line): result = line break except TypeError as e: log.error(e) d = PluginReturnedData(**dict(content=result)) return d
def __init__(self, type: str, config: Dict or None, name: str = None): self.type = type self.name = name or "" self.config = config or {} if CrawlinoModulesStore.find_module("hooks", self.type) is None: raise CrawlinoValueError("Invalid 'type' property value", exc_info=True, extra={ "given_source_type": self.type }) if self.config is None: raise CrawlinoValueError("Input must has a 'config' property")
def __init__(self, fields: List[dict]): self._raw_data = fields self.mappers = {} for m in fields: # Get the key ks = list(m.keys()) if len(ks) != 1: raise CrawlinoFormatError( "Invalid mapper format. Each map, " "only can have one dictionary value", exc_info=True, extra={"map_value": str(m)}) key_action = ks[0] # Determinate what sub-class build try: map_obj = self.MAPPERS[key_action](**m[key_action]) except KeyError: raise CrawlinoValueError("Invalid mapper", exc_info=True, extra={"mapper_name": key_action}) except TypeError as e: invalid_arg = e.args[0][e.args[0].rfind("argument") + len("argument"):] raise CrawlinoValueError( "Invalid mapper. Mapper destination " "doesn't required property", exc_info=True, extra={ "invalid_property": invalid_arg, "mapper_name": key_action }) # Storage the object self.mappers[map_obj.name] = map_obj
def generator_numeric(*args, **kwargs): """This generator create sequences of numbers from: art[0] to arg[1]""" log.debug("Numeric generator plugin") start, end, *_ = args if start > end: raise CrawlinoValueError( f"Start range in higher than lower, no data could be generated - " f"start: {start} - end: {end}") for x in range(start, end): yield x
def _build(**kwargs): config = kwargs.get("config") valid_properties = ("url", "domain", "ip") for x in valid_properties: if x in config: target = config[x] break else: raise CrawlinoValueError( f"Selected source type must have any of these properties: " f"{'|'.join(valid_properties)}") return SourceData(target)
def hook_print(prev_step: PluginReturnedData, **kwargs): log.debug("Hooks Module :: print plugin") allowed_output_format = ("json", "csv") data = prev_step.to_dict if not data: return config = kwargs.get("config", {}) output_format = config.get("format", "json") # ------------------------------------------------------------------------- # Check the source of data. If data comes from step: expressions, check if # there're results. If not have results -> don't display nothing # # Data from STEP_EXTRACTORS have property: 'extractor_results' # ------------------------------------------------------------------------- if "extractor_results" in data: if not data["extractor_results"]: # No data to display return if output_format not in allowed_output_format: raise CrawlinoValueError( f"Invalid output format value '{output_format}'. Allowed values " f"are: {'|'.join(allowed_output_format)}") if output_format == "json": # We need to use the 'default' arg because for inherit dicts, json # module some times raises TypeError exception print( json.dumps(data, default=lambda x: dict(x.to_dict) if hasattr(x, "to_dict") else dict(x), indent=4, sort_keys=True)) elif output_format == "csv": l = [] for k, v in data.items(): l.append(f"'{k}:{v}'") print(", ".join(l))
def generator_random(*args, **kwargs): """ Input parameters: arg[0]: generated type: string, number arg[1]: generated value len arg[2]: total of random values generated """ generated_type, maximum, total = args space = string.digits if generated_type == "string": space += string.ascii_letters if total <= 0: raise CrawlinoValueError( f"Total generated values must be bigger than 0") for _ in range(total): yield "".join(random.choice(space) for _ in range(maximum))
def input_web(prev_step: PluginReturnedData, **kwargs) -> PluginReturnedData: log.debug("Starting plugin - input::web") allowed_inputs = ("web", "domain") # Load data prev_config = dict_to_object(prev_step.to_dict) if prev_config.source_type not in allowed_inputs: raise CrawlinoValueError(f"This plugin only works with: " f"{'|'.join(allowed_inputs)}") # -------------------------------------------------------------------------- # Extract config # -------------------------------------------------------------------------- config = kwargs.get("config", {}) timeout = config.get("timeout", "0.5").lower() http_method = config.get("httpMethod", "GET").lower() http_type = config.get("contentType", None) http_headers = { x: y for x, y in config.get("httpHeaders", {}).items() } http_url = config.get("url", "") post_data = None if config.get("data", None) and http_method in ("post", "put", "delete"): if http_type == "": # Request only accept post data as format: # [("id", "value"), ("user", "value2")] post_data = [ x.split("=") for x in config.get("data", "").split("&") ] elif http_type == "json" or http_type == "application/json": post_data = config.get("data", "") http_headers["Content-Type"] = "application/json" # # Fix target # url_parsed = urllib.parse.urlparse(prev_config.target) if not url_parsed.scheme: target = f"http://{url_parsed.netloc}" else: target = f"{url_parsed.scheme}://{url_parsed.netloc}" # # Fix target URL # url = f"{target}{http_url}" try: response = requests.request( method=http_method, url=url, headers=http_headers, data=post_data, timeout=float(timeout) ) except Exception as e: log.debug(e) return PluginReturnedData() else: result = dict( status_code=response.status_code, headers=response.headers, content=response.text, request=dict( method=http_method, url=url, headers=http_headers, data=post_data ) ) d = PluginReturnedData(**result) return d
def input_raw_socket(prev_step: PluginReturnedData, **kwargs) \ -> PluginReturnedData: log.debug("Starting plugin - input::raw-socket") allowed_inputs = ("ip", "web", "domain", "url") allowed_proto = ("tcp", "udp") # Load data prev_config = dict_to_object(prev_step.to_dict) if prev_config.source_type not in allowed_inputs: raise CrawlinoValueError(f"This plugin only works with: " f"{'|'.join(allowed_inputs)}") # ------------------------------------------------------------------------- # Extract config # ------------------------------------------------------------------------- config = kwargs.get("config", {}) port_to_test = config.get("port", None) data_to_send = config.get("data", None) connection_timeout = config.get("timeout", "0.5") port_proto = "tcp" # # Check proto # if config.get("proto", None): if config.get("proto", None) not in allowed_proto: raise CrawlinoValueError(f"This plugin only works with: " f"{'|'.join(allowed_proto)}") port_proto = config.get("proto") if port_proto == "tcp": proto = socket.SOCK_STREAM else: proto = socket.SOCK_DGRAM # # Checking timeout # try: timeout = float(connection_timeout) except ValueError: raise CrawlinoValueError( "Invalid timeout value. It must be a float falue") # # Extract target # if prev_config.source_type == "ip": ip = prev_config.target else: ip, *_ = urllib.parse.urlparse(prev_config.target).netloc.split(":") # # Do connection # if not data_to_send: data_to_send = b"\r\n\r\n" else: data_to_send = data_to_send.encode() log.debug(f"Connecting to {ip}:{port_to_test}...") with socket.socket(socket.AF_INET, proto) as s: s.settimeout(timeout) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) code = s.connect_ex((ip, int(port_to_test))) if code == 0: # 0 = Open try: s.sendall(data_to_send) d, _, _, _ = s.recvmsg(100000) received_data = d.decode(errors="ignore") status = "open" except socket.timeout: log.error(f"Port {port_to_test} is open but it got a " f"timeout when try to get data from socket") else: received_data = None status = "closed/filtered" d = PluginReturnedData(**dict( host=ip, status=status, data=received_data, port=port_to_test )) return d