Exemple #1
0
    def __init__(self, model: dict):

        # Load external
        if model:
            if isinstance(model, str):
                _model = detect_actions(model)

                if not model:
                    raise CrawlinoValueError("Invalid model input values",
                                             exc_info=True,
                                             extra={"input_model": model})
                else:
                    model = _model

            # Inline declaration
            else:

                self.name = gt(model, "name", None)

                if not self.name:
                    raise CrawlinoValueError("Error in Models: Models must "
                                             "have 'type' property.")

                self.fields = CMModelsFields(gt(model, "fields", None))
                self.mappers = CMModelsMappers(gt(model, "mappers", None))
Exemple #2
0
def extractor_regex(prev_step: PluginReturnedData, **kwargs) \
        -> PluginReturnedData:

    log.debug("Starting plugin - extractor::xpath")

    #
    # Applying expressions
    #
    try:
        expressions = kwargs["expressions"]
    except KeyError:
        raise CrawlinoValueError("You can't run a rule without expressions")

    content_to_analyze = kwargs["content"]
    regex_group = kwargs.get("reportGroup", None)

    result = None
    if content_to_analyze:
        content_to_analyze = str(content_to_analyze)

        for expression in expressions:
            # Remove last \n
            if expression[-1] == "\n":
                expression = expression[:-1]

            if regex_group:
                try:
                    found = re.search(expression, content_to_analyze)
                except TypeError as e:
                    log.debug(e)
                    continue

                if found:
                    try:
                        regex_group = int(regex_group)
                    except ValueError:
                        raise CrawlinoValueError(
                            f"Invalid 'reportGroup'. Value must be an integer")

                    result = found.group(regex_group)
                    break
            else:
                for line in content_to_analyze.splitlines():
                    try:
                        if re.search(expression, line):
                            result = line
                            break
                    except TypeError as e:
                        log.error(e)

    d = PluginReturnedData(**dict(content=result))

    return d
Exemple #3
0
    def __init__(self, type: str, config: Dict or None, name: str = None):
        self.type = type
        self.name = name or ""
        self.config = config or {}

        if CrawlinoModulesStore.find_module("hooks", self.type) is None:
            raise CrawlinoValueError("Invalid 'type' property value",
                                     exc_info=True,
                                     extra={
                                         "given_source_type": self.type
                                     })

        if self.config is None:
            raise CrawlinoValueError("Input must has a 'config' property")
Exemple #4
0
    def __init__(self, fields: List[dict]):
        self._raw_data = fields

        self.mappers = {}

        for m in fields:
            # Get the key
            ks = list(m.keys())

            if len(ks) != 1:
                raise CrawlinoFormatError(
                    "Invalid mapper format. Each map, "
                    "only can have one dictionary value",
                    exc_info=True,
                    extra={"map_value": str(m)})

            key_action = ks[0]

            # Determinate what sub-class build
            try:
                map_obj = self.MAPPERS[key_action](**m[key_action])
            except KeyError:
                raise CrawlinoValueError("Invalid mapper",
                                         exc_info=True,
                                         extra={"mapper_name": key_action})
            except TypeError as e:
                invalid_arg = e.args[0][e.args[0].rfind("argument") +
                                        len("argument"):]

                raise CrawlinoValueError(
                    "Invalid mapper. Mapper destination "
                    "doesn't required property",
                    exc_info=True,
                    extra={
                        "invalid_property": invalid_arg,
                        "mapper_name": key_action
                    })

            # Storage the object
            self.mappers[map_obj.name] = map_obj
Exemple #5
0
def generator_numeric(*args, **kwargs):
    """This generator create sequences of numbers from: art[0] to arg[1]"""
    log.debug("Numeric generator plugin")

    start, end, *_ = args

    if start > end:
        raise CrawlinoValueError(
            f"Start range in higher than lower, no data could be generated - "
            f"start: {start} - end: {end}")

    for x in range(start, end):
        yield x
Exemple #6
0
def _build(**kwargs):
    config = kwargs.get("config")
    valid_properties = ("url", "domain", "ip")

    for x in valid_properties:
        if x in config:
            target = config[x]
            break
    else:
        raise CrawlinoValueError(
            f"Selected source type must have any of these properties: "
            f"{'|'.join(valid_properties)}")

    return SourceData(target)
Exemple #7
0
def hook_print(prev_step: PluginReturnedData, **kwargs):
    log.debug("Hooks Module :: print plugin")

    allowed_output_format = ("json", "csv")

    data = prev_step.to_dict
    if not data:
        return

    config = kwargs.get("config", {})
    output_format = config.get("format", "json")

    # -------------------------------------------------------------------------
    # Check the source of data. If data comes from step: expressions, check if
    # there're results. If not have results -> don't display nothing
    #
    # Data from STEP_EXTRACTORS have property: 'extractor_results'
    # -------------------------------------------------------------------------
    if "extractor_results" in data:
        if not data["extractor_results"]:
            # No data to display
            return

    if output_format not in allowed_output_format:
        raise CrawlinoValueError(
            f"Invalid output format value '{output_format}'. Allowed values "
            f"are: {'|'.join(allowed_output_format)}")

    if output_format == "json":
        # We need to use the 'default' arg because for inherit dicts, json
        # module some times raises TypeError exception
        print(
            json.dumps(data,
                       default=lambda x: dict(x.to_dict)
                       if hasattr(x, "to_dict") else dict(x),
                       indent=4,
                       sort_keys=True))

    elif output_format == "csv":
        l = []
        for k, v in data.items():
            l.append(f"'{k}:{v}'")

        print(", ".join(l))
Exemple #8
0
def generator_random(*args, **kwargs):
    """
    Input parameters:

    arg[0]: generated type: string, number
    arg[1]: generated value len
    arg[2]: total of random values generated
    """
    generated_type, maximum, total = args

    space = string.digits
    if generated_type == "string":
        space += string.ascii_letters

    if total <= 0:
        raise CrawlinoValueError(
            f"Total generated values must be bigger than 0")

    for _ in range(total):
        yield "".join(random.choice(space) for _ in range(maximum))
Exemple #9
0
def input_web(prev_step: PluginReturnedData, **kwargs) -> PluginReturnedData:
    log.debug("Starting plugin - input::web")

    allowed_inputs = ("web", "domain")

    # Load data
    prev_config = dict_to_object(prev_step.to_dict)

    if prev_config.source_type not in allowed_inputs:
        raise CrawlinoValueError(f"This plugin only works with: "
                                 f"{'|'.join(allowed_inputs)}")

    # --------------------------------------------------------------------------
    # Extract config
    # --------------------------------------------------------------------------
    config = kwargs.get("config", {})
    timeout = config.get("timeout", "0.5").lower()
    http_method = config.get("httpMethod", "GET").lower()
    http_type = config.get("contentType", None)
    http_headers = {
        x: y
        for x, y in config.get("httpHeaders", {}).items()
    }
    http_url = config.get("url", "")
    post_data = None
    if config.get("data", None) and http_method in ("post", "put", "delete"):
        if http_type == "":
            # Request only accept post data as format:
            # [("id", "value"), ("user", "value2")]
            post_data = [
                x.split("=") for x in config.get("data", "").split("&")
            ]

        elif http_type == "json" or http_type == "application/json":
            post_data = config.get("data", "")
            http_headers["Content-Type"] = "application/json"

    #
    # Fix target
    #
    url_parsed = urllib.parse.urlparse(prev_config.target)
    if not url_parsed.scheme:
        target = f"http://{url_parsed.netloc}"
    else:
        target = f"{url_parsed.scheme}://{url_parsed.netloc}"

    #
    # Fix target URL
    #
    url = f"{target}{http_url}"

    try:
        response = requests.request(
            method=http_method,
            url=url,
            headers=http_headers,
            data=post_data,
            timeout=float(timeout)
        )
    except Exception as e:
        log.debug(e)
        return PluginReturnedData()

    else:
        result = dict(
            status_code=response.status_code,
            headers=response.headers,
            content=response.text,
            request=dict(
                method=http_method,
                url=url,
                headers=http_headers,
                data=post_data
            )
        )

        d = PluginReturnedData(**result)

        return d
Exemple #10
0
def input_raw_socket(prev_step: PluginReturnedData, **kwargs) \
        -> PluginReturnedData:

    log.debug("Starting plugin - input::raw-socket")
    allowed_inputs = ("ip", "web", "domain", "url")
    allowed_proto = ("tcp", "udp")

    # Load data
    prev_config = dict_to_object(prev_step.to_dict)

    if prev_config.source_type not in allowed_inputs:
        raise CrawlinoValueError(f"This plugin only works with: "
                                 f"{'|'.join(allowed_inputs)}")

    # -------------------------------------------------------------------------
    # Extract config
    # -------------------------------------------------------------------------
    config = kwargs.get("config", {})
    port_to_test = config.get("port", None)
    data_to_send = config.get("data", None)
    connection_timeout = config.get("timeout", "0.5")
    port_proto = "tcp"

    #
    # Check proto
    #
    if config.get("proto", None):
        if config.get("proto", None) not in allowed_proto:
            raise CrawlinoValueError(f"This plugin only works with: "
                                     f"{'|'.join(allowed_proto)}")

        port_proto = config.get("proto")

    if port_proto == "tcp":
        proto = socket.SOCK_STREAM
    else:
        proto = socket.SOCK_DGRAM

    #
    # Checking timeout
    #
    try:
        timeout = float(connection_timeout)
    except ValueError:
        raise CrawlinoValueError(
            "Invalid timeout value. It must be a float falue")

    #
    # Extract target
    #
    if prev_config.source_type == "ip":
        ip = prev_config.target
    else:
        ip, *_ = urllib.parse.urlparse(prev_config.target).netloc.split(":")

    #
    # Do connection
    #
    if not data_to_send:
        data_to_send = b"\r\n\r\n"
    else:
        data_to_send = data_to_send.encode()

    log.debug(f"Connecting to {ip}:{port_to_test}...")
    with socket.socket(socket.AF_INET, proto) as s:
            s.settimeout(timeout)
            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            code = s.connect_ex((ip, int(port_to_test)))

            if code == 0:  # 0 = Open
                try:
                    s.sendall(data_to_send)
                    d, _, _, _ = s.recvmsg(100000)
                    received_data = d.decode(errors="ignore")
                    status = "open"
                except socket.timeout:
                    log.error(f"Port {port_to_test} is open but it got a "
                              f"timeout when try to get data from socket")
            else:
                received_data = None
                status = "closed/filtered"

    d = PluginReturnedData(**dict(
        host=ip,
        status=status,
        data=received_data,
        port=port_to_test
    ))

    return d