Exemple #1
0
    def fit(self, X, Y, **kwargs):
        """Fit the internal keras classifier given as input in the constructor.
        
        Arguments:
            X (numpy ndarray) : training data
            Y (numpy ndarray) : training label

        Raises:
            TypeError: X, Y not numpy ndarray
            ModelNotLoadedError: calling function without having loaded or passed model as arg

        Returns:
            self
        """
        if self._keras_classifier is None:
            raise ModelNotLoadedError()
        type_check(X, np.ndarray, "X")
        type_check(Y, np.ndarray, "Y")
        try:
            self._keras_classifier.fit(X, Y, **kwargs)
            return self
        except Exception as e:
            raise KerasInternalError(
                "Keras internal error. See inner exception for details."
            ) from e
    def __init__(self, filepath: str):
        """Constructs model by loading pretrained net.

        Arguments:
            filepath (str) : the path to the pretrained h5 net

        Raises:
        TypeError: filepath not  string
        FileNotFoundError: filepath not pointing to anything
        NotKerasModelError: filepath not pointing to h5 keras model
        """
        type_check(filepath, str, "filepath")
        from wafamole.models.custom.pytorch_models.ModelClass import SentimentLSTM

        self.filepath = filepath
        p = re.compile('.*ModelWAF(\d+).*')
        model_num = p.findall(filepath)
        self.model_number = model_num[0]
        self.vocabfile = './vocab' + self.model_number + '.json'
        f = open(self.vocabfile)
        self.vocab_to_int = json.load(f)
        vocab_size = len(self.vocab_to_int) + 1  # +1 for the 0 padding
        output_size = 1
        embedding_dim = 100
        hidden_dim = 32
        n_layers = 2
        net = SentimentLSTM(vocab_size, output_size, embedding_dim, hidden_dim,
                            n_layers)
        self.load_model(filepath, net)
        super(PyTorchExample, self).__init__(self._pytorch_classifier)
    def __init__(self, model: Model):
        """Initialize an evasion object.
        Arguments:
            model: the input model to evaluate

        Raises:
            TypeError: model is not Model
        """
        type_check(model, Model, "model")
        super(EvasionEngine, self).__init__(model)
def replace_nth(candidate, sub, wanted, n):
    """Replace the n-th occurrence of a portion of the candidate with wanted.

    Arguments:
        candidate (str) : the string to be modified
        sub (str) 		: regexp containing what to substitute
        wanted (str) 	: the string that will replace sub
        n (int)			: the index of the occurrence to replace

    Raises:
        TypeError : bad type passed as arguments

    Returns:
        (str) : the modified string
    """
    type_check(candidate, str, "candidate")
    type_check(sub, str, "sub")
    type_check(wanted, str, "wanted")
    type_check(n, int, "n")
    where = [m.start() for m in re.finditer(re.escape(sub), candidate)][n - 1]
    before = candidate[:where]
    after = candidate[where:]
    after = after.replace(sub, wanted, 1)
    result = before + after
    return result
Exemple #5
0
    def extract_features(self, value: str):
        """No feature extraction
        
        Arguments:
            value (str) : input query string
        
        Raises:
        TypeError: value is not string

        Returns:
            str : the input value
        """
        type_check(value, str, "value")
        return value
Exemple #6
0
    def __init__(self, filepath: str):
        """Constructs model by loading pretrained net.
        
        Arguments:
            filepath (str) : the path to the pretrained h5 net

        Raises:
        TypeError: filepath not  string
        FileNotFoundError: filepath not pointing to anything
        NotKerasModelError: filepath not pointing to h5 keras model
        """
        type_check(filepath, str, "filepath")
        file_exists(filepath)
        self.load(filepath)
        super(WafBrainWrapper, self).__init__(self._keras_classifier)
Exemple #7
0
    def classify(self, value: str):
        """Produce probability of being sql injection.
        
        Arguments:
            value (str) : input query
        
        Raises:
        TypeError: value is not string

        Returns:
           float : probability of being a sql injection
        """
        type_check(value, str, "value")
        malicious = process_payload(self._keras_classifier, "",
                                    [value])["score"]
        return malicious
def filter_candidates(symbols, payload):
    """It removes all the symbols that are not contained inside the input payload string.

    Arguments:
        symbols (dict)  : dictionary of symbols to filter (using the key)
        payload (str)   : the payload to use for the filtering

    Raises:
        TypeError : bad types passed as argument

    Returns:
        list : a list containing all the symbols that are contained inside the payload.

    """
    type_check(symbols, dict, "symbols")
    type_check(payload, str, "payload")

    return [s for s in symbols.keys() if s in payload]
def random_char(spaces=True):
    """Returns a random character.

    Keyword Arguments:
        spaces (bool) : include spaces [default = True]

    Raises:
        TypeError: spaces not bool


    Returns:
        str : random character
    """

    type_check(spaces, bool, "spaces")
    chars = list(string.printable)
    chars_no_space = [c for c in chars if c not in string.whitespace]
    return random.choice(chars if spaces else chars_no_space)
def random_string(max_len=5, spaces=True):
    """It creates a random string.

    Keyword Arguments:
        max_length (int) : the maximum length of the string [default=5]
        spaces (bool) : if True, all the printable character will be considered. Else, only letters and digits [default=True]

    Raises:
        TypeError: bad type passed as argument

    Returns:
        (str) : random string

    """
    type_check(max_len, int, "max_length")
    type_check(spaces, bool, "spaces")

    return "".join(
        [random_char(spaces=spaces) for i in range(random.randint(1, max_len))]
    )
Exemple #11
0
    def load_model(self, filepath, ModelClass):
        """Loads a PyTorch classifier stored in filepath.

        Arguments:
            filepath (string) : The path of the PyTorch classifier.

        Raises:
            TypeError: filepath is not string.
            FileNotFoundError: filepath not pointing to any file.
            NotPyTorchModelError: model can not be loaded.

        Returns:
            self
        """
        type_check(filepath, str, "filepath")
        file_exists(filepath)
        ModelClass.load_state_dict(torch.load(filepath))
        ModelClass.eval()
        self._pytorch_classifier = ModelClass
        return self
    def extract_features(self, value: str):
        """Extract feature vector using SQLiGoT extractor.

        Arguments:
            value (str) : the input SQL query.

        Raises:
            TypeError: value is not string
            ModelNotLoadedError: calling function without having loaded or passed model as arg

        Returns:
            numpy ndarray : the feature vector
        """
        if self._pytorch_classifier is None:
            raise ModelNotLoadedError()
        type_check(value, str, "value")
        # print("Modified String", value)
        new_value = ut.PreProc(value, self.model_number, self.vocab_to_int)
        # print("pre processed value", new_value)
        return new_value
Exemple #13
0
    def extract_features(self, value: str):
        """Extract feature vector using SQLiGoT extractor.
        
        Arguments:
            value (str) : the input SQL query.

        Raises:
            TypeError: value is not string
            ModelNotLoadedError: calling function without having loaded or passed model as arg

        Returns:
            numpy ndarray : the feature vector
        """
        if self._sklearn_classifier is None:
            raise ModelNotLoadedError()
        type_check(value, str, "value")
        query = self._sklearn_classifier.preprocess_single_query(
            value, undirected=self._undirected, proportional=self._proportional
        )

        return query
Exemple #14
0
    def load(self, filepath):
        """Loads a sklearn classifier stored in filepath.
        
        Arguments:
            filepath (string) : The path of the sklearn classifier.

        Raises:
            TypeError: filepath is not string.
            FileNotFoundError: filepath not pointing to any file.
            NotSklearnModelError: model can not be loaded.

        Returns:
            self
        """
        type_check(filepath, str, "filepath")
        file_exists(filepath)
        try:
            self._sklearn_classifier = joblib.load(filepath)
        except Exception as e:
            raise NotSklearnModelError("Error in loading model.") from e
        return self
def replace_random(candidate, sub, wanted):
    """Replace one picked at random of the occurrence of sub inside candidate with wanted.

    Arguments:
        candidate (str) : the string to be modified
        sub (str) 		: regexp containing what to substitute
        wanted (str) 	: the string that will replace sub

    Raises:
        TypeError : bad type passed as arguments

    Returns:
        (str) : the modified string
    """
    type_check(candidate, str, "candidate")
    type_check(sub, str, "sub")
    type_check(wanted, str, "wanted")
    occurrences = [m.start() for m in re.finditer(re.escape(sub), candidate)]
    if not occurrences:
        return candidate

    pos = random.choice(occurrences)

    before = candidate[:pos]
    after = candidate[pos:]
    after = after.replace(sub, wanted, 1)

    result = before + after
    return result
Exemple #16
0
    def __init__(
        self, sqligot_classifier: SQLiGoT = None, undirected=True, proportional=True
    ):
        """Constructs the wrapper.
        
        Arguments:

        Keyword Arguments:
            sqligot_classifier (SQLiGoT) : SQLiGoT object (default: None)
            undirected (bool) : set undirection for feature extraction (default: (True))
            proportional (bool) : set weights for edges in graph (default: (True))

        Raises:
            TypeError: wrong input types
        
        Returns:
            SQLiGoTWrapper : the object
        """
        if sqligot_classifier is not None:
            type_check(sqligot_classifier, SQLiGoT, "sqligot_classifier")
        type_check(undirected, bool, "undirected")
        type_check(proportional, bool, "proportional")

        self._undirected = undirected
        self._proportional = proportional
        return super(SQLiGoTWrapper, self).__init__(sqligot_classifier)
Exemple #17
0
    def load(self, filepath):
        """Loads a keras classifier stored in filepath.
        
        Arguments:
            filepath (string) : The path of the keras classifier.
        
        Returns:
            self
        Raises:
            TypeError: filepath is not string.
            FileNotFoundError: filepath not pointing to any file.
            NotKerasModelError: model can not be loaded.
        """
        type_check(filepath, str, "filepath")
        file_exists(filepath)

        try:
            self._keras_classifier = keras.models.load_model(filepath)
        except Exception as e:
            raise NotKerasModelError(
                "Can not load keras model. See inner exception for details."
            ) from e
    def produce_feat_vector(self, sql_query: str, normalize=False):
        """It returns the feature vector as histogram of tokens, produced from the input query.
        
        Arguments:
            sql_query (str) : An input SQL query
        
        Keyword Arguments:
            normalize (bool) : True for producing a normalized hitogram. (default: (False))
        
        Raises:
            TypeError: params has wrong types
        
        Returns:
            numpy ndarray : histogram of tokens
        """
        type_check(sql_query, str, "sql_query")
        if normalize is not None:
            type_check(normalize, bool, "normalize")

        parsed = list(sqlparse.parse(sql_query)[0].flatten())
        allowed = self._allowed_tokens
        tokens = self._produce_tokens(parsed)
        dict_token = OrderedDict(zip(allowed, [0 for _ in range(len(allowed))]))
        for t in tokens:
            if t in dict_token:
                dict_token[t] += 1
            else:
                parent = t
                while parent is not None and parent not in dict_token:
                    parent = parent.parent
                if parent is None:
                    continue
                dict_token[parent] += 1
        values = dict_token.values()
        feature_vector = np.array([i for i in values])
        if normalize:
            norm = np.linalg.norm(feature_vector)
            feature_vector = feature_vector / norm
        return feature_vector
    def create_dataset_from_file(
        self, filepath: str, label: int, limit: int = None, unique_rows=True
    ):
        """Create dataset from fil containing sql queries.
        
        Arguments:
            filepath (str) : path of sql queries dataset
            label (int) : labels to assign to each sample
        
        Keyword Arguments:
            limit (int) : if None, it specifies how many queries to use (default: (None))
            unique_rows (bool) : True for removing all the duplicates (default: (True))
        
        Raises:
            TypeError: params has wrong types
            FileNotFoundError: filepath not pointing to regular file
            TypeError: limit is not None and not int
        
        Returns:
            (numpy ndarray, list) : X and y
        """
        type_check(filepath, str, "filepath")
        type_check(label, int, "label")
        type_check(unique_rows, bool, "unique_rows")
        if limit is not None:
            type_check(limit, int, "limit")

        file_exists(filepath)
        X = []
        with open(filepath, "r") as f:
            i = 0
            for line in f:
                if limit is not None and i > limit:
                    break
                line = line.strip()
                X.append(self.produce_feat_vector(line))
                i += 1
        if unique_rows:
            X = np.unique(X, axis=0)
        else:
            X = np.array(X)
        y = [label for _ in X]
        return X, y
Exemple #20
0
    def preprocess_single_query(
        self, sql_query: str, undirected: bool = False, proportional: bool = True
    ):
        """Create feature vector from input query.
        
        Arguments:
            sql_query (str) : input sql query
        
        Keyword Arguments:
            undirected (bool) : create undirected graph if true (default: (False))
            proportional (bool) : create weighted graph if true (default: (True))

        Raises:
            TypeError: arguments are not typed correctly

        Returns:
            numpy ndarray : the feature vector extracted from the query
        """
        type_check(sql_query, str, "sql_query")
        type_check(undirected, bool, "undirected")
        type_check(proportional, bool, "proportional")

        graph = self._create_graph_from_sql_query(
            sql_query, proportional=proportional, undirected=undirected
        )
        if graph is None:
            return None
        extract_feat = (
            self._extract_feature_vector_from_undirected_graph
            if undirected
            else self._extract_feature_vector_from_directed_graph
        )
        feature_vector = extract_feat(graph)
        if feature_vector is None:
            return None
        return feature_vector
 def extract_features(self, value: str):
     type_check(value, str, "value")
     tokenizer = Tokenizer()
     feature_vector = tokenizer.produce_feat_vector(value)
     return feature_vector
Exemple #22
0
    def create_dataset(
        self,
        benign_filepath: str,
        sqlia_filepath: str,
        undirected=False,
        proportional=True,
        normalize=True,
        limit_samples=10000,
        balance=True,
        dump_to_file=True,
        check_cache=True,
        save_keyword_append="",
    ):
        """Create dataset of both sqli and sane queries, using the input paths.
        If check_cache is true, it tries to load previously computed dataset.
        
        Arguments:
            benign_filepath (str) : path to sane queries
            sqlia_filepath (str) : path to sqli queries

        Raises:
            TypeError: arguments are not typed correctly

        Keyword Arguments:
            undirected (bool) : true for undirected graphs (default: (False))
            proportional (bool) : true for weighted graphs (default: (True))
            normalize (bool) : true for normalizing weights of edges (default: (True))
            limit_samples (int) : if not None, how many queries per file to consider (default: (10000))
            balance (bool) : true for balancing the number of sane and sqli queries (default: (True))
            dump_to_file (bool) : true for storing the computed queries to file (default: (True))
            check_cache (bool) : enable dump load (default: (True))
            save_keyword_append (str) : append this string to both paths when saving results (default: (''))
        
        Returns:
            (numpy ndarray, numpy ndarray) : X and y
        """

        type_check(benign_filepath, str, "benign_path")
        type_check(undirected, bool, "undirected")
        type_check(proportional, bool, "proportional")
        type_check(normalize, bool, "normalize")
        type_check(balance, bool, "balance")
        type_check(dump_to_file, bool, "dump_to_file")
        type_check(save_keyword_append, str, "save_keyword_append")

        kind = "undirected" if undirected else "directed"
        prop = "proportional" if proportional else "unprop"
        if (
            check_cache
            and os.path.isfile(benign_filepath)
            and os.path.isfile(sqlia_filepath)
        ):
            X_benign = np.load(benign_filepath)
            X_sqlia = np.load(sqlia_filepath)
            if balance:
                X_benign, X_sqlia = self._balance_data(X_benign, X_sqlia)
            X = np.vstack((X_benign, X_sqlia))
            y = np.ones(len(X_benign) + len(X_sqlia))
            y[: len(X_benign)] = -1
            return X, y
        else:
            X_benign = self._create_feature_vectors_from_file(
                benign_filepath,
                undirected=undirected,
                proportional=proportional,
                normalize=normalize,
                limit_samples=limit_samples,
            )
            X_sqlia = self._create_feature_vectors_from_file(
                sqlia_filepath,
                undirected=undirected,
                proportional=proportional,
                normalize=normalize,
                limit_samples=limit_samples,
            )
        X_benign = np.unique(X_benign, axis=0)
        X_sqlia = np.unique(X_sqlia, axis=0)
        np.save(
            "{}graph_{}_{}_sane.npy".format(save_keyword_append, kind, prop), X_benign
        )
        np.save(
            "{}graph_{}_{}_sqlia.npy".format(save_keyword_append, kind, prop), X_sqlia
        )
        if balance:
            X_benign, X_sqlia = self._balance_data(X_benign, X_sqlia)
        X = np.vstack((X_benign, X_sqlia))
        y = np.ones(len(X_benign) + len(X_sqlia))
        y[: len(X_benign)] = -1
        return X, y
    def evaluate(
        self,
        payload: str,
        max_rounds: int = 1000,
        round_size: int = 20,
        timeout: int = 14400,
        threshold: float = 0.5,
    ):
        """It tries to produce a payloads that should be classified as a benign payload.

        Arguments:
            payload (str) : the initial payload
            max_rounds (int) : maximum number of mutation rounds
            round_size (int) : how many mutation for each round
            timeout (int) : number of seconds before the timeout
            threshold (float) : default 0.5, customizable for different results

        Raises:
            TypeError : input arguments are mistyped.

        Returns:
            float, str : minimum confidence and correspondent payload that achieve that score
        """

        type_check(payload, str, "payload")
        type_check(max_rounds, int, "max_rounds")
        type_check(round_size, int, "round_size")
        type_check(timeout, int, "timeout")
        type_check(threshold, float, "threshold")

        def _signal_handler(signum, frame):
            raise TimeoutError()

        # Timeout setup
        signal.signal(signal.SIGALRM, _signal_handler)
        signal.alarm(timeout)

        evaluation_results = []
        min_confidence, min_payload = self._mutation_round(payload, round_size)
        evaluation_results.append((min_confidence, min_payload))

        try:
            while max_rounds > 0 and min_confidence > threshold:
                for candidate_confidence, candidate_payload in sorted(
                    evaluation_results
                ):
                    max_rounds -= 1

                    confidence, payload = self._mutation_round(
                        candidate_payload, round_size
                    )
                    if confidence < candidate_confidence:
                        evaluation_results.append((confidence, payload))
                        min_confidence, min_payload = min(evaluation_results)
                        break

            if min_confidence < threshold:
                print("[+] Threshold reached")
            elif max_rounds <= 0:
                print("[!] Max number of iterations reached")

        except TimeoutError:
            print("[!] Execution timed out")

        print(
            "Reached confidence {}\nwith payload\n{}".format(
                min_confidence, min_payload
            )
        )

        return min_confidence, min_payload