Esempio n. 1
0
    def select(self):
        schema = Utils.get_available_feature_schema(
            self.data, force_base_entries=self.force_base_entries)
        X = [
            Utils.get_element_feature(schema, event.details, event.date)
            for event in self.data
        ]
        Y = [
            Utils.get_target_function_value(self.data, event)
            for event in self.data
        ]
        gbm = lgb.LGBMRegressor(boosting_type='gbdt',
                                num_leaves=31,
                                max_depth=-1,
                                learning_rate=0.1,
                                n_estimators=100,
                                objective='regression')
        gbm.fit(X, Y)
        features = gbm.feature_importances_
        for i in range(len(features)):
            if features[i] >= self.threshold:
                features[i] = True
            else:
                features[i] = False

        if self.force_base_entries:
            features[schema.index('__days_diff')] = True
            features[schema.index('__ref_number')] = True
            features[schema.index('__cvss_expl')] = True
        return Utils.get_filtered_schema(schema, features)
Esempio n. 2
0
 def _get_generator(self, data_gen):
     import random
     if isinstance(data_gen, tuple):
         X = data_gen[0]
         Y = data_gen[1]
     else:
         X = [
             np.array(
                 Utils.get_element_feature(self.schema, event.details,
                                           event.date))
             for event in data_gen
         ]
         Y = [
             Utils.get_target_function_value(self.data, event)
             for event in data_gen
         ]
     gen_data = list(zip(X, Y))
     random.shuffle(gen_data)
     X, Y = zip(*gen_data)
     i = 0
     while True:
         samples_X = []
         samples_Y = []
         for b in range(self.batch_size):
             if i == len(data_gen):
                 i = 0
                 gen_data = list(zip(X, Y))
                 random.shuffle(gen_data)
                 X, Y = zip(*gen_data)
             samples_X.append(X[i])
             samples_Y.append(Y[i])
             i += 1
         yield np.array(samples_X), np.array(samples_Y)
Esempio n. 3
0
 def learn_by_data(self):
     if self.data is None or self.schema is None:
         raise ValueError("You can't fit the model without having a data and schema")
     X = [Utils.get_element_feature(self.schema, event.details, event.date) for event in self.data]
     if self.use_reduction:
         self.pca.fit(X)
         X = self.pca.transform(X)
     Y = [Utils.get_target_function_value(self.data, event) for event in self.data]
     self.model.fit(X, Y)
Esempio n. 4
0
    def get_perf(data,
                 schema,
                 n_splits=5,
                 selection_method=ValidationMethod.ShuffleSplit,
                 is_nn=False,
                 epochs=100,
                 batch_size=1):
        ret = {
            'exp_var': 0,
            'max_error': 0,
            'mean_abs_error': 0,
            'mean_squared_error': 0,
            'mean_squared_log_error': 0,
            'median_abs_error': 0,
            'r2': 0
        }
        X = [
            np.array(
                Utils.get_element_feature(schema, event.details, event.date))
            for event in data
        ]
        Y = [Utils.get_target_function_value(data, event) for event in data]
        X = np.array(X)
        Y = np.array(Y)

        if selection_method == ValidationMethod.KFold:
            selector = KFold(n_splits=n_splits, shuffle=True)
        else:
            selector = ShuffleSplit(n_splits=n_splits,
                                    test_size=.25,
                                    random_state=0)

        for train_index, test_index in selector.split(X):
            X_train, X_test = X[train_index.astype(int)], X[test_index.astype(
                int)]
            y_train, y_test = Y[train_index.astype(int)], Y[test_index.astype(
                int)]
            if is_nn:
                model = TessNeuralModel(schema=schema,
                                        epochs=epochs,
                                        batch_size=batch_size)
            else:
                model = TessSVRModel(schema=schema)
            model.learn(X_train, y_train)
            partial_res = PerformanceValidator.get_perf_model(
                model, X_test, y_test)
            ret = {
                k: ret.get(k, 0) + partial_res.get(k, 0)
                for k in ret.keys()
            }
        for key in ret.keys():
            ret[key] = ret[key] / 5
        return ret
Esempio n. 5
0
 def learn_by_data(self):
     if self.data is None or self.schema is None:
         raise ValueError(
             "You can't fit the model without having a data and schema")
     steps = len(self.data) // self.batch_size
     X = [
         Utils.get_element_feature(self.schema, event.details, event.date)
         for event in self.data
     ]
     if self.use_reduction:
         self.pca.fit(X)
         X = self.pca.transform(X)
     Y = [
         Utils.get_target_function_value(self.data, event)
         for event in self.data
     ]
     self.model.fit_generator(generator=self._get_generator((X, Y)),
                              epochs=self.epochs,
                              steps_per_epoch=steps)
Esempio n. 6
0
def main():
    if len(sys.argv) < 2 or (sys.argv[1] != 'evaluate' and sys.argv[1] != 'learn'):
        usage()
        sys.exit(1)
    mode = sys.argv[1]
    sys.argv.remove(mode)
    parser = getparser(mode)
    args = parser.parse_args()
    if mode == 'evaluate':
        if args.cm.lower() == 'shuffle':
            cross_mode = ValidationMethod.ShuffleSplit
        else:
            cross_mode = ValidationMethod.KFold
        print('Parsing data...')
        parser = HistoryParser(abspath(args.d))
        parser.load()
        print('Selecting features...')
        if args.skip_selection:
            schema = Utils.get_available_feature_schema(parser.data)
        else:
            schema = FeatureSelection(parser.data, threshold=args.ts).select()
        print('Starting validation...')
        print(PerformanceValidator.get_perf(parser.data, schema, selection_method=cross_mode, n_splits=5,
                                            is_nn=args.nn, epochs=args.e, batch_size=args.bs))
    elif mode == 'learn':
        parser = HistoryParser(abspath(args.d))
        parser.load()
        if args.skip_selection:
            schema = Utils.get_available_feature_schema(parser.data)
        else:
            schema = FeatureSelection(parser.data, threshold=args.ts).select()
        if args.nn:
            model = TessNeuralModel(parser.data, schema, epochs=args.e, batch_size=args.bs, n_components=args.nc)
        else:
            model = TessSVRModel(parser.data, schema, n_components=args.nc)
        model.learn_by_data()
        model.save(abspath(args.o + '.tess'))
Esempio n. 7
0
    def load(self):
        if self.skip_capec == True and self.skip_keywords == True and self.skip_cwe == True:
            raise AttributeError(
                "Can't skip capec entries, cwe elements and keywords all together!"
            )
        if self.data is not None:
            return self.data
        self.data = []
        key_parser = KeywordsParser()
        cve = CVESearch()

        with open(self.data_path, mode='r') as csv_file:
            csv_reader = csv.DictReader(csv_file, delimiter=',')
            today = datetime.now(tz=None)
            for row in csv_reader:
                info = cve.find_cve_by_id(row['id'])
                published = datetime.strptime(info['publishedDate'],
                                              '%Y-%m-%dT%H:%MZ')
                if (today - published).days < self.min_age:
                    print('Ignoring event for {}'.format(row['id']))
                    continue
                vuln_details = None
                for item in self.data:
                    if item.id == row['id']:
                        vuln_details = item.details
                if vuln_details is None:
                    target = None
                    if 'target' in row.keys():
                        target = row['target']
                    vuln_details = Utils.get_vulnerability(row['id'],
                                                           cve,
                                                           key_parser,
                                                           self.skip_capec,
                                                           self.skip_keywords,
                                                           self.skip_cwe,
                                                           target=target)
                if vuln_details is None:
                    continue
                vuln_event = VulnerabilityEvent(row['id'], row['data'],
                                                row['outcome'], vuln_details)
                self.data.append(vuln_event)
Esempio n. 8
0
 def get_exploitability(self, vulnerability, time):
     if self.model is None:
         raise ValueError("Model is not set")
     return vulnerability.e_score * self.model.predict(
         [Utils.get_element_feature(self.schema, vulnerability, time)])
Esempio n. 9
0
 def get_exploitability(self, vulnerability, time):
     return vulnerability.e_score * self.model.predict([Utils.get_element_feature(self.schema, vulnerability, time)])