Esempio n. 1
0
    def select(self):
        schema = Utils.get_available_feature_schema(
            self.data, force_base_entries=self.force_base_entries)
        X = [
            Utils.get_element_feature(schema, event.details, event.date)
            for event in self.data
        ]
        Y = [
            Utils.get_target_function_value(self.data, event)
            for event in self.data
        ]
        gbm = lgb.LGBMRegressor(boosting_type='gbdt',
                                num_leaves=31,
                                max_depth=-1,
                                learning_rate=0.1,
                                n_estimators=100,
                                objective='regression')
        gbm.fit(X, Y)
        features = gbm.feature_importances_
        for i in range(len(features)):
            if features[i] >= self.threshold:
                features[i] = True
            else:
                features[i] = False

        if self.force_base_entries:
            features[schema.index('__days_diff')] = True
            features[schema.index('__ref_number')] = True
            features[schema.index('__cvss_expl')] = True
        return Utils.get_filtered_schema(schema, features)
Esempio n. 2
0
def main():
    if len(sys.argv) < 2 or (sys.argv[1] != 'evaluate' and sys.argv[1] != 'learn'):
        usage()
        sys.exit(1)
    mode = sys.argv[1]
    sys.argv.remove(mode)
    parser = getparser(mode)
    args = parser.parse_args()
    if mode == 'evaluate':
        if args.cm.lower() == 'shuffle':
            cross_mode = ValidationMethod.ShuffleSplit
        else:
            cross_mode = ValidationMethod.KFold
        print('Parsing data...')
        parser = HistoryParser(abspath(args.d))
        parser.load()
        print('Selecting features...')
        if args.skip_selection:
            schema = Utils.get_available_feature_schema(parser.data)
        else:
            schema = FeatureSelection(parser.data, threshold=args.ts).select()
        print('Starting validation...')
        print(PerformanceValidator.get_perf(parser.data, schema, selection_method=cross_mode, n_splits=5,
                                            is_nn=args.nn, epochs=args.e, batch_size=args.bs))
    elif mode == 'learn':
        parser = HistoryParser(abspath(args.d))
        parser.load()
        if args.skip_selection:
            schema = Utils.get_available_feature_schema(parser.data)
        else:
            schema = FeatureSelection(parser.data, threshold=args.ts).select()
        if args.nn:
            model = TessNeuralModel(parser.data, schema, epochs=args.e, batch_size=args.bs, n_components=args.nc)
        else:
            model = TessSVRModel(parser.data, schema, n_components=args.nc)
        model.learn_by_data()
        model.save(abspath(args.o + '.tess'))