Esempio n. 1
0
class PredictionPipeline():
    def __init__(self):
        # load config file
        with open("./config/predictionconfig.yml", "r") as ymlfile:
            cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)
        self.interval = cfg['interval']
        self.threshold = cfg['single_threshold']
        # init DataPreprocessor
        self.data_preprocessor = DataPreprocessor()
        # init PredictionMaker
        self.prediction_maker = PredictionMaker()
        self.registry = CollectorRegistry()
        self.pushgateway_url = os.getenv('PUSHGATEWAY_URL')

    def run(self):
        while True:
            start_millis = int(round(time.time() * 1000))
            print("Starting pipeline...")

            # get data
            df = self.data_preprocessor.get_data()
            df = self.data_preprocessor.preprocess_data(df)

            if df.empty == False:

                # predict
                result = self.prediction_maker.make_prediction(df)
                end_millis = int(round(time.time() * 1000))
                prediction_millis = end_millis - start_millis
                prediction = Prediction(result)

                # apply changes to K8s Cluster
                prediction.apply(self.threshold)

                # push to prometheus gateway
                prediction.push_to_prometheus(self.registry,
                                              self.pushgateway_url)
                try:
                    g = Gauge('prediction_making_speed',
                              'Time in ms for making Prediction.',
                              registry=registry)
                except:
                    pass
                g.set(prediction_millis)
                push_to_gateway('{}:9091'.format(self.pushgateway_url),
                                job='prediction-maker',
                                registry=registry)
                # sleep until next interval
                print("Prediction took {} ms.".format(prediction_millis))

            print("Going back to sleep for {} sec...".format(self.interval))
            time.sleep(self.interval)
Esempio n. 2
0
class TestDataPreprocessor(unittest.TestCase):
    """ unittests for DataPreprocessor
    """
    def setUp(self):
        """ init DataPreprocessor
        """
        self.data_preprocessor = DataPreprocessor()

    def test_preprocess_empty_data(self):
        """ test preprocess_data with empty df
        """
        df = pd.DataFrame(
            columns=['traceid', 'sessionid', 'servicessequence', 'starttime'])
        df = self.data_preprocessor.preprocess_data(df)
        full_df = pd.DataFrame(
            columns=['sessionid', 'nextcluster', 'starttime'])
        assert_frame_equal(df, full_df)

    def test_preprocess_data(self):
        """ test preprocess_data with normal df
        """
        df = pd.DataFrame(
            [['1234', '1234', 'service-1,service-2,service-1', '1234']],
            columns=['traceid', 'sessionid', 'servicessequence', 'starttime'])
        df = self.data_preprocessor.preprocess_data(df)
        full_df = pd.DataFrame(
            [[0.0, '1234', '1234', 'service-1,service-2', '1234', 0, 0, 0]],
            columns=[
                'index', 'traceid', 'sessionid', 'servicessequence',
                'starttime', 'currentclusternumber', 'clustersequence',
                'nextcluster'
            ])
        assert_frame_equal(df.sort_index(axis=1),
                           full_df.sort_index(axis=1),
                           check_dtype=False,
                           check_index_type=False)
Esempio n. 3
0
class MechansimCreator():
    ''' Class responsible for creating the mechanism for the prediction
    '''

    def __init__(self, algorithm):
        ''' init data preprocessor and classifier
        '''
        self.data_preprocessor = DataPreprocessor()
        if str(algorithm).lower() == 'decisiontree':
            self.clf = DecisionTreeClassifier()
        elif str(algorithm).lower() == 'randomforest':
            self.clf = RandomForestClassifier()


    def run(self):
        ''' create and store mechanism
        '''
        df = self.data_preprocessor.get_data()
        df = self.data_preprocessor.preprocess_data(df)
        # nextcluster as label for ML algorithm
        y = df['nextcluster'].values
        y = y.astype('int')


        X_train, X_test, y_train, y_test = train_test_split(df[['currentclusternumber']].values, y)

        # TODO: imbalanced classes?
        print(df.nextcluster.value_counts())

        # Classifer
        self.clf = self.clf.fit(X_train, y_train)
        y_pred = self.clf.predict(X_test)
        print("Accuracy of Classifier:", metrics.accuracy_score(y_test, y_pred))

        '''
        dot_data = StringIO()
        export_graphviz(clf, out_file=dot_data,
                        filled=True, rounded=True,
                        special_characters=True,feature_names = feature_cols)
        graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
        graph.write_png('./data/images/decisiontree/tree.png')

        # Random Forest classifier
        clf=RandomForestClassifier(n_estimators=32)
        clf.fit(X_train,y_train)
        y_pred=clf.predict(X_test)
        print("Accuracy of RandomForest:",metrics.accuracy_score(y_test, y_pred))

        i_tree = 0
        for tree_in_forest in clf.estimators_:
            if (i_tree < 10):
                dot_data = StringIO()
                export_graphviz(tree_in_forest, out_file=dot_data,
                        filled=True, rounded=True,
                        special_characters=True,feature_names = feature_cols)
            graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
            graph.write_png('./data/images/foresttrees/foresttree{}.png'.format(i_tree))
            i_tree += 1
        '''

        # save the model to db
        mechanism = Mechansim(self.clf, self.data_preprocessor.le, int(round(time.time() * 1000)))
        mechanism.save()