Esempio n. 1
0
class PredictionPipeline():
    def __init__(self):
        # load config file
        with open("./config/predictionconfig.yml", "r") as ymlfile:
            cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)
        self.interval = cfg['interval']
        self.threshold = cfg['single_threshold']
        # init DataPreprocessor
        self.data_preprocessor = DataPreprocessor()
        # init PredictionMaker
        self.prediction_maker = PredictionMaker()
        self.registry = CollectorRegistry()
        self.pushgateway_url = os.getenv('PUSHGATEWAY_URL')

    def run(self):
        while True:
            start_millis = int(round(time.time() * 1000))
            print("Starting pipeline...")

            # get data
            df = self.data_preprocessor.get_data()
            df = self.data_preprocessor.preprocess_data(df)

            if df.empty == False:

                # predict
                result = self.prediction_maker.make_prediction(df)
                end_millis = int(round(time.time() * 1000))
                prediction_millis = end_millis - start_millis
                prediction = Prediction(result)

                # apply changes to K8s Cluster
                prediction.apply(self.threshold)

                # push to prometheus gateway
                prediction.push_to_prometheus(self.registry,
                                              self.pushgateway_url)
                try:
                    g = Gauge('prediction_making_speed',
                              'Time in ms for making Prediction.',
                              registry=registry)
                except:
                    pass
                g.set(prediction_millis)
                push_to_gateway('{}:9091'.format(self.pushgateway_url),
                                job='prediction-maker',
                                registry=registry)
                # sleep until next interval
                print("Prediction took {} ms.".format(prediction_millis))

            print("Going back to sleep for {} sec...".format(self.interval))
            time.sleep(self.interval)
Esempio n. 2
0
class MechansimCreator():
    ''' Class responsible for creating the mechanism for the prediction
    '''

    def __init__(self, algorithm):
        ''' init data preprocessor and classifier
        '''
        self.data_preprocessor = DataPreprocessor()
        if str(algorithm).lower() == 'decisiontree':
            self.clf = DecisionTreeClassifier()
        elif str(algorithm).lower() == 'randomforest':
            self.clf = RandomForestClassifier()


    def run(self):
        ''' create and store mechanism
        '''
        df = self.data_preprocessor.get_data()
        df = self.data_preprocessor.preprocess_data(df)
        # nextcluster as label for ML algorithm
        y = df['nextcluster'].values
        y = y.astype('int')


        X_train, X_test, y_train, y_test = train_test_split(df[['currentclusternumber']].values, y)

        # TODO: imbalanced classes?
        print(df.nextcluster.value_counts())

        # Classifer
        self.clf = self.clf.fit(X_train, y_train)
        y_pred = self.clf.predict(X_test)
        print("Accuracy of Classifier:", metrics.accuracy_score(y_test, y_pred))

        '''
        dot_data = StringIO()
        export_graphviz(clf, out_file=dot_data,
                        filled=True, rounded=True,
                        special_characters=True,feature_names = feature_cols)
        graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
        graph.write_png('./data/images/decisiontree/tree.png')

        # Random Forest classifier
        clf=RandomForestClassifier(n_estimators=32)
        clf.fit(X_train,y_train)
        y_pred=clf.predict(X_test)
        print("Accuracy of RandomForest:",metrics.accuracy_score(y_test, y_pred))

        i_tree = 0
        for tree_in_forest in clf.estimators_:
            if (i_tree < 10):
                dot_data = StringIO()
                export_graphviz(tree_in_forest, out_file=dot_data,
                        filled=True, rounded=True,
                        special_characters=True,feature_names = feature_cols)
            graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
            graph.write_png('./data/images/foresttrees/foresttree{}.png'.format(i_tree))
            i_tree += 1
        '''

        # save the model to db
        mechanism = Mechansim(self.clf, self.data_preprocessor.le, int(round(time.time() * 1000)))
        mechanism.save()