def test_timeseries(self, tmp_path):
        ts_hours = 12
        data_len = 120
        train_file_name = os.path.join(str(tmp_path), 'train_data.csv')
        test_file_name = os.path.join(str(tmp_path), 'test_data.csv')

        features = generate_value_cols(['date', 'int'], data_len,
                                       ts_hours * 3600)
        labels = [generate_timeseries_labels(features)]

        feature_headers = list(map(lambda col: col[0], features))
        label_headers = list(map(lambda col: col[0], labels))

        # Create the training dataset and save it to a file
        columns_train = list(
            map(lambda col: col[1:int(len(col) * 3 / 4)], features))
        columns_train.extend(
            list(map(lambda col: col[1:int(len(col) * 3 / 4)], labels)))
        columns_to_file(columns_train,
                        train_file_name,
                        headers=[*feature_headers, *label_headers])
        # Create the testing dataset and save it to a file
        columns_test = list(
            map(lambda col: col[int(len(col) * 3 / 4):], features))
        columns_to_file(columns_test, test_file_name, headers=feature_headers)

        mdb = Predictor(name='test_timeseries')

        mdb.learn(from_data=train_file_name,
                  to_predict=label_headers,
                  timeseries_settings={
                      'order_by': [feature_headers[0]],
                      'window': 3
                  },
                  stop_training_in_x_seconds=10,
                  use_gpu=False,
                  advanced_args={'force_predict': True})

        results = mdb.predict(when_data=test_file_name, use_gpu=False)

        for row in results:
            expect_columns = [
                label_headers[0], label_headers[0] + '_confidence'
            ]
            for col in expect_columns:
                assert col in row

        models = F.get_models()
        model_data = F.get_model_data(models[0]['name'])
        assert model_data
Beispiel #2
0
    def get_models(self, status='any'):
        models = F.get_models()
        if status != 'any':
            models = [x for x in models if x['status'] == status]

        for i in range(len(models)):
            for k in ['train_end_at', 'updated_at', 'created_at']:
                if k in models[i] and models[i][k] is not None:
                    try:
                        models[i][k] = parse_datetime(
                            str(models[i][k]).split('.')[0])
                    except Exception as e:
                        models[i][k] = parse_datetime(str(models[i][k]))
        return models
Beispiel #3
0
    def test_multilabel_prediction(self, tmp_path):
        train_file_name = os.path.join(str(tmp_path), 'train_data.csv')
        test_file_name = os.path.join(str(tmp_path), 'test_data.csv')
        data_len = 60

        features = generate_value_cols(['int', 'float', 'int', 'float'], data_len)
        labels = []
        labels.append(generate_log_labels(features))
        labels.append(generate_timeseries_labels(features))

        feature_headers = list(map(lambda col: col[0], features))
        label_headers = list(map(lambda col: col[0], labels))

        # Create the training dataset and save it to a file
        columns_train = list(
            map(lambda col: col[1:int(len(col) * 3 / 4)], features))
        columns_train.extend(
            list(map(lambda col: col[1:int(len(col) * 3 / 4)], labels)))
        columns_to_file(columns_train, train_file_name,
                        headers=[*feature_headers, *label_headers])

        # Create the testing dataset and save it to a file
        columns_test = list(
            map(lambda col: col[int(len(col) * 3 / 4):], features))
        columns_to_file(columns_test, test_file_name,
                        headers=feature_headers)

        mdb = Predictor(name='test_multilabel_prediction')
        mdb.learn(
            from_data=train_file_name,
            to_predict=label_headers,
            stop_training_in_x_seconds=1,
            use_gpu=False,
            advanced_args={'force_predict': True}
        )

        results = mdb.predict(when_data=test_file_name)
        models = F.get_models()
        model_data = F.get_model_data(models[0]['name'])
        assert model_data

        for i in range(len(results)):
            row = results[i]
            for label in label_headers:
                expect_columns = [label, label + '_confidence']
                for col in expect_columns:
                    assert col in row
Beispiel #4
0
    def get_models(self, status='any'):
        models = F.get_models()
        if status != 'any':
            models = [x for x in models if x['status'] == status]
        models = [
            x for x in models
            if x['status'] != 'training' or parse_datetime(x['created_at']) >
            parse_datetime(self.config['mindsdb_last_started_at'])
        ]

        for i in range(len(models)):
            for k in ['train_end_at', 'updated_at', 'created_at']:
                if k in models[i] and models[i][k] is not None:
                    try:
                        models[i][k] = parse_datetime(
                            str(models[i][k]).split('.')[0])
                    except Exception:
                        models[i][k] = parse_datetime(str(models[i][k]))
        return models