Esempio n. 1
0
    def execute_sequence(self, _id):
        url = '/v1/datasets/' + quote(_id, safe='')
        mldb.log(url)
        res = mldb.put(url, {'type': 'sparse.mutable'})

        res = mldb.get(res.headers['Location']).json()
        self.assertEqual(res['id'], _id)

        res = mldb.get(url).json()
        self.assertEqual(res['id'], _id)

        mldb.delete(url)
        with self.assertMldbRaises(status_code=404):
            mldb.get(url)

        res = mldb.post('/v1/datasets', {'id': _id, 'type': 'sparse.mutable'})

        res = mldb.get(res.headers['Location']).json()
        self.assertEqual(res['id'], _id)

        res = mldb.get(url).json()
        self.assertEqual(res['id'], _id)

        mldb.delete(url)
        with self.assertMldbRaises(status_code=404):
            mldb.get(url)
Esempio n. 2
0
    def test_set_return_0(self):
        mldb.put(
            "/v1/plugins/mldb2114", {
                "type": "python",
                "params": {
                    "source": {
                        "routes": """request.set_return("", 0)"""
                    }
                }
            })

        with self.assertRaises(ResponseException) as e:
            mldb.get('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaises(ResponseException) as e:
            mldb.post('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaises(ResponseException) as e:
            mldb.put('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaises(ResponseException) as e:
            mldb.delete('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)
Esempio n. 3
0
    def test_no_set_return(self):
        mldb.put(
            "/v1/plugins/mldb2114", {
                "type": "python",
                "params": {
                    "source": {
                        "routes":
                        """
from mldb import mldb
mldb.log('no return')
"""
                    }
                }
            })

        msg = "Return value is required for route handlers but not set"

        with self.assertRaisesRegex(ResponseException, msg) as e:
            mldb.get('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaisesRegex(ResponseException, msg) as e:
            mldb.post('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaisesRegex(ResponseException, msg) as e:
            mldb.put('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaisesRegex(ResponseException, msg) as e:
            mldb.delete('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)
 def test_delete(self):
     # this test depends on put and post
     url = '/v1/datasets/ds'
     mldb.put(url, {
         'type' : 'sparse.mutable'
     })
     mldb.post(url + '/commit')
     mldb.delete(url)
Esempio n. 5
0
    def test_empty_str_json(self):
        mldb.put(
            "/v1/plugins/mldb2114", {
                "type": "python",
                "params": {
                    "source": {
                        "routes": """request.set_return("", 200)"""
                    }
                }
            })

        res = mldb.get('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), "")

        res = mldb.post('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), "")

        res = mldb.put('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), "")

        res = mldb.delete('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), "")
Esempio n. 6
0
    def test_empty_json(self):
        """
        Empty JSON returns the proper code
        """
        mldb.put(
            "/v1/plugins/mldb2114", {
                "type": "python",
                "params": {
                    "source": {
                        "routes":
                        """
if request.verb in ['GET', 'DELETE']:
    request.set_return({}, 200)
else:
    request.set_return({}, 201)
"""
                    }
                }
            })

        res = mldb.get('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), {})

        res = mldb.post('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 201)
        self.assertEqual(res.json(), {})

        res = mldb.put('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 201)
        self.assertEqual(res.json(), {})

        res = mldb.delete('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), {})
Esempio n. 7
0
    def test_random_sequences(self):
        """
        Generate random sequences and compare the result with numpy.
        """
        import numpy
        lines = [""]
        lines.append(
            '| {:^4} | {:^20} | {:^20} | {:^20} | {:^10} | {:^3} |'
            .format('size', 'MLDB', 'Numpy', 'Diff (%)', 'Diff', 'Err'))
        err_cnt = 0
        for size in range(2, 100):
            ds = mldb.create_dataset({'id' : 'rand', 'type' : 'tabular'})
            sequence = []
            for row in range(size):
                sequence.append(random.random() * 1000000)
                ds.record_row(row, [['a', sequence[-1], 0]])
            ds.commit()
            if (size == 20):
                mldb.log(sequence)

            mldb_res = mldb.query("SELECT stddev(a) FROM rand")[1][1]
            numpy_res = float(numpy.std(sequence, ddof=1))
            if (numpy_res == 0):
                mldb.log("Skipping case where numpy_re == 0")
            else:
                rx = abs(mldb_res - numpy_res) / numpy_res * 100
                err = ''
                # The diff % to consider MLDB is "too" off compared to numpy
                if rx > 0.0000000000005:
                    err_cnt += 1
                    err = 'ERR'
                    mldb.log(sequence)
                lines.append(
                    '| {:>4} | {:>20f} | {:>20f} | {:>.18f} | {:>10f} | {:^3} |'
                    .format(size, mldb_res, numpy_res, rx,
                            abs(mldb_res - numpy_res), err))
            mldb.delete('/v1/datasets/rand')
        mldb.log('\n'.join(lines))
        mldb.log(err_cnt)
        self.assertEqual(err_cnt, 0)
Esempio n. 8
0
    def test_delete(self):
        "MLDB-1468"
        url = '/v1/credentials/test_delete'
        config = {
            "store": {
                "resourceType": "aws:s3",
                "resource": "s3://dev.mldb.datacratic.com/test_delete",
                "credential": {
                    "provider": "Credential collections",
                    "protocol": "http",
                    "location": "s3.amazonaws.com",
                    "id": "dummy",
                    "secret": "dummy"
                }
            }
        }
        mldb.put(url, config)

        msg = "entry 'test_delete' already exists"
        with self.assertRaisesRegex(ResponseException, msg):
            mldb.put(url, config)

        mldb.delete(url)
        mldb.put(url, config)
Esempio n. 9
0
    def test_creation_of_dummy_creds(self):
        # try something that should work
        # mldb.get asserts the result status_code is >= 200 and < 400
        with self.assertRaisesRegex(ResponseException, "doesn't exist"):
            mldb.get("/v1/credentials/s3cred")

        resp = mldb.put(
            "/v1/credentials/s3cred", {
                "store": {
                    "resourceType": "aws:s3",
                    "resource": "s3://",
                    "credential": {
                        "provider": "Credentials collection",
                        "protocol": "http",
                        "location": "s3.amazonaws.com",
                        "id": "this is my key",
                        "secret": "this is my secret"
                    }
                }
            })

        mldb.log(resp)

        mldb.get("/v1/credentials/s3cred")

        resp = mldb.delete("/v1/credentials/s3cred")

        with self.assertRaisesRegex(ResponseException, "doesn't exist"):
            mldb.get("/v1/credentials/s3cred")

        resp = mldb.post(
            "/v1/credentials", {
                "store": {
                    "resourceType": "aws:s3",
                    "resource": "s3://",
                    "credential": {
                        "provider": "Credentials collection",
                        "protocol": "http",
                        "location": "s3.amazonaws.com",
                        "id": "this is my key",
                        "secret": "this is my secret"
                    }
                }
            })

        mldb.log(resp)
#
# MLDB-592-bs-training-failure.py
# mldb.ai inc, 2015
# this file is part of mldb. copyright 2015 mldb.ai inc. all rights reserved.
#
import csv, datetime

from mldb import mldb

######################
##  Create toy dataset
######################
mldb.delete("/v1/datasets/toy")

# create a mutable beh dataset
datasetConfig = {
    "type": "sparse.mutable",
    "id": "toy",
}

dataset = mldb.create_dataset(datasetConfig)


def feat_proc(k, v):
    if k == "Pclass": return "c" + v
    if k == "Cabin": return v[0]
    return v


ts = datetime.datetime.now()
titanic_dataset = \
    def test_it(self):
        create_storage_js = """
        var config = { type: "beh.binary.mutable" };
        var dataset = mldb.createDataset(config);
        var output = { config: dataset.config() };
        output;
        """

        save_storage_js = """
        var uri = "file://tmp/MLDB-530-" + new Date().toISOString() + ".beh";
        var addr = "/v1/datasets/" + args.datasetId;
        var res = mldb.post(addr + "/routes/saves", { dataFileUrl: uri });
        var output = { metadata: mldb.get(addr).json.status, config: res.json};
        output;
        """

        filename = 'tmp/MLDB-530-metadata.sqlite'
        try:
            os.mkdir('tmp')
        except OSError:
            # file exists
            try:
                os.unlink(filename)
            except OSError:
                # file doesn't exist
                pass

        mldb.post(
            '/v1/datasets', {
                "id": "recorder",
                "type": "continuous",
                "params": {
                    "commitInterval": "0s",
                    "metadataDataset": {
                        "type": "sqliteSparse",
                        "id": "metadataDb",
                        "params": {
                            "dataFileUrl": "file://" + filename
                        }
                    },
                    "createStorageDataset": {
                        "type": "script.run",
                        "params": {
                            "language": "javascript",
                            "scriptConfig": {
                                "source": create_storage_js
                            }
                        }
                    },
                    "saveStorageDataset": {
                        "type": "script.run",
                        "params": {
                            "language": "javascript",
                            "scriptConfig": {
                                "source": save_storage_js
                            }
                        }
                    }
                }
            })

        mldb.post(
            '/v1/datasets/recorder/rows', {
                'rowName':
                'row1',
                'columns':
                [['colA', 1,
                  datetime.datetime.now(tzlocal()).isoformat()]]
            })

        datasets = mldb.get('/v1/datasets').json()
        mldb.post('/v1/datasets/recorder/commit')

        mldb.log(mldb.query("SELECT * FROM metadataDb"))

        for ds in datasets:
            if ds not in ['metadataDb', 'recorder']:
                # This simulates the restart of mldb. (The previously commited
                # dataset is no longer loaded.) If you remove it, the test
                # works.
                mldb.delete('/v1/datasets/' + ds)

        # At the moment of creating this test, this call fails with
        # Error initializing continuous window dataset in metadata query:
        # Attempt to refer to nonexistant dataset with id
        # auto-ece4a39e4e8ee8bc-1d16ff0d5362be47
        # (Of course the id is random.)
        mldb.post(
            "/v1/datasets", {
                'id': 'window',
                "type": "continuous.window",
                "params": {
                    "metadataDataset": {
                        "id": 'metadataDb',
                        "params": {
                            "dataFileUrl": "file://" + filename
                        }
                    },
                    'from': '1980-01-01T00:00:00Z',
                    'to': '2020-01-01T00:00:00Z'
                }
            })

        res = mldb.query("SELECT * FROM window")
        self.assertEqual(res[1:], [["row1", 1]])
Esempio n. 12
0
        rnd = random.random()
        if rnd < x / 25. or (label is True and rnd < 0.4):
            feats.append(["feat%d" % x, 1, now])
        #else:
        #    feats.append(["feat%d" % x, 0, now])

    feats.append(["LABEL", "true" if label else "false", now])
    dataset.record_row("example-%d" % i, feats)

mldb.log("Committing dataset")
dataset.commit()

for cls in ["bdt", "glz", "bs"]:
    ############
    ### train a cls
    mldb.delete("/v1/procedures/tng_classif")
    rez = mldb.put(
        "/v1/procedures/tng_classif", {
            "type": "classifier.train",
            "params": {
                "trainingData": {
                    "where": "rowHash() % 3 != 1",
                    "select":
                    "{* EXCLUDING(LABEL)} as features, LABEL = 'true' as label",
                    "from": {
                        "id": "toy"
                    }
                },
                "configuration": {
                    "glz": {
                        "type": "glz",