예제 #1
0
def test_load_dataverse_dataset_file(dataset_client, dataset_name):
    dataset_document = dataset_client.read(dataset_name, None)
    df = load_dataset(dataset_document)
    assert isinstance(df, pd.pandas.core.frame.DataFrame)
예제 #2
0
import mlflow
import json
import sys

import pandas as pd

from burdock.client import get_dataset_client
from burdock.data.adapters import load_reader, load_metadata, load_dataset
from burdock.query.sql.private.query import PrivateQuery
from pandasql import sqldf

if __name__ == "__main__":
    dataset_name = sys.argv[1]
    budget = float(sys.argv[2])
    query = sys.argv[3]

    with mlflow.start_run():
        dataset_document = get_dataset_client().read(dataset_name, budget)
        dataset = load_dataset(dataset_document)
        reader = load_reader(dataset_document)
        schema = load_metadata(dataset_document)
        private_reader = PrivateQuery(reader, schema, budget)
        rowset = private_reader.execute(query)

        result = {"query_result": rowset}

        df = pd.DataFrame(rowset[1:], columns=rowset[0])
        with open("result.json", "w") as stream:
            json.dump(df.to_dict(), stream)
        mlflow.log_artifact("result.json")
예제 #3
0
import mlflow
import json
import sys

from statistic import Count
from burdock.client import get_dataset_client
from burdock.data.adapters import load_dataset

if __name__ == "__main__":
    dataset_name = sys.argv[1] if len(sys.argv) > 1 else "example"
    column_name = sys.argv[2] if len(sys.argv) > 1 else "a"
    budget = float(sys.argv[3]) if len(sys.argv) > 1 else 1

    with mlflow.start_run():
        df = load_dataset(get_dataset_client().read(dataset_name, budget))
        statistic = Count(column_name, budget).release(df)

        with open("result.json", "w") as stream:
            json.dump(statistic.as_dict(), stream)
        mlflow.log_artifact("result.json")