Python DatasetManager Examples

Programming Language: Python

Namespace/Package Name: dataplay.datasvc.manager

Class/Type: DatasetManager

Examples at hotexamples.com: 13

Python DatasetManager - 13 examples found. These are the top rated real world Python examples of dataplay.datasvc.manager.DatasetManager extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get_dataset(9)

list_datasets(3)

delete_dataset(2)

add_dataset(1)

query2dataset(1)

Example #1

Show file

def test_dataset_to_json():
    files = DatasetManager.list_datasets()
    for file in files:
        id = file['id']
        dataset = DatasetManager.get_dataset(id)
        assert dataset is not None
        payload = json.dumps(dataset.get_payload())
        '''
        with open(f'{id}.json', 'w') as f:
            f.write(payload)
        '''

        assert is_json(payload)

Example #2

Show file

def test_query2dataset():
    payload = {
        "source_dataset_id": "iris",
        "query_type": "sql",
        "query": "SELECT * FROM dataset LIMIT 20;",
        "dataset_id": "query2dataset",
        "dataset_name": "query2dataset",
        "dataset_description": "test query to dataset",
    }

    try:
        DatasetManager.query2dataset(**payload)
        DatasetManager.delete_dataset(payload['dataset_id'])
    except Exception:
        pytest.fail('query to dataset should not raise error')

Example #3

Show file

File: test_b64data.py Project: Donner886/dataplay_server

def test_adddataset():
    encoded_data = base64.b64encode(b'A,B,C,D\n1,2,3,4')
    payload = {
        "id": "b64test",
        "name": "b64test",
        "payload": encoded_data,
        "description": "b64test dataset",
    }

    try:
        DatasetManager.add_dataset(payload)

        DatasetManager.delete_dataset(payload['id'])
    except Exception:
        pytest.fail('should not raise execption')

Example #4

Show file

 def predict(job_id, payload):
     data = payload['data']
     input_type = payload['input_type']
     try:
         model = MLJob.get_model(job_id)
         if input_type == 'csv':
             csv_data = BytesIO(base64.b64decode(data))
             df = pd.read_csv(csv_data, sep=",")
             df_prediction = model.predict(df)
             output_data = df_prediction.to_csv(index=False)
             result = {}
             result['data'] = base64.b64encode(output_data.encode('utf-8'))
             return result
         elif input_type == 'dataset':
             dataset = DatasetManager.get_dataset(data)
             df = dataset.get_df()
             df_prediction = model.predict(df)
             payload = {}
             payload["cols"], payload["rows"] = df_to_cols_rows(
                 df_prediction)
             return payload
         else:
             message = f'input type {input_type} is not supported for prediction'
             logger.error(message)
             raise RuntimeError(message)
     except Exception as e:
         logger.exception(
             f'failed to do prediction for data={data} id={job_id} error={e}'
         )
         raise e

Example #5

Show file

File: test_automl_job.py Project: Donner886/dataplay_server

def test_job_auto_regression():
    dataset_id = 'housing'
    dataset = DatasetManager.get_dataset(dataset_id)
    assert dataset is not None
    df = dataset.get_df()
    assert df is not None

    features = [
        'crime_rate',
        'business_acres',
        'avg_rooms_per_dwelling',
        'distance_to_employment_center',
    ]
    targets = ['median_house_value']

    job_option = {}
    job_option['time_left_for_this_task'] = 30
    job_option['per_run_time_limit'] = 10

    job = AutoRegressionJob('testregression', dataset_id, features, targets,
                            job_option, None)
    job.train()

    predict_result = job.predict(df[features])
    predict_result[targets] = df[targets]
    assert job.get_status() == MLJobStatus.SUCCESS
    # predict_result.to_csv('/tmp/regression.csv', encoding='utf-8')
    job.clean()

Example #6

Show file

def test_sqlquery():
    dataset = DatasetManager.get_dataset('iris')
    query_result = dataset.query('SELECT * FROM dataset LIMIT 10;', 'sql')
    assert query_result is not None

    cols, rows = df_to_cols_rows(query_result)
    assert rows is not None
    assert len(rows) == 10

Example #7

Show file

File: job.py Project: Donner886/dataplay_server

 def __init__(self, name, dataset):
     self.id = str(uuid.uuid4())
     self.name = name
     self.dataset_id = dataset
     self.dataset = DatasetManager.get_dataset(dataset)
     self.df = self.dataset.get_df()
     self.job_dir = os.path.join(MLJob.base_dir, self.id)
     self.metadata = {}
     self._init()

Example #8

Show file

File: test_timeserials_job.py Project: Donner886/dataplay_server

def test_job_time_serials():
    dataset_id = 'air_passengers'
    dataset = DatasetManager.get_dataset(dataset_id)
    assert dataset is not None
    df = dataset.get_df()
    assert df is not None

    features = ['Date']
    targets = ['Number']

    job_option = {}

    job = TimeSerialsForecastsJob('testtimeserials', dataset_id, features,
                                  targets, job_option)
    job.train()
    if hasattr(job, 'training_error'):
        print(f'training error was detected {job.training_error}')

    assert job.get_status() == MLJobStatus.SUCCESS
    predict_result = job.predict(df[features])
    assert predict_result is not None
    predict_result.to_csv('/tmp/tt.csv', encoding='utf-8')

Example #9

Show file

File: test_automl_job.py Project: Donner886/dataplay_server

def test_job_auto_multi_classification():
    dataset_id = 'iris'
    dataset = DatasetManager.get_dataset(dataset_id)
    assert dataset is not None
    df = dataset.get_df()
    assert df is not None

    features = ['sepal_length', 'sepal_width']
    targets = ['species']

    job_option = {}
    job_option['time_left_for_this_task'] = 30
    job_option['per_run_time_limit'] = 10

    job = AutoClassificationJob('testclassification', dataset_id, features,
                                targets, job_option, None)
    job.train()

    predict_result = job.predict(df[features])
    predict_result[targets] = df[targets]
    assert job.get_status() == MLJobStatus.SUCCESS
    # predict_result.to_csv('/tmp/classification.csv', encoding='utf-8')
    job.clean()

Example #10

Show file

File: test_automl_job.py Project: Donner886/dataplay_server

def test_job_auto_classification():
    dataset_id = 'churn'
    dataset = DatasetManager.get_dataset(dataset_id)
    assert dataset is not None
    df = dataset.get_df()
    assert df is not None

    features = ['Account Length', 'Area Code', 'Day Calls', 'State']
    targets = ['Churn?']

    job_option = {}
    job_option['time_left_for_this_task'] = 30
    job_option['per_run_time_limit'] = 10

    job = AutoClassificationJob('testclassification', dataset_id, features,
                                targets, job_option, None)
    job.train()

    predict_result = job.predict(df[features])
    predict_result[targets] = df[targets]
    assert job.get_status() == MLJobStatus.SUCCESS
    # predict_result.to_csv('/tmp/classification.csv', encoding='utf-8')
    job.clean()

Example #11

Show file

def test_query():
    dataset = DatasetManager.get_dataset('iris')
    query_result = dataset.query('sepal_length > sepal_width')
    assert query_result is not None

Example #12

Show file

def test_data_list():
    files = DatasetManager.list_datasets()
    assert len(files) == 5

Example #13

Show file

def test_data_list():
    DatasetManager.list_datasets()