Python Pipeline.from_yaml Exemples, d3m.metadata.pipeline.Pipeline.from_yaml Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : generate_templates.py Projet : MLBazaar/mit-d3m-ta2

def load_pipeline(pipeline):
    with open(pipeline) as _pipeline:
        if pipeline.endswith('.json'):
            pipeline = Pipeline.from_json(_pipeline)
        else:
            pipeline = Pipeline.from_yaml(_pipeline)

    return pipeline

Exemple #2

0

Afficher le fichier

Fichier : runtime_muxin.py Projet : proska/dsbox-ta2

def generate_pipeline(pipeline_path: str,
                      dataset_path: str,
                      problem_doc_path: str,
                      resolver: Resolver = None) -> Runtime:
    """
    Simplified interface that fit a pipeline with a dataset

    Paramters
    ---------
    pipeline_path
        Path to the pipeline description
    dataset_path:
        Path to the datasetDoc.json
    problem_doc_path:
        Path to the problemDoc.json
    resolver : Resolver
        Resolver to use.
    """

    # Pipeline description
    pipeline_description = None
    if '.json' in pipeline_path:
        with open(pipeline_path) as pipeline_file:
            pipeline_description = Pipeline.from_json(
                string_or_file=pipeline_file, resolver=resolver)
    else:
        with open(pipeline_path) as pipeline_file:
            pipeline_description = Pipeline.from_yaml(
                string_or_file=pipeline_file, resolver=resolver)

    # Problem Doc
    problem_doc = load_problem_doc(problem_doc_path)

    # Dataset
    if 'file:' not in dataset_path:
        dataset_path = 'file://{dataset_path}'.format(
            dataset_path=os.path.abspath(dataset_path))

    dataset = D3MDatasetLoader().load(dataset_uri=dataset_path)
    # Adding Metadata to Dataset
    dataset = add_target_columns_metadata(dataset, problem_doc)

    # Pipeline
    pipeline_runtime = Runtime(pipeline_description)
    # Fitting Pipeline
    pipeline_runtime.fit(inputs=[dataset])
    return pipeline_runtime

Exemple #3

0

Afficher le fichier

Fichier : __main__.py Projet : csala/mit-d3m-ta2

def load_pipeline(pipeline_path):
    with open(pipeline_path, 'r') as pipeline_file:
        if pipeline_path.endswith('.json'):
            return Pipeline.from_json(pipeline_file)
        else:
            return Pipeline.from_yaml(pipeline_file)

Exemple #4

0

Afficher le fichier

    def test_convert_openml_task(self):
        self.maxDiff = None

        with open(
                os.path.join(
                    os.path.join(PIPELINE_DIR,
                                 'data-preparation-train-test-split.yml')),
                'r') as data_pipeline_file:
            data_pipeline = Pipeline.from_yaml(data_pipeline_file,
                                               resolver=Resolver())
        data_params = {
            'train_score_ratio': '0.8',
            'shuffle': 'true',
            'stratified': 'true',
        }
        task_id = 8
        save_dir = os.path.join(self.test_dir, 'single_dataset')
        save_dir_path = pathlib.PurePath(save_dir)

        datasets = {}
        crawler.crawl_openml_task(
            datasets=datasets,
            task_id=task_id,
            save_dir=save_dir,
            data_pipeline=data_pipeline,
            data_params=data_params,
            context=metadata_base.Context.TESTING,
        )
        self.assertEqual(
            datasets, {
                'openml_dataset_8':
                str(save_dir_path /
                    pathlib.PurePosixPath('openml_dataset_8/datasetDoc.json')),
                'openml_dataset_8_TRAIN':
                str(save_dir_path / pathlib.PurePosixPath(
                    'TRAIN/dataset_TRAIN/datasetDoc.json')),
                'openml_dataset_8_TEST':
                str(save_dir_path /
                    pathlib.PurePosixPath('TEST/dataset_TEST/datasetDoc.json')
                    ),
                'openml_dataset_8_SCORE':
                str(save_dir_path / pathlib.PurePosixPath(
                    'SCORE/dataset_SCORE/datasetDoc.json')),
            })

        self._assert_dir_structure(save_dir, [
            'SCORE',
            'TEST',
            'TRAIN',
            'openml_dataset_8',
            'openml_problem_8',
            'data_preparation_pipeline_run.pkl',
            'SCORE/dataset_SCORE',
            'SCORE/problem_SCORE',
            'SCORE/dataset_SCORE/tables',
            'SCORE/dataset_SCORE/datasetDoc.json',
            'SCORE/dataset_SCORE/tables/learningData.csv',
            'SCORE/problem_SCORE/problemDoc.json',
            'TEST/dataset_TEST',
            'TEST/problem_TEST',
            'TEST/dataset_TEST/tables',
            'TEST/dataset_TEST/datasetDoc.json',
            'TEST/dataset_TEST/tables/learningData.csv',
            'TEST/problem_TEST/problemDoc.json',
            'TRAIN/dataset_TRAIN',
            'TRAIN/problem_TRAIN',
            'TRAIN/dataset_TRAIN/tables',
            'TRAIN/dataset_TRAIN/datasetDoc.json',
            'TRAIN/dataset_TRAIN/tables/learningData.csv',
            'TRAIN/problem_TRAIN/problemDoc.json',
            'openml_dataset_8/tables',
            'openml_dataset_8/datasetDoc.json',
            'openml_dataset_8/tables/learningData.csv',
            'openml_problem_8/problemDoc.json',
        ])

Exemple #5

0

Afficher le fichier

    def test_ignore_openml_task(self):
        self.maxDiff = None

        with open(
                os.path.join(
                    os.path.join(PIPELINE_DIR,
                                 'data-preparation-train-test-split.yml')),
                'r') as data_pipeline_file:
            data_pipeline = Pipeline.from_yaml(data_pipeline_file,
                                               resolver=Resolver())
        data_params = {
            'train_score_ratio': '0.8',
            'shuffle': 'true',
            'stratified': 'true',
        }
        save_dir = os.path.join(self.test_dir, 'ignore_dataset')
        max_tasks = 1
        has_errored = crawler.crawl_openml(
            save_dir=save_dir,
            task_types=(
                problem_module.OpenMLTaskType.SUPERVISED_CLASSIFICATION, ),
            data_pipeline=data_pipeline,
            data_params=data_params,
            context=metadata_base.Context.TESTING,
            max_tasks=max_tasks,
            ignore_tasks=[3],
            ignore_datasets=[2],
        )
        self.assertFalse(has_errored)

        self._assert_dir_structure(save_dir, [
            'openml_task_4',
            'openml_task_4/SCORE',
            'openml_task_4/TEST',
            'openml_task_4/TRAIN',
            'openml_task_4/openml_dataset_4',
            'openml_task_4/openml_problem_4',
            'openml_task_4/data_preparation_pipeline_run.pkl',
            'openml_task_4/SCORE/dataset_SCORE',
            'openml_task_4/SCORE/problem_SCORE',
            'openml_task_4/SCORE/dataset_SCORE/tables',
            'openml_task_4/SCORE/dataset_SCORE/datasetDoc.json',
            'openml_task_4/SCORE/dataset_SCORE/tables/learningData.csv',
            'openml_task_4/SCORE/problem_SCORE/problemDoc.json',
            'openml_task_4/TEST/dataset_TEST',
            'openml_task_4/TEST/problem_TEST',
            'openml_task_4/TEST/dataset_TEST/tables',
            'openml_task_4/TEST/dataset_TEST/datasetDoc.json',
            'openml_task_4/TEST/dataset_TEST/tables/learningData.csv',
            'openml_task_4/TEST/problem_TEST/problemDoc.json',
            'openml_task_4/TRAIN/dataset_TRAIN',
            'openml_task_4/TRAIN/problem_TRAIN',
            'openml_task_4/TRAIN/dataset_TRAIN/tables',
            'openml_task_4/TRAIN/dataset_TRAIN/datasetDoc.json',
            'openml_task_4/TRAIN/dataset_TRAIN/tables/learningData.csv',
            'openml_task_4/TRAIN/problem_TRAIN/problemDoc.json',
            'openml_task_4/openml_dataset_4/tables',
            'openml_task_4/openml_dataset_4/datasetDoc.json',
            'openml_task_4/openml_dataset_4/tables/learningData.csv',
            'openml_task_4/openml_problem_4/problemDoc.json',
        ])

Exemple #6

0

Afficher le fichier

import random
import d3m.metadata.base
import d3m.runtime
from sqlalchemy.orm import joinedload
from d3m.container import Dataset
from d3m_ta2_nyu.workflow import database, convert
from d3m_ta2_nyu.utils import is_collection, get_dataset_sample
from d3m.metadata.pipeline import Pipeline
from d3m.metadata.problem import PerformanceMetric, TaskKeyword
from multiprocessing import Manager, Process

logger = logging.getLogger(__name__)

with pkg_resources.resource_stream(
        'd3m_ta2_nyu', '../resource/pipelines/kfold_tabular_split.yaml') as fp:
    kfold_tabular_split = Pipeline.from_yaml(fp)

with pkg_resources.resource_stream(
        'd3m_ta2_nyu',
        '../resource/pipelines/kfold_timeseries_split.yaml') as fp:
    kfold_timeseries_split = Pipeline.from_yaml(fp)

with pkg_resources.resource_stream(
        'd3m_ta2_nyu',
        '../resource/pipelines/train-test-tabular-split.yaml') as fp:
    train_test_tabular_split = Pipeline.from_yaml(fp)

with pkg_resources.resource_stream('d3m_ta2_nyu',
                                   '../resource/pipelines/scoring.yaml') as fp:
    scoring_pipeline = Pipeline.from_yaml(fp)

Exemple #7

0

Afficher le fichier

Fichier : schemas.py Projet : tods-doc/axolotl

def get_scoring_pipeline() -> Pipeline:
    with open(SCORING_PIPELINES_DIR, 'r') as pipeline_file:
        with d3m_utils.silence():
            pipeline = Pipeline.from_yaml(pipeline_file)
    return pipeline