Python NotSupportedError Examples

Programming Language: Python

Namespace/Package Name: d3m.exceptions

Method/Function: NotSupportedError

Examples at hotexamples.com: 5

Python NotSupportedError - 5 examples found. These are the top rated real world Python examples of d3m.exceptions.NotSupportedError extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def save_container(container: typing.Any, output_dir: str) -> None:
    # Saving data.
    if isinstance(container, container_module.Dataset):
        dataset_root_metadata = container.metadata.query(())

        missing_metadata: typing.Dict = {}
        for d3m_path, (
                dataset_path,
                required) in dataset_module.D3M_TO_DATASET_FIELDS.items():
            if not required:
                continue

            if utils.get_dict_path(dataset_root_metadata,
                                   dataset_path) is None:
                # TODO: Use some better value instead of this random value?
                utils.set_dict_path(missing_metadata, dataset_path,
                                    str(uuid.uuid4()))

        if missing_metadata:
            container = container.copy()
            container.metadata = container.metadata.update((),
                                                           missing_metadata)

        # Dataset saver creates any missing directories.
        dataset_uri = utils.path_to_uri(
            os.path.abspath(os.path.join(output_dir, 'datasetDoc.json')))
        container.save(dataset_uri)
    else:
        # We do not want to override anything.
        os.makedirs(output_dir, exist_ok=False)
        dataframe_path = os.path.join(output_dir, 'data.csv')

        if isinstance(container, container_module.DataFrame):
            container.to_csv(dataframe_path)
        elif isinstance(container,
                        (container_module.List, container_module.ndarray)):
            container = container_module.DataFrame(container)
            container.to_csv(dataframe_path)
        else:
            raise exceptions.NotSupportedError(
                "Value with type '{value_type}' cannot be saved as a container type."
                .format(value_type=type(container)))

    # Saving metadata. This is just for debugging purposes, so we are
    # using "to_json_structure" and not "to_internal_json_structure".
    input_metadata = container.metadata.to_json_structure()
    metadata_path = os.path.join(output_dir, 'metadata.json')

    with open(metadata_path, 'w') as outfile:
        json.dump(input_metadata,
                  outfile,
                  indent=2,
                  sort_keys=True,
                  allow_nan=False)

Example #2

Show file

    def get_class(self) -> typing.Any:
        """
        Returns a class suitable for computing this metric.
        """

        # Importing here to prevent import cycle.
        from d3m import metrics

        if self in metrics.class_map:
            return metrics.class_map[self]  # type: ignore

        if self in self._additional_score_class_map:
            return self._additional_score_class_map[self]  # type: ignore

        raise exceptions.NotSupportedError(
            "Computing metric {metric} is not supported.".format(metric=self))

Example #3

Show file

def crawl_openml_task(
    datasets: typing.Dict[str, str],
    task_id: int,
    save_dir: str,
    *,
    data_pipeline: pipeline_module.Pipeline,
    data_params: typing.Dict[str, str] = None,
    context: metadata_base.Context,
    random_seed: int = 0,
    volumes_dir: str = None,
    scratch_dir: str = None,
    runtime_environment: pipeline_run_module.RuntimeEnvironment = None,
    dataset_resolver: typing.Callable = None,
    problem_resolver: typing.Callable = None,
    compute_digest: dataset_module.ComputeDigest = dataset_module.
    ComputeDigest.ONLY_IF_MISSING,
    strict_digest: bool = False,
) -> None:
    """
    A function that crawls an OpenML task and corresponding dataset, do the split using a data
    preparation pipeline, and stores the splits as D3M dataset and problem description.

    Parameters
    ----------
    datasets:
        A mapping between known dataset IDs and their paths. Is updated in-place.
    task_id:
        An integer representing and OpenML task id to crawl and convert.
    save_dir:
        A directory where to save datasets and problems.
    data_pipeline:
        A data preparation pipeline used for splitting.
    data_params:
        A dictionary that contains the hyper-parameters for the data prepration pipeline.
    context:
        In which context to run pipelines.
    random_seed:
        A random seed to use for every run. This control all randomness during the run.
    volumes_dir:
        Path to a directory with static files required by primitives.
    scratch_dir:
        Path to a directory to store any temporary files needed during execution.
    runtime_environment:
        A description of the runtime environment.
    dataset_resolver:
        A dataset resolver to use.
    problem_resolver:
        A problem description resolver to use.
    compute_digest:
        Compute a digest over the data?
    strict_digest:
        If computed digest does not match the one provided in metadata, raise an exception?
    """

    if dataset_resolver is None:
        dataset_resolver = dataset_module.get_dataset
    if problem_resolver is None:
        problem_resolver = problem_module.get_problem

    number_of_folds = runtime._get_number_of_folds(data_params)
    assert number_of_folds != 0

    problem_uri = f'https://www.openml.org/t/{task_id}'
    problem_description = problem_resolver(problem_uri,
                                           strict_digest=strict_digest)

    if len(problem_description['inputs']) != 1:
        raise exceptions.NotSupportedError(
            "OpenML problem descriptions with multiple inputs are not supported."
        )

    problem_description_input = problem_description['inputs'][0]
    input_dataset_id = problem_description_input['dataset_id']

    known_datasets_set = set(datasets.keys())
    needed_splits_set = set()
    # We make sure when splitting that the output dataset has the same ID as the input dataset
    # with additional suffix for split type, and we are taking the advantage of this here.
    # The naming scheme matches "runtime._get_split_dataset_id".
    if number_of_folds == 1:
        needed_splits_set.add(f'{input_dataset_id}_TRAIN')
        needed_splits_set.add(f'{input_dataset_id}_TEST')
        needed_splits_set.add(f'{input_dataset_id}_SCORE')
        dataset_view_maps = [{
            'train': [
                {
                    'from': input_dataset_id,
                    'to': f'{input_dataset_id}_TRAIN',
                },
            ],
            'test': [
                {
                    'from': input_dataset_id,
                    'to': f'{input_dataset_id}_TEST',
                },
            ],
            'score': [
                {
                    'from': input_dataset_id,
                    'to': f'{input_dataset_id}_SCORE',
                },
            ],
        }]
    else:
        dataset_view_maps = []
        for fold_index in range(number_of_folds):
            needed_splits_set.add(
                f'{input_dataset_id}_FOLD_{fold_index}_TRAIN')
            needed_splits_set.add(f'{input_dataset_id}_FOLD_{fold_index}_TEST')
            needed_splits_set.add(
                f'{input_dataset_id}_FOLD_{fold_index}_SCORE')
            dataset_view_maps.append({
                'train': [
                    {
                        'from': input_dataset_id,
                        'to': f'{input_dataset_id}_FOLD_{fold_index}_TRAIN',
                    },
                ],
                'test': [
                    {
                        'from': input_dataset_id,
                        'to': f'{input_dataset_id}_FOLD_{fold_index}_TEST',
                    },
                ],
                'score': [
                    {
                        'from': input_dataset_id,
                        'to': f'{input_dataset_id}_FOLD_{fold_index}_SCORE',
                    },
                ],
            })

    # We already have this split, we can just reuse it.
    if problem_description_input[
            'dataset_id'] in known_datasets_set and needed_splits_set <= known_datasets_set:
        logger.debug("Copying existing splits.")

        # Copy splits.
        if number_of_folds == 1:
            view_maps = dataset_view_maps[0]
            for split_type in ['train', 'test', 'score']:
                shutil.copytree(
                    os.path.dirname(
                        datasets[runtime._get_dataset_id_from_view_maps(
                            view_maps, split_type, input_dataset_id)]),
                    os.path.join(save_dir, split_type.upper(),
                                 f'dataset_{split_type.upper()}'),
                )

                # Save problem description for the split. We do not copy because we copy only datasets.
                problem_path = os.path.abspath(
                    os.path.join(save_dir, split_type.upper(),
                                 f'problem_{split_type.upper()}',
                                 'problemDoc.json'))
                runtime._save_problem_description(problem_description,
                                                  problem_path,
                                                  dataset_view_maps=view_maps)
        else:
            for fold_index, view_maps in enumerate(dataset_view_maps):
                for split_type in ['train', 'test', 'score']:
                    shutil.copytree(
                        os.path.dirname(
                            datasets[runtime._get_dataset_id_from_view_maps(
                                view_maps, split_type, input_dataset_id)]),
                        os.path.join(save_dir, 'folds', str(fold_index),
                                     split_type.upper(),
                                     f'dataset_{split_type.upper()}'),
                    )

                    # Save problem description for the split. We do not copy because we copy only datasets.
                    problem_path = os.path.abspath(
                        os.path.join(save_dir, 'folds', str(fold_index),
                                     split_type.upper(),
                                     f'problem_{split_type.upper()}',
                                     'problemDoc.json'))
                    runtime._save_problem_description(
                        problem_description,
                        problem_path,
                        dataset_view_maps=view_maps)

        # Copy data preparation pipeline run pickle.
        shutil.copy2(
            os.path.join(os.path.dirname(datasets[input_dataset_id]), '..',
                         runtime.DATA_PIPELINE_RUN_FILENAME),
            os.path.join(save_dir, runtime.DATA_PIPELINE_RUN_FILENAME),
        )

        # Copy full dataset.
        shutil.copytree(
            os.path.dirname(datasets[input_dataset_id]),
            os.path.join(save_dir, input_dataset_id),
        )

    else:
        logger.debug("Running a data preparation pipeline.")

        openml_dataset_id = int(input_dataset_id.split('_')[-1])
        dataset_uri = f'https://www.openml.org/d/{openml_dataset_id}'
        dataset = dataset_resolver(
            dataset_uri,
            compute_digest=compute_digest,
            strict_digest=strict_digest,
        )
        dataset_id = dataset.metadata.query_field((), 'id')

        if input_dataset_id != dataset_id:
            raise exceptions.InvalidDatasetError(
                f"Loaded dataset (\"{dataset_id}\") does not have the expected dataset ID (\"{input_dataset_id}\")."
            )

        # Make splits and save them. This saves the pipeline run made by the data preparation pipeline, too.
        runtime.prepare_data_and_save(
            save_dir=save_dir,
            inputs=[dataset],
            data_pipeline=data_pipeline,
            problem_description=problem_description,
            data_params=data_params,
            context=context,
            random_seed=random_seed,
            volumes_dir=volumes_dir,
            scratch_dir=scratch_dir,
            runtime_environment=runtime_environment,
            # We provide "dataset_view_maps" to force split dataset IDs.
            dataset_view_maps=dataset_view_maps,
        )

        # Save full dataset.
        dataset_path = os.path.abspath(
            os.path.join(save_dir, dataset_id, 'datasetDoc.json'))
        dataset_uri = utils.path_to_uri(dataset_path)
        dataset.save(dataset_uri)

        # Updating known datasets.
        datasets[dataset_id] = dataset_path
        # We make sure when splitting that the output dataset has the same ID as the input dataset
        # with additional suffix for split type, and we are taking the advantage of this here.
        # The naming scheme matches "runtime._get_split_dataset_id".
        if number_of_folds == 1:
            for split_type in ['TRAIN', 'TEST', 'SCORE']:
                datasets[f'{dataset_id}_{split_type}'] = os.path.join(
                    save_dir, split_type, f'dataset_{split_type}',
                    'datasetDoc.json')
        else:
            for fold_index in range(number_of_folds):
                for split_type in ['TRAIN', 'TEST', 'SCORE']:
                    datasets[
                        f'{dataset_id}_FOLD_{fold_index}_{split_type}'] = os.path.join(
                            save_dir, 'folds', str(fold_index), split_type,
                            f'dataset_{split_type}', 'datasetDoc.json')

    # Save problem description. For splits, problem description is saved by "runtime.prepare_data_and_save".
    problem_path = os.path.abspath(
        os.path.join(save_dir, problem_description['id'], 'problemDoc.json'))
    # We do not save "dataset_view_maps" for this problem description.
    runtime._save_problem_description(problem_description, problem_path)

Example #4

Show file

File: load_graphs.py Project: neurodata/primitives-interfaces

    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> base.CallResult[Outputs]:
        # obtain the path to dataset
        temp_json = inputs.to_json_structure()
        datasetDoc_uri = temp_json['location_uris'][0][7:]
        location_base_uri = '/'.join(datasetDoc_uri.split('/')[:-1])

        with open(datasetDoc_uri) as json_file:
            datasetDoc_json = json.load(json_file)
            dataResources = datasetDoc_json['dataResources']

        # get the task type from the task docs
        temp_path = datasetDoc_uri.split('/')
        problemDoc_uri = '/'.join(temp_path[:-2]) + '/' + '/'.join(
            temp_path[-2:]).replace('dataset', 'problem')

        with open(problemDoc_uri) as json_file:
            task_types = json.load(json_file)['about']['taskKeywords']

        # TODO consider avoiding explicit use of problem type throughout pipeline
        TASK = ""
        for task in task_types:
            if task in [
                    "communityDetection", "linkPrediction",
                    "vertexClassification", "graphMatching"
            ]:
                TASK = task
        if TASK == "":
            raise exceptions.NotSupportedError(
                "only graph tasks are supported")

        # load the graphs and convert to a networkx object
        graphs = []
        nodeIDs = []
        for i in dataResources:
            if i['resType'] == "table":
                if i['resID'] == 'learningData':
                    df = inputs['learningData']
                else:
                    node_list = pd.read_csv(location_base_uri + "/" +
                                            i['resPath'])

                    # assume it is a nodeList otherwise. currently, there
                    # aren't any D3M nodeList datasets that have more than one
                    # graph. furthermore, even if there was such, there isn't
                    # a way to match an edgeList to a nodeList. hence, we have
                    # to assume thatnodeList corresponds to the first graph
                    graph = graphs[0]

                    # the following block essentially catches VXTC synthetic
                    # dataset and overwrites nodeList indices withh edgeList.
                    # without a doubt not an AutoML way, but is necessary
                    first_idx_edge = str(
                        sorted(list(graph.nodes(data=False)))[0])
                    first_idx_node = str(sorted(list(node_list['nodeID']))[0])
                    if (first_idx_edge.isdigit() and first_idx_node.isdigit()
                            and int(first_idx_edge) != int(first_idx_node)):
                        node_list = node_list.sort_values(
                            'nodeID').reset_index(drop=True)
                        d3m_indices = np.sort(
                            np.array(list(
                                graph.nodes(data=False))).astype(int))
                        node_list['nodeID'] = d3m_indices

                    # make nodeID an index (so it is not used an attribute)
                    node_list = node_list.set_index('nodeID')
                    node_list.index = node_list.index.astype(str)

                    # iterate over attributes and assign them to nodes
                    for attribute in node_list.columns.tolist():
                        series = pd.Series(node_list[attribute],
                                           index=node_list.index)
                        nx.set_node_attributes(graph, series.to_dict(),
                                               attribute)

            elif i['resType'] == 'graph':
                graph_temp = nx.read_gml(location_base_uri + "/" +
                                         i['resPath'])
                graphs.append(graph_temp)
                if TASK in ["communityDetection", "vertexClassification"]:
                    nodeIDs_temp = list(
                        nx.get_node_attributes(graphs[0], 'nodeID').values())
                    nodeIDs_temp = np.array([str(i) for i in nodeIDs_temp])
                    nodeIDs_temp = container.ndarray(nodeIDs_temp)
                    nodeIDs.append(nodeIDs_temp)
            elif i['resType'] == "edgeList":
                temp_graph = self._read_edgelist(
                    location_base_uri + "/" + i['resPath'],
                    i["columns"],
                )
                graphs.append(temp_graph)
                if TASK in ["communityDetection", "vertexClassification"]:
                    nodeIDs_temp = list(temp_graph.nodes)
                    nodeIDs_temp = np.array([str(i) for i in nodeIDs_temp])
                    nodeIDs_temp = container.ndarray(nodeIDs_temp)
                    nodeIDs.append(nodeIDs_temp)

        return base.CallResult(container.List([df, graphs, nodeIDs, TASK]))

Example #5

Show file

def main(argv: typing.Sequence) -> None:
    raise exceptions.NotSupportedError(
        "This CLI has been removed. Use \"python3 -m d3m problem describe\" instead."
    )