예제 #1
0
def _update_metadata(metadata: DataMetadata,
                     resource_id: SelectorSegment) -> DataMetadata:
    resource_metadata = dict(metadata.query((resource_id, )))

    if 'structural_type' not in resource_metadata or not issubclass(
            resource_metadata['structural_type'], container.DataFrame):
        raise TypeError(
            "The Dataset resource is not a DataFrame, but \"{type}\".".format(
                type=resource_metadata.get('structural_type', None), ))

    resource_metadata.update({
        'schema': CONTAINER_SCHEMA_VERSION,
    }, )

    new_metadata = DataMetadata(resource_metadata)

    new_metadata = metadata.copy_to(new_metadata, (resource_id, ))

    # Resource is not anymore an entry point.
    new_metadata = new_metadata.remove_semantic_type(
        (), 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint')

    return new_metadata
예제 #2
0
    jsonCall = json.load(inputFile)
    inputFile.close()

# Load the problem description schema
with open( path.join(jsonCall['train_data'], 'problem_TRAIN', 'problemDoc.json' ) , 'r') as inputFile:
    problemSchema = json.load(inputFile)
    inputFile.close()

# Load the json dataset description file
with open( path.join(jsonCall['train_data'], 'dataset_TRAIN', 'datasetDoc.json' ) , 'r') as inputFile:
    datasetSchema = json.load(inputFile)
    inputFile.close()

# Load dataset
ds_uri = 'file://' + path.join(jsonCall['train_data'], 'dataset_TRAIN', 'datasetDoc.json')
ds = container.Dataset(resources=dict(), metadata=DataMetadata())
ds = ds.load(ds_uri)

# Profile dataset
param = Hyperparams.sample()
prof = Profiler(hyperparams=param)
ds2 = prof.produce(inputs=ds)

# Get resource Ids, return ['0'] for this dataset
print(ds.metadata.get_elements( () ))

# Get available columns, returns [0, 1, 2, ..., 30] for 38_sick dataset
print(ds.metadata.get_elements(('0', ALL_ELEMENTS)))

# Metadata for column 1
column_one_metadata = ds.metadata.query(('0', ALL_ELEMENTS, 1))