def _update_metadata(metadata: DataMetadata, resource_id: SelectorSegment) -> DataMetadata: resource_metadata = dict(metadata.query((resource_id, ))) if 'structural_type' not in resource_metadata or not issubclass( resource_metadata['structural_type'], container.DataFrame): raise TypeError( "The Dataset resource is not a DataFrame, but \"{type}\".".format( type=resource_metadata.get('structural_type', None), )) resource_metadata.update({ 'schema': CONTAINER_SCHEMA_VERSION, }, ) new_metadata = DataMetadata(resource_metadata) new_metadata = metadata.copy_to(new_metadata, (resource_id, )) # Resource is not anymore an entry point. new_metadata = new_metadata.remove_semantic_type( (), 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint') return new_metadata
jsonCall = json.load(inputFile) inputFile.close() # Load the problem description schema with open( path.join(jsonCall['train_data'], 'problem_TRAIN', 'problemDoc.json' ) , 'r') as inputFile: problemSchema = json.load(inputFile) inputFile.close() # Load the json dataset description file with open( path.join(jsonCall['train_data'], 'dataset_TRAIN', 'datasetDoc.json' ) , 'r') as inputFile: datasetSchema = json.load(inputFile) inputFile.close() # Load dataset ds_uri = 'file://' + path.join(jsonCall['train_data'], 'dataset_TRAIN', 'datasetDoc.json') ds = container.Dataset(resources=dict(), metadata=DataMetadata()) ds = ds.load(ds_uri) # Profile dataset param = Hyperparams.sample() prof = Profiler(hyperparams=param) ds2 = prof.produce(inputs=ds) # Get resource Ids, return ['0'] for this dataset print(ds.metadata.get_elements( () )) # Get available columns, returns [0, 1, 2, ..., 30] for 38_sick dataset print(ds.metadata.get_elements(('0', ALL_ELEMENTS))) # Metadata for column 1 column_one_metadata = ds.metadata.query(('0', ALL_ELEMENTS, 1))