def load_dataset(dataset, datasets_path=None, bucket=None): dataset_path = _get_dataset_path(dataset, datasets_path, bucket) metadata = Metadata(str(dataset_path / 'metadata.json')) tables = metadata.get_tables() if not hasattr(metadata, 'modality'): if len(tables) > 1: modality = 'multi-table' else: table = metadata.get_table_meta(tables[0]) if any(table.get(field) for field in TIMESERIES_FIELDS): modality = 'timeseries' else: modality = 'single-table' metadata._metadata['modality'] = modality metadata.modality = modality if not hasattr(metadata, 'name'): metadata._metadata['name'] = dataset_path.name metadata.name = dataset_path.name return metadata
def load_dataset(dataset, datasets_path=None, bucket=None, aws_key=None, aws_secret=None, max_columns=None): dataset_path = _get_dataset_path(dataset, datasets_path, bucket, aws_key, aws_secret) with open(dataset_path / 'metadata.json') as metadata_file: metadata_content = json.load(metadata_file) if max_columns: if len(metadata_content['tables']) > 1: raise ValueError( 'max_columns is not supported for multi-table datasets') _apply_max_columns_to_metadata(metadata_content, max_columns) metadata = Metadata(metadata_content, dataset_path) tables = metadata.get_tables() if not hasattr(metadata, 'modality'): if len(tables) > 1: modality = 'multi-table' else: table = metadata.get_table_meta(tables[0]) if any(table.get(field) for field in TIMESERIES_FIELDS): modality = 'timeseries' else: modality = 'single-table' metadata._metadata['modality'] = modality metadata.modality = modality if not hasattr(metadata, 'name'): metadata._metadata['name'] = dataset_path.name metadata.name = dataset_path.name return metadata