Ejemplo n.º 1
0
def load_dataset(dataset, datasets_path=None, bucket=None):
    dataset_path = _get_dataset_path(dataset, datasets_path, bucket)
    metadata = Metadata(str(dataset_path / 'metadata.json'))
    tables = metadata.get_tables()
    if not hasattr(metadata, 'modality'):
        if len(tables) > 1:
            modality = 'multi-table'
        else:
            table = metadata.get_table_meta(tables[0])
            if any(table.get(field) for field in TIMESERIES_FIELDS):
                modality = 'timeseries'
            else:
                modality = 'single-table'

        metadata._metadata['modality'] = modality
        metadata.modality = modality

    if not hasattr(metadata, 'name'):
        metadata._metadata['name'] = dataset_path.name
        metadata.name = dataset_path.name

    return metadata
Ejemplo n.º 2
0
def load_dataset(dataset,
                 datasets_path=None,
                 bucket=None,
                 aws_key=None,
                 aws_secret=None,
                 max_columns=None):
    dataset_path = _get_dataset_path(dataset, datasets_path, bucket, aws_key,
                                     aws_secret)
    with open(dataset_path / 'metadata.json') as metadata_file:
        metadata_content = json.load(metadata_file)

    if max_columns:
        if len(metadata_content['tables']) > 1:
            raise ValueError(
                'max_columns is not supported for multi-table datasets')

        _apply_max_columns_to_metadata(metadata_content, max_columns)

    metadata = Metadata(metadata_content, dataset_path)
    tables = metadata.get_tables()
    if not hasattr(metadata, 'modality'):
        if len(tables) > 1:
            modality = 'multi-table'
        else:
            table = metadata.get_table_meta(tables[0])
            if any(table.get(field) for field in TIMESERIES_FIELDS):
                modality = 'timeseries'
            else:
                modality = 'single-table'

        metadata._metadata['modality'] = modality
        metadata.modality = modality

    if not hasattr(metadata, 'name'):
        metadata._metadata['name'] = dataset_path.name
        metadata.name = dataset_path.name

    return metadata