def CheckSlice(warnings, slice, dsid): _CheckIdPresent(warnings, 'Slice', slice) slice_id = GetSchemaId(slice) _CheckType(warnings, 'Slice', slice, ['DataSlice']) _CheckUrlPresent(warnings, 'Slice', slice, 'dataset', 'required for id ' + slice_id, dsid) _CheckPropertyPresent(warnings, 'Slice', slice, 'dimension', 'required') dims = AsList(GetSchemaProp(slice, 'dimension')) for dim in dims: url = GetUrl(dim) if url is None: warnings.append( f'Slice property "dimension" values must have URLs for {slice_id}' ) _CheckPropertyPresent(warnings, 'Slice', slice, 'measure', 'required') measures = AsList(GetSchemaProp(slice, 'measure')) for measure in measures: url = GetUrl(measure) if url is None: warnings.append( f'Slice property "measure" values must have URLs for {slice_id}' ) _CheckPropertyPresent(warnings, 'Slice', slice, 'data', 'required') data = GetSchemaProp(slice, 'data') if not isinstance(data, str): if isinstance(data, dict): CheckSliceData(warnings, data, slice_id) elif isinstance(data, list): for datum in data: CheckSliceData(warnings, datum, slice_id)
def _ExpandCodeList(self, dim): """Load a code list from CSV and return a list of JSON-LD objects.""" codeList = [] dimProps = [] tableMappings = {} for dimProp in AsList(GetSchemaProp(dim, 'dimensionProperty')): dimProps.append(dimProp) for tableMapping in AsList(GetSchemaProp(dim, 'tableMapping')): tableMappings[GetUrl(tableMapping['sourceEntity'])] = tableMapping with self.getter.Fetch(GetSchemaProp(dim, 'codeList')) as f: reader = DictReader(f) for row in reader: entry = {k: v for k, v in row.items()} if GetSchemaProp(dim, 'equivalentType'): entry['@type'] = ['DimensionValue'] entry['@type'] += AsList( GetSchemaProp(dim, 'equivalentType')) else: entry['@type'] = 'DimensionValue' entry['@id'] = GetSchemaId(dim) + '=' entry['@id'] += row['codeValue'] entry['dimension'] = GetSchemaId(dim) for dimProp in dimProps: propId = GetSchemaProp(dimProp, 'propertyID') value = dimProp.get('value') if propId: if value: entry[dimProp['propertyID']] = value continue columnId = propId dimPropId = GetSchemaId(dimProp) if dimPropId: tableMapping = tableMappings.get(dimPropId) if tableMapping and 'columnIdentifier' in tableMapping: columnId = tableMapping.get('columnIdentifier') else: columnId = propId for field in row: if field == columnId: if columnId != propId: entry[propId] = entry[columnId] del entry[columnId] elif field.startswith(columnId + '.'): entry[columnId] = entry.get( columnId, {'@type': dimProp['propertyType']}) if isinstance(entry[columnId], str): entry[columnId] = { '@type': dimProp['propertyType'], 'name': row['columnId'] } entry[columnId][field[len(columnId) + 1:]] = entry[field] del entry[field] codeList.append(entry) return codeList
def Expand(self): json_val = FrameGraph(self.getter.graph, frame=_DataFileFrame) for dim in AsList(GetSchemaProp(json_val, 'dimension')): if isinstance(dim.get('codeList'), str): dim['codeList'] = self._ExpandCodeList(dim) if isinstance(GetSchemaProp(json_val, 'footnote'), str): json_val['footnote'] = self._ExpandFootnotes( GetSchemaProp(json_val, 'footnote'), json_val) for slice in AsList(GetSchemaProp(json_val, 'slice')): dim_defs_by_id = MakeIdKeyedDict( AsList(GetSchemaProp(json_val, 'dimension'))) if isinstance(GetSchemaProp(slice, 'data'), str): slice['data'] = self._ExpandSliceData(slice, dim_defs_by_id) return json_val
def _ExpandSliceData(self, slice, dim_defs_by_id): data = [] with self.getter.Fetch(GetSchemaProp(slice, 'data')) as f: reader = DictReader(f) for row in reader: val = {} val['@type'] = 'Observation' val['slice'] = GetSchemaId(slice) val['dimensionValues'] = [] val['measureValues'] = [] for dim in AsList(GetSchemaProp(slice, 'dimension')): dim = GetUrl(dim) fragment = urlparse(dim).fragment dim_val = { '@type': 'DimensionValue', 'dimension': dim, } dim_def = dim_defs_by_id.get(dim) if dim_def: if GetSchemaProp(dim_def, '@type') == 'CategoricalDimension': dim_val['codeValue'] = row[fragment] elif GetSchemaProp(dim_def, '@type') == 'TimeDimension': if GetSchemaProp(dim_def, 'equivalentType'): dim_val['value'] = { '@type': GetSchemaProp(dim_def, 'equivalentType'), '@value': row[fragment] } else: val['dimensionValues'][-1]['value'] = row[fragment] val['dimensionValues'].append(dim_val) for measure in AsList(GetSchemaProp(slice, 'measure')): measure = GetUrl(measure) fragment = urlparse(measure).fragment val['measureValues'].append({ '@type': 'MeasureValue', 'measure': measure, 'value': row[fragment] }) if row.get(fragment + '*'): val['measureValues'][-1]['footnote'] = [ { '@type': 'StatisticalAnnotation', 'codeValue': footnote } for footnote in row[fragment + '*'].split(';') ] data.append(val) return data
def CheckStatisticalDataset(warnings, dataset): _CheckType(warnings, 'StatisticalDataset', dataset, ['StatisticalDataset']) _CheckIdPresent(warnings, 'StatisticalDataset', dataset) _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'dimension', 'required') for dim in AsList(GetSchemaProp(dataset, 'dimension')): CheckDimension(warnings, dim, GetSchemaId(dataset)) _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'measure', 'required') for measure in AsList(GetSchemaProp(dataset, 'measure')): CheckMeasure(warnings, measure, GetSchemaId(dataset)) _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'slice', 'required') for slice in AsList(GetSchemaProp(dataset, 'slice')): CheckSlice(warnings, slice, GetSchemaId(dataset))
def FrameGraph(graph, frame=_FullFrame): serialized = graph.serialize(format='json-ld') json_val = json.loads(serialized) json_val = {'@context': _Context, '@graph': AsList(json_val)} framed = jsonld.frame(json_val, frame, {'embed': '@always'}) framed['@context'] = 'http://schema.org' for items in framed['@graph']: framed.update(items) del framed['@graph'] return framed
def test_AsList(self): self.assertEqual(AsList(None), []) self.assertEqual(AsList([]), []) self.assertEqual(AsList([1]), [1]) self.assertEqual(AsList(1), [1])
def _ExpandSliceData(self, slice, dim_defs_by_id, meas_defs_by_id): data = [] tableMappings = {} for tableMapping in AsList(GetSchemaProp(slice, 'tableMapping')): tableMappings[GetUrl(tableMapping['sourceEntity'])] = tableMapping with self.getter.Fetch(GetSchemaProp(slice, 'data')) as f: reader = DictReader(f) for row in reader: val = {} val['@type'] = 'Observation' val['slice'] = GetSchemaId(slice) val['dimensionValue'] = [] val['measureValue'] = [] for dim in AsList(GetSchemaProp(slice, 'dimension')): dim = GetUrl(dim) dim_def = dim_defs_by_id.get(dim) if dim_def is None: raise RuntimeError( "Unable to find definition for dimension " + dim) tableMapping = tableMappings.get(dim) if tableMapping: col_id = tableMapping['columnIdentifier'] else: col_id = urlparse(dim).fragment dim_val = { '@type': 'DimensionValue', 'dimension': dim, } if dim_def: if GetSchemaProp(dim_def, '@type') == 'CategoricalDimension': dim_val['codeValue'] = row[col_id] elif GetSchemaProp(dim_def, '@type') == 'TimeDimension': if GetSchemaProp(dim_def, 'equivalentType'): dim_val['value'] = { '@type': GetSchemaProp(dim_def, 'equivalentType'), '@value': row[col_id] } else: dim_val['value'] = row[col_id] val['dimensionValue'].append(dim_val) for measure in AsList(GetSchemaProp(slice, 'measure')): measure = GetUrl(measure) meas_def = meas_defs_by_id.get(measure) tableMapping = tableMappings.get(measure) if tableMapping: col_id = tableMapping['columnIdentifier'] else: col_id = urlparse(measure).fragment val['measureValue'].append({ '@type': 'MeasureValue', 'measure': measure, 'value': row[col_id] }) if row.get(col_id + '*'): val['measureValue'][-1]['footnote'] = [{ '@type': 'StatisticalAnnotation', 'codeValue': footnote } for footnote in row[col_id + '*'].split(';')] data.append(val) return data