Beispiel #1
0
def CheckSlice(warnings, slice, dsid):
    _CheckIdPresent(warnings, 'Slice', slice)
    slice_id = GetSchemaId(slice)
    _CheckType(warnings, 'Slice', slice, ['DataSlice'])
    _CheckUrlPresent(warnings, 'Slice', slice, 'dataset',
                     'required for id ' + slice_id, dsid)
    _CheckPropertyPresent(warnings, 'Slice', slice, 'dimension', 'required')

    dims = AsList(GetSchemaProp(slice, 'dimension'))
    for dim in dims:
        url = GetUrl(dim)
        if url is None:
            warnings.append(
                f'Slice property "dimension" values must have URLs for {slice_id}'
            )

    _CheckPropertyPresent(warnings, 'Slice', slice, 'measure', 'required')
    measures = AsList(GetSchemaProp(slice, 'measure'))
    for measure in measures:
        url = GetUrl(measure)
        if url is None:
            warnings.append(
                f'Slice property "measure" values must have URLs for {slice_id}'
            )

    _CheckPropertyPresent(warnings, 'Slice', slice, 'data', 'required')
    data = GetSchemaProp(slice, 'data')
    if not isinstance(data, str):
        if isinstance(data, dict):
            CheckSliceData(warnings, data, slice_id)
        elif isinstance(data, list):
            for datum in data:
                CheckSliceData(warnings, datum, slice_id)
Beispiel #2
0
def CheckStatisticalDataset(warnings, dataset):
    _CheckType(warnings, 'StatisticalDataset', dataset, ['StatisticalDataset'])
    _CheckIdPresent(warnings, 'StatisticalDataset', dataset)
    _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'dimension',
                          'required')
    for dim in AsList(GetSchemaProp(dataset, 'dimension')):
        CheckDimension(warnings, dim, GetSchemaId(dataset))
    _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'measure',
                          'required')
    for measure in AsList(GetSchemaProp(dataset, 'measure')):
        CheckMeasure(warnings, measure, GetSchemaId(dataset))
    _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'slice',
                          'required')
    for slice in AsList(GetSchemaProp(dataset, 'slice')):
        CheckSlice(warnings, slice, GetSchemaId(dataset))
Beispiel #3
0
 def _ExpandCodeList(self, dim):
   """Load a code list from CSV and return a list of JSON-LD objects."""
   codeList = []
   with self.getter.Fetch(GetSchemaProp(dim, 'codeList')) as f:
     reader = DictReader(f)
     for row in reader:
       if GetSchemaProp(dim, 'equivalentType'):
         row['@type'] = ['DimensionValue', GetSchemaProp(dim, 'equivalentType')]
       else:
         row['@type'] = 'DimensionValue'
       row['@id'] = GetSchemaId(dim) + '='
       row['@id'] += row['codeValue']
       row['dimension'] = GetSchemaId(dim)
       codeList.append(row)
   return codeList
Beispiel #4
0
def _CheckUrlPresent(warnings, name, obj, prop, category, expected=None):
    val = GetUrl(GetSchemaProp(obj, prop))
    if val is None:
        warnings.append(f'{name} property "{prop}" is {category}')
    elif expected and val != expected:
        warnings.append(
            f'{name} property "{prop}" has value "{val}" but expected "{expected}"'
        )
Beispiel #5
0
    def _ExpandCodeList(self, dim):
        """Load a code list from CSV and return a list of JSON-LD objects."""
        codeList = []
        dimProps = []
        tableMappings = {}
        for dimProp in AsList(GetSchemaProp(dim, 'dimensionProperty')):
            dimProps.append(dimProp)
        for tableMapping in AsList(GetSchemaProp(dim, 'tableMapping')):
            tableMappings[GetUrl(tableMapping['sourceEntity'])] = tableMapping

        with self.getter.Fetch(GetSchemaProp(dim, 'codeList')) as f:
            reader = DictReader(f)
            for row in reader:
                entry = {k: v for k, v in row.items()}
                if GetSchemaProp(dim, 'equivalentType'):
                    entry['@type'] = ['DimensionValue']
                    entry['@type'] += AsList(
                        GetSchemaProp(dim, 'equivalentType'))
                else:
                    entry['@type'] = 'DimensionValue'
                entry['@id'] = GetSchemaId(dim) + '='
                entry['@id'] += row['codeValue']
                entry['dimension'] = GetSchemaId(dim)
                for dimProp in dimProps:
                    propId = GetSchemaProp(dimProp, 'propertyID')
                    value = dimProp.get('value')
                    if propId:
                        if value:
                            entry[dimProp['propertyID']] = value
                            continue
                        columnId = propId
                        dimPropId = GetSchemaId(dimProp)
                        if dimPropId:
                            tableMapping = tableMappings.get(dimPropId)
                            if tableMapping and 'columnIdentifier' in tableMapping:
                                columnId = tableMapping.get('columnIdentifier')
                            else:
                                columnId = propId
                        for field in row:
                            if field == columnId:
                                if columnId != propId:
                                    entry[propId] = entry[columnId]
                                    del entry[columnId]
                            elif field.startswith(columnId + '.'):
                                entry[columnId] = entry.get(
                                    columnId,
                                    {'@type': dimProp['propertyType']})
                                if isinstance(entry[columnId], str):
                                    entry[columnId] = {
                                        '@type': dimProp['propertyType'],
                                        'name': row['columnId']
                                    }
                                entry[columnId][field[len(columnId) +
                                                      1:]] = entry[field]
                                del entry[field]
                codeList.append(entry)
        return codeList
Beispiel #6
0
  def _ExpandSliceData(self, slice, dim_defs_by_id):
    data = []
    with self.getter.Fetch(GetSchemaProp(slice, 'data')) as f:
      reader = DictReader(f)
      for row in reader:
        val = {}
        val['@type'] = 'Observation'
        val['slice'] = GetSchemaId(slice)
        val['dimensionValues'] = []
        val['measureValues'] = []
        for dim in AsList(GetSchemaProp(slice, 'dimension')):
          dim = GetUrl(dim)
          fragment = urlparse(dim).fragment
          dim_val = {
              '@type': 'DimensionValue',
              'dimension': dim,
          }
          dim_def = dim_defs_by_id.get(dim)
          if dim_def:
            if GetSchemaProp(dim_def, '@type') == 'CategoricalDimension':
              dim_val['codeValue'] = row[fragment]
            elif GetSchemaProp(dim_def, '@type') == 'TimeDimension':
              if GetSchemaProp(dim_def, 'equivalentType'):
                dim_val['value'] = {
                    '@type': GetSchemaProp(dim_def, 'equivalentType'),
                    '@value': row[fragment]
                }
              else:
                val['dimensionValues'][-1]['value'] = row[fragment]
          val['dimensionValues'].append(dim_val)

        for measure in AsList(GetSchemaProp(slice, 'measure')):
          measure = GetUrl(measure)
          fragment = urlparse(measure).fragment
          val['measureValues'].append({
              '@type': 'MeasureValue',
              'measure': measure,
              'value': row[fragment]
          })
          if row.get(fragment + '*'):
            val['measureValues'][-1]['footnote'] = [
                {
                    '@type': 'StatisticalAnnotation',
                    'codeValue': footnote
                }
                for footnote in row[fragment + '*'].split(';')
            ]
        data.append(val)
    return data
Beispiel #7
0
 def Expand(self):
   json_val = FrameGraph(self.getter.graph, frame=_DataFileFrame)
   for dim in AsList(GetSchemaProp(json_val, 'dimension')):
     if isinstance(dim.get('codeList'), str):
       dim['codeList'] = self._ExpandCodeList(dim)
   if isinstance(GetSchemaProp(json_val, 'footnote'), str):
     json_val['footnote'] = self._ExpandFootnotes(
         GetSchemaProp(json_val, 'footnote'), json_val)
   for slice in AsList(GetSchemaProp(json_val, 'slice')):
     dim_defs_by_id = MakeIdKeyedDict(
         AsList(GetSchemaProp(json_val, 'dimension')))
     if isinstance(GetSchemaProp(slice, 'data'), str):
       slice['data'] = self._ExpandSliceData(slice, dim_defs_by_id)
   return json_val
Beispiel #8
0
def _CheckAnyPropertyPresent(warnings, name, obj, props, category):
    if not any(GetSchemaProp(obj, prop) for prop in props):
        warnings.append(f'{name}: One property of {props} is {category}')
Beispiel #9
0
 def test_GetSchemaProp(self):
   self.assertEqual(GetSchemaProp({'id': 'val'}, 'id'), 'val')
   self.assertEqual(GetSchemaProp({'schema:id': 'val'}, 'id'), 'val')
Beispiel #10
0
    def _ExpandSliceData(self, slice, dim_defs_by_id, meas_defs_by_id):
        data = []
        tableMappings = {}
        for tableMapping in AsList(GetSchemaProp(slice, 'tableMapping')):
            tableMappings[GetUrl(tableMapping['sourceEntity'])] = tableMapping

        with self.getter.Fetch(GetSchemaProp(slice, 'data')) as f:
            reader = DictReader(f)
            for row in reader:
                val = {}
                val['@type'] = 'Observation'
                val['slice'] = GetSchemaId(slice)
                val['dimensionValue'] = []
                val['measureValue'] = []
                for dim in AsList(GetSchemaProp(slice, 'dimension')):
                    dim = GetUrl(dim)
                    dim_def = dim_defs_by_id.get(dim)
                    if dim_def is None:
                        raise RuntimeError(
                            "Unable to find definition for dimension " + dim)
                    tableMapping = tableMappings.get(dim)
                    if tableMapping:
                        col_id = tableMapping['columnIdentifier']
                    else:
                        col_id = urlparse(dim).fragment
                    dim_val = {
                        '@type': 'DimensionValue',
                        'dimension': dim,
                    }
                    if dim_def:
                        if GetSchemaProp(dim_def,
                                         '@type') == 'CategoricalDimension':
                            dim_val['codeValue'] = row[col_id]
                        elif GetSchemaProp(dim_def,
                                           '@type') == 'TimeDimension':
                            if GetSchemaProp(dim_def, 'equivalentType'):
                                dim_val['value'] = {
                                    '@type':
                                    GetSchemaProp(dim_def, 'equivalentType'),
                                    '@value':
                                    row[col_id]
                                }
                            else:
                                dim_val['value'] = row[col_id]
                    val['dimensionValue'].append(dim_val)

                for measure in AsList(GetSchemaProp(slice, 'measure')):
                    measure = GetUrl(measure)
                    meas_def = meas_defs_by_id.get(measure)
                    tableMapping = tableMappings.get(measure)
                    if tableMapping:
                        col_id = tableMapping['columnIdentifier']
                    else:
                        col_id = urlparse(measure).fragment
                    val['measureValue'].append({
                        '@type': 'MeasureValue',
                        'measure': measure,
                        'value': row[col_id]
                    })
                    if row.get(col_id + '*'):
                        val['measureValue'][-1]['footnote'] = [{
                            '@type':
                            'StatisticalAnnotation',
                            'codeValue':
                            footnote
                        } for footnote in row[col_id + '*'].split(';')]
                data.append(val)
        return data